Skip to content

Commit 1ea5bb8

Browse files
authored
Merge pull request #44 from the16thpythonist/fix_moleculenet
Fixed a crash of MoleculeNetDataset when faulty smiles are encountered
2 parents 27d7940 + f9f7368 commit 1ea5bb8

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

kgcnn/data/base.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,23 @@ def assign_property(self, key, value):
399399
return self
400400

401401
def obtain_property(self, key):
402+
r"""Returns a list with the values of all the graphs defined for the string property name `key`. If none of
403+
the graphs in the list have this property, returns None.
404+
405+
Args:
406+
key (str): The string name of the property to be retrieved for all the graphs contained in this list
407+
"""
408+
# "_list" is a list of GraphNumpyContainers, which means "prop_list" here will be a list of all the property
409+
# values for teach of the graphs which make up this list.
402410
prop_list = [x.obtain_property(key) for x in self._list]
411+
412+
# If a certain string property is not set for a GraphNumpyContainer, it will still return None. Here we check:
413+
# If all the items for our given property name are None then we know that this property is generally not
414+
# defined for any of the graphs in the list.
403415
if all([x is None for x in prop_list]):
404416
self.logger.warning("Property %s is not set on any graph." % key)
405417
return None
418+
406419
return prop_list
407420

408421
def __setattr__(self, key, value):
@@ -494,6 +507,14 @@ def map_list(self, fun, **kwargs):
494507
return self
495508

496509
def clean(self, inputs: list):
510+
"""Given a list of property names, this method removes all elements from the internal list of
511+
GraphNumpyContainers, which do not define at least one of those properties. Aka only those graphs remain in
512+
the list which definitely define all of the properties.
513+
514+
Args:
515+
inputs (list): A list of strings, where each string is supposed to be a property name, which the graphs
516+
in this list may possess.
517+
"""
497518
invalid_graphs = []
498519
for item in inputs:
499520
if isinstance(item, dict):

kgcnn/data/moleculenet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def read_in_memory(self, label_column_name: str = None, has_conformers: bool =
193193
self.assign_property("node_symbol", node_symbol)
194194
self.assign_property("node_coordinates", node_coordinates)
195195
self.assign_property("node_number", node_number)
196-
self.assign_property("graph_size", [len(x) for x in node_number])
196+
self.assign_property("graph_size", [None if x is None else len(x) for x in node_number])
197197
self.assign_property("edge_indices", edge_indices)
198198
self.assign_property("graph_labels", graph_labels)
199199
self.assign_property("edge_number", edge_number)
@@ -302,7 +302,7 @@ def set_attributes(self,
302302
if i % 1000 == 0:
303303
self.info(" ... read molecules {0} from {1}".format(i, num_mols))
304304

305-
self.assign_property("graph_size", [len(x) for x in node_attributes])
305+
self.assign_property("graph_size", [None if x is None else len(x) for x in node_number])
306306
self.assign_property("graph_attributes", graph_attributes)
307307
self.assign_property("node_attributes", node_attributes)
308308
self.assign_property("edge_attributes", edge_attributes)

0 commit comments

Comments
 (0)