Skip to content

Commit 92705d8

Browse files
committed
Update dataset processors.
1 parent ca1f2b5 commit 92705d8

File tree

2 files changed

+90
-0
lines changed

2 files changed

+90
-0
lines changed

gklearn/utils/dataset.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ def load_predefined_dataset(self, ds_name):
9393
elif ds_name == 'DD':
9494
ds_file = current_path + '../../datasets/DD/DD_A.txt'
9595
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
96+
elif ds_name == 'ENZYMES':
97+
ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
98+
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
9699
elif ds_name == 'Fingerprint':
97100
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt'
98101
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
@@ -117,6 +120,12 @@ def load_predefined_dataset(self, ds_name):
117120
elif ds_name == 'MUTAG':
118121
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt'
119122
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
123+
elif ds_name == 'NCI1':
124+
ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt'
125+
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
126+
elif ds_name == 'NCI109':
127+
ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt'
128+
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
120129
elif ds_name == 'PAH':
121130
ds_file = current_path + '../../datasets/PAH/dataset.ds'
122131
self.__graphs, self.__targets, label_names = load_dataset(ds_file)

gklearn/utils/graphdataset.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
""" Obtain all kinds of attributes of a graph dataset.
2+
3+
This file is for old version of graphkit-learn.
24
"""
35

46

@@ -336,3 +338,82 @@ def get_edge_attr_dim(Gn):
336338
from collections import OrderedDict
337339
return OrderedDict(
338340
sorted(attrs.items(), key=lambda i: attr_names.index(i[0])))
341+
342+
343+
def load_predefined_dataset(ds_name):
344+
import os
345+
from gklearn.utils.graphfiles import loadDataset
346+
347+
current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
348+
if ds_name == 'Acyclic':
349+
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds'
350+
graphs, targets = loadDataset(ds_file)
351+
elif ds_name == 'AIDS':
352+
ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt'
353+
graphs, targets = loadDataset(ds_file)
354+
elif ds_name == 'Alkane':
355+
ds_file = current_path + '../../datasets/Alkane/dataset.ds'
356+
fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt'
357+
graphs, targets = loadDataset(ds_file, filename_y=fn_targets)
358+
elif ds_name == 'COIL-DEL':
359+
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
360+
graphs, targets = loadDataset(ds_file)
361+
elif ds_name == 'COIL-RAG':
362+
ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt'
363+
graphs, targets = loadDataset(ds_file)
364+
elif ds_name == 'COLORS-3':
365+
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt'
366+
graphs, targets = loadDataset(ds_file)
367+
elif ds_name == 'Cuneiform':
368+
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt'
369+
graphs, targets = loadDataset(ds_file)
370+
elif ds_name == 'DD':
371+
ds_file = current_path + '../../datasets/DD/DD_A.txt'
372+
graphs, targets = loadDataset(ds_file)
373+
elif ds_name == 'ENZYMES':
374+
ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
375+
graphs, targets = loadDataset(ds_file)
376+
elif ds_name == 'Fingerprint':
377+
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt'
378+
graphs, targets = loadDataset(ds_file)
379+
elif ds_name == 'FRANKENSTEIN':
380+
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt'
381+
graphs, targets = loadDataset(ds_file)
382+
elif ds_name == 'Letter-high': # node non-symb
383+
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt'
384+
graphs, targets = loadDataset(ds_file)
385+
elif ds_name == 'Letter-low': # node non-symb
386+
ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt'
387+
graphs, targets = loadDataset(ds_file)
388+
elif ds_name == 'Letter-med': # node non-symb
389+
ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt'
390+
graphs, targets = loadDataset(ds_file)
391+
elif ds_name == 'MAO':
392+
ds_file = current_path + '../../datasets/MAO/dataset.ds'
393+
graphs, targets = loadDataset(ds_file)
394+
elif ds_name == 'Monoterpenoides':
395+
ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds'
396+
graphs, targets = loadDataset(ds_file)
397+
elif ds_name == 'MUTAG':
398+
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt'
399+
graphs, targets = loadDataset(ds_file)
400+
elif ds_name == 'NCI1':
401+
ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt'
402+
graphs, targets = loadDataset(ds_file)
403+
elif ds_name == 'NCI109':
404+
ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt'
405+
graphs, targets = loadDataset(ds_file)
406+
elif ds_name == 'PAH':
407+
ds_file = current_path + '../../datasets/PAH/dataset.ds'
408+
graphs, targets = loadDataset(ds_file)
409+
elif ds_name == 'SYNTHETIC':
410+
pass
411+
elif ds_name == 'SYNTHETICnew':
412+
ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
413+
graphs, targets = loadDataset(ds_file)
414+
elif ds_name == 'Synthie':
415+
pass
416+
else:
417+
raise Exception('The dataset name "', ds_name, '" is not pre-defined.')
418+
419+
return graphs, targets

0 commit comments

Comments
 (0)