1
+ # -*- coding: utf-8 -*-
2
+ """example_median_preimege_generator.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1PIDvHOcmiLEQ5Np3bgBDdu0kLOquOMQK
8
+
9
+ **This script demonstrates how to generate a graph preimage using Boria's method.**
10
+ ---
11
+ """
12
+
13
+ """**1. Get dataset.**"""
14
+
15
+ from gklearn .utils import Dataset , split_dataset_by_target
16
+
17
+ # Predefined dataset name, use dataset "MAO".
18
+ ds_name = 'MAO'
19
+ # The node/edge labels that will not be used in the computation.
20
+ irrelevant_labels = {'node_attrs' : ['x' , 'y' , 'z' ], 'edge_labels' : ['bond_stereo' ]}
21
+
22
+ # Initialize a Dataset.
23
+ dataset_all = Dataset ()
24
+ # Load predefined dataset "MAO".
25
+ dataset_all .load_predefined_dataset (ds_name )
26
+ # Remove irrelevant labels.
27
+ dataset_all .remove_labels (** irrelevant_labels )
28
+ # Split the whole dataset according to the classification targets.
29
+ datasets = split_dataset_by_target (dataset_all )
30
+ # Get the first class of graphs, whose median preimage will be computed.
31
+ dataset = datasets [0 ]
32
+ len (dataset .graphs )
33
+
34
+ """**2. Set parameters.**"""
35
+
36
+ import multiprocessing
37
+
38
+ # Parameters for MedianPreimageGenerator (our method).
39
+ mpg_options = {'fit_method' : 'k-graphs' , # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting.
40
+ 'init_ecc' : [4 , 4 , 2 , 1 , 1 , 1 ], # initial edit costs.
41
+ 'ds_name' : ds_name , # name of the dataset.
42
+ 'parallel' : True , # whether the parallel scheme is to be used.
43
+ 'time_limit_in_sec' : 0 , # maximum time limit to compute the preimage. If set to 0 then no limit.
44
+ 'max_itrs' : 100 , # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
45
+ 'max_itrs_without_update' : 3 , # If the times that edit costs is not update is more than this number, then the optimization stops.
46
+ 'epsilon_residual' : 0.01 , # In optimization, the residual is only considered changed if the change is bigger than this number.
47
+ 'epsilon_ec' : 0.1 , # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
48
+ 'verbose' : 2 # whether to print out results.
49
+ }
50
+ # Parameters for graph kernel computation.
51
+ kernel_options = {'name' : 'PathUpToH' , # use path kernel up to length h.
52
+ 'depth' : 9 ,
53
+ 'k_func' : 'MinMax' ,
54
+ 'compute_method' : 'trie' ,
55
+ 'parallel' : 'imap_unordered' , # or None
56
+ 'n_jobs' : multiprocessing .cpu_count (),
57
+ 'normalize' : True , # whether to use normalized Gram matrix to optimize edit costs.
58
+ 'verbose' : 2 # whether to print out results.
59
+ }
60
+ # Parameters for GED computation.
61
+ ged_options = {'method' : 'IPFP' , # use IPFP huristic.
62
+ 'initialization_method' : 'RANDOM' , # or 'NODE', etc.
63
+ 'initial_solutions' : 10 , # when bigger than 1, then the method is considered mIPFP.
64
+ 'edit_cost' : 'CONSTANT' , # use CONSTANT cost.
65
+ 'attr_distance' : 'euclidean' , # the distance between non-symbolic node/edge labels is computed by euclidean distance.
66
+ 'ratio_runs_from_initial_solutions' : 1 ,
67
+ 'threads' : multiprocessing .cpu_count (), # parallel threads. Do not work if mpg_options['parallel'] = False.
68
+ 'init_option' : 'EAGER_WITHOUT_SHUFFLED_COPIES'
69
+ }
70
+ # Parameters for MedianGraphEstimator (Boria's method).
71
+ mge_options = {'init_type' : 'MEDOID' , # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
72
+ 'random_inits' : 10 , # number of random initialization when 'init_type' = 'RANDOM'.
73
+ 'time_limit' : 600 , # maximum time limit to compute the generalized median. If set to 0 then no limit.
74
+ 'verbose' : 2 , # whether to print out results.
75
+ 'refine' : False # whether to refine the final SODs or not.
76
+ }
77
+ print ('done.' )
78
+
79
+ """**3. Run median preimage generator.**"""
80
+
81
+ from gklearn .preimage import MedianPreimageGenerator
82
+
83
+ # Create median preimage generator instance.
84
+ mpg = MedianPreimageGenerator ()
85
+ # Add dataset.
86
+ mpg .dataset = dataset
87
+ # Set parameters.
88
+ mpg .set_options (** mpg_options .copy ())
89
+ mpg .kernel_options = kernel_options .copy ()
90
+ mpg .ged_options = ged_options .copy ()
91
+ mpg .mge_options = mge_options .copy ()
92
+ # Run.
93
+ mpg .run ()
94
+
95
+ """**4. Get results.**"""
96
+
97
+ # Get results.
98
+ import pprint
99
+ pp = pprint .PrettyPrinter (indent = 4 ) # pretty print
100
+ results = mpg .get_results ()
101
+ pp .pprint (results )
102
+
103
+ # Draw generated graphs.
104
+ def draw_graph (graph ):
105
+ import matplotlib .pyplot as plt
106
+ import networkx as nx
107
+ plt .figure ()
108
+ pos = nx .spring_layout (graph )
109
+ nx .draw (graph , pos , node_size = 500 , labels = nx .get_node_attributes (graph , 'atom_symbol' ), font_color = 'w' , width = 3 , with_labels = True )
110
+ plt .show ()
111
+ plt .clf ()
112
+ plt .close ()
113
+
114
+ draw_graph (mpg .set_median )
115
+ draw_graph (mpg .gen_median )
0 commit comments