1
- CREATE TEMPLATE QUERY GDBMS_ALGO.community.label_prop (SET<STRING> v_type_set, SET<STRING> e_type_set, INT maximum_iteration, INT print_limit,
2
- BOOL print_results = TRUE, STRING file_path = "", STRING result_attribute = "") SYNTAX V1 {
3
-
1
+ CREATE TEMPLATE QUERY GDBMS_ALGO.community.label_prop(
2
+ SET<STRING> v_type_set,
3
+ SET<STRING> e_type_set,
4
+ UINT maximum_iteration = 10,
5
+ UINT sample_edge_num = 1000,
6
+ UINT batch_num = 12,
7
+ INT print_limit,
8
+ BOOL print_results = TRUE,
9
+ STRING file_path="",
10
+ STRING result_attribute = ""
11
+ ) FOR GRAPH MyGraph SYNTAX V1 {
4
12
5
13
/*
6
- First Author: <First Author Name>
7
- First Commit Date: <First Commit Date>
8
-
9
- Recent Author: <Recent Commit Author Name>
10
- Recent Commit Date: <Recent Commit Date>
14
+ First Author: xuanlei.lin@tigergraph.com
15
+ First Commit Date: 2024-07-15
11
16
17
+ Recent Author: xuanlei.lin@tigergraph.com
18
+ Recent Commit Date: 2024-07-15
12
19
13
20
Repository:
14
21
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community
@@ -17,89 +24,168 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.community.label_prop (SET<STRING> v_type_set, S
17
24
Production
18
25
19
26
Description:
20
- Partition the vertices into communities, according to the Label Propagation method.
21
- Indicate community membership by assigning each vertex a community ID.
22
-
23
- Publications:
24
- NA
27
+ This query partitions vertices into communities using the Label Propagation method.
28
+ It assigns a community ID to each vertex based on its neighbors' community IDs.
25
29
26
30
TigerGraph Documentation:
27
31
https://docs.tigergraph.com/graph-ml/current/community-algorithms/label-propagation
28
32
29
33
Parameters:
30
34
v_type_set:
31
- Names of vertex types to use
35
+ The set of vertex types to traverse.
32
36
e_type_set:
33
- Names of edge types to use
37
+ The set of edge types to traverse.
34
38
maximum_iteration:
35
- Number of maximum iteration of the algorithm
39
+ The maximum number of iterations for the algorithm.
40
+ sample_edge_num:
41
+ The number of edges to sample for super nodes.
42
+ batch_num:
43
+ The number of batches. Using batches reduces memory consumption.
36
44
print_limit:
37
- If >=0, max number of vertices to output to JSON.
45
+ If >= 0, the maximum number of vertices to output to JSON.
38
46
print_results:
39
- If True, output JSON to standard output
47
+ If True, output JSON to standard output. WARNING: Avoid printing results for large datasets.
40
48
result_attribute:
41
- If not empty, store community id values (INT) to this attribute
49
+ If not empty, store community ID values (INT) in this attribute.
42
50
file_path:
43
- If not empty, write output to this file .
51
+ File to write CSV output to.
44
52
*/
45
53
46
- OrAccum @@or_changed = true;
47
- MapAccum<INT, INT> @map; # <communityId, numNeighbors>
48
- MapAccum<INT, INT> @@comm_sizes_map; # <communityId, members>
49
- SumAccum<INT> @sum_label, @sum_num;
50
- FILE f (file_path);
51
- Start = {v_type_set};
54
+ TYPEDEF TUPLE <DOUBLE score, VERTEX community> MoveScore;
55
+ MinAccum<VERTEX> @community_id; // Community ID of the node
56
+ SumAccum<INT> @vid; // Vertex's internal ID
57
+ SumAccum<INT> @batch_id; // Batch ID for the node
58
+ SumAccum<INT> @degree; // Outdegree of the node
59
+ SumAccum<INT> @@vertex_num; // Total number of vertices
60
+ MapAccum<VERTEX, SumAccum<DOUBLE>> @community_k_in_map; // Number of neighbors belonging to each community
61
+ MaxAccum<MoveScore> @best_move; // Best move for the node with the highest score
62
+ MaxAccum<DOUBLE> @@min_double; // Used to reset the @best_move
63
+ OrAccum @to_change_community; // Flag to check if the node needs to change community
64
+ MapAccum<VERTEX, INT> @@comm_sizes_map; // Map: community ID -> size of the community
65
+ FILE f(file_path); // File to write results to
52
66
53
- # Assign unique labels to each vertex
54
- Start = SELECT s
55
- FROM Start:s
56
- ACCUM s.@sum_label = getvid(s);
67
+ // Initialization
68
+ All_Nodes = {v_type_set};
69
+ Tmp =
70
+ SELECT s
71
+ FROM All_Nodes:s -(e_type_set:e)- :t
72
+ POST-ACCUM
73
+ s.@community_id = s,
74
+ s.@vid = getvid(s),
75
+ s.@batch_id = s.@vid % batch_num,
76
+ s.@degree = s.outdegree(e_type_set);
77
+ @@vertex_num = All_Nodes.size();
78
+ @@vertex_num = @@vertex_num / batch_num;
57
79
58
- # Propagate labels to neighbors until labels converge or the max iterations is reached
59
- WHILE @@or_changed == true LIMIT maximum_iteration DO
60
- @@or_changed = false;
61
- Start = SELECT s
62
- FROM Start:s -(e_type_set:e)- :t
63
- ACCUM t.@map += (s.@sum_label -> 1) # count the occurrences of neighbor's labels
80
+ // Label propagation
81
+ INT hop = 0;
82
+ Candidates = All_Nodes;
83
+ WHILE Candidates.size() > 0 AND hop < maximum_iteration DO
84
+ hop = hop + 1;
85
+ // Find the best move
86
+ IF hop == 1 THEN // First iteration
87
+ ChangedNodes =
88
+ SELECT s
89
+ FROM Candidates:s -(e_type_set:e)- :t
90
+ WHERE s.@degree < t.@degree
91
+ ACCUM s.@best_move += MoveScore(t.@degree, t.@community_id)
92
+ POST-ACCUM
93
+ IF s.@best_move.community != s.@community_id THEN
94
+ s.@to_change_community = TRUE
95
+ END
96
+ HAVING s.@to_change_community == TRUE;
97
+ ELSE // Remaining iterations
98
+ IF Candidates.size() < @@vertex_num OR batch_num == 1 THEN // No batch processing
99
+ ChangedNodes =
100
+ SELECT s
101
+ FROM Candidates:s -(e_type_set:e)- :t
102
+ SAMPLE sample_edge_num EDGE WHEN s.outdegree(e_type_set) > sample_edge_num
103
+ ACCUM s.@community_k_in_map += (t.@community_id -> 1)
104
+ POST-ACCUM
105
+ s.@best_move = MoveScore(@@min_double, s), // Reset best move
106
+ FOREACH (community_id, k_in) IN s.@community_k_in_map DO
107
+ s.@best_move += MoveScore(k_in, community_id)
108
+ END,
109
+ IF s.@best_move.community != s.@community_id THEN
110
+ s.@to_change_community = TRUE
111
+ END,
112
+ s.@community_k_in_map.clear()
113
+ HAVING s.@to_change_community == TRUE;
114
+ ELSE // Use batch processing
115
+ ChangedNodes = {};
116
+ FOREACH batch_id IN RANGE[0, batch_num-1] DO
117
+ Nodes =
118
+ SELECT s
119
+ FROM Candidates:s
120
+ WHERE s.@batch_id == batch_id;
121
+ Nodes =
122
+ SELECT s
123
+ FROM Nodes:s -(e_type_set:e)- :t
124
+ SAMPLE sample_edge_num EDGE WHEN s.outdegree(e_type_set) > sample_edge_num
125
+ ACCUM s.@community_k_in_map += (t.@community_id -> 1)
64
126
POST-ACCUM
65
- INT max_v = 0,
66
- INT label = 0,
67
- # Iterate over the map to get the neighbor label that occurs most often
68
- FOREACH (k,v) IN t.@map DO
69
- CASE WHEN v > max_v THEN
70
- max_v = v,
71
- label = k
72
- END
73
- END,
74
- # When the neighbor search finds a label AND it is a new label
75
- # AND the label's count has increased, update the label.
76
- CASE WHEN label != 0 AND t.@sum_label != label AND max_v > t.@sum_num THEN
77
- @@or_changed += true,
78
- t.@sum_label = label,
79
- t.@sum_num = max_v
80
- END,
81
- t.@map.clear();
82
- END;
127
+ s.@best_move = MoveScore(@@min_double, s), // Reset best move
128
+ FOREACH (community_id, k_in) IN s.@community_k_in_map DO
129
+ s.@best_move += MoveScore(k_in, community_id)
130
+ END,
131
+ IF s.@best_move.community != s.@community_id THEN
132
+ s.@to_change_community = TRUE
133
+ END,
134
+ s.@community_k_in_map.clear()
135
+ HAVING s.@to_change_community == TRUE;
136
+ ChangedNodes = ChangedNodes UNION Nodes;
137
+ END;
138
+ END;
139
+ END;
140
+
141
+ // Handle nodes that swap communities
142
+ SwapNodes =
143
+ SELECT s
144
+ FROM ChangedNodes:s -(e_type_set:e)- :t
145
+ WHERE s.@best_move.community == t.@community_id
146
+ AND t.@to_change_community == TRUE
147
+ AND t.@best_move.community == s.@community_id
148
+ AND (s.@best_move.score < t.@best_move.score
149
+ OR (abs(s.@best_move.score - t.@best_move.score) < 0.00000000001
150
+ AND s.@vid > t.@vid))
151
+ POST-ACCUM
152
+ s.@to_change_community = FALSE;
153
+ ChangedNodes = ChangedNodes MINUS SwapNodes;
154
+
155
+ // Update community IDs
156
+ ChangedNodes =
157
+ SELECT s
158
+ FROM ChangedNodes:s
159
+ POST-ACCUM
160
+ s.@community_id = s.@best_move.community,
161
+ s.@to_change_community = FALSE;
162
+
163
+ // Find candidates for the next iteration
164
+ Candidates =
165
+ SELECT t
166
+ FROM ChangedNodes:s -(e_type_set:e)- :t
167
+ WHERE t.@community_id != s.@community_id;
168
+ END;
83
169
84
- Start = {v_type_set};
85
- Start = SELECT s
86
- FROM Start:s
87
- POST-ACCUM
88
- IF result_attribute != "" THEN
89
- s.setAttr(result_attribute, s.@sum_label)
90
- END,
91
-
92
- IF file_path != "" THEN
93
- f.println(s, s.@sum_label)
94
- END,
95
-
96
- IF print_results THEN
97
- @@comm_sizes_map += (s.@sum_label -> 1)
98
- END
99
- LIMIT print_limit;
170
+ // Output results
171
+ Nodes =
172
+ SELECT s
173
+ FROM All_Nodes:s
174
+ POST-ACCUM
175
+ IF result_attribute != "" THEN
176
+ s.setAttr(result_attribute, getvid(s.@community_id))
177
+ END,
178
+ IF print_results THEN
179
+ @@comm_sizes_map += (s.@community_id -> 1)
180
+ END,
181
+ IF file_path != "" THEN
182
+ f.println(s.id, s.@community_id)
183
+ END
184
+ LIMIT print_limit;
100
185
101
- IF print_results THEN
186
+ // Print results if print_results is True
187
+ IF print_results THEN
102
188
PRINT @@comm_sizes_map;
103
- PRINT Start[Start.@sum_label ];
104
- END;
189
+ PRINT Nodes[Nodes.@community_id ];
190
+ END;
105
191
}
0 commit comments