@@ -35,82 +35,42 @@ use crate::pipelines::PipelineBuilder;
35
35
use crate :: sessions:: QueryContext ;
36
36
37
37
impl PipelineBuilder {
38
- pub ( crate ) fn build_range_join ( & mut self , range_join : & RangeJoin ) -> Result < ( ) > {
39
- let state = Arc :: new ( RangeJoinState :: new ( self . ctx . clone ( ) , range_join) ) ;
40
- self . expand_right_side_pipeline ( range_join, state. clone ( ) ) ?;
41
- self . build_left_side ( range_join, state) ?;
42
- Ok ( ( ) )
43
- }
44
-
45
- fn build_left_side (
46
- & mut self ,
47
- range_join : & RangeJoin ,
48
- state : Arc < RangeJoinState > ,
49
- ) -> Result < ( ) > {
50
- self . build_pipeline ( & range_join. left ) ?;
51
- let max_threads = self . settings . get_max_threads ( ) ? as usize ;
52
- self . main_pipeline . try_resize ( max_threads) ?;
53
- self . main_pipeline . add_transform ( |input, output| {
54
- Ok ( ProcessorPtr :: create ( TransformRangeJoinLeft :: create (
55
- input,
56
- output,
57
- state. clone ( ) ,
58
- ) ) )
59
- } ) ?;
60
- Ok ( ( ) )
61
- }
62
-
63
- fn expand_right_side_pipeline (
64
- & mut self ,
65
- range_join : & RangeJoin ,
66
- state : Arc < RangeJoinState > ,
67
- ) -> Result < ( ) > {
68
- let right_side_context = QueryContext :: create_from ( self . ctx . as_ref ( ) ) ;
69
- let mut right_side_builder = PipelineBuilder :: create (
38
+ // Create a new pipeline builder with the same context as the current builder
39
+ fn create_sub_pipeline_builder ( & self ) -> PipelineBuilder {
40
+ let sub_context = QueryContext :: create_from ( self . ctx . as_ref ( ) ) ;
41
+ let mut sub_builder = PipelineBuilder :: create (
70
42
self . func_ctx . clone ( ) ,
71
43
self . settings . clone ( ) ,
72
- right_side_context ,
44
+ sub_context ,
73
45
self . main_pipeline . get_scopes ( ) ,
74
46
) ;
75
- right_side_builder. hash_join_states = self . hash_join_states . clone ( ) ;
76
-
77
- let mut right_res = right_side_builder. finalize ( & range_join. right ) ?;
78
- right_res. main_pipeline . add_sink ( |input| {
79
- Ok ( ProcessorPtr :: create (
80
- Sinker :: < TransformRangeJoinRight > :: create (
81
- input,
82
- TransformRangeJoinRight :: create ( state. clone ( ) ) ,
83
- ) ,
84
- ) )
85
- } ) ?;
86
- self . pipelines . push ( right_res. main_pipeline . finalize ( ) ) ;
87
- self . pipelines . extend ( right_res. sources_pipelines ) ;
88
- Ok ( ( ) )
47
+ sub_builder. hash_join_states = self . hash_join_states . clone ( ) ;
48
+ sub_builder
89
49
}
90
50
91
- pub ( crate ) fn build_join ( & mut self , join : & HashJoin ) -> Result < ( ) > {
92
- // for merge into target table as build side.
93
- let ( enable_merge_into_optimization, merge_into_is_distributed) =
94
- self . merge_into_get_optimization_flag ( join) ;
51
+ pub ( crate ) fn build_hash_join ( & mut self , join : & HashJoin ) -> Result < ( ) > {
52
+ // Get optimization flags for merge-into operations
53
+ let ( enable_optimization, is_distributed) = self . merge_into_get_optimization_flag ( join) ;
95
54
96
- let state = self . build_join_state (
97
- join,
98
- merge_into_is_distributed,
99
- enable_merge_into_optimization,
100
- ) ?;
55
+ // Create the join state with optimization flags
56
+ let state = self . build_hash_join_state ( join, is_distributed, enable_optimization) ?;
101
57
if let Some ( ( build_cache_index, _) ) = join. build_side_cache_info {
102
58
self . hash_join_states
103
59
. insert ( build_cache_index, state. clone ( ) ) ;
104
60
}
105
- self . expand_build_side_pipeline ( & join. build , join, state. clone ( ) ) ?;
106
- self . build_join_probe ( join, state) ?;
107
61
108
- // In the case of spilling, we need to share state among multiple threads. Quickly fetch all data from this round to quickly start the next round.
62
+ // Build both phases of the Hash Join
63
+ self . build_hash_join_build_side ( & join. build , join, state. clone ( ) ) ?;
64
+ self . build_hash_join_probe_side ( join, state) ?;
65
+
66
+ // In the case of spilling, we need to share state among multiple threads
67
+ // Quickly fetch all data from this round to quickly start the next round
109
68
self . main_pipeline
110
69
. resize ( self . main_pipeline . output_len ( ) , true )
111
70
}
112
71
113
- fn build_join_state (
72
+ // Create the Hash Join state
73
+ fn build_hash_join_state (
114
74
& mut self ,
115
75
join : & HashJoin ,
116
76
merge_into_is_distributed : bool ,
@@ -128,20 +88,14 @@ impl PipelineBuilder {
128
88
)
129
89
}
130
90
131
- fn expand_build_side_pipeline (
91
+ // Build the build-side pipeline for Hash Join
92
+ fn build_hash_join_build_side (
132
93
& mut self ,
133
94
build : & PhysicalPlan ,
134
95
hash_join_plan : & HashJoin ,
135
96
join_state : Arc < HashJoinState > ,
136
97
) -> Result < ( ) > {
137
- let build_side_context = QueryContext :: create_from ( self . ctx . as_ref ( ) ) ;
138
- let mut build_side_builder = PipelineBuilder :: create (
139
- self . func_ctx . clone ( ) ,
140
- self . settings . clone ( ) ,
141
- build_side_context,
142
- self . main_pipeline . get_scopes ( ) ,
143
- ) ;
144
- build_side_builder. hash_join_states = self . hash_join_states . clone ( ) ;
98
+ let build_side_builder = self . create_sub_pipeline_builder ( ) ;
145
99
let mut build_res = build_side_builder. finalize ( build) ?;
146
100
147
101
assert ! ( build_res. main_pipeline. is_pulling_pipeline( ) ?) ;
@@ -162,7 +116,7 @@ impl PipelineBuilder {
162
116
build_state. clone ( ) ,
163
117
) ?) )
164
118
} ;
165
- // for distributed merge into when source as build side.
119
+ // For distributed merge- into when source as build side
166
120
if hash_join_plan. need_hold_hash_table {
167
121
self . join_state = Some ( build_state. clone ( ) )
168
122
}
@@ -173,7 +127,12 @@ impl PipelineBuilder {
173
127
Ok ( ( ) )
174
128
}
175
129
176
- fn build_join_probe ( & mut self , join : & HashJoin , state : Arc < HashJoinState > ) -> Result < ( ) > {
130
+ // Build the probe-side pipeline for Hash Join
131
+ fn build_hash_join_probe_side (
132
+ & mut self ,
133
+ join : & HashJoin ,
134
+ state : Arc < HashJoinState > ,
135
+ ) -> Result < ( ) > {
177
136
self . build_pipeline ( & join. probe ) ?;
178
137
179
138
let max_block_size = self . settings . get_max_block_size ( ) ? as usize ;
@@ -203,16 +162,66 @@ impl PipelineBuilder {
203
162
) ?) )
204
163
} ) ?;
205
164
165
+ // For merge-into operations that need to hold the hash table
206
166
if join. need_hold_hash_table {
207
- let mut projected_probe_fields = vec ! [ ] ;
167
+ // Extract projected fields from probe schema
168
+ let mut projected_fields = vec ! [ ] ;
208
169
for ( i, field) in probe_state. probe_schema . fields ( ) . iter ( ) . enumerate ( ) {
209
170
if probe_state. probe_projections . contains ( & i) {
210
- projected_probe_fields . push ( field. clone ( ) ) ;
171
+ projected_fields . push ( field. clone ( ) ) ;
211
172
}
212
173
}
213
- self . merge_into_probe_data_fields = Some ( projected_probe_fields ) ;
174
+ self . merge_into_probe_data_fields = Some ( projected_fields ) ;
214
175
}
215
176
216
177
Ok ( ( ) )
217
178
}
179
+
180
+ pub ( crate ) fn build_range_join ( & mut self , range_join : & RangeJoin ) -> Result < ( ) > {
181
+ let state = Arc :: new ( RangeJoinState :: new ( self . ctx . clone ( ) , range_join) ) ;
182
+ self . build_range_join_right_side ( range_join, state. clone ( ) ) ?;
183
+ self . build_range_join_left_side ( range_join, state) ?;
184
+ Ok ( ( ) )
185
+ }
186
+
187
+ // Build the left-side pipeline for Range Join
188
+ fn build_range_join_left_side (
189
+ & mut self ,
190
+ range_join : & RangeJoin ,
191
+ state : Arc < RangeJoinState > ,
192
+ ) -> Result < ( ) > {
193
+ self . build_pipeline ( & range_join. left ) ?;
194
+ let max_threads = self . settings . get_max_threads ( ) ? as usize ;
195
+ self . main_pipeline . try_resize ( max_threads) ?;
196
+ self . main_pipeline . add_transform ( |input, output| {
197
+ Ok ( ProcessorPtr :: create ( TransformRangeJoinLeft :: create (
198
+ input,
199
+ output,
200
+ state. clone ( ) ,
201
+ ) ) )
202
+ } ) ?;
203
+ Ok ( ( ) )
204
+ }
205
+
206
+ // Build the right-side pipeline for Range Join
207
+ fn build_range_join_right_side (
208
+ & mut self ,
209
+ range_join : & RangeJoin ,
210
+ state : Arc < RangeJoinState > ,
211
+ ) -> Result < ( ) > {
212
+ let right_side_builder = self . create_sub_pipeline_builder ( ) ;
213
+
214
+ let mut right_res = right_side_builder. finalize ( & range_join. right ) ?;
215
+ right_res. main_pipeline . add_sink ( |input| {
216
+ Ok ( ProcessorPtr :: create (
217
+ Sinker :: < TransformRangeJoinRight > :: create (
218
+ input,
219
+ TransformRangeJoinRight :: create ( state. clone ( ) ) ,
220
+ ) ,
221
+ ) )
222
+ } ) ?;
223
+ self . pipelines . push ( right_res. main_pipeline . finalize ( ) ) ;
224
+ self . pipelines . extend ( right_res. sources_pipelines ) ;
225
+ Ok ( ( ) )
226
+ }
218
227
}
0 commit comments