@@ -24,55 +24,62 @@ extern const int TYPE_MISMATCH;
24
24
25
25
namespace KafkaStream
26
26
{
27
- ChunkPartitioner::ChunkPartitioner (ContextPtr context, const Block & header, const ASTPtr & partitioning_expr_ast )
27
+ void ChunkSharder::useRandomSharding ( )
28
28
{
29
- // / `InterpreterCreateQuery::handleExternalStreamCreation` ensures this
30
- assert (partitioning_expr_ast);
29
+ random_sharding = true ;
30
+ std::random_device r;
31
+ rand = std::minstd_rand (r ());
32
+ }
33
+
34
+ ChunkSharder::ChunkSharder (ContextPtr context, const Block & header, const ASTPtr & sharding_expr_ast)
35
+ {
36
+ assert (sharding_expr_ast);
31
37
32
- ASTPtr query = partitioning_expr_ast ;
38
+ ASTPtr query = sharding_expr_ast ;
33
39
auto syntax_result = TreeRewriter (context).analyze (query, header.getNamesAndTypesList ());
34
- partitioning_expr = ExpressionAnalyzer (query, syntax_result, context).getActions (false );
40
+ sharding_expr = ExpressionAnalyzer (query, syntax_result, context).getActions (false );
35
41
36
- partitioning_key_column_name = partitioning_expr_ast ->getColumnName ();
42
+ sharding_key_column_name = sharding_expr_ast ->getColumnName ();
37
43
38
- if (auto * shard_func = partitioning_expr_ast ->as <ASTFunction>())
44
+ if (auto * shard_func = sharding_expr_ast ->as <ASTFunction>())
39
45
{
40
46
if (shard_func->name == " rand" || shard_func->name == " RAND" )
41
- {
42
- random_partitioning = true ;
43
- std::random_device r;
44
- rand = std::minstd_rand (r ());
45
- }
47
+ this ->useRandomSharding ();
46
48
}
47
49
}
48
50
49
- BlocksWithShard ChunkPartitioner::partition (Block block, Int32 partition_cnt) const
51
+ ChunkSharder::ChunkSharder ()
52
+ {
53
+ this ->useRandomSharding ();
54
+ }
55
+
56
+ BlocksWithShard ChunkSharder::shard (Block block, Int32 shard_cnt) const
50
57
{
51
58
// / no topics have zero partitions
52
- assert (partition_cnt > 0 );
59
+ assert (shard_cnt > 0 );
53
60
54
- if (partition_cnt == 1 )
61
+ if (shard_cnt == 1 )
55
62
return {BlockWithShard{Block (std::move (block)), 0 }};
56
63
57
- if (random_partitioning )
58
- return {BlockWithShard{Block (std::move (block)), getNextShardIndex (partition_cnt )}};
64
+ if (random_sharding )
65
+ return {BlockWithShard{Block (std::move (block)), getNextShardIndex (shard_cnt )}};
59
66
60
- return doParition (std::move (block), partition_cnt );
67
+ return doSharding (std::move (block), shard_cnt );
61
68
}
62
69
63
- BlocksWithShard ChunkPartitioner::doParition (Block block, Int32 partition_cnt ) const
70
+ BlocksWithShard ChunkSharder::doSharding (Block block, Int32 shard_cnt ) const
64
71
{
65
- auto selector = createSelector (block, partition_cnt );
72
+ auto selector = createSelector (block, shard_cnt );
66
73
67
- Blocks partitioned_blocks{static_cast <size_t >(partition_cnt )};
74
+ Blocks partitioned_blocks{static_cast <size_t >(shard_cnt )};
68
75
69
- for (Int32 i = 0 ; i < partition_cnt ; ++i)
76
+ for (Int32 i = 0 ; i < shard_cnt ; ++i)
70
77
partitioned_blocks[i] = block.cloneEmpty ();
71
78
72
79
for (size_t pos = 0 ; pos < block.columns (); ++pos)
73
80
{
74
- MutableColumns partitioned_columns = block.getByPosition (pos).column ->scatter (partition_cnt , selector);
75
- for (Int32 i = 0 ; i < partition_cnt ; ++i)
81
+ MutableColumns partitioned_columns = block.getByPosition (pos).column ->scatter (shard_cnt , selector);
82
+ for (Int32 i = 0 ; i < shard_cnt ; ++i)
76
83
partitioned_blocks[i].getByPosition (pos).column = std::move (partitioned_columns[i]);
77
84
}
78
85
@@ -89,14 +96,14 @@ BlocksWithShard ChunkPartitioner::doParition(Block block, Int32 partition_cnt) c
89
96
return blocks_with_shard;
90
97
}
91
98
92
- IColumn::Selector ChunkPartitioner ::createSelector (Block block, Int32 partition_cnt ) const
99
+ IColumn::Selector ChunkSharder ::createSelector (Block block, Int32 shard_cnt ) const
93
100
{
94
- std::vector<UInt64 > slot_to_shard (partition_cnt );
101
+ std::vector<UInt64 > slot_to_shard (shard_cnt );
95
102
std::iota (slot_to_shard.begin (), slot_to_shard.end (), 0 );
96
103
97
- partitioning_expr ->execute (block);
104
+ sharding_expr ->execute (block);
98
105
99
- const auto & key_column = block.getByName (partitioning_key_column_name );
106
+ const auto & key_column = block.getByName (sharding_key_column_name );
100
107
101
108
// / If key_column.type is DataTypeLowCardinality, do shard according to its dictionaryType
102
109
#define CREATE_FOR_TYPE (TYPE ) \
@@ -236,7 +243,10 @@ KafkaSink::KafkaSink(const Kafka * kafka, const Block & header, ContextPtr conte
236
243
writer = FormatFactory::instance ().getOutputFormat (data_format, *wb, header, context);
237
244
writer->setAutoFlush ();
238
245
239
- partitioner = std::make_unique<KafkaStream::ChunkPartitioner>(context, header, kafka->partitioning_expr_ast ());
246
+ if (kafka->hasCustomShardingExpr ())
247
+ partitioner = std::make_unique<KafkaStream::ChunkSharder>(context, header, kafka->shardingExprAst ());
248
+ else
249
+ partitioner = std::make_unique<KafkaStream::ChunkSharder>();
240
250
241
251
polling_threads.scheduleOrThrowOnError ([this ]() {
242
252
while (!is_finished.test ())
@@ -251,7 +261,7 @@ void KafkaSink::consume(Chunk chunk)
251
261
return ;
252
262
253
263
auto block = getHeader ().cloneWithColumns (chunk.detachColumns ());
254
- auto blocks = partitioner->partition (std::move (block), partition_cnt);
264
+ auto blocks = partitioner->shard (std::move (block), partition_cnt);
255
265
256
266
for (auto & blockWithShard : blocks)
257
267
{
0 commit comments