@@ -24,55 +24,62 @@ extern const int TYPE_MISMATCH;
2424
2525namespace KafkaStream
2626{
27- ChunkPartitioner::ChunkPartitioner (ContextPtr context, const Block & header, const ASTPtr & partitioning_expr_ast )
27+ void ChunkSharder::useRandomSharding ( )
2828{
29- // / `InterpreterCreateQuery::handleExternalStreamCreation` ensures this
30- assert (partitioning_expr_ast);
29+ random_sharding = true ;
30+ std::random_device r;
31+ rand = std::minstd_rand (r ());
32+ }
33+
34+ ChunkSharder::ChunkSharder (ContextPtr context, const Block & header, const ASTPtr & sharding_expr_ast)
35+ {
36+ assert (sharding_expr_ast);
3137
32- ASTPtr query = partitioning_expr_ast ;
38+ ASTPtr query = sharding_expr_ast ;
3339 auto syntax_result = TreeRewriter (context).analyze (query, header.getNamesAndTypesList ());
34- partitioning_expr = ExpressionAnalyzer (query, syntax_result, context).getActions (false );
40+ sharding_expr = ExpressionAnalyzer (query, syntax_result, context).getActions (false );
3541
36- partitioning_key_column_name = partitioning_expr_ast ->getColumnName ();
42+ sharding_key_column_name = sharding_expr_ast ->getColumnName ();
3743
38- if (auto * shard_func = partitioning_expr_ast ->as <ASTFunction>())
44+ if (auto * shard_func = sharding_expr_ast ->as <ASTFunction>())
3945 {
4046 if (shard_func->name == " rand" || shard_func->name == " RAND" )
41- {
42- random_partitioning = true ;
43- std::random_device r;
44- rand = std::minstd_rand (r ());
45- }
47+ this ->useRandomSharding ();
4648 }
4749}
4850
49- BlocksWithShard ChunkPartitioner::partition (Block block, Int32 partition_cnt) const
51+ ChunkSharder::ChunkSharder ()
52+ {
53+ this ->useRandomSharding ();
54+ }
55+
56+ BlocksWithShard ChunkSharder::shard (Block block, Int32 shard_cnt) const
5057{
5158 // / no topics have zero partitions
52- assert (partition_cnt > 0 );
59+ assert (shard_cnt > 0 );
5360
54- if (partition_cnt == 1 )
61+ if (shard_cnt == 1 )
5562 return {BlockWithShard{Block (std::move (block)), 0 }};
5663
57- if (random_partitioning )
58- return {BlockWithShard{Block (std::move (block)), getNextShardIndex (partition_cnt )}};
64+ if (random_sharding )
65+ return {BlockWithShard{Block (std::move (block)), getNextShardIndex (shard_cnt )}};
5966
60- return doParition (std::move (block), partition_cnt );
67+ return doSharding (std::move (block), shard_cnt );
6168}
6269
63- BlocksWithShard ChunkPartitioner::doParition (Block block, Int32 partition_cnt ) const
70+ BlocksWithShard ChunkSharder::doSharding (Block block, Int32 shard_cnt ) const
6471{
65- auto selector = createSelector (block, partition_cnt );
72+ auto selector = createSelector (block, shard_cnt );
6673
67- Blocks partitioned_blocks{static_cast <size_t >(partition_cnt )};
74+ Blocks partitioned_blocks{static_cast <size_t >(shard_cnt )};
6875
69- for (Int32 i = 0 ; i < partition_cnt ; ++i)
76+ for (Int32 i = 0 ; i < shard_cnt ; ++i)
7077 partitioned_blocks[i] = block.cloneEmpty ();
7178
7279 for (size_t pos = 0 ; pos < block.columns (); ++pos)
7380 {
74- MutableColumns partitioned_columns = block.getByPosition (pos).column ->scatter (partition_cnt , selector);
75- for (Int32 i = 0 ; i < partition_cnt ; ++i)
81+ MutableColumns partitioned_columns = block.getByPosition (pos).column ->scatter (shard_cnt , selector);
82+ for (Int32 i = 0 ; i < shard_cnt ; ++i)
7683 partitioned_blocks[i].getByPosition (pos).column = std::move (partitioned_columns[i]);
7784 }
7885
@@ -89,14 +96,14 @@ BlocksWithShard ChunkPartitioner::doParition(Block block, Int32 partition_cnt) c
8996 return blocks_with_shard;
9097}
9198
92- IColumn::Selector ChunkPartitioner ::createSelector (Block block, Int32 partition_cnt ) const
99+ IColumn::Selector ChunkSharder ::createSelector (Block block, Int32 shard_cnt ) const
93100{
94- std::vector<UInt64> slot_to_shard (partition_cnt );
101+ std::vector<UInt64> slot_to_shard (shard_cnt );
95102 std::iota (slot_to_shard.begin (), slot_to_shard.end (), 0 );
96103
97- partitioning_expr ->execute (block);
104+ sharding_expr ->execute (block);
98105
99- const auto & key_column = block.getByName (partitioning_key_column_name );
106+ const auto & key_column = block.getByName (sharding_key_column_name );
100107
101108// / If key_column.type is DataTypeLowCardinality, do shard according to its dictionaryType
102109#define CREATE_FOR_TYPE (TYPE ) \
@@ -236,7 +243,10 @@ KafkaSink::KafkaSink(const Kafka * kafka, const Block & header, ContextPtr conte
236243 writer = FormatFactory::instance ().getOutputFormat (data_format, *wb, header, context);
237244 writer->setAutoFlush ();
238245
239- partitioner = std::make_unique<KafkaStream::ChunkPartitioner>(context, header, kafka->partitioning_expr_ast ());
246+ if (kafka->hasCustomShardingExpr ())
247+ partitioner = std::make_unique<KafkaStream::ChunkSharder>(context, header, kafka->shardingExprAst ());
248+ else
249+ partitioner = std::make_unique<KafkaStream::ChunkSharder>();
240250
241251 polling_threads.scheduleOrThrowOnError ([this ]() {
242252 while (!is_finished.test ())
@@ -251,7 +261,7 @@ void KafkaSink::consume(Chunk chunk)
251261 return ;
252262
253263 auto block = getHeader ().cloneWithColumns (chunk.detachColumns ());
254- auto blocks = partitioner->partition (std::move (block), partition_cnt);
264+ auto blocks = partitioner->shard (std::move (block), partition_cnt);
255265
256266 for (auto & blockWithShard : blocks)
257267 {
0 commit comments