Skip to content

Commit 3f77fd8

Browse files
committed
use metahll
1 parent bbbe468 commit 3f77fd8

File tree

4 files changed

+11
-10
lines changed

4 files changed

+11
-10
lines changed

src/query/storages/common/table_meta/src/meta/statistics.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ pub type SnapshotId = Uuid;
2525
pub type Location = (String, FormatVersion);
2626
pub type ClusterKey = (u32, String);
2727
pub type StatisticsOfColumns = HashMap<ColumnId, ColumnStatistics>;
28-
pub type ColumnDistinctHLL = simple_hll::HyperLogLog<10>;
2928

3029
// Assigned to executors, describes that which blocks of given segment, an executor should take care of
3130
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)]

src/query/storages/common/table_meta/src/meta/v4/block_statistics.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,20 @@ use crate::meta::format::compress;
2626
use crate::meta::format::encode;
2727
use crate::meta::format::read_and_deserialize;
2828
use crate::meta::versions::Versioned;
29-
use crate::meta::ColumnDistinctHLL;
3029
use crate::meta::FormatVersion;
3130
use crate::meta::MetaCompression;
3231
use crate::meta::MetaEncoding;
32+
use crate::meta::MetaHLL;
3333

3434
#[derive(Serialize, Deserialize, Clone, Debug)]
3535
pub struct BlockStatistics {
3636
pub format_version: FormatVersion,
3737

38-
pub hll: HashMap<ColumnId, ColumnDistinctHLL>,
38+
pub hll: HashMap<ColumnId, MetaHLL>,
3939
}
4040

4141
impl BlockStatistics {
42-
pub fn new(hll: HashMap<ColumnId, ColumnDistinctHLL>) -> Self {
42+
pub fn new(hll: HashMap<ColumnId, MetaHLL>) -> Self {
4343
Self {
4444
format_version: BlockStatistics::VERSION,
4545
hll,

src/query/storages/fuse/src/io/write/stream/block_builder.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,9 @@ impl StreamBlockBuilder {
318318
.block_stats_location(&block_id);
319319
let block_stats_state = self.block_stats_builder.finalize(block_stats_location)?;
320320
if let Some(state) = &block_stats_state {
321-
column_distinct_count.extend(state.column_distinct_count.clone());
321+
for (key, val) in &state.column_distinct_count {
322+
column_distinct_count.entry(*key).or_insert(*val);
323+
}
322324
}
323325
let col_stats = self.column_stats_state.finalize(column_distinct_count)?;
324326

src/query/storages/fuse/src/io/write/stream/column_ndv_estimator.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ use databend_common_expression::with_number_type;
3939
use databend_common_expression::Column;
4040
use databend_common_expression::ScalarRef;
4141
use databend_common_expression::SELECTIVITY_THRESHOLD;
42-
use databend_storages_common_table_meta::meta::ColumnDistinctHLL;
42+
use databend_storages_common_table_meta::meta::MetaHLL;
4343
use enum_dispatch::enum_dispatch;
4444

4545
#[enum_dispatch]
4646
pub trait ColumnNDVEstimatorOps: Send + Sync {
4747
fn update_column(&mut self, column: &Column);
4848
fn update_scalar(&mut self, scalar: &ScalarRef);
4949
fn finalize(&self) -> usize;
50-
fn hll(self) -> ColumnDistinctHLL;
50+
fn hll(self) -> MetaHLL;
5151
}
5252

5353
#[enum_dispatch(ColumnNDVEstimatorOps)]
@@ -111,7 +111,7 @@ where
111111
T: ValueType + Send + Sync,
112112
for<'a> T::ScalarRef<'a>: Hash,
113113
{
114-
hll: ColumnDistinctHLL,
114+
hll: MetaHLL,
115115
_phantom: PhantomData<T>,
116116
}
117117

@@ -122,7 +122,7 @@ where
122122
{
123123
pub fn new() -> Self {
124124
Self {
125-
hll: ColumnDistinctHLL::new(),
125+
hll: MetaHLL::new(),
126126
_phantom: Default::default(),
127127
}
128128
}
@@ -181,7 +181,7 @@ where
181181
self.hll.count()
182182
}
183183

184-
fn hll(self) -> ColumnDistinctHLL {
184+
fn hll(self) -> MetaHLL {
185185
self.hll
186186
}
187187
}

0 commit comments

Comments
 (0)