Skip to content

Commit f5b968f

Browse files
Arkenanjrchatruc
andauthored
perf(l1): rebuild_bloom - insert instead of merge (#5266)
Merging layer filters is very costly due to the cost of iterating the elements of a filter. Inserting has proven to be cheaper for the size of the filter we're using and the amount of elements we are holding. Some measurements: - Early hoodi imports went from 8 blocks per second to about 80 (10x). - Synced hoodi rebuild time went from ~166ms to ~27ms, (about a 6x). The PR also simplifies the rebuild code and avoids using rayon entirely. --------- Co-authored-by: Javier Rodríguez Chatruc <49622509+jrchatruc@users.noreply.github.com> Co-authored-by: Javier Chatruc <jrchatruc@gmail.com>
1 parent 85ef3d3 commit f5b968f

File tree

2 files changed

+18
-34
lines changed

2 files changed

+18
-34
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
### 2025-11-11
66

7+
- Insert instead of merge for bloom rebuilds [#5223](https://github.com/lambdaclass/ethrex/pull/5223)
78
- Replace sha3 keccak to an assembly version using ffi [#5247](https://github.com/lambdaclass/ethrex/pull/5247)
89
- Fix `FlatKeyValue` generation on fullsync mode [#5274](https://github.com/lambdaclass/ethrex/pull/5274)
910

crates/storage/trie_db/layering.rs

Lines changed: 17 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use ethrex_common::H256;
2-
use rayon::iter::{ParallelBridge, ParallelIterator};
32
use rustc_hash::FxHashMap;
43
use std::sync::Arc;
54

@@ -133,45 +132,29 @@ impl TrieLayerCache {
133132
self.layers.insert(state_root, Arc::new(entry));
134133
}
135134

136-
/// Rebuilds the global bloom filter accruing all current existing layers.
135+
/// Rebuilds the global bloom filter by inserting all keys from all layers.
137136
pub fn rebuild_bloom(&mut self) {
138-
let mut blooms: Vec<_> = self
139-
.layers
140-
.values()
141-
.par_bridge()
142-
.map(|entry| {
143-
let Ok(mut bloom) = Self::create_filter() else {
144-
tracing::warn!("TrieLayerCache: rebuild_bloom could not create filter");
145-
return None;
146-
};
147-
for (p, _) in entry.nodes.iter() {
148-
if let Err(qfilter::Error::CapacityExceeded) = bloom.insert(p) {
149-
tracing::warn!("TrieLayerCache: rebuild_bloom capacity exceeded");
150-
return None;
151-
}
152-
}
153-
Some(bloom)
154-
})
155-
.collect();
156-
157-
let Some(mut ret) = blooms.pop().flatten() else {
158-
tracing::warn!("TrieLayerCache: rebuild_bloom no valid bloom found");
137+
let Ok(mut new_global_filter) = Self::create_filter() else {
138+
tracing::warn!(
139+
"TrieLayerCache: rebuild_bloom could not create new filter. Poisoning bloom."
140+
);
159141
self.bloom = None;
160142
return;
161143
};
162-
for bloom in blooms.iter() {
163-
let Some(bloom) = bloom else {
164-
tracing::warn!("TrieLayerCache: rebuild_bloom no valid bloom found");
165-
self.bloom = None;
166-
return;
167-
};
168-
if let Err(qfilter::Error::CapacityExceeded) = ret.merge(false, bloom) {
169-
tracing::warn!("TrieLayerCache: rebuild_bloom capacity exceeded");
170-
self.bloom = None;
171-
return;
144+
145+
for layer in self.layers.values() {
146+
for path in layer.nodes.keys() {
147+
if let Err(qfilter::Error::CapacityExceeded) = new_global_filter.insert(path) {
148+
tracing::warn!(
149+
"TrieLayerCache: rebuild_bloom capacity exceeded. Poisoning bloom."
150+
);
151+
self.bloom = None;
152+
return;
153+
}
172154
}
173155
}
174-
self.bloom = Some(ret);
156+
157+
self.bloom = Some(new_global_filter);
175158
}
176159

177160
pub fn commit(&mut self, state_root: H256) -> Option<Vec<(Vec<u8>, Vec<u8>)>> {

0 commit comments

Comments
 (0)