Skip to content

Commit 4cf636a

Browse files
committed
repair: fix forest and fec chainer publish edge cases
1 parent bc4ef87 commit 4cf636a

File tree

7 files changed

+377
-88
lines changed

7 files changed

+377
-88
lines changed

src/discof/forest/fd_forest.c

Lines changed: 100 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ fd_forest_new( void * shmem, ulong ele_max, ulong seed ) {
4242
void * ancestry = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_ancestry_align(), fd_forest_ancestry_footprint( ele_max ) );
4343
void * frontier = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_frontier_align(), fd_forest_frontier_footprint( ele_max ) );
4444
void * orphaned = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_orphaned_align(), fd_forest_orphaned_footprint( ele_max ) );
45+
void * deque = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_deque_align(), fd_forest_deque_footprint( ele_max ) );
4546
FD_TEST( FD_SCRATCH_ALLOC_FINI( l, fd_forest_align() ) == (ulong)shmem + footprint );
4647

4748
forest->root = ULONG_MAX;
@@ -51,6 +52,7 @@ fd_forest_new( void * shmem, ulong ele_max, ulong seed ) {
5152
forest->ancestry_gaddr = fd_wksp_gaddr_fast( wksp, fd_forest_ancestry_join( fd_forest_ancestry_new( ancestry, ele_max, seed ) ) );
5253
forest->frontier_gaddr = fd_wksp_gaddr_fast( wksp, fd_forest_frontier_join( fd_forest_frontier_new( frontier, ele_max, seed ) ) );
5354
forest->orphaned_gaddr = fd_wksp_gaddr_fast( wksp, fd_forest_orphaned_join( fd_forest_orphaned_new( orphaned, ele_max, seed ) ) );
55+
forest->deque_gaddr = fd_wksp_gaddr_fast( wksp, fd_forest_deque_join ( fd_forest_deque_new ( deque, ele_max ) ) );
5456

5557
FD_COMPILER_MFENCE();
5658
FD_VOLATILE( forest->magic ) = FD_FOREST_MAGIC;
@@ -126,7 +128,6 @@ fd_forest_init( fd_forest_t * forest, ulong root_slot ) {
126128

127129
fd_forest_ele_t * root_ele = fd_forest_pool_ele_acquire( pool );
128130
root_ele->slot = root_slot;
129-
root_ele->prev = null;
130131
root_ele->parent = null;
131132
root_ele->child = null;
132133
root_ele->sibling = null;
@@ -188,16 +189,14 @@ fd_forest_verify( fd_forest_t const * forest ) {
188189
return 0;
189190
}
190191

191-
/* query queries for a connected ele keyed by slot. does not return
192-
orphaned ele. */
192+
FD_FN_PURE static inline ulong *
193+
fd_forest_deque( fd_forest_t * forest ) {
194+
return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->deque_gaddr );
195+
}
193196

194-
static fd_forest_ele_t *
195-
ancestry_frontier_query( fd_forest_t * forest, ulong slot ) {
196-
fd_forest_ele_t * pool = fd_forest_pool( forest );
197-
fd_forest_ele_t * ele = NULL;
198-
ele = fd_forest_ancestry_ele_query( fd_forest_ancestry( forest ), &slot, NULL, pool );
199-
ele = fd_ptr_if( !ele, fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &slot, NULL, pool ), ele );
200-
return ele;
197+
FD_FN_PURE static inline ulong *
198+
fd_forest_deque_const( fd_forest_t const * forest ) {
199+
return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->deque_gaddr );
201200
}
202201

203202
/* remove removes and returns a connected ele from ancestry or frontier
@@ -233,41 +232,6 @@ link( fd_forest_t * forest, fd_forest_ele_t * parent, fd_forest_ele_t * child )
233232
child->parent = fd_forest_pool_idx( pool, parent );
234233
}
235234

236-
/* link_orphans performs a BFS beginning from head using BFS. head is
237-
the first element of a linked list representing the BFS queue. If the
238-
starting orphan is connected to the ancestry tree (ie. its parent is
239-
in the map), it is linked to the tree and removed from the orphaned
240-
map, and any of its orphaned children are added to the queue (linking
241-
a parent also links its direct children). Otherwise it remains in the
242-
orphaned map. The BFS continues until the queue is empty. */
243-
244-
FD_FN_UNUSED static void
245-
link_orphans( fd_forest_t * forest, fd_forest_ele_t * head ) {
246-
fd_forest_ele_t * pool = fd_forest_pool( forest );
247-
ulong null = fd_forest_pool_idx_null( pool );
248-
fd_forest_ancestry_t * ancestry = fd_forest_ancestry( forest );
249-
fd_forest_orphaned_t * orphaned = fd_forest_orphaned( forest );
250-
fd_forest_ele_t * tail = head;
251-
fd_forest_ele_t * prev = NULL;
252-
while( FD_LIKELY( head ) ) { /* while queue is non-empty */
253-
if( FD_LIKELY( fd_forest_orphaned_ele_remove( orphaned, &head->slot, NULL, pool ) ) ) { /* head is orphan root */
254-
fd_forest_ancestry_ele_insert( ancestry, head, pool );
255-
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
256-
while( FD_LIKELY( child ) ) { /* append children to frontier */
257-
tail->prev = fd_forest_pool_idx( pool, child ); /* safe to overwrite prev */
258-
tail = fd_forest_pool_ele( pool, tail->prev );
259-
tail->prev = null;
260-
ulong sibling = child->sibling;
261-
child->sibling = null;
262-
child = fd_forest_pool_ele( pool, sibling );
263-
}
264-
}
265-
prev = head;
266-
head = fd_forest_pool_ele( pool, head->prev );
267-
prev->prev = null;
268-
}
269-
}
270-
271235
/* advance_frontier attempts to advance the frontier beginning from slot
272236
using BFS. head is the first element of a linked list representing
273237
the BFS queue. A slot can be advanced if all shreds for the block
@@ -276,36 +240,36 @@ link_orphans( fd_forest_t * forest, fd_forest_ele_t * head ) {
276240
static void
277241
advance_frontier( fd_forest_t * forest, ulong slot, ushort parent_off ) {
278242
fd_forest_ele_t * pool = fd_forest_pool( forest );
279-
ulong null = fd_forest_pool_idx_null( pool );
280243
fd_forest_ancestry_t * ancestry = fd_forest_ancestry( forest );
281244
fd_forest_frontier_t * frontier = fd_forest_frontier( forest );
245+
ulong * queue = fd_forest_deque( forest );
282246

283247
fd_forest_ele_t * ele;
284248
ele = fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &slot, NULL, pool );
285249
ulong parent_slot = slot - parent_off;
286250
ele = fd_ptr_if( !ele, fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &parent_slot, NULL, pool ), ele );
251+
if( FD_UNLIKELY( !ele ) ) return;
287252

288-
fd_forest_ele_t * head = ele;
289-
fd_forest_ele_t * tail = head;
290-
fd_forest_ele_t * prev = NULL;
253+
#if FD_FOREST_USE_HANDHOLDING
254+
FD_TEST( fd_forest_deque_cnt( queue ) == 0 );
255+
#endif
291256

292-
while( FD_LIKELY( head ) ) {
257+
/* BFS elements as pool idxs*/
258+
fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, ele ) );
259+
while( FD_LIKELY( fd_forest_deque_cnt( queue ) ) ) {
260+
fd_forest_ele_t * head = fd_forest_pool_ele( pool, fd_forest_deque_pop_head( queue ) );
293261
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
294262
if( FD_LIKELY( child && head->complete_idx != UINT_MAX && head->buffered_idx == head->complete_idx ) ) {
295263
fd_forest_frontier_ele_remove( frontier, &head->slot, NULL, pool );
296264
fd_forest_ancestry_ele_insert( ancestry, head, pool );
297265
while( FD_LIKELY( child ) ) { /* append children to frontier */
298266
fd_forest_ancestry_ele_remove( ancestry, &child->slot, NULL, pool );
299267
fd_forest_frontier_ele_insert( frontier, child, pool );
300-
tail->prev = fd_forest_pool_idx( pool, child );
301-
tail = fd_forest_pool_ele( pool, tail->prev );
302-
tail->prev = fd_forest_pool_idx_null( pool );
303-
child = fd_forest_pool_ele( pool, child->sibling );
268+
269+
fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, child ) );
270+
child = fd_forest_pool_ele( pool, child->sibling );
304271
}
305272
}
306-
prev = head;
307-
head = fd_forest_pool_ele( pool, head->prev );
308-
prev->prev = null;
309273
}
310274
}
311275

@@ -330,7 +294,6 @@ acquire( fd_forest_t * forest, ulong slot ) {
330294
ulong null = fd_forest_pool_idx_null( pool );
331295

332296
ele->slot = slot;
333-
ele->prev = null;
334297
ele->next = null;
335298
ele->parent = null;
336299
ele->child = null;
@@ -382,9 +345,6 @@ insert( fd_forest_t * forest, ulong slot, ushort parent_off ) {
382345

383346
fd_forest_ele_t *
384347
fd_forest_query( fd_forest_t * forest, ulong slot ) {
385-
# if FD_FOREST_USE_HANDHOLDING
386-
FD_TEST( slot > fd_forest_root_slot( forest ) ); /* caller error - inval */
387-
# endif
388348
return query( forest, slot );
389349
}
390350

@@ -433,50 +393,108 @@ fd_forest_publish( fd_forest_t * forest, ulong new_root_slot ) {
433393

434394
fd_forest_ancestry_t * ancestry = fd_forest_ancestry( forest );
435395
fd_forest_frontier_t * frontier = fd_forest_frontier( forest );
396+
fd_forest_orphaned_t * orphaned = fd_forest_orphaned( forest );
436397
fd_forest_ele_t * pool = fd_forest_pool( forest );
437398
ulong null = fd_forest_pool_idx_null( pool );
399+
ulong * queue = fd_forest_deque( forest );
438400

439401
fd_forest_ele_t * old_root_ele = fd_forest_pool_ele( pool, forest->root );
440-
fd_forest_ele_t * new_root_ele = ancestry_frontier_query( forest, new_root_slot );
402+
fd_forest_ele_t * new_root_ele = query( forest, new_root_slot );
441403

442-
# if FD_FOREST_USE_HANDHOLDING
443-
FD_TEST( new_root_ele ); /* caller error - not found */
444-
FD_TEST( new_root_ele->slot > old_root_ele->slot ); /* caller error - inval */
445-
# endif
404+
#if FD_FOREST_USE_HANDHOLDING
405+
if( FD_LIKELY( new_root_ele ) ) {
406+
FD_TEST( new_root_ele->slot > old_root_ele->slot ); /* caller error - inval */
407+
}
408+
#endif
409+
410+
/* Edge case where if we haven't been getting repairs, and we have a
411+
gap between the root and orphans. we publish forward to a slot that
412+
we don't have. This only case this should be happening is when we
413+
load a second incremental and that incremental slot lives in the
414+
gap. In that case this isn't a bug, but we should be treating this
415+
new root like the snapshot slot / init root. Should be happening
416+
very rarely given a well-functioning repair. */
417+
418+
if( FD_UNLIKELY( !new_root_ele ) ) {
419+
new_root_ele = acquire( forest, new_root_slot );
420+
new_root_ele->complete_idx = 0;
421+
new_root_ele->buffered_idx = 0;
422+
fd_forest_frontier_ele_insert( frontier, new_root_ele, pool );
423+
}
446424

447425
/* First, remove the previous root, and add it to a FIFO prune queue.
448426
head points to the queue head (initialized with old_root_ele). */
449-
427+
#if FD_FOREST_USE_HANDHOLDING
428+
FD_TEST( fd_forest_deque_cnt( queue ) == 0 );
429+
#endif
450430
fd_forest_ele_t * head = ancestry_frontier_remove( forest, old_root_ele->slot );
451-
head->next = null;
452-
fd_forest_ele_t * tail = head;
431+
if( FD_LIKELY( head ) ) fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, head ) );
453432

454433
/* Second, BFS down the tree, inserting each ele into the prune queue
455434
except for the new root. Loop invariant: head always descends from
456435
old_root_ele and never descends from new_root_ele. */
457436

458-
while( head ) {
437+
while( FD_LIKELY( fd_forest_deque_cnt( queue ) ) ) {
438+
head = fd_forest_pool_ele( pool, fd_forest_deque_pop_head( queue ) );
459439
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
460440
while( FD_LIKELY( child ) ) {
461441
if( FD_LIKELY( child != new_root_ele ) ) { /* do not prune new root or descendants */
462-
ulong idx = fd_forest_ancestry_idx_remove( ancestry, &child->slot, null, pool );
463-
idx = fd_ulong_if( idx != null, idx, fd_forest_frontier_idx_remove( frontier, &child->slot, null, pool ) );
464-
tail->next = idx; /* insert prune queue */
465-
# if FD_FOREST_USE_HANDHOLDING
466-
FD_TEST( tail->next != null ); /* programming error in BFS */
467-
# endif
468-
tail = fd_forest_pool_ele( pool, tail->next ); /* advance prune queue */
469-
tail->next = null;
442+
ulong idx = fd_forest_ancestry_idx_remove( ancestry, &child->slot, null, pool );
443+
idx = fd_ulong_if( idx != null, idx, fd_forest_frontier_idx_remove( frontier, &child->slot, null, pool ) );
444+
fd_forest_deque_push_tail( queue, idx );
470445
}
471446
child = fd_forest_pool_ele( pool, child->sibling );
472447
}
473-
fd_forest_ele_t * next = fd_forest_pool_ele( pool, head->next ); /* FIFO pop */
474-
fd_forest_pool_ele_release( pool, head ); /* free head */
475-
head = next;
448+
fd_forest_pool_ele_release( pool, head );
449+
}
450+
451+
/* If there is nothing on the frontier, we have hit an edge case
452+
during catching up where all of our frontiers were < the new root.
453+
In that case we need to continue repairing from the new root, so
454+
add it to the frontier. */
455+
456+
if( FD_UNLIKELY( fd_forest_frontier_iter_done( fd_forest_frontier_iter_init( frontier, pool ), frontier, pool ) ) ) {
457+
fd_forest_ele_t * remove = fd_forest_ancestry_ele_remove( ancestry, &new_root_ele->slot, NULL, pool );
458+
if( FD_UNLIKELY( !remove ) ) {
459+
/* Very rare case where during second incremental load we could publish to an orphaned slot */
460+
remove = fd_forest_orphaned_ele_remove( orphaned, &new_root_ele->slot, NULL, pool );
461+
}
462+
FD_TEST( remove == new_root_ele );
463+
fd_forest_frontier_ele_insert( frontier, new_root_ele, pool );
464+
new_root_ele->complete_idx = 0;
465+
new_root_ele->buffered_idx = 0;
466+
advance_frontier( forest, new_root_ele->slot, 0 );
476467
}
477468

478469
new_root_ele->parent = null; /* unlink new root from parent */
479-
forest->root = fd_forest_ancestry_idx_query( ancestry, &new_root_slot, null, pool );
470+
forest->root = fd_forest_pool_idx( pool, new_root_ele );
471+
472+
/* Lastly, cleanup orphans if there orphan heads < new_root_slot.
473+
First, add any relevant orphans to the prune queue. */
474+
475+
for( fd_forest_orphaned_iter_t iter = fd_forest_orphaned_iter_init( orphaned, pool );
476+
!fd_forest_orphaned_iter_done( iter, orphaned, pool );
477+
iter = fd_forest_orphaned_iter_next( iter, orphaned, pool ) ) {
478+
fd_forest_ele_t * ele = fd_forest_orphaned_iter_ele( iter, orphaned, pool );
479+
if( FD_UNLIKELY( ele->slot < new_root_slot ) ) {
480+
fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, ele ) );
481+
}
482+
}
483+
484+
/* Now BFS and clean up children of these orphan heads */
485+
while( FD_LIKELY( fd_forest_deque_cnt( queue ) ) ) {
486+
head = fd_forest_pool_ele( pool, fd_forest_deque_pop_head( queue ) );
487+
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
488+
while( FD_LIKELY( child ) ) {
489+
if( FD_LIKELY( child != new_root_ele ) ) {
490+
fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, child ) );
491+
}
492+
child = fd_forest_pool_ele( pool, child->sibling );
493+
}
494+
ulong remove = fd_forest_orphaned_idx_remove( orphaned, &head->slot, null, pool ); /* remove myself */
495+
remove = fd_ulong_if( remove == null, fd_forest_ancestry_idx_remove( ancestry, &head->slot, null, pool ), remove );
496+
fd_forest_pool_ele_release( pool, head ); /* free head */
497+
}
480498
return new_root_ele;
481499
}
482500

src/discof/forest/fd_forest.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545

4646
struct __attribute__((aligned(128UL))) fd_forest_ele {
4747
ulong slot; /* map key */
48-
ulong prev; /* internal use by link_orphans */
4948
ulong next; /* internal use by fd_pool, fd_map_chain */
5049
ulong parent; /* pool idx of the parent in the tree */
5150
ulong child; /* pool idx of the left-child */
@@ -79,6 +78,12 @@ typedef struct fd_forest_ele fd_forest_ele_t;
7978
#define MAP_KEY slot
8079
#include "../../util/tmpl/fd_map_chain.c"
8180

81+
/* Internal use only for BFSing */
82+
#define DEQUE_NAME fd_forest_deque
83+
#define DEQUE_T ulong
84+
#include "../../util/tmpl/fd_deque_dynamic.c"
85+
86+
8287
/* fd_forest_t is the top-level structure that holds the root of
8388
the tree, as well as the memory pools and map structures.
8489
@@ -111,6 +116,7 @@ struct __attribute__((aligned(128UL))) fd_forest {
111116
ulong ancestry_gaddr; /* wksp_gaddr of fd_forest_ancestry */
112117
ulong frontier_gaddr; /* map of slot to ele (leaf that needs repair) */
113118
ulong orphaned_gaddr; /* map of parent_slot to singly-linked list of ele orphaned by that parent slot */
119+
ulong deque_gaddr; /* wksp gaddr of fd_forest_deque. internal use only for BFSing */
114120
ulong magic; /* ==FD_FOREST_MAGIC */
115121
};
116122
typedef struct fd_forest fd_forest_t;
@@ -137,13 +143,15 @@ fd_forest_footprint( ulong ele_max ) {
137143
FD_LAYOUT_APPEND(
138144
FD_LAYOUT_APPEND(
139145
FD_LAYOUT_APPEND(
146+
FD_LAYOUT_APPEND(
140147
FD_LAYOUT_INIT,
141148
alignof(fd_forest_t), sizeof(fd_forest_t) ),
142149
fd_fseq_align(), fd_fseq_footprint() ),
143150
fd_forest_pool_align(), fd_forest_pool_footprint( ele_max ) ),
144151
fd_forest_ancestry_align(), fd_forest_ancestry_footprint( ele_max ) ),
145152
fd_forest_frontier_align(), fd_forest_frontier_footprint( ele_max ) ),
146153
fd_forest_orphaned_align(), fd_forest_orphaned_footprint( ele_max ) ),
154+
fd_forest_deque_align(), fd_forest_deque_footprint( ele_max ) ),
147155
fd_forest_align() );
148156
}
149157

0 commit comments

Comments
 (0)