Skip to content

Commit 55600ed

Browse files
committed
repair: fix forest and fec chainer publish edge cases
1 parent bc4ef87 commit 55600ed

File tree

7 files changed

+373
-89
lines changed

7 files changed

+373
-89
lines changed

src/discof/forest/fd_forest.c

Lines changed: 96 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ fd_forest_new( void * shmem, ulong ele_max, ulong seed ) {
4242
void * ancestry = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_ancestry_align(), fd_forest_ancestry_footprint( ele_max ) );
4343
void * frontier = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_frontier_align(), fd_forest_frontier_footprint( ele_max ) );
4444
void * orphaned = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_orphaned_align(), fd_forest_orphaned_footprint( ele_max ) );
45+
void * deque = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_deque_align(), fd_forest_deque_footprint( ele_max ) );
4546
FD_TEST( FD_SCRATCH_ALLOC_FINI( l, fd_forest_align() ) == (ulong)shmem + footprint );
4647

4748
forest->root = ULONG_MAX;
@@ -51,6 +52,7 @@ fd_forest_new( void * shmem, ulong ele_max, ulong seed ) {
5152
forest->ancestry_gaddr = fd_wksp_gaddr_fast( wksp, fd_forest_ancestry_join( fd_forest_ancestry_new( ancestry, ele_max, seed ) ) );
5253
forest->frontier_gaddr = fd_wksp_gaddr_fast( wksp, fd_forest_frontier_join( fd_forest_frontier_new( frontier, ele_max, seed ) ) );
5354
forest->orphaned_gaddr = fd_wksp_gaddr_fast( wksp, fd_forest_orphaned_join( fd_forest_orphaned_new( orphaned, ele_max, seed ) ) );
55+
forest->deque_gaddr = fd_wksp_gaddr_fast( wksp, fd_forest_deque_join ( fd_forest_deque_new ( deque, ele_max ) ) );
5456

5557
FD_COMPILER_MFENCE();
5658
FD_VOLATILE( forest->magic ) = FD_FOREST_MAGIC;
@@ -126,7 +128,6 @@ fd_forest_init( fd_forest_t * forest, ulong root_slot ) {
126128

127129
fd_forest_ele_t * root_ele = fd_forest_pool_ele_acquire( pool );
128130
root_ele->slot = root_slot;
129-
root_ele->prev = null;
130131
root_ele->parent = null;
131132
root_ele->child = null;
132133
root_ele->sibling = null;
@@ -188,16 +189,9 @@ fd_forest_verify( fd_forest_t const * forest ) {
188189
return 0;
189190
}
190191

191-
/* query queries for a connected ele keyed by slot. does not return
192-
orphaned ele. */
193-
194-
static fd_forest_ele_t *
195-
ancestry_frontier_query( fd_forest_t * forest, ulong slot ) {
196-
fd_forest_ele_t * pool = fd_forest_pool( forest );
197-
fd_forest_ele_t * ele = NULL;
198-
ele = fd_forest_ancestry_ele_query( fd_forest_ancestry( forest ), &slot, NULL, pool );
199-
ele = fd_ptr_if( !ele, fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &slot, NULL, pool ), ele );
200-
return ele;
192+
FD_FN_PURE static inline ulong *
193+
fd_forest_deque( fd_forest_t * forest ) {
194+
return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->deque_gaddr );
201195
}
202196

203197
/* remove removes and returns a connected ele from ancestry or frontier
@@ -233,41 +227,6 @@ link( fd_forest_t * forest, fd_forest_ele_t * parent, fd_forest_ele_t * child )
233227
child->parent = fd_forest_pool_idx( pool, parent );
234228
}
235229

236-
/* link_orphans performs a BFS beginning from head using BFS. head is
237-
the first element of a linked list representing the BFS queue. If the
238-
starting orphan is connected to the ancestry tree (ie. its parent is
239-
in the map), it is linked to the tree and removed from the orphaned
240-
map, and any of its orphaned children are added to the queue (linking
241-
a parent also links its direct children). Otherwise it remains in the
242-
orphaned map. The BFS continues until the queue is empty. */
243-
244-
FD_FN_UNUSED static void
245-
link_orphans( fd_forest_t * forest, fd_forest_ele_t * head ) {
246-
fd_forest_ele_t * pool = fd_forest_pool( forest );
247-
ulong null = fd_forest_pool_idx_null( pool );
248-
fd_forest_ancestry_t * ancestry = fd_forest_ancestry( forest );
249-
fd_forest_orphaned_t * orphaned = fd_forest_orphaned( forest );
250-
fd_forest_ele_t * tail = head;
251-
fd_forest_ele_t * prev = NULL;
252-
while( FD_LIKELY( head ) ) { /* while queue is non-empty */
253-
if( FD_LIKELY( fd_forest_orphaned_ele_remove( orphaned, &head->slot, NULL, pool ) ) ) { /* head is orphan root */
254-
fd_forest_ancestry_ele_insert( ancestry, head, pool );
255-
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
256-
while( FD_LIKELY( child ) ) { /* append children to frontier */
257-
tail->prev = fd_forest_pool_idx( pool, child ); /* safe to overwrite prev */
258-
tail = fd_forest_pool_ele( pool, tail->prev );
259-
tail->prev = null;
260-
ulong sibling = child->sibling;
261-
child->sibling = null;
262-
child = fd_forest_pool_ele( pool, sibling );
263-
}
264-
}
265-
prev = head;
266-
head = fd_forest_pool_ele( pool, head->prev );
267-
prev->prev = null;
268-
}
269-
}
270-
271230
/* advance_frontier attempts to advance the frontier beginning from slot
272231
using BFS. head is the first element of a linked list representing
273232
the BFS queue. A slot can be advanced if all shreds for the block
@@ -276,36 +235,36 @@ link_orphans( fd_forest_t * forest, fd_forest_ele_t * head ) {
276235
static void
277236
advance_frontier( fd_forest_t * forest, ulong slot, ushort parent_off ) {
278237
fd_forest_ele_t * pool = fd_forest_pool( forest );
279-
ulong null = fd_forest_pool_idx_null( pool );
280238
fd_forest_ancestry_t * ancestry = fd_forest_ancestry( forest );
281239
fd_forest_frontier_t * frontier = fd_forest_frontier( forest );
240+
ulong * queue = fd_forest_deque( forest );
282241

283242
fd_forest_ele_t * ele;
284243
ele = fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &slot, NULL, pool );
285244
ulong parent_slot = slot - parent_off;
286245
ele = fd_ptr_if( !ele, fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &parent_slot, NULL, pool ), ele );
246+
if( FD_UNLIKELY( !ele ) ) return;
287247

288-
fd_forest_ele_t * head = ele;
289-
fd_forest_ele_t * tail = head;
290-
fd_forest_ele_t * prev = NULL;
248+
#if FD_FOREST_USE_HANDHOLDING
249+
FD_TEST( fd_forest_deque_cnt( queue ) == 0 );
250+
#endif
291251

292-
while( FD_LIKELY( head ) ) {
252+
/* BFS elements as pool idxs*/
253+
fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, ele ) );
254+
while( FD_LIKELY( fd_forest_deque_cnt( queue ) ) ) {
255+
fd_forest_ele_t * head = fd_forest_pool_ele( pool, fd_forest_deque_pop_head( queue ) );
293256
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
294257
if( FD_LIKELY( child && head->complete_idx != UINT_MAX && head->buffered_idx == head->complete_idx ) ) {
295258
fd_forest_frontier_ele_remove( frontier, &head->slot, NULL, pool );
296259
fd_forest_ancestry_ele_insert( ancestry, head, pool );
297260
while( FD_LIKELY( child ) ) { /* append children to frontier */
298261
fd_forest_ancestry_ele_remove( ancestry, &child->slot, NULL, pool );
299262
fd_forest_frontier_ele_insert( frontier, child, pool );
300-
tail->prev = fd_forest_pool_idx( pool, child );
301-
tail = fd_forest_pool_ele( pool, tail->prev );
302-
tail->prev = fd_forest_pool_idx_null( pool );
303-
child = fd_forest_pool_ele( pool, child->sibling );
263+
264+
fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, child ) );
265+
child = fd_forest_pool_ele( pool, child->sibling );
304266
}
305267
}
306-
prev = head;
307-
head = fd_forest_pool_ele( pool, head->prev );
308-
prev->prev = null;
309268
}
310269
}
311270

@@ -330,7 +289,6 @@ acquire( fd_forest_t * forest, ulong slot ) {
330289
ulong null = fd_forest_pool_idx_null( pool );
331290

332291
ele->slot = slot;
333-
ele->prev = null;
334292
ele->next = null;
335293
ele->parent = null;
336294
ele->child = null;
@@ -382,9 +340,6 @@ insert( fd_forest_t * forest, ulong slot, ushort parent_off ) {
382340

383341
fd_forest_ele_t *
384342
fd_forest_query( fd_forest_t * forest, ulong slot ) {
385-
# if FD_FOREST_USE_HANDHOLDING
386-
FD_TEST( slot > fd_forest_root_slot( forest ) ); /* caller error - inval */
387-
# endif
388343
return query( forest, slot );
389344
}
390345

@@ -433,50 +388,108 @@ fd_forest_publish( fd_forest_t * forest, ulong new_root_slot ) {
433388

434389
fd_forest_ancestry_t * ancestry = fd_forest_ancestry( forest );
435390
fd_forest_frontier_t * frontier = fd_forest_frontier( forest );
391+
fd_forest_orphaned_t * orphaned = fd_forest_orphaned( forest );
436392
fd_forest_ele_t * pool = fd_forest_pool( forest );
437393
ulong null = fd_forest_pool_idx_null( pool );
394+
ulong * queue = fd_forest_deque( forest );
438395

439396
fd_forest_ele_t * old_root_ele = fd_forest_pool_ele( pool, forest->root );
440-
fd_forest_ele_t * new_root_ele = ancestry_frontier_query( forest, new_root_slot );
397+
fd_forest_ele_t * new_root_ele = query( forest, new_root_slot );
441398

442-
# if FD_FOREST_USE_HANDHOLDING
443-
FD_TEST( new_root_ele ); /* caller error - not found */
444-
FD_TEST( new_root_ele->slot > old_root_ele->slot ); /* caller error - inval */
445-
# endif
399+
#if FD_FOREST_USE_HANDHOLDING
400+
if( FD_LIKELY( new_root_ele ) ) {
401+
FD_TEST( new_root_ele->slot > old_root_ele->slot ); /* caller error - inval */
402+
}
403+
#endif
404+
405+
/* Edge case where if we haven't been getting repairs, and we have a
406+
gap between the root and orphans. we publish forward to a slot that
407+
we don't have. This only case this should be happening is when we
408+
load a second incremental and that incremental slot lives in the
409+
gap. In that case this isn't a bug, but we should be treating this
410+
new root like the snapshot slot / init root. Should be happening
411+
very rarely given a well-functioning repair. */
412+
413+
if( FD_UNLIKELY( !new_root_ele ) ) {
414+
new_root_ele = acquire( forest, new_root_slot );
415+
new_root_ele->complete_idx = 0;
416+
new_root_ele->buffered_idx = 0;
417+
fd_forest_frontier_ele_insert( frontier, new_root_ele, pool );
418+
}
446419

447420
/* First, remove the previous root, and add it to a FIFO prune queue.
448421
head points to the queue head (initialized with old_root_ele). */
449-
422+
#if FD_FOREST_USE_HANDHOLDING
423+
FD_TEST( fd_forest_deque_cnt( queue ) == 0 );
424+
#endif
450425
fd_forest_ele_t * head = ancestry_frontier_remove( forest, old_root_ele->slot );
451-
head->next = null;
452-
fd_forest_ele_t * tail = head;
426+
if( FD_LIKELY( head ) ) fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, head ) );
453427

454428
/* Second, BFS down the tree, inserting each ele into the prune queue
455429
except for the new root. Loop invariant: head always descends from
456430
old_root_ele and never descends from new_root_ele. */
457431

458-
while( head ) {
432+
while( FD_LIKELY( fd_forest_deque_cnt( queue ) ) ) {
433+
head = fd_forest_pool_ele( pool, fd_forest_deque_pop_head( queue ) );
459434
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
460435
while( FD_LIKELY( child ) ) {
461436
if( FD_LIKELY( child != new_root_ele ) ) { /* do not prune new root or descendants */
462-
ulong idx = fd_forest_ancestry_idx_remove( ancestry, &child->slot, null, pool );
463-
idx = fd_ulong_if( idx != null, idx, fd_forest_frontier_idx_remove( frontier, &child->slot, null, pool ) );
464-
tail->next = idx; /* insert prune queue */
465-
# if FD_FOREST_USE_HANDHOLDING
466-
FD_TEST( tail->next != null ); /* programming error in BFS */
467-
# endif
468-
tail = fd_forest_pool_ele( pool, tail->next ); /* advance prune queue */
469-
tail->next = null;
437+
ulong idx = fd_forest_ancestry_idx_remove( ancestry, &child->slot, null, pool );
438+
idx = fd_ulong_if( idx != null, idx, fd_forest_frontier_idx_remove( frontier, &child->slot, null, pool ) );
439+
fd_forest_deque_push_tail( queue, idx );
470440
}
471441
child = fd_forest_pool_ele( pool, child->sibling );
472442
}
473-
fd_forest_ele_t * next = fd_forest_pool_ele( pool, head->next ); /* FIFO pop */
474-
fd_forest_pool_ele_release( pool, head ); /* free head */
475-
head = next;
443+
fd_forest_pool_ele_release( pool, head );
444+
}
445+
446+
/* If there is nothing on the frontier, we have hit an edge case
447+
during catching up where all of our frontiers were < the new root.
448+
In that case we need to continue repairing from the new root, so
449+
add it to the frontier. */
450+
451+
if( FD_UNLIKELY( fd_forest_frontier_iter_done( fd_forest_frontier_iter_init( frontier, pool ), frontier, pool ) ) ) {
452+
fd_forest_ele_t * remove = fd_forest_ancestry_ele_remove( ancestry, &new_root_ele->slot, NULL, pool );
453+
if( FD_UNLIKELY( !remove ) ) {
454+
/* Very rare case where during second incremental load we could publish to an orphaned slot */
455+
remove = fd_forest_orphaned_ele_remove( orphaned, &new_root_ele->slot, NULL, pool );
456+
}
457+
FD_TEST( remove == new_root_ele );
458+
fd_forest_frontier_ele_insert( frontier, new_root_ele, pool );
459+
new_root_ele->complete_idx = 0;
460+
new_root_ele->buffered_idx = 0;
461+
advance_frontier( forest, new_root_ele->slot, 0 );
476462
}
477463

478464
new_root_ele->parent = null; /* unlink new root from parent */
479-
forest->root = fd_forest_ancestry_idx_query( ancestry, &new_root_slot, null, pool );
465+
forest->root = fd_forest_pool_idx( pool, new_root_ele );
466+
467+
/* Lastly, cleanup orphans if there orphan heads < new_root_slot.
468+
First, add any relevant orphans to the prune queue. */
469+
470+
for( fd_forest_orphaned_iter_t iter = fd_forest_orphaned_iter_init( orphaned, pool );
471+
!fd_forest_orphaned_iter_done( iter, orphaned, pool );
472+
iter = fd_forest_orphaned_iter_next( iter, orphaned, pool ) ) {
473+
fd_forest_ele_t * ele = fd_forest_orphaned_iter_ele( iter, orphaned, pool );
474+
if( FD_UNLIKELY( ele->slot < new_root_slot ) ) {
475+
fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, ele ) );
476+
}
477+
}
478+
479+
/* Now BFS and clean up children of these orphan heads */
480+
while( FD_UNLIKELY( fd_forest_deque_cnt( queue ) ) ) {
481+
head = fd_forest_pool_ele( pool, fd_forest_deque_pop_head( queue ) );
482+
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
483+
while( FD_LIKELY( child ) ) {
484+
if( FD_LIKELY( child != new_root_ele ) ) {
485+
fd_forest_deque_push_tail( queue, fd_forest_pool_idx( pool, child ) );
486+
}
487+
child = fd_forest_pool_ele( pool, child->sibling );
488+
}
489+
ulong remove = fd_forest_orphaned_idx_remove( orphaned, &head->slot, null, pool ); /* remove myself */
490+
remove = fd_ulong_if( remove == null, fd_forest_ancestry_idx_remove( ancestry, &head->slot, null, pool ), remove );
491+
fd_forest_pool_ele_release( pool, head ); /* free head */
492+
}
480493
return new_root_ele;
481494
}
482495

src/discof/forest/fd_forest.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545

4646
struct __attribute__((aligned(128UL))) fd_forest_ele {
4747
ulong slot; /* map key */
48-
ulong prev; /* internal use by link_orphans */
4948
ulong next; /* internal use by fd_pool, fd_map_chain */
5049
ulong parent; /* pool idx of the parent in the tree */
5150
ulong child; /* pool idx of the left-child */
@@ -79,6 +78,12 @@ typedef struct fd_forest_ele fd_forest_ele_t;
7978
#define MAP_KEY slot
8079
#include "../../util/tmpl/fd_map_chain.c"
8180

81+
/* Internal use only for BFSing */
82+
#define DEQUE_NAME fd_forest_deque
83+
#define DEQUE_T ulong
84+
#include "../../util/tmpl/fd_deque_dynamic.c"
85+
86+
8287
/* fd_forest_t is the top-level structure that holds the root of
8388
the tree, as well as the memory pools and map structures.
8489
@@ -111,6 +116,7 @@ struct __attribute__((aligned(128UL))) fd_forest {
111116
ulong ancestry_gaddr; /* wksp_gaddr of fd_forest_ancestry */
112117
ulong frontier_gaddr; /* map of slot to ele (leaf that needs repair) */
113118
ulong orphaned_gaddr; /* map of parent_slot to singly-linked list of ele orphaned by that parent slot */
119+
ulong deque_gaddr; /* wksp gaddr of fd_forest_deque. internal use only for BFSing */
114120
ulong magic; /* ==FD_FOREST_MAGIC */
115121
};
116122
typedef struct fd_forest fd_forest_t;
@@ -137,13 +143,15 @@ fd_forest_footprint( ulong ele_max ) {
137143
FD_LAYOUT_APPEND(
138144
FD_LAYOUT_APPEND(
139145
FD_LAYOUT_APPEND(
146+
FD_LAYOUT_APPEND(
140147
FD_LAYOUT_INIT,
141148
alignof(fd_forest_t), sizeof(fd_forest_t) ),
142149
fd_fseq_align(), fd_fseq_footprint() ),
143150
fd_forest_pool_align(), fd_forest_pool_footprint( ele_max ) ),
144151
fd_forest_ancestry_align(), fd_forest_ancestry_footprint( ele_max ) ),
145152
fd_forest_frontier_align(), fd_forest_frontier_footprint( ele_max ) ),
146153
fd_forest_orphaned_align(), fd_forest_orphaned_footprint( ele_max ) ),
154+
fd_forest_deque_align(), fd_forest_deque_footprint( ele_max ) ),
147155
fd_forest_align() );
148156
}
149157

0 commit comments

Comments
 (0)