Skip to content

Commit 336f618

Browse files
committed
repair: fix forest and fec chainer publish edge cases
1 parent 090c57a commit 336f618

File tree

8 files changed

+376
-32
lines changed

8 files changed

+376
-32
lines changed

src/discof/forest/fd_forest.c

Lines changed: 103 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,14 @@ fd_forest_verify( fd_forest_t const * forest ) {
190190
/* query queries for a connected ele keyed by slot. does not return
191191
orphaned ele. */
192192

193-
static fd_forest_ele_t *
194-
ancestry_frontier_query( fd_forest_t * forest, ulong slot ) {
195-
fd_forest_ele_t * pool = fd_forest_pool( forest );
196-
fd_forest_ele_t * ele = NULL;
197-
ele = fd_forest_ancestry_ele_query( fd_forest_ancestry( forest ), &slot, NULL, pool );
198-
ele = fd_ptr_if( !ele, fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &slot, NULL, pool ), ele );
199-
return ele;
200-
}
193+
//static fd_forest_ele_t * FD_FN_UNUSED
194+
//ancestry_frontier_query( fd_forest_t * forest, ulong slot ) {
195+
//fd_forest_ele_t * pool = fd_forest_pool( forest );
196+
//fd_forest_ele_t * ele = NULL;
197+
//ele = fd_forest_ancestry_ele_query( fd_forest_ancestry( forest ), &slot, NULL, pool );
198+
//ele = fd_ptr_if( !ele, fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &slot, NULL, pool ), ele );
199+
//return ele;
200+
//}
201201

202202
/* remove removes and returns a connected ele from ancestry or frontier
203203
maps. does not remove orphaned ele. does not unlink ele. */
@@ -352,6 +352,9 @@ insert( fd_forest_t * forest, ulong slot, ushort parent_off ) {
352352
# if FD_FOREST_USE_HANDHOLDING
353353
FD_TEST( parent_off <= slot ); /* caller err - inval */
354354
FD_TEST( fd_forest_pool_free( pool ) ); /* impl err - oom */
355+
if ( slot <= fd_forest_root_slot( forest ) ) {
356+
__asm__("int $3");
357+
}
355358
FD_TEST( slot > fd_forest_root_slot( forest ) ); /* caller error - inval */
356359
# endif
357360

@@ -381,9 +384,6 @@ insert( fd_forest_t * forest, ulong slot, ushort parent_off ) {
381384

382385
fd_forest_ele_t *
383386
fd_forest_query( fd_forest_t * forest, ulong slot ) {
384-
# if FD_FOREST_USE_HANDHOLDING
385-
FD_TEST( slot > fd_forest_root_slot( forest ) ); /* caller error - inval */
386-
# endif
387387
return query( forest, slot );
388388
}
389389

@@ -432,22 +432,39 @@ fd_forest_publish( fd_forest_t * forest, ulong new_root_slot ) {
432432

433433
fd_forest_ancestry_t * ancestry = fd_forest_ancestry( forest );
434434
fd_forest_frontier_t * frontier = fd_forest_frontier( forest );
435+
fd_forest_orphaned_t * orphaned = fd_forest_orphaned( forest );
435436
fd_forest_ele_t * pool = fd_forest_pool( forest );
436437
ulong null = fd_forest_pool_idx_null( pool );
437438

438439
fd_forest_ele_t * old_root_ele = fd_forest_pool_ele( pool, forest->root );
439-
fd_forest_ele_t * new_root_ele = ancestry_frontier_query( forest, new_root_slot );
440+
fd_forest_ele_t * new_root_ele = query( forest, new_root_slot );
440441

441-
# if FD_FOREST_USE_HANDHOLDING
442-
FD_TEST( new_root_ele ); /* caller error - not found */
443-
FD_TEST( new_root_ele->slot > old_root_ele->slot ); /* caller error - inval */
444-
# endif
442+
#if FD_FOREST_USE_HANDHOLDING
443+
if( FD_LIKELY( new_root_ele ) ) {
444+
FD_TEST( new_root_ele->slot > old_root_ele->slot ); /* caller error - inval */
445+
}
446+
#endif
447+
448+
/* Edge case where if we haven't been getting repairs, and we have a
449+
gap between the root and orphans. we publish forward to a slot that
450+
we don't have. This only case this should be happening is when we
451+
load a second incremental and that incremental slot lives in the
452+
gap. In that case this isn't a bug, but we should be treating this
453+
new root like the snapshot slot / init root. Should be happening
454+
very rarely given a well-functioning repair. */
455+
456+
if( FD_UNLIKELY( !new_root_ele ) ) {
457+
new_root_ele = acquire( forest, new_root_slot );
458+
new_root_ele->complete_idx = 0;
459+
new_root_ele->buffered_idx = 0;
460+
fd_forest_frontier_ele_insert( frontier, new_root_ele, pool );
461+
}
445462

446463
/* First, remove the previous root, and add it to a FIFO prune queue.
447464
head points to the queue head (initialized with old_root_ele). */
448465

449466
fd_forest_ele_t * head = ancestry_frontier_remove( forest, old_root_ele->slot );
450-
head->next = null;
467+
head->prev = null;
451468
fd_forest_ele_t * tail = head;
452469

453470
/* Second, BFS down the tree, inserting each ele into the prune queue
@@ -460,22 +477,85 @@ fd_forest_publish( fd_forest_t * forest, ulong new_root_slot ) {
460477
if( FD_LIKELY( child != new_root_ele ) ) { /* do not prune new root or descendants */
461478
ulong idx = fd_forest_ancestry_idx_remove( ancestry, &child->slot, null, pool );
462479
idx = fd_ulong_if( idx != null, idx, fd_forest_frontier_idx_remove( frontier, &child->slot, null, pool ) );
463-
tail->next = idx; /* insert prune queue */
480+
tail->prev = idx; /* insert prune queue */
464481
# if FD_FOREST_USE_HANDHOLDING
465-
FD_TEST( tail->next != null ); /* programming error in BFS */
482+
FD_TEST( tail->prev != null ); /* programming error in BFS */
466483
# endif
467-
tail = fd_forest_pool_ele( pool, tail->next ); /* advance prune queue */
468-
tail->next = null;
484+
tail = fd_forest_pool_ele( pool, tail->prev ); /* advance prune queue */
485+
tail->prev = null;
469486
}
470487
child = fd_forest_pool_ele( pool, child->sibling );
471488
}
472-
fd_forest_ele_t * next = fd_forest_pool_ele( pool, head->next ); /* FIFO pop */
489+
fd_forest_ele_t * next = fd_forest_pool_ele( pool, head->prev ); /* FIFO pop */
473490
fd_forest_pool_ele_release( pool, head ); /* free head */
474491
head = next;
475492
}
476493

494+
/* If there is nothing on the frontier, we have hit an edge case
495+
during catching up where all of our frontiers were < the new root.
496+
In that case we need to continue repairing from the new root, so
497+
add it to the frontier. */
498+
499+
if( FD_UNLIKELY( fd_forest_frontier_iter_done( fd_forest_frontier_iter_init( frontier, pool ), frontier, pool ) ) ) {
500+
fd_forest_ele_t * remove = fd_forest_ancestry_ele_remove( ancestry, &new_root_ele->slot, NULL, pool );
501+
if( FD_UNLIKELY( !remove ) ) {
502+
/* Very rare case where during boot we could publish to an orphaned slot */
503+
remove = fd_forest_orphaned_ele_remove( orphaned, &new_root_ele->slot, NULL, pool );
504+
}
505+
FD_TEST( remove == new_root_ele );
506+
fd_forest_frontier_ele_insert( frontier, new_root_ele, pool );
507+
new_root_ele->complete_idx = 0;
508+
new_root_ele->buffered_idx = 0;
509+
advance_frontier( forest, new_root_ele->slot, 0 );
510+
}
511+
477512
new_root_ele->parent = null; /* unlink new root from parent */
478-
forest->root = fd_forest_ancestry_idx_query( ancestry, &new_root_slot, null, pool );
513+
forest->root = fd_forest_pool_idx( pool, new_root_ele );
514+
515+
FD_TEST( !fd_forest_verify( forest ) );
516+
517+
/* Lastly, cleanup orphans if there orphan heads < new_root_slot.
518+
First, add any relevant orphans to the prune queue. FIXME: NEED TO REMOVE BEFORE MODIFYING NEXT PTR */
519+
520+
head = NULL;
521+
for( fd_forest_orphaned_iter_t iter = fd_forest_orphaned_iter_init( orphaned, pool );
522+
!fd_forest_orphaned_iter_done( iter, orphaned, pool );
523+
iter = fd_forest_orphaned_iter_next( iter, orphaned, pool ) ) {
524+
fd_forest_ele_t * ele = fd_forest_orphaned_iter_ele( iter, orphaned, pool );
525+
if( FD_UNLIKELY( ele->slot < new_root_slot ) ) {
526+
if( FD_UNLIKELY( !head ) ) {
527+
head = ele;
528+
head->prev = null;
529+
tail = ele;
530+
} else {
531+
tail->prev = iter.ele_idx;
532+
tail = fd_forest_pool_ele( pool, tail->prev );
533+
tail->prev = null;
534+
}
535+
}
536+
}
537+
538+
FD_TEST( !fd_forest_verify( forest ) );
539+
540+
541+
/* Now BFS and clean up children of these orphan heads */
542+
while( head ) {
543+
fd_forest_ele_t * child = fd_forest_pool_ele( pool, head->child );
544+
while( FD_LIKELY( child ) ) {
545+
if( FD_LIKELY( child != new_root_ele ) ) {
546+
tail->prev = fd_forest_pool_idx( pool, child ); /* insert prune queue */
547+
tail = fd_forest_pool_ele( pool, tail->prev ); /* advance prune queue */
548+
tail->prev = null;
549+
}
550+
child = fd_forest_pool_ele( pool, child->sibling );
551+
}
552+
ulong remove = fd_forest_orphaned_idx_remove( orphaned, &head->slot, null, pool ); /* remove myself */
553+
remove = fd_ulong_if( remove == null, fd_forest_ancestry_idx_remove( ancestry, &head->slot, null, pool ), remove );
554+
555+
fd_forest_ele_t * next = fd_forest_pool_ele( pool, head->prev ); /* FIFO pop */
556+
fd_forest_pool_ele_release( pool, head ); /* free head */
557+
head = next;
558+
}
479559
return new_root_ele;
480560
}
481561

src/discof/forest/fd_forest.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545

4646
struct __attribute__((aligned(128UL))) fd_forest_ele {
4747
ulong slot; /* map key */
48-
ulong prev; /* internal use by link_orphans */
48+
ulong prev; /* internal use by fd_forest_publish */
4949
ulong next; /* internal use by fd_pool, fd_map_chain */
5050
ulong parent; /* pool idx of the parent in the tree */
5151
ulong child; /* pool idx of the left-child */

src/discof/forest/test_forest.c

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,126 @@ test_publish( fd_wksp_t * wksp ) {
7171
fd_forest_t * forest = fd_forest_join( fd_forest_new( mem, ele_max, 42UL /* seed */ ) );
7272

7373
FD_TEST( forest );
74-
fd_forest_publish( setup_preorder( forest ), publish_test_cases[i] );
74+
setup_preorder( forest );
75+
fd_forest_publish( forest, publish_test_cases[i] );
7576
FD_TEST( !fd_forest_verify( forest ) );
7677
// fd_forest_print( forest );
7778

7879
fd_wksp_free_laddr( fd_forest_delete( fd_forest_leave( fd_forest_fini( forest ) ) ) );
7980
}
8081
}
8182

83+
void
84+
test_publish_incremental( fd_wksp_t * wksp ){
85+
/* as the name suggests. tests the complications introduced by loading
86+
two incremental snapshots */
87+
88+
ulong ele_max = 8UL;
89+
void * mem = fd_wksp_alloc_laddr( wksp, fd_forest_align(), fd_forest_footprint( ele_max ), 1UL );
90+
FD_TEST( mem );
91+
fd_forest_t * forest = fd_forest_join( fd_forest_new( mem, ele_max, 42UL /* seed */ ) );
92+
93+
/* 1. Try publishing to a slot that doesnt exist
94+
95+
0 10 -> 11
96+
97+
*/
98+
99+
fd_forest_init( forest, 0 );
100+
fd_forest_data_shred_insert( forest, 11, 1, 0, 0, 1, 1 );
101+
102+
ulong new_root = 1;
103+
fd_forest_publish( forest, new_root );
104+
FD_TEST( fd_forest_root_slot( forest ) == new_root );
105+
FD_TEST( fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &new_root, NULL, fd_forest_pool( forest ) ) );
106+
FD_TEST( !fd_forest_query( forest, 0 ) );
107+
108+
/* 2. Try publishing to a slot on the frontier
109+
110+
1 -> 2 -> 3 10 -> 11
111+
112+
*/
113+
114+
fd_forest_data_shred_insert( forest, 2, 1, 0, 0, 1, 1 );
115+
fd_forest_data_shred_insert( forest, 3, 1, 0, 0, 1, 1 );
116+
117+
ulong frontier = 3;
118+
FD_TEST( fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &frontier, NULL, fd_forest_pool( forest ) ) );
119+
fd_forest_publish( forest, frontier );
120+
FD_TEST( fd_forest_root_slot( forest ) == frontier );
121+
FD_TEST( fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &frontier, NULL, fd_forest_pool( forest ) ) );
122+
FD_TEST( !fd_forest_query( forest, 1 ) );
123+
FD_TEST( !fd_forest_query( forest, 2 ) );
124+
FD_TEST( fd_forest_query( forest, 10 ) );
125+
FD_TEST( fd_forest_query( forest, 11 ) );
126+
127+
/* 3. Try publishing to a slot in ancestry but in front of the frontier
128+
129+
frontier new_root
130+
3 -> 4 -> 5 -> 6 -> 7 10 -> 11
131+
132+
*/
133+
134+
fd_forest_data_shred_insert( forest, 4, 1, 0, 0, 0, 0 );
135+
fd_forest_data_shred_insert( forest, 5, 1, 0, 0, 0, 0 );
136+
fd_forest_data_shred_insert( forest, 6, 1, 0, 0, 0, 0 );
137+
fd_forest_data_shred_insert( forest, 7, 1, 0, 0, 0, 0 );
138+
139+
frontier = 4;
140+
new_root = 6;
141+
FD_TEST( fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &frontier, NULL, fd_forest_pool( forest ) ) );
142+
fd_forest_publish( forest, new_root );
143+
FD_TEST( fd_forest_root_slot( forest ) == new_root );
144+
frontier = 7;
145+
FD_TEST( fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &frontier, NULL, fd_forest_pool( forest ) ) );
146+
FD_TEST( !fd_forest_query( forest, 3 ) );
147+
FD_TEST( !fd_forest_query( forest, 4 ) );
148+
FD_TEST( !fd_forest_query( forest, 5 ) );
149+
150+
/* 4. Try publishing to an orphan slot
151+
152+
6 -> 7 10 -> 11
153+
8 -> 9 (should get pruned)
154+
*/
155+
156+
fd_forest_data_shred_insert( forest, 9, 1, 0, 0, 0, 0 );
157+
158+
new_root = 10;
159+
frontier = 11;
160+
fd_forest_print( forest );
161+
162+
fd_forest_publish( forest, new_root);
163+
FD_TEST( !fd_forest_verify( forest ) );
164+
FD_TEST( fd_forest_root_slot( forest ) == new_root );
165+
fd_forest_print( forest );
166+
FD_TEST( fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &frontier, NULL, fd_forest_pool( forest ) ) );
167+
FD_TEST( !fd_forest_query( forest, 6 ) );
168+
FD_TEST( !fd_forest_query( forest, 7 ) );
169+
FD_TEST( !fd_forest_query( forest, 8 ) );
170+
FD_TEST( !fd_forest_query( forest, 9 ) );
171+
FD_TEST( fd_forest_query( forest, 10 ) );
172+
FD_TEST( fd_forest_query( forest, 11 ) );
173+
174+
/* 5. Try publishing to an orphan slot that is not a "head" of orphans
175+
(publish)
176+
10 -> 11 14 -> 15 -> 16
177+
178+
*/
179+
180+
fd_forest_data_shred_insert( forest, 14, 1, 0, 0, 0, 0 );
181+
fd_forest_data_shred_insert( forest, 15, 1, 0, 0, 0, 0 );
182+
fd_forest_data_shred_insert( forest, 16, 1, 0, 0, 0, 0 );
183+
184+
new_root = 15;
185+
frontier = 16;
186+
fd_forest_publish( forest, new_root );
187+
FD_TEST( !fd_forest_verify( forest ) );
188+
FD_TEST( fd_forest_root_slot( forest ) == new_root );
189+
FD_TEST( fd_forest_frontier_ele_query( fd_forest_frontier( forest ), &frontier, NULL, fd_forest_pool( forest ) ) );
190+
FD_TEST( !fd_forest_query( forest, 10 ) );
191+
FD_TEST( !fd_forest_query( forest, 11 ) );
192+
FD_TEST( !fd_forest_query( forest, 14 ) );
193+
}
82194
#define SORT_NAME sort
83195
#define SORT_KEY_T ulong
84196
#include "../../util/tmpl/fd_sort.c"
@@ -520,6 +632,7 @@ main( int argc, char ** argv ) {
520632
FD_TEST( wksp );
521633

522634
test_publish( wksp );
635+
test_publish_incremental( wksp );
523636
test_out_of_order( wksp );
524637
test_forks( wksp );
525638
// test_print_tree( wksp );

0 commit comments

Comments
 (0)