Skip to content

Commit 71f147a

Browse files
committed
pack,poh: when pack in perf mode, end slot when poh is done
1 parent 5f03408 commit 71f147a

File tree

6 files changed

+67
-8
lines changed

6 files changed

+67
-8
lines changed

src/app/fdctl/topology.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ fd_topo_initialize( config_t * config ) {
431431
tile->poh.plugins_enabled = plugins_enabled;
432432
tile->poh.bank_cnt = config->layout.bank_tile_count;
433433
tile->poh.lagged_consecutive_leader_start = config->tiles.poh.lagged_consecutive_leader_start;
434+
tile->poh.perf_mode = (config->tiles.pack.schedule_strategy_enum==0);
434435

435436
if( FD_UNLIKELY( config->tiles.bundle.enabled ) ) {
436437
tile->poh.bundle.enabled = 1;

src/disco/fd_disco_base.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@
1818
#define POH_PKT_TYPE_MICROBLOCK (0UL)
1919
#define POH_PKT_TYPE_BECAME_LEADER (1UL)
2020
#define POH_PKT_TYPE_DONE_PACKING (2UL)
21+
#define POH_PKT_TYPE_DONE_HASHING (2UL)
2122
#define POH_PKT_TYPE_FEAT_ACT_SLOT (3UL)
23+
/* Since we're out of bits, alias DONE_PACKING and DONE_HASHING.
24+
DONE_PACKING will be set from pack, but DONE_HASHING will only be
25+
sent from poh to pack, so they don't actually conflict. */
2226

2327
#define REPLAY_FLAG_FINISHED_BLOCK (0x01UL)
2428
#define REPLAY_FLAG_PACKED_MICROBLOCK (0x02UL)

src/disco/gui/fd_gui_tile.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,10 @@ after_frag( fd_gui_ctx_t * ctx,
216216

217217
if( FD_LIKELY( ctx->in_kind[ in_idx ]==IN_KIND_PLUGIN ) ) fd_gui_plugin_message( ctx->gui, sig, ctx->buf );
218218
else if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_POH_PACK ) ) {
219-
FD_TEST( fd_disco_poh_sig_pkt_type( sig )==POH_PKT_TYPE_BECAME_LEADER );
220-
fd_became_leader_t * became_leader = (fd_became_leader_t *)ctx->buf;
221-
fd_gui_became_leader( ctx->gui, fd_frag_meta_ts_decomp( tspub, fd_tickcount() ), fd_disco_poh_sig_slot( sig ), became_leader->slot_start_ns, became_leader->slot_end_ns, became_leader->limits.slot_max_cost, became_leader->max_microblocks_in_slot );
219+
if( FD_LIKELY( fd_disco_poh_sig_pkt_type( sig )==POH_PKT_TYPE_BECAME_LEADER ) ) {
220+
fd_became_leader_t * became_leader = (fd_became_leader_t *)ctx->buf;
221+
fd_gui_became_leader( ctx->gui, fd_frag_meta_ts_decomp( tspub, fd_tickcount() ), fd_disco_poh_sig_slot( sig ), became_leader->slot_start_ns, became_leader->slot_end_ns, became_leader->limits.slot_max_cost, became_leader->max_microblocks_in_slot );
222+
}
222223
} else if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_PACK_BANK ) ) {
223224
if( FD_LIKELY( fd_disco_poh_sig_pkt_type( sig )==POH_PKT_TYPE_MICROBLOCK ) ) {
224225
FD_TEST( sz<ULONG_MAX );

src/disco/pack/fd_pack_tile.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ typedef struct {
128128

129129
/* One of the FD_PACK_STRATEGY_* values defined above */
130130
int strategy;
131+
/* Set to 1 when we get a message from PoH indicating it's pretty much
132+
done with the slot. Only matters if strategy==PERF */
133+
int poh_done;
131134

132135
/* The value passed to fd_pack_new, etc. */
133136
ulong max_pending_transactions;
@@ -544,6 +547,7 @@ after_credit( fd_pack_ctx_t * ctx,
544547
ctx->drain_banks = 1;
545548
ctx->leader_slot = ULONG_MAX;
546549
ctx->slot_microblock_cnt = 0UL;
550+
ctx->poh_done = 0;
547551
fd_pack_end_block( ctx->pack );
548552
remove_ib( ctx );
549553

@@ -583,6 +587,15 @@ after_credit( fd_pack_ctx_t * ctx,
583587
if( FD_UNLIKELY( (ulong)(now-ctx->last_successful_insert) <
584588
ctx->wait_duration_ticks[ fd_ulong_min( fd_pack_avail_txn_cnt( ctx->pack ), MAX_TXN_PER_MICROBLOCK ) ] ) ) {
585589
update_metric_state( ctx, now, FD_PACK_METRIC_STATE_TRANSACTIONS, 0 );
590+
591+
if( FD_UNLIKELY( (ctx->strategy==FD_PACK_STRATEGY_PERF) &
592+
(ctx->bank_idle_bitset==fd_ulong_mask_lsb( (int)bank_cnt )) &
593+
ctx->poh_done ) ) {
594+
FD_LOG_INFO(( "pack_end_early(ms=%li, reason=txn)", (ctx->slot_end_ns - ctx->approx_wallclock_ns)/1000000L ));
595+
/* In perf mode, if PoH is done, we're out of transactions, and we
596+
can't schedule anything, just try to end the block ASAP. */
597+
ctx->slot_end_ns = ctx->approx_wallclock_ns;
598+
}
586599
return;
587600
}
588601

@@ -715,6 +728,14 @@ after_credit( fd_pack_ctx_t * ctx,
715728

716729
memcpy( ctx->last_sched_metrics->all, (ulong const *)fd_metrics_tl, sizeof(ctx->last_sched_metrics->all) );
717730
ctx->last_sched_metrics->time = now2;
731+
} else if( FD_UNLIKELY( (ctx->strategy==FD_PACK_STRATEGY_PERF) &
732+
(ctx->bank_idle_bitset==fd_ulong_mask_lsb( (int)bank_cnt )) &
733+
ctx->poh_done ) ) {
734+
FD_LOG_INFO(( "pack_end_early(ms=%li, reason=sched)", (ctx->slot_end_ns - ctx->approx_wallclock_ns)/1000000L ));
735+
/* In perf mode, if PoH is done and we can't schedule anything
736+
even though all the banks are idle, just schedule ending the
737+
block ASAP. */
738+
ctx->slot_end_ns = ctx->approx_wallclock_ns;
718739
}
719740
}
720741

@@ -748,6 +769,7 @@ after_credit( fd_pack_ctx_t * ctx,
748769
ctx->drain_banks = 1;
749770
ctx->leader_slot = ULONG_MAX;
750771
ctx->slot_microblock_cnt = 0UL;
772+
ctx->poh_done = 0;
751773
fd_pack_end_block( ctx->pack );
752774
remove_ib( ctx );
753775

@@ -771,7 +793,13 @@ during_frag( fd_pack_ctx_t * ctx,
771793

772794
switch( ctx->in_kind[ in_idx ] ) {
773795
case IN_KIND_POH: {
774-
/* Not interested in stamped microblocks, only leader updates. */
796+
/* Not interested in stamped microblocks, only leader updates and
797+
possibly done hashing messages. */
798+
if( FD_UNLIKELY( fd_disco_poh_sig_pkt_type( sig )==POH_PKT_TYPE_DONE_HASHING ) ) {
799+
ctx->poh_done = ctx->leader_slot == fd_disco_poh_sig_slot( sig );
800+
FD_LOG_INFO(( "PoH done %i hashing for slot %lu", ctx->poh_done, fd_disco_poh_sig_slot( sig ) ));
801+
}
802+
775803
if( fd_disco_poh_sig_pkt_type( sig )!=POH_PKT_TYPE_BECAME_LEADER ) return;
776804

777805
/* There was a leader transition. Handle it. */
@@ -787,6 +815,7 @@ during_frag( fd_pack_ctx_t * ctx,
787815
ctx->drain_banks = 1;
788816
ctx->leader_slot = ULONG_MAX;
789817
ctx->slot_microblock_cnt = 0UL;
818+
ctx->poh_done = 0;
790819
fd_pack_end_block( ctx->pack );
791820
remove_ib( ctx );
792821
}
@@ -796,6 +825,7 @@ during_frag( fd_pack_ctx_t * ctx,
796825
FD_MCNT_INC( PACK, TRANSACTION_EXPIRED, exp_cnt );
797826

798827
fd_became_leader_t * became_leader = (fd_became_leader_t *)dcache_entry;
828+
ctx->poh_done = 0;
799829
ctx->leader_bank = became_leader->bank;
800830
ctx->slot_max_microblocks = became_leader->max_microblocks_in_slot;
801831
/* Reserve some space in the block for ticks */
@@ -1175,6 +1205,7 @@ unprivileged_init( fd_topo_t * topo,
11751205
ctx->cur_spot = NULL;
11761206
ctx->is_bundle = 0;
11771207
ctx->strategy = tile->pack.schedule_strategy;
1208+
ctx->poh_done = 0;
11781209
ctx->max_pending_transactions = tile->pack.max_pending_transactions;
11791210
ctx->leader_slot = ULONG_MAX;
11801211
ctx->leader_bank = NULL;

src/disco/topo/fd_topo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ struct fd_topo_tile {
234234
struct {
235235
int lagged_consecutive_leader_start;
236236
int plugins_enabled;
237+
int perf_mode;
237238
ulong bank_cnt;
238239
char identity_key_path[ PATH_MAX ];
239240
struct {

src/discoh/poh/fd_poh_tile.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,13 @@ typedef struct {
387387
microblocks that the pack tile can publish in each slot. */
388388
ulong max_microblocks_per_slot;
389389

390+
/* If perf_mode==1, then instead of using slot_duration_ns and
391+
hashcnt_duration_ns to pace hashes during our leader slot, we just
392+
hash as fast as possible. In practice, this means we'll hardly mix
393+
in any transactions for the first portion of the slot, which can
394+
cause backpressure. */
395+
int perf_mode;
396+
390397
/* Consensus-critical slot cost limits. */
391398
struct {
392399
ulong slot_max_cost;
@@ -483,6 +490,10 @@ typedef struct {
483490
/* If an in progress frag should be skipped */
484491
int skip_frag;
485492

493+
/* If the poh tile has sent a DONE_HASHING message to pack for this
494+
slot. */
495+
int sent_done_hashing;
496+
486497
ulong max_active_descendant;
487498

488499
/* If we currently are the leader according the clock AND we have
@@ -1096,6 +1107,7 @@ fd_ext_poh_begin_leader( void const * bank,
10961107
ctx->microblocks_lower_bound = 0UL;
10971108
ctx->cus_used = 0UL;
10981109
ctx->expect_microblock_idx = 0UL;
1110+
ctx->sent_done_hashing = 0;
10991111

11001112
ctx->limits.slot_max_cost = cus_block_limit;
11011113
ctx->limits.slot_max_vote_cost = cus_vote_cost_limit;
@@ -1631,7 +1643,7 @@ after_credit( fd_poh_ctx_t * ctx,
16311643
ulong target_hashcnt;
16321644
if( FD_LIKELY( !is_leader ) ) {
16331645
target_hashcnt = (ulong)((double)(now - ctx->reset_slot_start_ns) / ctx->hashcnt_duration_ns) - (ctx->slot-ctx->reset_slot)*ctx->hashcnt_per_slot;
1634-
} else {
1646+
} else if( FD_LIKELY( ctx->perf_mode==0 ) ) {
16351647
/* We might have gotten very behind on hashes, but if we are leader
16361648
we want to catch up gradually over the remainder of our leader
16371649
slot, not all at once right now. This helps keep the tile from
@@ -1641,7 +1653,10 @@ after_credit( fd_poh_ctx_t * ctx,
16411653
double actual_slot_duration_ns = ctx->slot_duration_ns<(double)(ctx->leader_bank_start_ns - expected_slot_start_ns) ? 0.0 : ctx->slot_duration_ns - (double)(ctx->leader_bank_start_ns - expected_slot_start_ns);
16421654
double actual_hashcnt_duration_ns = actual_slot_duration_ns / (double)ctx->hashcnt_per_slot;
16431655
target_hashcnt = fd_ulong_if( actual_hashcnt_duration_ns==0.0, restricted_hashcnt, (ulong)((double)(now - ctx->leader_bank_start_ns) / actual_hashcnt_duration_ns) );
1656+
} else {
1657+
target_hashcnt = ULONG_MAX; /* It gets clamped down immediately */
16441658
}
1659+
16451660
/* Clamp to [min_hashcnt, restricted_hashcnt] as above */
16461661
target_hashcnt = fd_ulong_max( fd_ulong_min( target_hashcnt, restricted_hashcnt ), min_hashcnt );
16471662

@@ -1689,9 +1704,13 @@ after_credit( fd_poh_ctx_t * ctx,
16891704

16901705
*charge_busy = 1;
16911706

1692-
if( FD_LIKELY( ctx->hashcnt<target_hashcnt ) ) {
1693-
fd_sha256_hash_32_repeated( ctx->hash, ctx->hash, target_hashcnt-ctx->hashcnt );
1694-
ctx->hashcnt = target_hashcnt;
1707+
fd_sha256_hash_32_repeated( ctx->hash, ctx->hash, target_hashcnt-ctx->hashcnt );
1708+
ctx->hashcnt = target_hashcnt;
1709+
1710+
if( FD_UNLIKELY( (ctx->hashcnt==restricted_hashcnt) && (!ctx->sent_done_hashing) ) ) {
1711+
ulong sig = fd_disco_poh_sig( ctx->slot, POH_PKT_TYPE_DONE_HASHING, 0UL );
1712+
fd_stem_publish( ctx->stem, ctx->pack_out->idx, sig, ctx->pack_out->chunk, 0UL, 0UL, 0UL, 0UL );
1713+
ctx->sent_done_hashing = 1;
16951714
}
16961715

16971716
if( FD_UNLIKELY( ctx->hashcnt==ctx->hashcnt_per_slot ) ) {
@@ -2241,6 +2260,8 @@ unprivileged_init( fd_topo_t * topo,
22412260
ctx->highwater_leader_slot = ULONG_MAX;
22422261
ctx->next_leader_slot = ULONG_MAX;
22432262
ctx->reset_slot = ULONG_MAX;
2263+
ctx->sent_done_hashing = 0;
2264+
ctx->perf_mode = tile->poh.perf_mode;
22442265

22452266
ctx->lagged_consecutive_leader_start = tile->poh.lagged_consecutive_leader_start;
22462267
ctx->expect_sequential_leader_slot = ULONG_MAX;

0 commit comments

Comments
 (0)