Skip to content

Commit af76f0d

Browse files
merge
2 parents cca1b3c + 5e44b59 commit af76f0d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+6604
-718
lines changed

.gitignore

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -64,19 +64,18 @@ deps-bundle.tar.zst
6464
/book/bun.lockb
6565

6666
# TVU
67-
mainnet.toml
68-
testnet.toml
69-
privatenet.toml
70-
localnet.toml
71-
sim.toml
72-
backtest.toml
73-
mainnet.sh
74-
testnet.sh
75-
privatenet.sh
76-
localnet.sh
77-
sim.sh
78-
backtest.sh
79-
local.toml
67+
/mainnet.toml
68+
/testnet.toml
69+
/privatenet.toml
70+
/localnet.toml
71+
/sim.toml
72+
/backtest.toml
73+
/mainnet.sh
74+
/testnet.sh
75+
/privatenet.sh
76+
/localnet.sh
77+
/sim.sh
78+
/backtest.sh
8079

8180
# Python
8281
venv

book/api/metrics-generated.md

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
| <span class="metrics-name">tile_&#8203;tid</span> | gauge | The thread ID of the tile. Always the same as the Pid in production, but might be different in development. |
2929
| <span class="metrics-name">tile_&#8203;context_&#8203;switch_&#8203;involuntary_&#8203;count</span> | counter | The number of involuntary context switches. |
3030
| <span class="metrics-name">tile_&#8203;context_&#8203;switch_&#8203;voluntary_&#8203;count</span> | counter | The number of voluntary context switches. |
31-
| <span class="metrics-name">tile_&#8203;status</span> | gauge | The current status of the tile. 0 is booting, 1 is running. |
31+
| <span class="metrics-name">tile_&#8203;status</span> | gauge | The current status of the tile. 0 is booting, 1 is running. 2 is shutdown |
3232
| <span class="metrics-name">tile_&#8203;heartbeat</span> | gauge | The last UNIX timestamp in nanoseconds that the tile heartbeated. |
3333
| <span class="metrics-name">tile_&#8203;in_&#8203;backpressure</span> | gauge | Whether the tile is currently backpressured or not, either 1 or 0. |
3434
| <span class="metrics-name">tile_&#8203;backpressure_&#8203;count</span> | counter | Number of times the tile has had to wait for one of more consumers to catch up to resume publishing. |
@@ -791,3 +791,54 @@
791791
| <span class="metrics-name">send_&#8203;receive_&#8203;duration_&#8203;seconds</span> | histogram | Duration spent receiving packets |
792792

793793
</div>
794+
795+
## Snaprd Tile
796+
797+
<div class="metrics">
798+
799+
| Metric | Type | Description |
800+
|--------|------|-------------|
801+
| <span class="metrics-name">snaprd_&#8203;state</span> | gauge | State of the tile. 0 = waiting for at least one peer from gossip, 1 = collecting peers from gossip, 2 = pinging peers, 3 = collecting ping responses, 4 = reading full snapshot file, 5 = reading incremental snapshot file, 6 = downloading full snapshot file, 7 = downloading incremental snapshot file, 8 = pinging peers before loading the incremental snapshot, 0 = collecting ping responses before loading the incremental snapshot, 10 = waiting for full snapshot to finish loading, 11 = waiting for incremental snapshot to finish loading, 12 = done |
802+
| <span class="metrics-name">snaprd_&#8203;full_&#8203;num_&#8203;retries</span> | counter | Number of times we aborted and retried full snapshot download because the peer was too slow |
803+
| <span class="metrics-name">snaprd_&#8203;incremental_&#8203;num_&#8203;retries</span> | counter | Number of times we aborted and retried incremental snapshot download because the peer was too slow |
804+
| <span class="metrics-name">snaprd_&#8203;full_&#8203;bytes_&#8203;read</span> | gauge | Number of bytes read so far from the full snapshot. Might decrease if snapshot load is aborted and restarted |
805+
| <span class="metrics-name">snaprd_&#8203;full_&#8203;bytes_&#8203;total</span> | gauge | Total size of the full snapshot file. Might change if snapshot load is aborted and restarted |
806+
| <span class="metrics-name">snaprd_&#8203;full_&#8203;download_&#8203;retries</span> | gauge | Number of times we retried the full snapshot download because the peer was too slow |
807+
| <span class="metrics-name">snaprd_&#8203;incremental_&#8203;bytes_&#8203;read</span> | gauge | Number of bytes read so far from the incremental snapshot. Might decrease if snapshot load is aborted and restarted |
808+
| <span class="metrics-name">snaprd_&#8203;incremental_&#8203;bytes_&#8203;total</span> | gauge | Total size of the incremental snapshot file. Might change if snapshot load is aborted and restarted |
809+
| <span class="metrics-name">snaprd_&#8203;incremental_&#8203;download_&#8203;retries</span> | gauge | Number of times we retried the incremental snapshot download because the peer was too slow |
810+
811+
</div>
812+
813+
## Snapdc Tile
814+
815+
<div class="metrics">
816+
817+
| Metric | Type | Description |
818+
|--------|------|-------------|
819+
| <span class="metrics-name">snapdc_&#8203;state</span> | gauge | State of the tile. 0 = waiting for compressed byte stream, 1 = decompressing full snapshot, 2 = decompressing incremental snapshot, 3 = Done |
820+
| <span class="metrics-name">snapdc_&#8203;full_&#8203;compressed_&#8203;bytes_&#8203;read</span> | gauge | Number of bytes read so far from the compressed full snapshot file. Might decrease if snapshot load is aborted and restarted |
821+
| <span class="metrics-name">snapdc_&#8203;full_&#8203;decompressed_&#8203;bytes_&#8203;read</span> | gauge | Number of bytes read so far from the decompressed file. Might decrease if snapshot load is aborted and restarted |
822+
| <span class="metrics-name">snapdc_&#8203;full_&#8203;decompressed_&#8203;bytes_&#8203;total</span> | gauge | Total size of the decompressed full snapshot file. Might change if snapshot load is aborted and restarted |
823+
| <span class="metrics-name">snapdc_&#8203;incremental_&#8203;compressed_&#8203;bytes_&#8203;read</span> | gauge | Number of bytes read so far from the compressed incremental snapshot file. Might decrease if snapshot load is aborted and restarted |
824+
| <span class="metrics-name">snapdc_&#8203;incremental_&#8203;decompressed_&#8203;bytes_&#8203;read</span> | gauge | Number of bytes read so far from the decompressed incremental snapshot file. Might decrease if snapshot load is aborted and restarted |
825+
| <span class="metrics-name">snapdc_&#8203;incremental_&#8203;decompressed_&#8203;bytes_&#8203;total</span> | gauge | Total size of the decompressed incremental snapshot file. Might change if snapshot load is aborted and restarted |
826+
827+
</div>
828+
829+
## Snapin Tile
830+
831+
<div class="metrics">
832+
833+
| Metric | Type | Description |
834+
|--------|------|-------------|
835+
| <span class="metrics-name">snapin_&#8203;state</span> | gauge | State of the tile. 0 = waiting for decompressed snapshot bytestream, 1 = processing full snapshot, 2 = processing incremental snapshot, 3 = Done |
836+
| <span class="metrics-name">snapin_&#8203;full_&#8203;accounts_&#8203;files_&#8203;processed</span> | gauge | Number of accounts files (appendvecs) processed in the full snapshot. Might decrease if snapshot load is aborted and restarted |
837+
| <span class="metrics-name">snapin_&#8203;full_&#8203;accounts_&#8203;files_&#8203;total</span> | gauge | Total number of accounts files in the full snapshot. Might change if snapshot load is aborted and restarted |
838+
| <span class="metrics-name">snapin_&#8203;incremental_&#8203;accounts_&#8203;files_&#8203;processed</span> | gauge | Number of accounts files (appendvecs) processed in the incremental snapshot. Might decrease if snapshot load is aborted and restarted |
839+
| <span class="metrics-name">snapin_&#8203;incremental_&#8203;accounts_&#8203;files_&#8203;total</span> | gauge | Total number of accounts files in the full snapshot. Might change if snapshot load is aborted and restarted |
840+
| <span class="metrics-name">snapin_&#8203;full_&#8203;accounts_&#8203;processed</span> | gauge | Number of accounts processed in the full snapshot. Might decrease if snapshot load is aborted and restarted |
841+
| <span class="metrics-name">snapin_&#8203;incremental_&#8203;accounts_&#8203;processed</span> | gauge | Number of accounts processed in the incremental snapshot. Might decrease if snapshot load is aborted and restarted |
842+
| <span class="metrics-name">snapin_&#8203;accounts_&#8203;inserted</span> | gauge | Number of accounts inserted during snpashot loading. Might decrease if snapshot load is aborted and restarted |
843+
844+
</div>

src/app/firedancer-dev/Local.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ $(call add-objs,commands/bench,fd_firedancer_dev)
1414
$(call add-objs,commands/dev,fd_firedancer_dev)
1515
$(call add-objs,commands/sim,fd_firedancer_dev)
1616
$(call add-objs,commands/backtest,fd_firedancer_dev)
17+
$(call add-objs,commands/snapshot_load,fd_firedancer_dev)
1718

1819
$(call make-bin,firedancer-dev,main,fd_firedancer_dev fd_firedancer fddev_shared fdctl_shared fdctl_platform fd_discof fd_disco fd_choreo fd_flamenco fd_funk fd_quic fd_tls fd_reedsol fd_waltz fd_tango fd_ballet fd_util firedancer_version,$(SECP256K1_LIBS) $(ROCKSDB_LIBS) $(OPENSSL_LIBS))
1920

src/app/firedancer-dev/commands/backtest.c

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "../../../discof/replay/fd_replay_notif.h"
2626
#include "../../../flamenco/runtime/fd_runtime.h"
2727
#include "../../../flamenco/runtime/fd_txncache.h"
28+
#include "../../../discof/restore/utils/fd_snapshot_messages.h"
2829

2930
#include <unistd.h> /* pause */
3031
extern fd_topo_obj_callbacks_t * CALLBACKS[];
@@ -85,7 +86,17 @@ backtest_topo( config_t * config ) {
8586
FOR(writer_tile_cnt) fd_topob_tile( topo, "writer", "writer", "metric_in", cpu_idx++, 0, 0 );
8687
8788
/**********************************************************************/
88-
/* Setup backtest->replay links in topo */
89+
/* Add the snapshot tiles to topo */
90+
/**********************************************************************/
91+
fd_topob_wksp( topo, "snaprd" );
92+
fd_topob_wksp( topo, "snapdc" );
93+
fd_topob_wksp( topo, "snapin" );
94+
fd_topob_tile( topo, "snaprd", "snaprd", "metric_in", cpu_idx++, 0, 0 );
95+
fd_topob_tile( topo, "snapdc", "snapdc", "metric_in", cpu_idx++, 0, 0 );
96+
fd_topo_tile_t * snapin_tile = fd_topob_tile( topo, "snapin", "snapin", "metric_in", cpu_idx++, 0, 0 );
97+
98+
/**********************************************************************/
99+
/* Setup backtest->replay link (repair_repla) in topo */
89100
/**********************************************************************/
90101

91102
/* The repair tile is replaced by the backtest tile for the repair to
@@ -110,6 +121,25 @@ backtest_topo( config_t * config ) {
110121
topo->links[ replay_tile->in_link_id[ fd_topo_find_tile_in_link( topo, replay_tile, "pack_replay", 0 ) ] ].permit_no_producers = 1;
111122
topo->links[ replay_tile->in_link_id[ fd_topo_find_tile_in_link( topo, replay_tile, "batch_replay", 0 ) ] ].permit_no_producers = 1;
112123

124+
/**********************************************************************/
125+
/* Setup snapshot links in topo */
126+
/**********************************************************************/
127+
fd_topob_wksp( topo, "snap_zstd" );
128+
fd_topob_wksp( topo, "snap_stream");
129+
fd_topob_wksp( topo, "snap_out" );
130+
fd_topob_wksp( topo, "replay_manif" );
131+
fd_topob_link( topo, "snap_out", "snap_out", 128UL, sizeof(fd_snapshot_manifest_t), 1UL );
132+
133+
fd_topob_link( topo, "snap_zstd", "snap_zstd", 512UL, 16384UL, 1UL );
134+
fd_topob_link( topo, "snap_stream", "snap_stream", 512UL, USHORT_MAX, 1UL );
135+
136+
fd_topob_tile_out( topo, "snaprd", 0UL, "snap_zstd", 0UL );
137+
fd_topob_tile_in ( topo, "snapdc", 0UL, "metric_in", "snap_zstd", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
138+
fd_topob_tile_out( topo, "snapdc", 0UL, "snap_stream", 0UL );
139+
fd_topob_tile_in ( topo, "snapin", 0UL, "metric_in", "snap_stream", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
140+
fd_topob_tile_out( topo, "snapin", 0UL, "snap_out", 0UL );
141+
fd_topob_tile_in ( topo, "replay", 0UL, "metric_in", "snap_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
142+
113143
/**********************************************************************/
114144
/* More backtest->replay links in topo */
115145
/**********************************************************************/
@@ -267,6 +297,15 @@ backtest_topo( config_t * config ) {
267297
FD_TEST( fd_pod_insertf_ulong( topo->props, busy_obj->id, "bank_busy.%lu", i ) );
268298
}
269299

300+
/* Replay decoded manifest dcache topo obj */
301+
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
302+
fd_pod_insertf_ulong( topo->props, 2UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
303+
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );
304+
305+
fd_topob_tile_uses( topo, snapin_tile, funk_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
306+
fd_topob_tile_uses( topo, snapin_tile, replay_manifest_dcache, FD_SHMEM_JOIN_MODE_READ_WRITE );
307+
fd_topob_tile_uses( topo, replay_tile, replay_manifest_dcache, FD_SHMEM_JOIN_MODE_READ_ONLY );
308+
270309
for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
271310
fd_topo_tile_t * tile = &topo->tiles[ i ];
272311
if( !strcmp( tile->name, "rocksdb" ) ) {

src/app/firedancer-dev/commands/sim.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,7 @@ sim_topo( config_t * config ) {
189189

190190
/* Override */
191191
if( !strcmp( tile->name, "replay" ) ) {
192-
strncpy( tile->replay.incremental, config->tiles.replay.incremental, sizeof(tile->replay.incremental) );
193192
strncpy( tile->replay.slots_replayed, config->tiles.replay.slots_replayed, sizeof(tile->replay.slots_replayed) );
194-
strncpy( tile->replay.snapshot, config->tiles.replay.snapshot, sizeof(tile->replay.snapshot) );
195193
strncpy( tile->replay.status_cache, config->tiles.replay.status_cache, sizeof(tile->replay.status_cache) );
196194
}
197195
}

0 commit comments

Comments
 (0)