Skip to content

contrib: repair analysis cli improvements #5465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 10, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 68 additions & 23 deletions contrib/repair-analysis/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from matplotlib.backends.backend_pdf import PdfPages
import sys
import warnings

import os
"""
This script generates a report on repair analysis off of one testnet run.
1. Add the following to your testnet config.toml file:
Expand All @@ -19,7 +19,7 @@
- /my/folder/fec_complete.csv

3. Run this script with the following command:
python3 report.py <testnet.log path> <request_data.csv path> <shred_data.csv path> <fec_complete.csv path (optional, skips some long-running steps)>
python3 report.py <testnet.log path> <csv_folder path>

If you are missing dependencies, make sure to install them with:
python3 -m pip install pandas numpy matplotlib seaborn
Expand All @@ -33,6 +33,14 @@

def match_repair_requests( requests, responses ):
rsp = responses[responses['is_turbine'] == False]

# sanity check that no more than 1 pubkey has the same nonce
verify_nonce = rsp.groupby('nonce').agg({'src_ip': 'nunique'}).reset_index()
verify_nonce = verify_nonce[verify_nonce['src_ip'] > 1]
if len(verify_nonce) > 0:
print("Repair responses with strangely the same nonce: ")
print(verify_nonce)

rsp = rsp.groupby('nonce').agg({'timestamp': 'min', 'slot':'first', 'idx':'max'}).reset_index()

# check which nonces are in requests but not in responses
Expand Down Expand Up @@ -180,6 +188,18 @@ def execution_stats( log_path, pdf ):
last_executed = int(line.split()[12][:-1]) # 13th word (index 12 in 0-based indexing)
break

if snapshot_slot is None:
# get user input from CLI instead
snapshot_slot = int(input('Couldn\'t find snapshot slot in log, please enter it manually: '))

if first_turbine is None:
# get user input from CLI instead
first_turbine = input('Couldn\'t find first turbine slot in log, please enter it manually: ')

if last_executed is None:
# get user input from CLI instead
last_executed = int(input('Couldn\'t find last executed slot in log, please enter it manually: '))

# Output the extracted values
print(f'snapshot_slot = {snapshot_slot}')
print(f'first_turbine = {first_turbine}')
Expand Down Expand Up @@ -556,13 +576,15 @@ def generate_report( log_path, request_data_path, shred_data_path, peers_data_pa
on_bad_lines='skip',
skipfooter=1 ) # because of the buffered writer the last row is probably incomplete

repair_requests = pd.read_csv( request_data_path,
dtype={'dst_ip': str, 'dst_port': int, 'timestamp': int, 'slot': int, 'idx': int, 'nonce': int },
skipfooter=1 )
if request_data_path:
repair_requests = pd.read_csv( request_data_path,
dtype={'dst_ip': str, 'dst_port': int, 'timestamp': int, 'slot': int, 'idx': int, 'nonce': int },
skipfooter=1 )

peers_data = pd.read_csv( peers_data_path,
dtype={'peer_ip4_addr': int, 'peer_port': int, 'pubkey':str, 'turbine': bool },
on_bad_lines='skip',
if peers_data_path:
peers_data = pd.read_csv( peers_data_path,
dtype={'peer_ip4_addr': int, 'peer_port': int, 'pubkey':str, 'turbine': bool },
on_bad_lines='skip',
skipfooter=1 )

# if we have a fec complete file, read it in
Expand All @@ -588,36 +610,59 @@ def generate_report( log_path, request_data_path, shred_data_path, peers_data_pa
catchup = shreds_data[shreds_data['slot'].between(snapshot_slot, first_turbine - 1)]
live = shreds_data[shreds_data['slot'].between(first_turbine, last_executed)]

catchup_rq = repair_requests[repair_requests['slot'].between(snapshot_slot, first_turbine - 1)]
live_rq = repair_requests[repair_requests['slot'].between(first_turbine, last_executed)]

turbine_stats(catchup, live)
if request_data_path:
catchup_rq = repair_requests[repair_requests['slot'].between(snapshot_slot, first_turbine - 1)]
live_rq = repair_requests[repair_requests['slot'].between(first_turbine, last_executed)]

catchup = catchup[catchup['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine
shreds_data = shreds_data[shreds_data['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine
peer_stats( catchup, catchup_rq, live, live_rq, pdf )
turbine_stats(catchup, live)

catchup = catchup[catchup['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine
shreds_data = shreds_data[shreds_data['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine

peer_stats( catchup, catchup_rq, live, live_rq, pdf )

if fec_complete_path:
completion_times( fec_stats, shreds_data, first_turbine, pdf )

print_slots(repair_requests, shreds_data, snapshot_slot, first_turbine, pdf)
if request_data_path:
print_slots(repair_requests, shreds_data, snapshot_slot, first_turbine, pdf)

if __name__ == "__main__":
if len(sys.argv) < 4:
if len(sys.argv) < 3:
print('Add: [tiles.shredcap] \n\t enabled = true \n\t folder_path = /my/folder_for_csv_dump \n to your testnet config.toml file to enable the report generation.')
print('Usage: python report.py <testnet.log path> <request_data.csv path> <shred_data.csv path> <peers_data.csv> <fec_complete.csv path (optional)>')
print('Usage: python report.py <testnet.log path> <csv_folder_path>')
print('Report will automatically be saved as report.pdf in the current directory.')
sys.exit(1)

log_path = sys.argv[1]
request_data_path = sys.argv[2]
shred_data_path = sys.argv[3]
peers_data_path = sys.argv[4]
fec_complete_path = sys.argv[5] if len(sys.argv) > 5 else None
log_path = sys.argv[1]
csv_path = sys.argv[2]
# check if the csvs live in path
if not os.path.exists(csv_path):
print(f'Error: {csv_path} does not exist')
sys.exit(1)

csv_paths = { 'shred_data.csv' : os.path.join(csv_path, 'shred_data.csv'),
'request_data.csv' : os.path.join(csv_path, 'request_data.csv'),
'peers_data.csv' : os.path.join(csv_path, 'peers.csv'),
'fec_complete.csv' : os.path.join(csv_path, 'fec_complete.csv') }

for csv_name, csv_path in csv_paths.items():
if not os.path.exists(csv_path):
csv_paths[csv_name] = None

for csv_name, csv_path in csv_paths.items():
print(f'Found {csv_name}: {csv_path}')

output_path = 'report.pdf'
pdf = PdfPages('report.pdf')
generate_report(log_path, request_data_path, shred_data_path, peers_data_path, fec_complete_path, pdf)

generate_report(log_path,
csv_paths['request_data.csv'],
csv_paths['shred_data.csv'],
csv_paths['peers_data.csv'],
csv_paths['fec_complete.csv'],
pdf)
print(f'Graphs generated at: {output_path}')

pdf.close()
Loading