Skip to content

Commit 6af7c8f

Browse files
committed
contrib: easier to use repair analysis cli
1 parent fd9edac commit 6af7c8f

File tree

1 file changed

+68
-23
lines changed

1 file changed

+68
-23
lines changed

contrib/repair-analysis/report.py

Lines changed: 68 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from matplotlib.backends.backend_pdf import PdfPages
66
import sys
77
import warnings
8-
8+
import os
99
"""
1010
This script generates a report on repair analysis off of one testnet run.
1111
1. Add the following to your testnet config.toml file:
@@ -19,7 +19,7 @@
1919
- /my/folder/fec_complete.csv
2020
2121
3. Run this script with the following command:
22-
python3 report.py <testnet.log path> <request_data.csv path> <shred_data.csv path> <fec_complete.csv path (optional, skips some long-running steps)>
22+
python3 report.py <testnet.log path> <csv_folder path>
2323
2424
If you are missing dependencies, make sure to install them with:
2525
python3 -m pip install pandas numpy matplotlib seaborn
@@ -33,6 +33,14 @@
3333

3434
def match_repair_requests( requests, responses ):
3535
rsp = responses[responses['is_turbine'] == False]
36+
37+
# sanity check that no more than 1 pubkey has the same nonce
38+
verify_nonce = rsp.groupby('nonce').agg({'src_ip': 'nunique'}).reset_index()
39+
verify_nonce = verify_nonce[verify_nonce['src_ip'] > 1]
40+
if len(verify_nonce) > 0:
41+
print("Repair responses with strangely the same nonce: ")
42+
print(verify_nonce)
43+
3644
rsp = rsp.groupby('nonce').agg({'timestamp': 'min', 'slot':'first', 'idx':'max'}).reset_index()
3745

3846
# check which nonces are in requests but not in responses
@@ -180,6 +188,18 @@ def execution_stats( log_path, pdf ):
180188
last_executed = int(line.split()[12][:-1]) # 13th word (index 12 in 0-based indexing)
181189
break
182190

191+
if snapshot_slot is None:
192+
# get user input from CLI instead
193+
snapshot_slot = int(input('Couldn\'t find snapshot slot in log, please enter it manually: '))
194+
195+
if first_turbine is None:
196+
# get user input from CLI instead
197+
first_turbine = input('Couldn\'t find first turbine slot in log, please enter it manually: ')
198+
199+
if last_executed is None:
200+
# get user input from CLI instead
201+
last_executed = int(input('Couldn\'t find last executed slot in log, please enter it manually: '))
202+
183203
# Output the extracted values
184204
print(f'snapshot_slot = {snapshot_slot}')
185205
print(f'first_turbine = {first_turbine}')
@@ -556,13 +576,15 @@ def generate_report( log_path, request_data_path, shred_data_path, peers_data_pa
556576
on_bad_lines='skip',
557577
skipfooter=1 ) # because of the buffered writer the last row is probably incomplete
558578

559-
repair_requests = pd.read_csv( request_data_path,
560-
dtype={'dst_ip': str, 'dst_port': int, 'timestamp': int, 'slot': int, 'idx': int, 'nonce': int },
561-
skipfooter=1 )
579+
if request_data_path:
580+
repair_requests = pd.read_csv( request_data_path,
581+
dtype={'dst_ip': str, 'dst_port': int, 'timestamp': int, 'slot': int, 'idx': int, 'nonce': int },
582+
skipfooter=1 )
562583

563-
peers_data = pd.read_csv( peers_data_path,
564-
dtype={'peer_ip4_addr': int, 'peer_port': int, 'pubkey':str, 'turbine': bool },
565-
on_bad_lines='skip',
584+
if peers_data_path:
585+
peers_data = pd.read_csv( peers_data_path,
586+
dtype={'peer_ip4_addr': int, 'peer_port': int, 'pubkey':str, 'turbine': bool },
587+
on_bad_lines='skip',
566588
skipfooter=1 )
567589

568590
# if we have a fec complete file, read it in
@@ -588,36 +610,59 @@ def generate_report( log_path, request_data_path, shred_data_path, peers_data_pa
588610
catchup = shreds_data[shreds_data['slot'].between(snapshot_slot, first_turbine - 1)]
589611
live = shreds_data[shreds_data['slot'].between(first_turbine, last_executed)]
590612

591-
catchup_rq = repair_requests[repair_requests['slot'].between(snapshot_slot, first_turbine - 1)]
592-
live_rq = repair_requests[repair_requests['slot'].between(first_turbine, last_executed)]
593613

594-
turbine_stats(catchup, live)
614+
if request_data_path:
615+
catchup_rq = repair_requests[repair_requests['slot'].between(snapshot_slot, first_turbine - 1)]
616+
live_rq = repair_requests[repair_requests['slot'].between(first_turbine, last_executed)]
595617

596-
catchup = catchup[catchup['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine
597-
shreds_data = shreds_data[shreds_data['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine
598-
peer_stats( catchup, catchup_rq, live, live_rq, pdf )
618+
turbine_stats(catchup, live)
619+
620+
catchup = catchup[catchup['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine
621+
shreds_data = shreds_data[shreds_data['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine
622+
623+
peer_stats( catchup, catchup_rq, live, live_rq, pdf )
599624

600625
if fec_complete_path:
601626
completion_times( fec_stats, shreds_data, first_turbine, pdf )
602627

603-
print_slots(repair_requests, shreds_data, snapshot_slot, first_turbine, pdf)
628+
if request_data_path:
629+
print_slots(repair_requests, shreds_data, snapshot_slot, first_turbine, pdf)
604630

605631
if __name__ == "__main__":
606-
if len(sys.argv) < 4:
632+
if len(sys.argv) < 3:
607633
print('Add: [tiles.shredcap] \n\t enabled = true \n\t folder_path = /my/folder_for_csv_dump \n to your testnet config.toml file to enable the report generation.')
608-
print('Usage: python report.py <testnet.log path> <request_data.csv path> <shred_data.csv path> <peers_data.csv> <fec_complete.csv path (optional)>')
634+
print('Usage: python report.py <testnet.log path> <csv_folder_path>')
609635
print('Report will automatically be saved as report.pdf in the current directory.')
610636
sys.exit(1)
611637

612-
log_path = sys.argv[1]
613-
request_data_path = sys.argv[2]
614-
shred_data_path = sys.argv[3]
615-
peers_data_path = sys.argv[4]
616-
fec_complete_path = sys.argv[5] if len(sys.argv) > 5 else None
638+
log_path = sys.argv[1]
639+
csv_path = sys.argv[2]
640+
# check if the csvs live in path
641+
if not os.path.exists(csv_path):
642+
print(f'Error: {csv_path} does not exist')
643+
sys.exit(1)
644+
645+
csv_paths = { 'shred_data.csv' : os.path.join(csv_path, 'shred_data.csv'),
646+
'request_data.csv' : os.path.join(csv_path, 'request_data.csv'),
647+
'peers_data.csv' : os.path.join(csv_path, 'peers.csv'),
648+
'fec_complete.csv' : os.path.join(csv_path, 'fec_complete.csv') }
649+
650+
for csv_name, csv_path in csv_paths.items():
651+
if not os.path.exists(csv_path):
652+
csv_paths[csv_name] = None
653+
654+
for csv_name, csv_path in csv_paths.items():
655+
print(f'Found {csv_name}: {csv_path}')
617656

618657
output_path = 'report.pdf'
619658
pdf = PdfPages('report.pdf')
620-
generate_report(log_path, request_data_path, shred_data_path, peers_data_path, fec_complete_path, pdf)
659+
660+
generate_report(log_path,
661+
csv_paths['request_data.csv'],
662+
csv_paths['shred_data.csv'],
663+
csv_paths['peers_data.csv'],
664+
csv_paths['fec_complete.csv'],
665+
pdf)
621666
print(f'Graphs generated at: {output_path}')
622667

623668
pdf.close()

0 commit comments

Comments
 (0)