diff --git a/contrib/repair-analysis/report.py b/contrib/repair-analysis/report.py index 6a721a5090..ad06dfbbfd 100644 --- a/contrib/repair-analysis/report.py +++ b/contrib/repair-analysis/report.py @@ -5,7 +5,7 @@ from matplotlib.backends.backend_pdf import PdfPages import sys import warnings - +import os """ This script generates a report on repair analysis off of one testnet run. 1. Add the following to your testnet config.toml file: @@ -19,7 +19,7 @@ - /my/folder/fec_complete.csv 3. Run this script with the following command: - python3 report.py + python3 report.py If you are missing dependencies, make sure to install them with: python3 -m pip install pandas numpy matplotlib seaborn @@ -33,6 +33,14 @@ def match_repair_requests( requests, responses ): rsp = responses[responses['is_turbine'] == False] + + # sanity check that no more than 1 pubkey has the same nonce + verify_nonce = rsp.groupby('nonce').agg({'src_ip': 'nunique'}).reset_index() + verify_nonce = verify_nonce[verify_nonce['src_ip'] > 1] + if len(verify_nonce) > 0: + print("Repair responses with strangely the same nonce: ") + print(verify_nonce) + rsp = rsp.groupby('nonce').agg({'timestamp': 'min', 'slot':'first', 'idx':'max'}).reset_index() # check which nonces are in requests but not in responses @@ -180,6 +188,18 @@ def execution_stats( log_path, pdf ): last_executed = int(line.split()[12][:-1]) # 13th word (index 12 in 0-based indexing) break + if snapshot_slot is None: + # get user input from CLI instead + snapshot_slot = int(input('Couldn\'t find snapshot slot in log, please enter it manually: ')) + + if first_turbine is None: + # get user input from CLI instead + first_turbine = input('Couldn\'t find first turbine slot in log, please enter it manually: ') + + if last_executed is None: + # get user input from CLI instead + last_executed = int(input('Couldn\'t find last executed slot in log, please enter it manually: ')) + # Output the extracted values print(f'snapshot_slot = {snapshot_slot}') print(f'first_turbine = {first_turbine}') @@ -556,13 +576,15 @@ def generate_report( log_path, request_data_path, shred_data_path, peers_data_pa on_bad_lines='skip', skipfooter=1 ) # because of the buffered writer the last row is probably incomplete - repair_requests = pd.read_csv( request_data_path, - dtype={'dst_ip': str, 'dst_port': int, 'timestamp': int, 'slot': int, 'idx': int, 'nonce': int }, - skipfooter=1 ) + if request_data_path: + repair_requests = pd.read_csv( request_data_path, + dtype={'dst_ip': str, 'dst_port': int, 'timestamp': int, 'slot': int, 'idx': int, 'nonce': int }, + skipfooter=1 ) - peers_data = pd.read_csv( peers_data_path, - dtype={'peer_ip4_addr': int, 'peer_port': int, 'pubkey':str, 'turbine': bool }, - on_bad_lines='skip', + if peers_data_path: + peers_data = pd.read_csv( peers_data_path, + dtype={'peer_ip4_addr': int, 'peer_port': int, 'pubkey':str, 'turbine': bool }, + on_bad_lines='skip', skipfooter=1 ) # if we have a fec complete file, read it in @@ -588,36 +610,59 @@ def generate_report( log_path, request_data_path, shred_data_path, peers_data_pa catchup = shreds_data[shreds_data['slot'].between(snapshot_slot, first_turbine - 1)] live = shreds_data[shreds_data['slot'].between(first_turbine, last_executed)] - catchup_rq = repair_requests[repair_requests['slot'].between(snapshot_slot, first_turbine - 1)] - live_rq = repair_requests[repair_requests['slot'].between(first_turbine, last_executed)] - turbine_stats(catchup, live) + if request_data_path: + catchup_rq = repair_requests[repair_requests['slot'].between(snapshot_slot, first_turbine - 1)] + live_rq = repair_requests[repair_requests['slot'].between(first_turbine, last_executed)] - catchup = catchup[catchup['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine - shreds_data = shreds_data[shreds_data['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine - peer_stats( catchup, catchup_rq, live, live_rq, pdf ) + turbine_stats(catchup, live) + + catchup = catchup[catchup['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine + shreds_data = shreds_data[shreds_data['timestamp'] >= first_turbine_accept_ts] # only keep shreds that were accepted after the first turbine + + peer_stats( catchup, catchup_rq, live, live_rq, pdf ) if fec_complete_path: completion_times( fec_stats, shreds_data, first_turbine, pdf ) - print_slots(repair_requests, shreds_data, snapshot_slot, first_turbine, pdf) + if request_data_path: + print_slots(repair_requests, shreds_data, snapshot_slot, first_turbine, pdf) if __name__ == "__main__": - if len(sys.argv) < 4: + if len(sys.argv) < 3: print('Add: [tiles.shredcap] \n\t enabled = true \n\t folder_path = /my/folder_for_csv_dump \n to your testnet config.toml file to enable the report generation.') - print('Usage: python report.py ') + print('Usage: python report.py ') print('Report will automatically be saved as report.pdf in the current directory.') sys.exit(1) - log_path = sys.argv[1] - request_data_path = sys.argv[2] - shred_data_path = sys.argv[3] - peers_data_path = sys.argv[4] - fec_complete_path = sys.argv[5] if len(sys.argv) > 5 else None + log_path = sys.argv[1] + csv_path = sys.argv[2] + # check if the csvs live in path + if not os.path.exists(csv_path): + print(f'Error: {csv_path} does not exist') + sys.exit(1) + + csv_paths = { 'shred_data.csv' : os.path.join(csv_path, 'shred_data.csv'), + 'request_data.csv' : os.path.join(csv_path, 'request_data.csv'), + 'peers_data.csv' : os.path.join(csv_path, 'peers.csv'), + 'fec_complete.csv' : os.path.join(csv_path, 'fec_complete.csv') } + + for csv_name, csv_path in csv_paths.items(): + if not os.path.exists(csv_path): + csv_paths[csv_name] = None + + for csv_name, csv_path in csv_paths.items(): + print(f'Found {csv_name}: {csv_path}') output_path = 'report.pdf' pdf = PdfPages('report.pdf') - generate_report(log_path, request_data_path, shred_data_path, peers_data_path, fec_complete_path, pdf) + + generate_report(log_path, + csv_paths['request_data.csv'], + csv_paths['shred_data.csv'], + csv_paths['peers_data.csv'], + csv_paths['fec_complete.csv'], + pdf) print(f'Graphs generated at: {output_path}') pdf.close()