diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..246e2f6 --- /dev/null +++ b/Makefile @@ -0,0 +1,45 @@ +# Makefile for redisbench-admin + +.PHONY: compliance compliance-fix test integration-tests help + +# Code quality and compliance checks +compliance: + @echo "๐Ÿ” Running compliance checks..." + tox -e compliance + +# Fix code formatting issues +format: + @echo "๐Ÿ”ง Fixing code formatting..." + tox -e format + @echo "โœ… Code formatting fixed!" + +# Alias for format +compliance-fix: format + +# Run tests with coverage +test: + @echo "๐Ÿงช Running tests..." + poetry run coverage erase + poetry run pytest --cov=redisbench_admin --cov-report=term-missing -ra + poetry run coverage xml + @echo "โœ… Tests completed!" + +# Run integration tests (alias for test) +integration-tests: test + +# Run both compliance and tests +all: compliance test + +# Show help +help: + @echo "Available targets:" + @echo " compliance - Run code quality checks (black, flake8)" + @echo " format - Fix code formatting with black" + @echo " compliance-fix - Alias for format" + @echo " test - Run tests with coverage" + @echo " integration-tests - Alias for test" + @echo " all - Run compliance checks and tests" + @echo " help - Show this help message" + +# Default target +.DEFAULT_GOAL := help diff --git a/README.md b/README.md index cae7f62..947fe43 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,13 @@ $ tox To run a specific test: ```sh -$ tox -- tests/test_redistimeseries.py +$ tox -- tests/test_defaults_purpose_built_env.py +``` + +To run a specific test and persist the docker container used for timeseries: + +``` +tox --docker-dont-stop=rts_datasink -- -vv --log-cli-level=INFO tests/test_defaults_purpose_built_env.py ``` To run a specific test with verbose logging: diff --git a/pyproject.toml b/pyproject.toml index d99f34f..0a59798 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "redisbench-admin" -version = "0.11.37" +version = "0.11.39" description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )." authors = ["filipecosta90 ","Redis Performance Group "] readme = "README.md" @@ -44,6 +44,9 @@ certifi = ">=2021.10.8,<2025.0.0" pygithub = "^1.57" [tool.poetry.dev-dependencies] +tox = ">=4.16.0" +tox-docker = ">=5.0.0" +docker = ">=7.1.0" pytest = "^4.6" pytest-cov = "^2.9.0" codecov = "2.1.13" diff --git a/redisbench_admin/compare/args.py b/redisbench_admin/compare/args.py index 5aba32c..1ddce9e 100644 --- a/redisbench_admin/compare/args.py +++ b/redisbench_admin/compare/args.py @@ -134,7 +134,7 @@ def create_compare_arguments(parser): parser.add_argument( "--regressions-percent-lower-limit", type=float, - default=5.0, + default=8.0, help="Only consider regressions with a percentage over the defined limit. (0-100)", ) parser.add_argument( diff --git a/redisbench_admin/compare/compare.py b/redisbench_admin/compare/compare.py index e489f9a..ac740e5 100644 --- a/redisbench_admin/compare/compare.py +++ b/redisbench_admin/compare/compare.py @@ -13,6 +13,7 @@ import humanize import datetime as dt import os +import statistics from tqdm import tqdm from github import Github from slack_sdk.webhook import WebhookClient @@ -270,6 +271,10 @@ def compare_command_logic(args, project_name, project_version): total_stable, total_unstable, total_comparison_points, + total_unstable_baseline, + total_unstable_comparison, + total_latency_confirmed_regressions, + latency_confirmed_regression_details, ) = compute_regression_table( rts, tf_github_org, @@ -303,6 +308,7 @@ def compare_command_logic(args, project_name, project_version): comparison_architecture, first_n_baseline, first_n_comparison, + grafana_link_base, ) comment_body = "" if total_comparison_points > 0: @@ -321,11 +327,63 @@ def compare_command_logic(args, project_name, project_version): ) if total_unstable > 0: + unstable_details = [] + if total_unstable_baseline > 0: + unstable_details.append(f"{total_unstable_baseline} baseline") + if total_unstable_comparison > 0: + unstable_details.append(f"{total_unstable_comparison} comparison") + + unstable_breakdown = ( + " (" + ", ".join(unstable_details) + ")" if unstable_details else "" + ) comparison_summary += ( - "- Detected a total of {} highly unstable benchmarks.\n".format( - total_unstable + "- Detected a total of {} highly unstable benchmarks{}.\n".format( + total_unstable, unstable_breakdown ) ) + + # Add latency confirmation summary if applicable + if total_latency_confirmed_regressions > 0: + comparison_summary += "- Latency analysis confirmed regressions in {} of the unstable tests:\n".format( + total_latency_confirmed_regressions + ) + + # Add detailed breakdown as bullet points with test links + if latency_confirmed_regression_details: + for detail in latency_confirmed_regression_details: + test_name = detail["test_name"] + commands_info = [] + for cmd_detail in detail["commands"]: + commands_info.append( + f"{cmd_detail['command']} +{cmd_detail['change_percent']:.1f}%" + ) + + if commands_info: + # Create test link if grafana_link_base is available + test_display_name = test_name + if grafana_link_base is not None: + grafana_test_link = f"{grafana_link_base}?orgId=1&var-test_case={test_name}" + if baseline_branch is not None: + grafana_test_link += ( + f"&var-branch={baseline_branch}" + ) + if comparison_branch is not None: + grafana_test_link += ( + f"&var-branch={comparison_branch}" + ) + grafana_test_link += "&from=now-30d&to=now" + test_display_name = ( + f"[{test_name}]({grafana_test_link})" + ) + + # Add confidence indicator if available + confidence_indicator = "" + if "high_confidence" in detail: + confidence_indicator = ( + " ๐Ÿ”ด" if detail["high_confidence"] else " โš ๏ธ" + ) + + comparison_summary += f" - {test_display_name}: {', '.join(commands_info)}{confidence_indicator}\n" if total_improvements > 0: comparison_summary += "- Detected a total of {} improvements above the improvement water line.\n".format( total_improvements @@ -484,6 +542,9 @@ def compare_command_logic(args, project_name, project_version): total_stable, total_unstable, total_comparison_points, + total_unstable_baseline, + total_unstable_comparison, + total_latency_confirmed_regressions, ) @@ -531,6 +592,7 @@ def compute_regression_table( comparison_architecture=ARCH_X86, first_n_baseline=-1, first_n_comparison=-1, + grafana_link_base=None, ): START_TIME_NOW_UTC, _, _ = get_start_time_vars() START_TIME_LAST_MONTH_UTC = START_TIME_NOW_UTC - datetime.timedelta(days=31) @@ -593,6 +655,10 @@ def compute_regression_table( total_stable, total_unstable, total_comparison_points, + total_unstable_baseline, + total_unstable_comparison, + total_latency_confirmed_regressions, + latency_confirmed_regression_details, ) = from_rts_to_regression_table( baseline_deployment_name, comparison_deployment_name, @@ -621,14 +687,97 @@ def compute_regression_table( comparison_architecture, first_n_baseline, first_n_comparison, + grafana_link_base, + baseline_branch, + baseline_tag, + comparison_branch, + comparison_tag, + from_date, + to_date, ) logging.info( "Printing differential analysis between {} and {}".format( baseline_str, comparison_str ) ) - writer = MarkdownTableWriter( - table_name="Comparison between {} and {}.\n\nTime Period from {}. (environment used: {})\n".format( + + # Split table into improvements, regressions, and no-changes + improvements_table = [] + regressions_table = [] + no_changes_table = [] + + for row in table: + # Check if there's a meaningful change (not stable/unstable) + note = row[4].lower() if len(row) > 4 else "" + percentage_str = row[3] if len(row) > 3 else "0.0%" + + # Extract percentage value + try: + percentage_val = float(percentage_str.replace("%", "").strip()) + except: + percentage_val = 0.0 + + # Categorize based on change type + if "improvement" in note and "potential" not in note: + # Only actual improvements, not potential ones + improvements_table.append(row) + elif ("regression" in note and "potential" not in note) or "unstable" in note: + # Only actual regressions, not potential ones, plus unstable tests + regressions_table.append(row) + elif "no change" in note or "potential" in note: + # No changes and potential changes (below significance threshold) + no_changes_table.append(row) + elif abs(percentage_val) > 3.0: # Significant changes based on percentage + if (percentage_val > 0 and metric_mode == "higher-better") or ( + percentage_val < 0 and metric_mode == "lower-better" + ): + improvements_table.append(row) + else: + regressions_table.append(row) + else: + no_changes_table.append(row) + + # Sort tables by percentage change + def get_percentage_value(row): + """Extract percentage value from row for sorting""" + try: + percentage_str = row[3] if len(row) > 3 else "0.0%" + return float(percentage_str.replace("%", "").strip()) + except: + return 0.0 + + # Sort improvements by percentage change (highest first) + improvements_table.sort(key=get_percentage_value, reverse=True) + + # Sort regressions by percentage change (most negative first for higher-better, most positive first for lower-better) + if metric_mode == "higher-better": + # For higher-better metrics, most negative changes are worst regressions + regressions_table.sort(key=get_percentage_value) + else: + # For lower-better metrics, most positive changes are worst regressions + regressions_table.sort(key=get_percentage_value, reverse=True) + + # Create improvements table (visible) + improvements_writer = MarkdownTableWriter( + table_name="Performance Improvements - Comparison between {} and {}.\n\nTime Period from {}. (environment used: {})\n".format( + baseline_str, + comparison_str, + from_human_str, + baseline_deployment_name, + ), + headers=[ + "Test Case", + "Baseline {} (median obs. +- std.dev)".format(baseline_str), + "Comparison {} (median obs. +- std.dev)".format(comparison_str), + "% change ({})".format(metric_mode), + "Note", + ], + value_matrix=improvements_table, + ) + + # Create regressions table (visible) + regressions_writer = MarkdownTableWriter( + table_name="Performance Regressions and Issues - Comparison between {} and {}.\n\nTime Period from {}. (environment used: {})\n".format( baseline_str, comparison_str, from_human_str, @@ -641,8 +790,22 @@ def compute_regression_table( "% change ({})".format(metric_mode), "Note", ], - value_matrix=table, + value_matrix=regressions_table, ) + + # Create no-changes table (hidden in markdown) + no_changes_writer = MarkdownTableWriter( + table_name="Tests with No Significant Changes", + headers=[ + "Test Case", + "Baseline {} (median obs. +- std.dev)".format(baseline_str), + "Comparison {} (median obs. +- std.dev)".format(comparison_str), + "% change ({})".format(metric_mode), + "Note", + ], + value_matrix=no_changes_table, + ) + table_output = "" from io import StringIO @@ -651,7 +814,25 @@ def compute_regression_table( old_stdout = sys.stdout sys.stdout = mystdout = StringIO() - writer.dump(mystdout, False) + # Output improvements table first (if any) + if improvements_table: + improvements_writer.dump(mystdout, False) + mystdout.write("\n\n") + + # Output regressions table (if any) + if regressions_table: + regressions_writer.dump(mystdout, False) + mystdout.write("\n\n") + + # Add hidden no-changes table + if no_changes_table: + mystdout.write( + "
\nTests with No Significant Changes ({} tests)\n\n".format( + len(no_changes_table) + ) + ) + no_changes_writer.dump(mystdout, False) + mystdout.write("\n
\n") sys.stdout = old_stdout @@ -665,6 +846,10 @@ def compute_regression_table( total_stable, total_unstable, total_comparison_points, + total_unstable_baseline, + total_unstable_comparison, + total_latency_confirmed_regressions, + latency_confirmed_regression_details, ) @@ -752,6 +937,13 @@ def from_rts_to_regression_table( comparison_architecture=ARCH_X86, first_n_baseline=-1, first_n_comparison=-1, + grafana_link_base=None, + baseline_branch=None, + baseline_tag=None, + comparison_branch=None, + comparison_tag=None, + from_date=None, + to_date=None, ): print_all = print_regressions_only is False and print_improvements_only is False table = [] @@ -759,8 +951,12 @@ def from_rts_to_regression_table( total_improvements = 0 total_stable = 0 total_unstable = 0 + total_unstable_baseline = 0 + total_unstable_comparison = 0 total_regressions = 0 total_comparison_points = 0 + total_latency_confirmed_regressions = 0 + latency_confirmed_regression_details = [] # Track specific test details noise_waterline = 3 progress = tqdm(unit="benchmark time-series", total=len(test_names)) for test_name in test_names: @@ -898,10 +1094,243 @@ def from_rts_to_regression_table( logging.error("Detected a ZeroDivisionError. {}".format(e.__str__())) pass unstable = False + unstable_baseline = False + unstable_comparison = False + latency_confirms_regression = False + if baseline_v != "N/A" and comparison_v != "N/A": if comparison_pct_change > 10.0 or baseline_pct_change > 10.0: - note = "UNSTABLE (very high variance)" unstable = True + unstable_baseline = baseline_pct_change > 10.0 + unstable_comparison = comparison_pct_change > 10.0 + + # Build detailed unstable note + unstable_parts = [] + if unstable_baseline and unstable_comparison: + unstable_parts.append( + "UNSTABLE (baseline & comparison high variance)" + ) + elif unstable_baseline: + unstable_parts.append("UNSTABLE (baseline high variance)") + elif unstable_comparison: + unstable_parts.append("UNSTABLE (comparison high variance)") + + note = unstable_parts[0] + + # Log detailed warning about unstable data detection + logging.warning( + f"UNSTABLE DATA DETECTED for test '{test_name}': " + f"baseline variance={baseline_pct_change:.1f}%, " + f"comparison variance={comparison_pct_change:.1f}% " + f"(threshold=10.0%)" + ) + + # For throughput metrics (higher-better), check both server-side and client-side latency + if metric_mode == "higher-better": + logging.info( + f"Performing 2nd-level latency validation for unstable throughput metric '{test_name}' " + f"(metric_mode={metric_mode})" + ) + + # Check server-side p50 latency + ( + server_latency_note, + server_confirms_regression, + server_regression_details, + ) = check_latency_for_unstable_throughput( + rts, + test_name, + baseline_str, + comparison_str, + by_str_baseline, + by_str_comparison, + baseline_deployment_name, + comparison_deployment_name, + tf_triggering_env, + from_ts_ms, + to_ts_ms, + last_n_baseline, + last_n_comparison, + first_n_baseline, + first_n_comparison, + running_platform, + baseline_architecture, + comparison_architecture, + verbose, + ) + + # Check client-side latency metrics + ( + client_latency_note, + client_confirms_regression, + client_regression_details, + ) = check_client_side_latency( + rts, + test_name, + baseline_str, + comparison_str, + by_str_baseline, + by_str_comparison, + baseline_deployment_name, + comparison_deployment_name, + tf_triggering_env, + from_ts_ms, + to_ts_ms, + last_n_baseline, + last_n_comparison, + first_n_baseline, + first_n_comparison, + running_platform, + baseline_architecture, + comparison_architecture, + verbose, + ) + + # Combine results from both server and client side + combined_latency_notes = [] + if server_latency_note: + combined_latency_notes.append(f"server: {server_latency_note}") + if client_latency_note: + combined_latency_notes.append(f"client: {client_latency_note}") + + # Only confirm regression if BOTH server and client side show evidence AND data is stable enough + # Check if either server or client data contains unstable indicators + server_has_unstable = ( + server_latency_note and "UNSTABLE" in server_latency_note + ) + client_has_unstable = ( + client_latency_note and "UNSTABLE" in client_latency_note + ) + + # Don't confirm regression if either side has unstable data + if server_has_unstable or client_has_unstable: + both_confirm_regression = False + unstable_sides = [] + if server_has_unstable: + unstable_sides.append("server") + if client_has_unstable: + unstable_sides.append("client") + blocked_note = f"regression blocked due to unstable {' and '.join(unstable_sides)} latency data" + note += f"; {blocked_note}" + logging.info( + f"Blocking regression confirmation for '{test_name}' due to unstable latency data" + ) + if server_has_unstable: + logging.info(" Server-side latency data is unstable") + if client_has_unstable: + logging.info(" Client-side latency data is unstable") + else: + both_confirm_regression = ( + server_confirms_regression and client_confirms_regression + ) + + if combined_latency_notes: + combined_note = "; ".join(combined_latency_notes) + note += f"; {combined_note}" + logging.info( + f"Combined latency check result for '{test_name}': {combined_note}" + ) + + if both_confirm_regression: + logging.info( + f"BOTH server and client latency analysis CONFIRM regression for '{test_name}'" + ) + + # Set the flag for counting confirmed regressions + latency_confirms_regression = True + + # Combine regression details from both server and client + combined_regression_details = ( + server_regression_details or client_regression_details + ) + if combined_regression_details: + combined_regression_details["server_side"] = ( + server_confirms_regression + ) + combined_regression_details["client_side"] = ( + client_confirms_regression + ) + + # 2nd level confirmation is sufficient - always add to confirmed regressions + logging.info( + f"Adding '{test_name}' to confirmed regressions based on 2nd level validation" + ) + + # Perform 3rd-level analysis: variance + p99 check for additional confidence scoring + logging.info( + f"Performing 3rd-level analysis (variance + p99) for confidence scoring on '{test_name}'" + ) + ( + confidence_note, + high_confidence, + ) = perform_variance_and_p99_analysis( + rts, + test_name, + baseline_str, + comparison_str, + by_str_baseline, + by_str_comparison, + baseline_deployment_name, + comparison_deployment_name, + tf_triggering_env, + from_ts_ms, + to_ts_ms, + last_n_baseline, + last_n_comparison, + first_n_baseline, + first_n_comparison, + running_platform, + baseline_architecture, + comparison_architecture, + verbose, + ) + + if confidence_note: + note += f"; {confidence_note}" + logging.info( + f"Confidence analysis for '{test_name}': {confidence_note}" + ) + # Use 3rd level confidence if available + combined_regression_details["high_confidence"] = ( + high_confidence + ) + else: + # No 3rd level data available - default to moderate confidence since 2nd level confirmed + logging.info( + f"No 3rd level data available for '{test_name}' - using 2nd level confirmation" + ) + combined_regression_details["high_confidence"] = ( + True # 2nd level confirmation is reliable + ) + + # Always add to confirmed regressions when 2nd level confirms + latency_confirmed_regression_details.append( + combined_regression_details + ) + elif server_confirms_regression or client_confirms_regression: + side_confirmed = ( + "server" if server_confirms_regression else "client" + ) + side_not_confirmed = ( + "client" if server_confirms_regression else "server" + ) + insufficient_evidence_note = f"only {side_confirmed} side confirms regression ({side_not_confirmed} side stable) - insufficient evidence" + note += f"; {insufficient_evidence_note}" + logging.info( + f"Only {side_confirmed} side confirms regression for '{test_name}' - insufficient evidence" + ) + else: + no_regression_note = ( + "neither server nor client side confirms regression" + ) + note += f"; {no_regression_note}" + logging.info( + f"Neither server nor client side confirms regression for '{test_name}'" + ) + else: + logging.info( + f"No latency data available for secondary check on '{test_name}'" + ) baseline_v_str = prepare_value_str( baseline_pct_change, baseline_v, baseline_values, simplify_table @@ -956,6 +1385,12 @@ def from_rts_to_regression_table( if unstable: total_unstable += 1 + if unstable_baseline: + total_unstable_baseline += 1 + if unstable_comparison: + total_unstable_comparison += 1 + if latency_confirms_regression: + total_latency_confirmed_regressions += 1 should_add_line = False if print_regressions_only and detected_regression: @@ -976,6 +1411,13 @@ def from_rts_to_regression_table( percentage_change, table, test_name, + grafana_link_base, + baseline_branch, + baseline_tag, + comparison_branch, + comparison_tag, + from_date, + to_date, ) return ( detected_regressions, @@ -985,9 +1427,995 @@ def from_rts_to_regression_table( total_stable, total_unstable, total_comparison_points, + total_unstable_baseline, + total_unstable_comparison, + total_latency_confirmed_regressions, + latency_confirmed_regression_details, ) +def check_client_side_latency( + rts, + test_name, + baseline_str, + comparison_str, + by_str_baseline, + by_str_comparison, + baseline_deployment_name, + comparison_deployment_name, + tf_triggering_env, + from_ts_ms, + to_ts_ms, + last_n_baseline, + last_n_comparison, + first_n_baseline, + first_n_comparison, + running_platform, + baseline_architecture, + comparison_architecture, + verbose=False, +): + """ + Check client-side latency metrics to provide additional validation for regression detection. + + Returns: + tuple: (note_string, confirms_regression_bool, regression_details_dict) + """ + logging.info(f"Starting client-side latency check for test: {test_name}") + try: + # Client-side latency metrics to check + client_metrics = [ + "p50_latency_ms", + "Latency", + "OverallQuantiles.allCommands.q50", + "Tests.INSERT.AverageLatency_us_", + "Tests.READ.AverageLatency_us_", + "Tests.SEARCH.AverageLatency_us_", + "Tests.UPDATE.AverageLatency_us_", + ] + + client_latency_notes = [] + significant_client_latency_increases = 0 + regression_details = {"test_name": test_name, "commands": []} + + for metric in client_metrics: + # Build filters for client-side latency metric + filters_baseline = [ + f"{by_str_baseline}={baseline_str}", + f"metric={metric}", + f"test_name={test_name}", + f"deployment_name={baseline_deployment_name}", + f"triggering_env={tf_triggering_env}", + ] + filters_comparison = [ + f"{by_str_comparison}={comparison_str}", + f"metric={metric}", + f"test_name={test_name}", + f"deployment_name={comparison_deployment_name}", + f"triggering_env={tf_triggering_env}", + ] + + # Add optional filters + if running_platform is not None: + filters_baseline.append(f"running_platform={running_platform}") + filters_comparison.append(f"running_platform={running_platform}") + if baseline_architecture != ARCH_X86: + filters_baseline.append(f"arch={baseline_architecture}") + if comparison_architecture != ARCH_X86: + filters_comparison.append(f"arch={comparison_architecture}") + + # Query for client-side latency time-series + baseline_client_ts = rts.ts().queryindex(filters_baseline) + comparison_client_ts = rts.ts().queryindex(filters_comparison) + + if len(baseline_client_ts) == 0 or len(comparison_client_ts) == 0: + if verbose: + logging.info( + f" No client-side data found for metric '{metric}' in {test_name}" + ) + continue + + logging.info( + f" Found client-side metric '{metric}': {len(baseline_client_ts)} baseline, {len(comparison_client_ts)} comparison time-series" + ) + + # Filter out target time-series + baseline_client_ts = [ts for ts in baseline_client_ts if "target" not in ts] + comparison_client_ts = [ + ts for ts in comparison_client_ts if "target" not in ts + ] + + if len(baseline_client_ts) == 0 or len(comparison_client_ts) == 0: + continue + + # Use the first available time-series for each side + baseline_ts = baseline_client_ts[0] + comparison_ts = comparison_client_ts[0] + + # Get client-side latency data + baseline_client_data = rts.ts().revrange(baseline_ts, from_ts_ms, to_ts_ms) + comparison_client_data = rts.ts().revrange( + comparison_ts, from_ts_ms, to_ts_ms + ) + + if len(baseline_client_data) == 0 or len(comparison_client_data) == 0: + if verbose: + logging.info( + f" No data points for metric '{metric}': baseline={len(baseline_client_data)}, comparison={len(comparison_client_data)}" + ) + continue + + # Calculate client-side latency statistics + baseline_client_values = [] + comparison_client_values = [] + + (_, baseline_client_median, _) = get_v_pct_change_and_largest_var( + baseline_client_data, + 0, + 0, + baseline_client_values, + 0, + last_n_baseline, + verbose, + first_n_baseline, + ) + + (_, comparison_client_median, _) = get_v_pct_change_and_largest_var( + comparison_client_data, + 0, + 0, + comparison_client_values, + 0, + last_n_comparison, + verbose, + first_n_comparison, + ) + + if baseline_client_median == "N/A" or comparison_client_median == "N/A": + if verbose: + logging.info( + f" Could not calculate median for metric '{metric}': baseline={baseline_client_median}, comparison={comparison_client_median}" + ) + continue + + # Calculate variance (coefficient of variation) for both baseline and comparison + baseline_client_mean = ( + statistics.mean(baseline_client_values) if baseline_client_values else 0 + ) + baseline_client_stdev = ( + statistics.stdev(baseline_client_values) + if len(baseline_client_values) > 1 + else 0 + ) + baseline_client_cv = ( + (baseline_client_stdev / baseline_client_mean * 100) + if baseline_client_mean > 0 + else float("inf") + ) + + comparison_client_mean = ( + statistics.mean(comparison_client_values) + if comparison_client_values + else 0 + ) + comparison_client_stdev = ( + statistics.stdev(comparison_client_values) + if len(comparison_client_values) > 1 + else 0 + ) + comparison_client_cv = ( + (comparison_client_stdev / comparison_client_mean * 100) + if comparison_client_mean > 0 + else float("inf") + ) + + # Calculate client-side latency change (for latency, higher is worse) + client_latency_change = ( + float(comparison_client_median) / float(baseline_client_median) - 1 + ) * 100.0 + + logging.info( + f" Client metric '{metric}': baseline={baseline_client_median:.2f} (CV={baseline_client_cv:.1f}%), comparison={comparison_client_median:.2f} (CV={comparison_client_cv:.1f}%), change={client_latency_change:.1f}%" + ) + + # Check if client latency data is too unstable to be reliable + client_data_unstable = ( + baseline_client_cv > 50.0 or comparison_client_cv > 50.0 + ) + + if client_data_unstable: + # Mark as unstable client latency data + unstable_reason = [] + if baseline_client_cv > 50.0: + unstable_reason.append(f"baseline CV={baseline_client_cv:.1f}%") + if comparison_client_cv > 50.0: + unstable_reason.append(f"comparison CV={comparison_client_cv:.1f}%") + + client_latency_notes.append( + f"{metric} UNSTABLE ({', '.join(unstable_reason)} - data too noisy for reliable analysis)" + ) + logging.warning( + f" Client metric '{metric}': UNSTABLE latency data detected - {', '.join(unstable_reason)}" + ) + elif ( + abs(client_latency_change) > 5.0 + ): # Only report significant client latency changes for stable data + direction = "increased" if client_latency_change > 0 else "decreased" + + # Adjust significance threshold based on baseline variance + if baseline_client_cv < 30.0: + # Low variance - use standard threshold + significance_threshold = 10.0 + elif baseline_client_cv < 50.0: + # Moderate variance - require larger change + significance_threshold = 15.0 + else: + # High variance - require much larger change + significance_threshold = 25.0 + + client_latency_notes.append( + f"{metric} {direction} {abs(client_latency_change):.1f}% (baseline CV={baseline_client_cv:.1f}%)" + ) + logging.info( + f" Client metric '{metric}': SIGNIFICANT latency change detected ({direction} {abs(client_latency_change):.1f}%, baseline CV={baseline_client_cv:.1f}%)" + ) + + # Track significant client latency increases (potential regression confirmation) + if client_latency_change > significance_threshold: + significant_client_latency_increases += 1 + regression_details["commands"].append( + { + "command": metric, + "change_percent": client_latency_change, + "direction": direction, + "baseline_cv": baseline_client_cv, + "comparison_cv": comparison_client_cv, + } + ) + logging.info( + f" Client metric '{metric}': CONFIRMS regression (change={client_latency_change:.1f}% > threshold={significance_threshold:.1f}%)" + ) + else: + logging.info( + f" Client metric '{metric}': Change below significance threshold (change={client_latency_change:.1f}% <= threshold={significance_threshold:.1f}%)" + ) + elif verbose: + client_latency_notes.append( + f"{metric} stable (CV={baseline_client_cv:.1f}%)" + ) + logging.info( + f" Client metric '{metric}': latency stable (change={client_latency_change:.1f}%, baseline CV={baseline_client_cv:.1f}%)" + ) + + # Determine if client-side latency confirms regression + confirms_regression = significant_client_latency_increases > 0 + + # Return combined client latency notes + if client_latency_notes: + result = "; ".join(client_latency_notes) + logging.info( + f"Client-side latency check completed for {test_name}: {result}" + ) + return ( + result, + confirms_regression, + regression_details if confirms_regression else None, + ) + else: + result = "client latency stable" if len(client_metrics) > 0 else None + logging.info( + f"Client-side latency check completed for {test_name}: {result or 'no data'}" + ) + return result, False, None + + except Exception as e: + logging.error(f"Error checking client-side latency for {test_name}: {e}") + return None, False, None + + +def perform_variance_and_p99_analysis( + rts, + test_name, + baseline_str, + comparison_str, + by_str_baseline, + by_str_comparison, + baseline_deployment_name, + comparison_deployment_name, + tf_triggering_env, + from_ts_ms, + to_ts_ms, + last_n_baseline, + last_n_comparison, + first_n_baseline, + first_n_comparison, + running_platform, + baseline_architecture, + comparison_architecture, + verbose=False, +): + """ + Perform 3rd-level analysis using variance and p99 metrics to assess confidence in regression detection. + + Returns: + tuple: (confidence_note, high_confidence_bool) + """ + try: + logging.info(f"Starting variance and p99 analysis for {test_name}") + + # Build filters for p99 latency metric using both metric=p99 and metric-type=(latencystats) + filters_baseline = [ + f"{by_str_baseline}={baseline_str}", + "metric=p99", + "metric-type=(latencystats)", + f"test_name={test_name}", + f"deployment_name={baseline_deployment_name}", + f"triggering_env={tf_triggering_env}", + ] + filters_comparison = [ + f"{by_str_comparison}={comparison_str}", + "metric=p99", + "metric-type=(latencystats)", + f"test_name={test_name}", + f"deployment_name={comparison_deployment_name}", + f"triggering_env={tf_triggering_env}", + ] + + # Add optional filters + if running_platform is not None: + filters_baseline.append(f"running_platform={running_platform}") + filters_comparison.append(f"running_platform={running_platform}") + if baseline_architecture != ARCH_X86: + filters_baseline.append(f"arch={baseline_architecture}") + if comparison_architecture != ARCH_X86: + filters_comparison.append(f"arch={comparison_architecture}") + + # Query for p99 latency time-series + logging.info(f"Querying p99 latencystats time-series for {test_name}") + baseline_p99_ts = rts.ts().queryindex(filters_baseline) + comparison_p99_ts = rts.ts().queryindex(filters_comparison) + + logging.info(f"Found {len(baseline_p99_ts)} baseline p99 latency time-series") + logging.info( + f"Found {len(comparison_p99_ts)} comparison p99 latency time-series" + ) + + # Filter out target time-series and unwanted commands (reuse existing function) + def should_exclude_timeseries(ts_name): + """Check if time-series should be excluded based on command""" + if "target" in ts_name: + return True + ts_name_lower = ts_name.lower() + excluded_commands = ["config", "info", "ping", "cluster", "resetstat"] + return any(cmd in ts_name_lower for cmd in excluded_commands) + + baseline_p99_ts = [ + ts for ts in baseline_p99_ts if not should_exclude_timeseries(ts) + ] + comparison_p99_ts = [ + ts for ts in comparison_p99_ts if not should_exclude_timeseries(ts) + ] + + if len(baseline_p99_ts) == 0 or len(comparison_p99_ts) == 0: + logging.warning( + f"No p99 latency data found for {test_name} after filtering" + ) + return None, False + + # Extract command names from time-series (reuse existing function) + def extract_command_from_ts(ts_name): + """Extract meaningful command name from time-series name""" + # Look for latencystats_latency_percentiles_usec__p99 pattern + match = re.search( + r"latencystats_latency_percentiles_usec_([^_/]+)_p99", ts_name + ) + if match: + return match.group(1) + # Look for command= pattern in the time-series name + match = re.search(r"command=([^/]+)", ts_name) + if match: + return match.group(1) + # If no specific pattern found, try to extract from the end of the path + parts = ts_name.split("/") + if len(parts) > 0: + return parts[-1] + return "unknown" + + # Group time-series by command + baseline_by_command = {} + comparison_by_command = {} + + for ts in baseline_p99_ts: + cmd = extract_command_from_ts(ts) + if cmd not in baseline_by_command: + baseline_by_command[cmd] = [] + baseline_by_command[cmd].append(ts) + + for ts in comparison_p99_ts: + cmd = extract_command_from_ts(ts) + if cmd not in comparison_by_command: + comparison_by_command[cmd] = [] + comparison_by_command[cmd].append(ts) + + # Find common commands between baseline and comparison + common_commands = set(baseline_by_command.keys()) & set( + comparison_by_command.keys() + ) + + if not common_commands: + logging.warning( + f"No common commands found for p99 variance analysis in {test_name}" + ) + return None, False + + variance_notes = [] + p99_notes = [] + high_confidence_indicators = 0 + total_indicators = 0 + + # Analyze variance and p99 for each command + for command in sorted(common_commands): + total_indicators += 1 + logging.info(f"Analyzing p99 variance for command: {command}") + + baseline_ts_list = baseline_by_command[command] + comparison_ts_list = comparison_by_command[command] + + # If multiple time-series for the same command, try to get the best one + if len(baseline_ts_list) > 1: + baseline_ts_list = get_only_Totals(baseline_ts_list) + if len(comparison_ts_list) > 1: + comparison_ts_list = get_only_Totals(comparison_ts_list) + + if len(baseline_ts_list) != 1 or len(comparison_ts_list) != 1: + logging.warning( + f" Skipping {command}: baseline={len(baseline_ts_list)}, comparison={len(comparison_ts_list)} time-series" + ) + continue + + # Get p99 latency data for this command + baseline_p99_data = [] + comparison_p99_data = [] + + for ts_name in baseline_ts_list: + datapoints = rts.ts().revrange(ts_name, from_ts_ms, to_ts_ms) + baseline_p99_data.extend(datapoints) + + for ts_name in comparison_ts_list: + datapoints = rts.ts().revrange(ts_name, from_ts_ms, to_ts_ms) + comparison_p99_data.extend(datapoints) + + if len(baseline_p99_data) < 3 or len(comparison_p99_data) < 3: + logging.warning( + f" Insufficient p99 data for {command}: baseline={len(baseline_p99_data)}, comparison={len(comparison_p99_data)} datapoints" + ) + continue + + # Extract values for variance calculation + baseline_values = [dp[1] for dp in baseline_p99_data] + comparison_values = [dp[1] for dp in comparison_p99_data] + + # Calculate variance (coefficient of variation) + baseline_mean = statistics.mean(baseline_values) + baseline_stdev = ( + statistics.stdev(baseline_values) if len(baseline_values) > 1 else 0 + ) + baseline_cv = ( + (baseline_stdev / baseline_mean * 100) + if baseline_mean > 0 + else float("inf") + ) + + comparison_mean = statistics.mean(comparison_values) + comparison_stdev = ( + statistics.stdev(comparison_values) if len(comparison_values) > 1 else 0 + ) + comparison_cv = ( + (comparison_stdev / comparison_mean * 100) + if comparison_mean > 0 + else float("inf") + ) + + # Calculate p99 change + p99_change = ( + ((comparison_mean - baseline_mean) / baseline_mean * 100) + if baseline_mean > 0 + else 0 + ) + + # Assess confidence based on variance and p99 change + if baseline_cv < 30: # Low variance in baseline (< 30% CV) + if abs(p99_change) > 15: # Significant p99 change + high_confidence_indicators += 1 + p99_notes.append( + f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (stable baseline)" + ) + else: + p99_notes.append( + f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (stable baseline, minor change)" + ) + elif baseline_cv < 50: # Moderate variance + if abs(p99_change) > 25: # Need larger change for confidence + high_confidence_indicators += 1 + p99_notes.append( + f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (moderate baseline variance)" + ) + else: + p99_notes.append( + f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (moderate baseline variance, uncertain)" + ) + else: # High variance + if abs(p99_change) > 40: # Need very large change for confidence + high_confidence_indicators += 1 + p99_notes.append( + f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (high baseline variance, large change)" + ) + else: + p99_notes.append( + f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (high baseline variance, low confidence)" + ) + + variance_notes.append(f"{command} baseline CV={baseline_cv:.1f}%") + + if verbose: + logging.info( + f" Command {command}: baseline CV={baseline_cv:.1f}%, comparison CV={comparison_cv:.1f}%, p99 change={p99_change:.1f}%" + ) + + # Determine overall confidence + confidence_ratio = ( + high_confidence_indicators / total_indicators if total_indicators > 0 else 0 + ) + high_confidence = ( + confidence_ratio >= 0.5 + ) # At least 50% of indicators show high confidence + + # Create confidence note + confidence_parts = [] + if variance_notes: + confidence_parts.extend(variance_notes) + if p99_notes: + confidence_parts.extend(p99_notes) + + confidence_note = "; ".join(confidence_parts) if confidence_parts else None + + if confidence_note: + confidence_level = "HIGH" if high_confidence else "LOW" + cv_explanation = "CV=coefficient of variation (data stability: <30% stable, 30-50% moderate, >50% unstable)" + confidence_note = ( + f"confidence={confidence_level} ({confidence_note}; {cv_explanation})" + ) + + logging.info( + f"Variance and p99 analysis completed for {test_name}: confidence={confidence_ratio:.2f}, high_confidence={high_confidence}" + ) + return confidence_note, high_confidence + + except Exception as e: + logging.error(f"Error in variance and p99 analysis for {test_name}: {e}") + return None, False + + +def check_latency_for_unstable_throughput( + rts, + test_name, + baseline_str, + comparison_str, + by_str_baseline, + by_str_comparison, + baseline_deployment_name, + comparison_deployment_name, + tf_triggering_env, + from_ts_ms, + to_ts_ms, + last_n_baseline, + last_n_comparison, + first_n_baseline, + first_n_comparison, + running_platform, + baseline_architecture, + comparison_architecture, + verbose, +): + """ + Check latency (p50) for unstable throughput metrics to provide additional context. + Returns a tuple: (note_string, confirms_regression_bool, regression_details_dict) + """ + logging.info(f"Starting latency check for unstable throughput test: {test_name}") + try: + # Build filters for p50 latency metric using both metric=p50 and metric-type=(latencystats) + filters_baseline = [ + f"{by_str_baseline}={baseline_str}", + "metric=p50", + "metric-type=(latencystats)", + f"test_name={test_name}", + f"deployment_name={baseline_deployment_name}", + f"triggering_env={tf_triggering_env}", + ] + filters_comparison = [ + f"{by_str_comparison}={comparison_str}", + "metric=p50", + "metric-type=(latencystats)", + f"test_name={test_name}", + f"deployment_name={comparison_deployment_name}", + f"triggering_env={tf_triggering_env}", + ] + + # Add optional filters + if running_platform is not None: + filters_baseline.append(f"running_platform={running_platform}") + filters_comparison.append(f"running_platform={running_platform}") + if baseline_architecture != ARCH_X86: + filters_baseline.append(f"arch={baseline_architecture}") + if comparison_architecture != ARCH_X86: + filters_comparison.append(f"arch={comparison_architecture}") + + # Query for p50 latency time-series + logging.info(f"Querying p50 latencystats time-series for {test_name}") + logging.info(f"Baseline filters: {filters_baseline}") + logging.info(f"Comparison filters: {filters_comparison}") + + baseline_latency_ts = rts.ts().queryindex(filters_baseline) + comparison_latency_ts = rts.ts().queryindex(filters_comparison) + + logging.info( + f"Found {len(baseline_latency_ts)} baseline p50 latency time-series" + ) + logging.info( + f"Found {len(comparison_latency_ts)} comparison p50 latency time-series" + ) + + if verbose and baseline_latency_ts: + logging.info(f"Baseline latency time-series: {baseline_latency_ts}") + if verbose and comparison_latency_ts: + logging.info(f"Comparison latency time-series: {comparison_latency_ts}") + + # Filter out target time-series and unwanted commands + def should_exclude_timeseries(ts_name): + """Check if time-series should be excluded based on command""" + # Exclude target time-series + if "target" in ts_name: + return True + + # Convert to lowercase for case-insensitive matching + ts_name_lower = ts_name.lower() + + # Exclude administrative commands (case-insensitive) + excluded_commands = ["config", "info", "ping", "cluster", "resetstat"] + return any(cmd in ts_name_lower for cmd in excluded_commands) + + baseline_latency_ts_before = len(baseline_latency_ts) + comparison_latency_ts_before = len(comparison_latency_ts) + + # Apply filtering and log what gets excluded + baseline_excluded = [ + ts for ts in baseline_latency_ts if should_exclude_timeseries(ts) + ] + comparison_excluded = [ + ts for ts in comparison_latency_ts if should_exclude_timeseries(ts) + ] + + baseline_latency_ts = [ + ts for ts in baseline_latency_ts if not should_exclude_timeseries(ts) + ] + comparison_latency_ts = [ + ts for ts in comparison_latency_ts if not should_exclude_timeseries(ts) + ] + + logging.info( + f"After filtering: baseline {baseline_latency_ts_before} -> {len(baseline_latency_ts)}, " + f"comparison {comparison_latency_ts_before} -> {len(comparison_latency_ts)}" + ) + + if baseline_excluded: + logging.info( + f"Excluded {len(baseline_excluded)} baseline administrative command time-series" + ) + if verbose: + for ts in baseline_excluded: + logging.info(f" Excluded baseline: {ts}") + if comparison_excluded: + logging.info( + f"Excluded {len(comparison_excluded)} comparison administrative command time-series" + ) + if verbose: + for ts in comparison_excluded: + logging.info(f" Excluded comparison: {ts}") + + if len(baseline_latency_ts) == 0 or len(comparison_latency_ts) == 0: + logging.warning( + f"No p50 latency data found for {test_name} after filtering" + ) + return None, False, None + + # Extract command names from time-series to match baseline and comparison + def extract_command_from_ts(ts_name): + """Extract meaningful command name from time-series name""" + import re + + # Look for latencystats_latency_percentiles_usec__p50 pattern + match = re.search( + r"latencystats_latency_percentiles_usec_([^_/]+)_p50", ts_name + ) + if match: + return match.group(1) + + # Look for command= pattern in the time-series name + match = re.search(r"command=([^/]+)", ts_name) + if match: + return match.group(1) + + # If no specific pattern found, try to extract from the end of the path + # e.g., .../Ops/sec/GET -> GET + parts = ts_name.split("/") + if len(parts) > 0: + return parts[-1] + return "unknown" + + # Group time-series by command + baseline_by_command = {} + comparison_by_command = {} + + for ts in baseline_latency_ts: + cmd = extract_command_from_ts(ts) + if verbose: + logging.info(f"Baseline time-series '{ts}' -> command '{cmd}'") + if cmd not in baseline_by_command: + baseline_by_command[cmd] = [] + baseline_by_command[cmd].append(ts) + + for ts in comparison_latency_ts: + cmd = extract_command_from_ts(ts) + if verbose: + logging.info(f"Comparison time-series '{ts}' -> command '{cmd}'") + if cmd not in comparison_by_command: + comparison_by_command[cmd] = [] + comparison_by_command[cmd].append(ts) + + # Find common commands between baseline and comparison + common_commands = set(baseline_by_command.keys()) & set( + comparison_by_command.keys() + ) + + logging.info(f"Baseline commands found: {sorted(baseline_by_command.keys())}") + logging.info( + f"Comparison commands found: {sorted(comparison_by_command.keys())}" + ) + logging.info( + f"Common commands for latency comparison: {sorted(common_commands)}" + ) + + if not common_commands: + logging.warning( + f"No common commands found for latency comparison in {test_name}" + ) + return None, False, None + + latency_notes = [] + significant_latency_increases = ( + 0 # Track commands with significant latency increases + ) + regression_details = {"test_name": test_name, "commands": []} + + # Compare latency for each command individually + for command in sorted(common_commands): + logging.info(f"Analyzing latency for command: {command}") + baseline_ts_list = baseline_by_command[command] + comparison_ts_list = comparison_by_command[command] + + logging.info( + f" Command {command}: {len(baseline_ts_list)} baseline, {len(comparison_ts_list)} comparison time-series" + ) + + # If multiple time-series for the same command, try to get the best one + if len(baseline_ts_list) > 1: + logging.info( + f" Multiple baseline time-series for {command}, filtering..." + ) + baseline_ts_list = get_only_Totals(baseline_ts_list) + if len(comparison_ts_list) > 1: + logging.info( + f" Multiple comparison time-series for {command}, filtering..." + ) + comparison_ts_list = get_only_Totals(comparison_ts_list) + + if len(baseline_ts_list) != 1 or len(comparison_ts_list) != 1: + logging.warning( + f" Skipping {command}: baseline={len(baseline_ts_list)}, comparison={len(comparison_ts_list)} time-series" + ) + continue + + # Get latency data for this command + baseline_latency_data = [] + comparison_latency_data = [] + + for ts_name in baseline_ts_list: + datapoints = rts.ts().revrange(ts_name, from_ts_ms, to_ts_ms) + baseline_latency_data.extend(datapoints) + + for ts_name in comparison_ts_list: + datapoints = rts.ts().revrange(ts_name, from_ts_ms, to_ts_ms) + comparison_latency_data.extend(datapoints) + + if len(baseline_latency_data) == 0 or len(comparison_latency_data) == 0: + logging.warning( + f" No latency data for {command}: baseline={len(baseline_latency_data)}, comparison={len(comparison_latency_data)} datapoints" + ) + continue + + logging.info( + f" Command {command}: {len(baseline_latency_data)} baseline, {len(comparison_latency_data)} comparison datapoints" + ) + + # Calculate latency statistics for this command + baseline_latency_values = [] + comparison_latency_values = [] + + (_, baseline_latency_median, _) = get_v_pct_change_and_largest_var( + baseline_latency_data, + 0, + 0, + baseline_latency_values, + 0, + last_n_baseline, + verbose, + first_n_baseline, + ) + + (_, comparison_latency_median, _) = get_v_pct_change_and_largest_var( + comparison_latency_data, + 0, + 0, + comparison_latency_values, + 0, + last_n_comparison, + verbose, + first_n_comparison, + ) + + if baseline_latency_median == "N/A" or comparison_latency_median == "N/A": + logging.warning( + f" Could not calculate median for {command}: baseline={baseline_latency_median}, comparison={comparison_latency_median}" + ) + continue + + # Calculate variance (coefficient of variation) for both baseline and comparison + baseline_latency_mean = ( + statistics.mean(baseline_latency_values) + if baseline_latency_values + else 0 + ) + baseline_latency_stdev = ( + statistics.stdev(baseline_latency_values) + if len(baseline_latency_values) > 1 + else 0 + ) + baseline_latency_cv = ( + (baseline_latency_stdev / baseline_latency_mean * 100) + if baseline_latency_mean > 0 + else float("inf") + ) + + comparison_latency_mean = ( + statistics.mean(comparison_latency_values) + if comparison_latency_values + else 0 + ) + comparison_latency_stdev = ( + statistics.stdev(comparison_latency_values) + if len(comparison_latency_values) > 1 + else 0 + ) + comparison_latency_cv = ( + (comparison_latency_stdev / comparison_latency_mean * 100) + if comparison_latency_mean > 0 + else float("inf") + ) + + # Calculate latency change (for latency, lower is better) + latency_change = ( + float(comparison_latency_median) / float(baseline_latency_median) - 1 + ) * 100.0 + + logging.info( + f" Command {command}: baseline p50={baseline_latency_median:.2f} (CV={baseline_latency_cv:.1f}%), comparison p50={comparison_latency_median:.2f} (CV={comparison_latency_cv:.1f}%), change={latency_change:.1f}%" + ) + + # Check if latency data is too unstable to be reliable + latency_data_unstable = ( + baseline_latency_cv > 50.0 or comparison_latency_cv > 50.0 + ) + + if latency_data_unstable: + # Mark as unstable latency data + unstable_reason = [] + if baseline_latency_cv > 50.0: + unstable_reason.append(f"baseline CV={baseline_latency_cv:.1f}%") + if comparison_latency_cv > 50.0: + unstable_reason.append( + f"comparison CV={comparison_latency_cv:.1f}%" + ) + + latency_notes.append( + f"{command} p50 UNSTABLE ({', '.join(unstable_reason)} - data too noisy for reliable analysis)" + ) + logging.warning( + f" Command {command}: UNSTABLE latency data detected - {', '.join(unstable_reason)}" + ) + elif ( + abs(latency_change) > 5.0 + ): # Only report significant latency changes for stable data + direction = "increased" if latency_change > 0 else "decreased" + + # Adjust significance threshold based on baseline variance + if baseline_latency_cv < 30.0: + # Low variance - use standard threshold + significance_threshold = 10.0 + elif baseline_latency_cv < 50.0: + # Moderate variance - require larger change + significance_threshold = 15.0 + else: + # High variance - require much larger change + significance_threshold = 25.0 + + latency_notes.append( + f"{command} p50 {direction} {abs(latency_change):.1f}% (baseline CV={baseline_latency_cv:.1f}%)" + ) + logging.info( + f" Command {command}: SIGNIFICANT latency change detected ({direction} {abs(latency_change):.1f}%, baseline CV={baseline_latency_cv:.1f}%)" + ) + + # Track significant latency increases (potential regression confirmation) + if latency_change > significance_threshold: + significant_latency_increases += 1 + regression_details["commands"].append( + { + "command": command, + "change_percent": latency_change, + "direction": direction, + "baseline_cv": baseline_latency_cv, + "comparison_cv": comparison_latency_cv, + } + ) + logging.info( + f" Command {command}: CONFIRMS regression (change={latency_change:.1f}% > threshold={significance_threshold:.1f}%)" + ) + else: + logging.info( + f" Command {command}: Change below significance threshold (change={latency_change:.1f}% <= threshold={significance_threshold:.1f}%)" + ) + elif verbose: + latency_notes.append( + f"{command} p50 stable (CV={baseline_latency_cv:.1f}%)" + ) + logging.info( + f" Command {command}: latency stable (change={latency_change:.1f}%, baseline CV={baseline_latency_cv:.1f}%)" + ) + + # Determine if latency confirms regression + confirms_regression = significant_latency_increases > 0 + + # Return combined latency notes + if latency_notes: + result = "; ".join(latency_notes) + logging.info(f"Latency check completed for {test_name}: {result}") + return ( + result, + confirms_regression, + regression_details if confirms_regression else None, + ) + else: + result = "p50 latency stable" if common_commands else None + logging.info( + f"Latency check completed for {test_name}: {result or 'no data'}" + ) + return result, False, None + + except Exception as e: + logging.error(f"Error checking latency for {test_name}: {e}") + return None, False, None + + def get_only_Totals(baseline_timeseries): logging.warning("\t\tTime-series: {}".format(", ".join(baseline_timeseries))) logging.info("Checking if Totals will reduce timeseries.") @@ -995,6 +2423,37 @@ def get_only_Totals(baseline_timeseries): for ts_name in baseline_timeseries: if "Totals" in ts_name: new_base.append(ts_name) + + # If no "Totals" time-series found, try to pick the best alternative + if len(new_base) == 0: + logging.warning( + "No 'Totals' time-series found, trying to pick best alternative." + ) + # Prefer time-series without quotes in metric names + unquoted_series = [ts for ts in baseline_timeseries if "'" not in ts] + if unquoted_series: + new_base = unquoted_series + else: + # Fall back to original list + new_base = baseline_timeseries + + # If we still have multiple time-series after filtering for "Totals", + # prefer the one without quotes in the metric name + if len(new_base) > 1: + logging.info("Multiple time-series found, preferring unquoted metric names.") + unquoted_series = [ts for ts in new_base if "'" not in ts] + if unquoted_series: + new_base = unquoted_series + + # If we still have multiple, take the first one + if len(new_base) > 1: + logging.warning( + "Still multiple time-series after filtering, taking the first one: {}".format( + new_base[0] + ) + ) + new_base = [new_base[0]] + baseline_timeseries = new_base return baseline_timeseries @@ -1061,11 +2520,38 @@ def add_line( percentage_change, table, test_name, + grafana_link_base=None, + baseline_branch=None, + baseline_version=None, + comparison_branch=None, + comparison_version=None, + from_date=None, + to_date=None, ): + grafana_link = None + if grafana_link_base is not None: + grafana_link = "{}?orgId=1".format(grafana_link_base) + grafana_link += f"&var-test_case={test_name}" + + if baseline_branch is not None: + grafana_link += f"&var-branch={baseline_branch}" + if baseline_version is not None: + grafana_link += f"&var-version={baseline_version}" + if comparison_branch is not None: + grafana_link += f"&var-branch={comparison_branch}" + if comparison_version is not None: + grafana_link += f"&var-version={comparison_version}" + grafana_link += "&from=now-30d&to=now" + + # Create test name with optional Grafana link + test_name_display = test_name + if grafana_link is not None: + test_name_display = f"[{test_name}]({grafana_link})" + percentage_change_str = "{:.1f}% ".format(percentage_change) table.append( [ - test_name, + test_name_display, baseline_v_str, comparison_v_str, percentage_change_str, @@ -1102,9 +2588,9 @@ def get_v_pct_change_and_largest_var( comparison_values.append(tuple[1]) comparison_df = pd.DataFrame(comparison_values) - comparison_median = float(comparison_df.median()) + comparison_median = float(comparison_df.median().iloc[0]) comparison_v = comparison_median - comparison_std = float(comparison_df.std()) + comparison_std = float(comparison_df.std().iloc[0]) if verbose: logging.info( "comparison_datapoints: {} value: {}; std-dev: {}; median: {}".format( diff --git a/redisbench_admin/environments/oss_cluster.py b/redisbench_admin/environments/oss_cluster.py index 526b6c3..8bd526e 100644 --- a/redisbench_admin/environments/oss_cluster.py +++ b/redisbench_admin/environments/oss_cluster.py @@ -89,6 +89,20 @@ def generate_meet_cmds(shard_count, shard_host, start_port): def setup_oss_cluster_from_conns(meet_cmds, redis_conns, shard_count): status = False try: + # Pre-setup validation: check uptime and cluster mode + for primary_pos, redis_conn in enumerate(redis_conns): + redis_conn.ping() + + server_info = redis_conn.info("server") + uptime = server_info.get("uptime_in_seconds", 0) + cluster_enabled = server_info.get("cluster_enabled", 0) + tcp_port = server_info.get("tcp_port", "n/a") + + logging.info( + f"Node {primary_pos} ({tcp_port}): uptime={uptime}s cluster_enabled={cluster_enabled}" + ) + + # Send meet commands for primary_pos, redis_conn in enumerate(redis_conns): logging.info( "Sending to primary #{} a total of {} MEET commands".format( @@ -138,6 +152,29 @@ def setup_oss_cluster_from_conns(meet_cmds, redis_conns, shard_count): ) logging.info("Node {}: cluster_state {}".format(n, cluster_state_ok)) sleep(1) + + # Post-setup validation: check uptime and cluster mode + sleep(10) + for primary_pos, redis_conn in enumerate(redis_conns): + redis_conn.ping() + + server_info = redis_conn.info("server") + uptime = server_info.get("uptime_in_seconds", 0) + server_info = redis_conn.info("cluster") + cluster_enabled = server_info.get("cluster_enabled", -1) + tcp_port = server_info.get("tcp_port", "n/a") + + logging.info( + f"Node {primary_pos} ({tcp_port}): uptime={uptime}s cluster_enabled={cluster_enabled}" + ) + + if cluster_enabled != 1: + logging.error( + "Node {}: cluster mode is not enabled (cluster_enabled={})".format( + primary_pos, cluster_enabled + ) + ) + return False status = True except redis.exceptions.RedisError as e: logging.warning("Received an error {}".format(e.__str__())) diff --git a/redisbench_admin/run/cluster.py b/redisbench_admin/run/cluster.py index 270cd1a..8aef21e 100644 --- a/redisbench_admin/run/cluster.py +++ b/redisbench_admin/run/cluster.py @@ -115,6 +115,12 @@ def spin_up_redis_cluster_remote_redis( logname, redis_7=True, ): + # Import the function from standalone module + from redisbench_admin.run_remote.standalone import ensure_redis_server_available + + # Ensure redis-server is available before trying to start cluster + ensure_redis_server_available(server_public_ip, username, private_key, ssh_port) + logging.info("Generating the remote redis-server command arguments") redis_process_commands = [] logfiles = [] diff --git a/redisbench_admin/run/metrics.py b/redisbench_admin/run/metrics.py index 3db76cc..45f16f9 100644 --- a/redisbench_admin/run/metrics.py +++ b/redisbench_admin/run/metrics.py @@ -188,8 +188,6 @@ def get_total_cpu(info_data): def collect_cpu_data(redis_conns=[], delta_secs: float = 5.0, delay_start: float = 1.0): - global BENCHMARK_CPU_STATS_GLOBAL - global BENCHMARK_RUNNING_GLOBAL import time counter = 0 diff --git a/redisbench_admin/run_local/args.py b/redisbench_admin/run_local/args.py index 00a7316..bd5da5b 100644 --- a/redisbench_admin/run_local/args.py +++ b/redisbench_admin/run_local/args.py @@ -40,4 +40,16 @@ def create_run_local_arguments(parser): default=IGNORE_KEYSPACE_ERRORS, help="Ignore keyspace check errors. Will still log them as errors", ) + parser.add_argument( + "--dry-run", + default=False, + action="store_true", + help="Setup environment and test connectivity without running benchmarks", + ) + parser.add_argument( + "--dry-run-with-preload", + default=False, + action="store_true", + help="Setup environment, preload data, and test connectivity without running benchmarks", + ) return parser diff --git a/redisbench_admin/run_local/run_local.py b/redisbench_admin/run_local/run_local.py index 622252a..4c7d46f 100644 --- a/redisbench_admin/run_local/run_local.py +++ b/redisbench_admin/run_local/run_local.py @@ -16,6 +16,7 @@ from redisbench_admin.utils.remote import ( get_project_ts_tags, push_data_to_redistimeseries, + perform_connectivity_test, ) import redisbench_admin.run.metrics @@ -326,6 +327,7 @@ def run_local_command_logic(args, project_name, project_version): ) # run the benchmark + cpu_stats_thread = threading.Thread( target=collect_cpu_data, args=(redis_conns, 5.0, 1.0), @@ -335,9 +337,60 @@ def run_local_command_logic(args, project_name, project_version): ) cpu_stats_thread.start() benchmark_start_time = datetime.datetime.now() - stdout, stderr = run_local_benchmark( - benchmark_tool, command + logging.info( + "Running benchmark command: {}".format(command) ) + # Handle dry-run modes + if args.dry_run or args.dry_run_with_preload: + logging.info( + "๐Ÿƒ Dry-run mode detected - performing connectivity tests" + ) + + # Test basic connectivity after setup + connectivity_success = perform_connectivity_test( + redis_conns, "after local environment setup" + ) + + if args.dry_run_with_preload: + logging.info( + "๐Ÿ“ฆ Dry-run with preload - data loading already completed during setup" + ) + # Test connectivity after preload (data was loaded during local_db_spin) + connectivity_success = ( + perform_connectivity_test( + redis_conns, "after data preloading" + ) + and connectivity_success + ) + + # Print dry-run summary + logging.info("=" * 50) + logging.info("๐ŸŽฏ DRY-RUN SUMMARY") + logging.info("=" * 50) + logging.info( + f"โœ… Database: {setup_type} ({'cluster' if cluster_api_enabled else 'standalone'}) started locally" + ) + logging.info( + f"โœ… Client tools: {benchmark_tool} available" + ) + logging.info( + f"{'โœ…' if connectivity_success else 'โŒ'} Connectivity: {len(redis_conns)} connection(s) tested" + ) + if args.dry_run_with_preload: + logging.info( + "โœ… Data preload: Completed during setup" + ) + logging.info("๐Ÿ Dry-run completed successfully") + logging.info( + "โญ๏ธ Benchmark execution skipped (dry-run mode)" + ) + logging.info("=" * 50) + + # Skip benchmark execution and continue to next test + else: + stdout, stderr = run_local_benchmark( + benchmark_tool, command + ) benchmark_end_time = datetime.datetime.now() redisbench_admin.run.metrics.BENCHMARK_RUNNING_GLOBAL = ( False @@ -364,10 +417,10 @@ def run_local_command_logic(args, project_name, project_version): benchmark_end_time, benchmark_start_time ) ) - - logging.info("Extracting the benchmark results") - logging.info("stdout: {}".format(stdout)) - logging.info("stderr: {}".format(stderr)) + if args.dry_run is False: + logging.info("Extracting the benchmark results") + logging.info("stdout: {}".format(stdout)) + logging.info("stderr: {}".format(stderr)) ( _, @@ -420,53 +473,54 @@ def run_local_command_logic(args, project_name, project_version): test_name, tf_triggering_env, ) - - post_process_benchmark_results( - benchmark_tool, - local_benchmark_output_filename, - start_time_ms, - start_time_str, - stdout, - ) results_dict = {} - with open( - local_benchmark_output_filename, "r" - ) as json_file: - results_dict = json.load(json_file) - print_results_table_stdout( - benchmark_config, - default_metrics, - results_dict, - setup_name, - setup_type, - test_name, - total_shards_cpu_usage, - overall_end_time_metrics, - [ - "memory_used_memory", - "memory_used_memory_dataset", - ], - ) - export_redis_metrics( - artifact_version, - end_time_ms, - overall_end_time_metrics, - rts, - setup_name, - setup_type, - test_name, - tf_github_branch, - tf_github_org, - tf_github_repo, - tf_triggering_env, - {"metric-type": "redis-metrics"}, - 0, + if args.dry_run is False: + post_process_benchmark_results( + benchmark_tool, + local_benchmark_output_filename, + start_time_ms, + start_time_str, + stdout, ) - # check KPIs - return_code = results_dict_kpi_check( - benchmark_config, results_dict, return_code - ) + with open( + local_benchmark_output_filename, "r" + ) as json_file: + results_dict = json.load(json_file) + print_results_table_stdout( + benchmark_config, + default_metrics, + results_dict, + setup_name, + setup_type, + test_name, + total_shards_cpu_usage, + overall_end_time_metrics, + [ + "memory_used_memory", + "memory_used_memory_dataset", + ], + ) + export_redis_metrics( + artifact_version, + end_time_ms, + overall_end_time_metrics, + rts, + setup_name, + setup_type, + test_name, + tf_github_branch, + tf_github_org, + tf_github_repo, + tf_triggering_env, + {"metric-type": "redis-metrics"}, + 0, + ) + + # check KPIs + return_code = results_dict_kpi_check( + benchmark_config, results_dict, return_code + ) metadata_tags = get_metadata_tags(benchmark_config) ( @@ -508,7 +562,10 @@ def run_local_command_logic(args, project_name, project_version): "Some unexpected exception was caught " "during local work. Failing test...." ) - logging.critical(sys.exc_info()[0]) + if len(sys.exc_info()) > 0: + logging.critical(sys.exc_info()[0]) + else: + logging.critical(sys.exc_info()) print("-" * 60) traceback.print_exc(file=sys.stdout) print("-" * 60) diff --git a/redisbench_admin/run_remote/args.py b/redisbench_admin/run_remote/args.py index 0b22382..cd643f8 100644 --- a/redisbench_admin/run_remote/args.py +++ b/redisbench_admin/run_remote/args.py @@ -112,6 +112,18 @@ def create_run_remote_arguments(parser): action="store_true", help="skip environment variables check", ) + parser.add_argument( + "--dry-run", + default=False, + action="store_true", + help="Setup environment and test connectivity without running benchmarks", + ) + parser.add_argument( + "--dry-run-with-preload", + default=False, + action="store_true", + help="Setup environment, preload data, and test connectivity without running benchmarks", + ) parser.add_argument( "--continue-on-module-check-error", default=False, diff --git a/redisbench_admin/run_remote/remote_db.py b/redisbench_admin/run_remote/remote_db.py index 7550650..528af0f 100644 --- a/redisbench_admin/run_remote/remote_db.py +++ b/redisbench_admin/run_remote/remote_db.py @@ -406,9 +406,17 @@ def db_error_artifacts( upload_s3, username, ): + # Import the zip check function + from redisbench_admin.run_remote.standalone import ensure_zip_available + + # Ensure zip is available before trying to use it + ensure_zip_available(server_public_ip, username, private_key, db_ssh_port) + local_zipfile = "{}.zip".format(logname) remote_zipfile = "/home/{}/{}".format(username, local_zipfile) - execute_remote_commands( + + # Create zip file + zip_result = execute_remote_commands( server_public_ip, username, private_key, @@ -417,26 +425,57 @@ def db_error_artifacts( ], db_ssh_port, ) - failed_remote_run_artifact_store( - upload_s3, - server_public_ip, - dirname, - remote_zipfile, - local_zipfile, - s3_bucket_name, - s3_bucket_path, - username, - private_key, - ) + + # Check if zip creation was successful + zip_success = True + for pos, res_pos in enumerate(zip_result): + [recv_exit_status, stdout, stderr] = res_pos + if recv_exit_status != 0: + logging.warning( + "Zip creation failed with exit code {}. stdout: {}. stderr: {}".format( + recv_exit_status, stdout, stderr + ) + ) + zip_success = False + + # Only try to upload if zip was created successfully + if zip_success: + try: + failed_remote_run_artifact_store( + upload_s3, + server_public_ip, + dirname, + remote_zipfile, + local_zipfile, + s3_bucket_name, + s3_bucket_path, + username, + private_key, + ) + except Exception as e: + logging.warning( + "Failed to upload zip file to S3: {}. Continuing without upload.".format( + e + ) + ) + else: + logging.warning("Skipping S3 upload due to zip creation failure") if len(full_logfiles) > 0: - failed_remote_run_artifact_store( - upload_s3, - server_public_ip, - dirname, - full_logfiles[0], - logname, - s3_bucket_name, - s3_bucket_path, - username, - private_key, - ) + try: + failed_remote_run_artifact_store( + upload_s3, + server_public_ip, + dirname, + full_logfiles[0], + logname, + s3_bucket_name, + s3_bucket_path, + username, + private_key, + ) + except Exception as e: + logging.warning( + "Failed to upload logfile to S3: {}. Continuing without upload.".format( + e + ) + ) diff --git a/redisbench_admin/run_remote/remote_helpers.py b/redisbench_admin/run_remote/remote_helpers.py index c220ef8..7d7af34 100644 --- a/redisbench_admin/run_remote/remote_helpers.py +++ b/redisbench_admin/run_remote/remote_helpers.py @@ -71,6 +71,23 @@ def remote_tool_pre_bench_step( logging.info( f"Settting up remote tool {benchmark_tool} requirements. architecture ={architecture}" ) + + # Check and install benchmark tools if needed + if benchmark_tool == "memtier_benchmark": + from redisbench_admin.run_remote.standalone import ( + ensure_memtier_benchmark_available, + ) + + ensure_memtier_benchmark_available( + client_public_ip, username, private_key, client_ssh_port + ) + elif benchmark_tool == "redis-benchmark": + from redisbench_admin.run_remote.standalone import ensure_redis_server_available + + # redis-benchmark comes with redis-server, so ensure redis-server is installed + ensure_redis_server_available( + client_public_ip, username, private_key, client_ssh_port + ) if benchmark_tool == "redisgraph-benchmark-go": setup_remote_benchmark_tool_redisgraph_benchmark_go( client_public_ip, diff --git a/redisbench_admin/run_remote/run_remote.py b/redisbench_admin/run_remote/run_remote.py index ed1a1a2..ed796ce 100644 --- a/redisbench_admin/run_remote/run_remote.py +++ b/redisbench_admin/run_remote/run_remote.py @@ -69,6 +69,7 @@ get_project_ts_tags, push_data_to_redistimeseries, fetch_remote_id_from_config, + perform_connectivity_test, ) from redisbench_admin.utils.utils import ( @@ -161,7 +162,24 @@ def run_remote_command_logic(args, project_name, project_version): ) webhook_client_slack = WebhookClient(webhook_url) - if args.skip_env_vars_verify is False: + # Only check AWS credentials when actually needed + needs_aws_for_infrastructure = ( + args.inventory is None + ) # No inventory = need to deploy with Terraform + needs_aws_for_s3 = args.upload_results_s3 # S3 upload enabled + + if args.skip_env_vars_verify is False and ( + needs_aws_for_infrastructure or needs_aws_for_s3 + ): + # Log why AWS credentials are being checked + aws_reasons = [] + if needs_aws_for_infrastructure: + aws_reasons.append("infrastructure deployment (no --inventory provided)") + if needs_aws_for_s3: + aws_reasons.append("S3 upload (--upload_results_s3 enabled)") + + logging.info("AWS credentials required for: {}".format(", ".join(aws_reasons))) + env_check_status, failure_reason = check_ec2_env() if env_check_status is False: if webhook_notifications_active: @@ -177,6 +195,10 @@ def run_remote_command_logic(args, project_name, project_version): ) logging.critical("{}. Exiting right away!".format(failure_reason)) exit(1) + elif args.skip_env_vars_verify is False: + logging.info( + "AWS credentials check skipped (using --inventory and S3 upload disabled)" + ) continue_on_module_check_error = args.continue_on_module_check_error module_check_status, error_message = redis_modules_check(local_module_files) @@ -674,6 +696,62 @@ def run_remote_command_logic(args, project_name, project_version): ) ) + # Handle dry-run modes + if args.dry_run or args.dry_run_with_preload: + logging.info( + "๐Ÿƒ Dry-run mode detected - performing connectivity tests" + ) + + # Test basic connectivity after setup + connectivity_success = ( + perform_connectivity_test( + redis_conns, "after environment setup" + ) + ) + + if args.dry_run_with_preload: + logging.info( + "๐Ÿ“ฆ Dry-run with preload - data loading already completed during setup" + ) + # Test connectivity after preload (data was loaded during remote_db_spin) + connectivity_success = ( + perform_connectivity_test( + redis_conns, "after data preloading" + ) + and connectivity_success + ) + + # Print dry-run summary + logging.info("=" * 50) + logging.info("๐ŸŽฏ DRY-RUN SUMMARY") + logging.info("=" * 50) + logging.info( + f"โœ… Infrastructure: {'Deployed' if args.inventory is None else 'Using existing'}" + ) + logging.info( + f"โœ… Database: {setup_type} ({'cluster' if cluster_enabled else 'standalone'}) started" + ) + logging.info( + f"โœ… Client tools: Setup completed on {client_public_ip}" + ) + logging.info( + f"{'โœ…' if connectivity_success else 'โŒ'} Connectivity: {len(redis_conns)} connection(s) tested" + ) + if args.dry_run_with_preload: + logging.info( + "โœ… Data preload: Completed during setup" + ) + logging.info( + "๐Ÿ Dry-run completed successfully" + ) + logging.info( + "โญ๏ธ Benchmark execution skipped (dry-run mode)" + ) + logging.info("=" * 50) + + # Skip benchmark execution and continue to next test + continue + logging.info( "Will store benchmark json output to local file {}".format( local_bench_fname diff --git a/redisbench_admin/run_remote/standalone.py b/redisbench_admin/run_remote/standalone.py index 7d3bd23..4c9904e 100644 --- a/redisbench_admin/run_remote/standalone.py +++ b/redisbench_admin/run_remote/standalone.py @@ -15,6 +15,139 @@ from redisbench_admin.utils.utils import redis_server_config_module_part +def ensure_redis_server_available(server_public_ip, username, private_key, port=22): + """Check if redis-server is available, install if not""" + logging.info("Checking if redis-server is available on remote server...") + + # Check if redis-server exists + check_result = execute_remote_commands( + server_public_ip, username, private_key, ["which redis-server"], port + ) + + # Check the result + if len(check_result) > 0: + [recv_exit_status, stdout, stderr] = check_result[0] + if recv_exit_status != 0: + logging.info("redis-server not found, installing Redis...") + + # Install Redis using the provided commands + install_commands = [ + "sudo apt-get install lsb-release curl gpg -y", + "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg", + "sudo chmod 644 /usr/share/keyrings/redis-archive-keyring.gpg", + 'echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list', + "sudo apt-get update", + "sudo apt-get install redis -y", + "sudo systemctl disable redis-server", + ] + + install_result = execute_remote_commands( + server_public_ip, username, private_key, install_commands, port + ) + + # Check if installation was successful + for pos, res_pos in enumerate(install_result): + [recv_exit_status, stdout, stderr] = res_pos + if recv_exit_status != 0: + logging.warning( + "Redis installation command {} returned exit code {}. stdout: {}. stderr: {}".format( + pos, recv_exit_status, stdout, stderr + ) + ) + + logging.info("Redis installation completed and auto-start disabled") + else: + logging.info("redis-server is already available") + else: + logging.error("Failed to check redis-server availability") + + +def ensure_zip_available(server_public_ip, username, private_key, port=22): + """Check if zip is available, install if not""" + logging.info("Checking if zip is available on remote server...") + + # Check if zip exists + check_result = execute_remote_commands( + server_public_ip, username, private_key, ["which zip"], port + ) + + # Check the result + if len(check_result) > 0: + [recv_exit_status, stdout, stderr] = check_result[0] + if recv_exit_status != 0: + logging.info("zip not found, installing...") + + # Install zip + install_commands = ["sudo apt-get install zip -y"] + + install_result = execute_remote_commands( + server_public_ip, username, private_key, install_commands, port + ) + + # Check if installation was successful + for pos, res_pos in enumerate(install_result): + [recv_exit_status, stdout, stderr] = res_pos + if recv_exit_status != 0: + logging.warning( + "Zip installation command {} returned exit code {}. stdout: {}. stderr: {}".format( + pos, recv_exit_status, stdout, stderr + ) + ) + + logging.info("Zip installation completed") + else: + logging.info("zip is already available") + else: + logging.error("Failed to check zip availability") + + +def ensure_memtier_benchmark_available( + client_public_ip, username, private_key, port=22 +): + """Check if memtier_benchmark is available, install if not""" + logging.info("Checking if memtier_benchmark is available on remote client...") + + # Check if memtier_benchmark exists + check_result = execute_remote_commands( + client_public_ip, username, private_key, ["which memtier_benchmark"], port + ) + + # Check the result + if len(check_result) > 0: + [recv_exit_status, stdout, stderr] = check_result[0] + if recv_exit_status != 0: + logging.info("memtier_benchmark not found, installing...") + + # Install memtier_benchmark using the provided commands + install_commands = [ + "sudo apt install lsb-release curl gpg -y", + "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg", + 'echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list', + "sudo apt-get update", + "sudo apt-get install memtier-benchmark -y", + ] + + install_result = execute_remote_commands( + client_public_ip, username, private_key, install_commands, port + ) + + # Check if installation was successful + for pos, res_pos in enumerate(install_result): + [recv_exit_status, stdout, stderr] = res_pos + if recv_exit_status != 0: + logging.warning( + "memtier_benchmark installation command {} returned exit code {}. stdout: {}. stderr: {}".format( + pos, recv_exit_status, stdout, stderr + ) + ) + + logging.info("memtier_benchmark installation completed") + else: + logging.info("memtier_benchmark is already available") + else: + logging.error("Failed to check memtier_benchmark availability") + + def spin_up_standalone_remote_redis( temporary_dir, server_public_ip, @@ -27,6 +160,9 @@ def spin_up_standalone_remote_redis( modules_configuration_parameters_map={}, redis_7=True, ): + # Ensure redis-server is available before trying to start it + ensure_redis_server_available(server_public_ip, username, private_key, port) + full_logfile, initial_redis_cmd = generate_remote_standalone_redis_cmd( logfile, redis_configuration_parameters, diff --git a/redisbench_admin/utils/remote.py b/redisbench_admin/utils/remote.py index df415c0..ada875c 100644 --- a/redisbench_admin/utils/remote.py +++ b/redisbench_admin/utils/remote.py @@ -1189,3 +1189,31 @@ def check_ec2_env(): logging.error(error_message) return status, error_message + + +def perform_connectivity_test(redis_conns, test_description=""): + """Perform PING test on all Redis connections""" + logging.info(f"๐Ÿ” Performing connectivity test: {test_description}") + + success_count = 0 + total_count = len(redis_conns) + + for i, conn in enumerate(redis_conns): + try: + result = conn.ping() + if result: + logging.info(f"โœ… Connection {i}: PING successful") + success_count += 1 + else: + logging.error(f"โŒ Connection {i}: PING returned False") + except Exception as e: + logging.error(f"โŒ Connection {i}: PING failed - {e}") + + if success_count == total_count: + logging.info(f"๐ŸŽ‰ All {total_count} connectivity tests passed!") + return True + else: + logging.error( + f"๐Ÿ’ฅ {total_count - success_count}/{total_count} connectivity tests failed!" + ) + return False diff --git a/redisbench_admin/utils/utils.py b/redisbench_admin/utils/utils.py index b86fcb7..de835e1 100644 --- a/redisbench_admin/utils/utils.py +++ b/redisbench_admin/utils/utils.py @@ -128,33 +128,51 @@ def upload_artifacts_to_s3( if region_name is None: region_name = EC2_REGION logging.info("-- uploading results to s3 -- ") - if aws_access_key_id is not None and aws_secret_access_key is not None: - logging.info("-- Using REQUEST PROVIDED AWS credentials -- ") - session = boto3.Session( - aws_access_key_id, aws_secret_access_key, aws_session_token, region_name - ) - s3 = session.resource("s3") - else: - logging.info("-- Using default AWS credentials -- ") - s3 = boto3.resource("s3") - bucket = s3.Bucket(s3_bucket_name) - progress = tqdm(unit="files", total=len(artifacts)) - for full_artifact_path in artifacts: - artifact = os.path.basename(full_artifact_path) - object_key = "{bucket_path}{filename}".format( - bucket_path=s3_bucket_path, filename=artifact - ) + try: + if aws_access_key_id is not None and aws_secret_access_key is not None: + logging.info("-- Using REQUEST PROVIDED AWS credentials -- ") + session = boto3.Session( + aws_access_key_id, aws_secret_access_key, aws_session_token, region_name + ) + s3 = session.resource("s3") + else: + logging.info("-- Using default AWS credentials -- ") + s3 = boto3.resource("s3") + bucket = s3.Bucket(s3_bucket_name) + progress = tqdm(unit="files", total=len(artifacts)) + + for full_artifact_path in artifacts: + try: + artifact = os.path.basename(full_artifact_path) + object_key = "{bucket_path}{filename}".format( + bucket_path=s3_bucket_path, filename=artifact + ) - bucket.upload_file(full_artifact_path, object_key) - object_acl = s3.ObjectAcl(s3_bucket_name, object_key) - object_acl.put(ACL="public-read") - progress.update() - url = "https://s3.{0}.amazonaws.com/{1}/{2}{3}".format( - region_name, s3_bucket_name, s3_bucket_path, quote_plus(artifact) + bucket.upload_file(full_artifact_path, object_key) + object_acl = s3.ObjectAcl(s3_bucket_name, object_key) + object_acl.put(ACL="public-read") + progress.update() + url = "https://s3.{0}.amazonaws.com/{1}/{2}{3}".format( + region_name, s3_bucket_name, s3_bucket_path, quote_plus(artifact) + ) + artifacts_map[artifact] = url + logging.info("Successfully uploaded {} to S3".format(artifact)) + except Exception as e: + logging.warning( + "Failed to upload artifact {} to S3: {}. Skipping this file.".format( + full_artifact_path, e + ) + ) + progress.update() + progress.close() + except Exception as e: + logging.error( + "Failed to initialize S3 connection: {}. No files will be uploaded.".format( + e + ) ) - artifacts_map[artifact] = url - progress.close() + return artifacts_map diff --git a/scripts/compliance.sh b/scripts/compliance.sh new file mode 100644 index 0000000..eb8cc7c --- /dev/null +++ b/scripts/compliance.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Compliance checks script for redisbench-admin +set -e + +echo "๐Ÿ” Running compliance checks..." + +echo "๐Ÿ“ Checking code formatting with black..." +poetry run black --check redisbench_admin + +echo "๐Ÿ” Running linting with flake8..." +poetry run flake8 redisbench_admin + +echo "โœ… All compliance checks passed!" diff --git a/tests/test_common.py b/tests/test_common.py index 735943c..f42702d 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -172,7 +172,15 @@ def test_common_exporter_logic(): # negative test common_exporter_logic(None, None, None, None, None, None, None, None, None, None) try: - rts = redis.Redis(port=16379) + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + rts_host = os.getenv("RTS_DATASINK_HOST", None) + rts_pass = "" + if rts_host is None: + return + rts = redis.Redis(port=rts_port, host=rts_host) rts.ping() with open( "./tests/test_data/redis-benchmark-full-suite-1Mkeys-100B.yml", "r" @@ -508,9 +516,16 @@ def test_dbconfig_keyspacelen_check(): from redis import StrictRedis from redis.exceptions import ConnectionError - redis_port = 16379 + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + rts_host = os.getenv("RTS_DATASINK_HOST", None) + rts_pass = "" + if rts_host is None: + return try: - redis = StrictRedis(port=redis_port) + redis = StrictRedis(port=rts_port, host=rts_host) redis.ping() redis.flushall() redis_conns = [redis] @@ -520,18 +535,19 @@ def test_dbconfig_keyspacelen_check(): ) as yml_file: benchmark_config = yaml.safe_load(yml_file) # no keyspace len check - result = dbconfig_keyspacelen_check(benchmark_config, redis_conns) + result = dbconfig_keyspacelen_check(benchmark_config, redis_conns,False,5) assert result == True with open("./tests/test_data/tsbs-targets.yml", "r") as yml_file: benchmark_config = yaml.safe_load(yml_file) # check and fail try: - result = dbconfig_keyspacelen_check(benchmark_config, redis_conns) + result = dbconfig_keyspacelen_check(benchmark_config, redis_conns,False,5) except Exception as e: + err_str = e.__str__() assert ( - e.__str__() - == "The total numbers of keys in setup does not match the expected spec: 1000!=0. Aborting..." + "The total number of keys in setup does not match the expected spec: 1000 != 0. Aborting after" in + err_str ) # check and pass @@ -582,9 +598,13 @@ def test_execute_init_commands(): from redis import StrictRedis from redis.exceptions import ConnectionError - redis_port = 16379 + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + try: - redis = StrictRedis(port=redis_port) + redis = StrictRedis(port=rts_port) redis.ping() redis.flushall() redis.config_resetstat() diff --git a/tests/test_compare.py b/tests/test_compare.py index a0828b5..bf24c90 100644 --- a/tests/test_compare.py +++ b/tests/test_compare.py @@ -10,12 +10,14 @@ def test_compare_command_logic(): + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) rts_host = os.getenv("RTS_DATASINK_HOST", None) - rts_port = 16379 rts_pass = "" if rts_host is None: return - rts = redis.Redis(port=16379, host=rts_host) + rts = redis.Redis(port=rts_port, host=rts_host) rts.ping() rts.flushall() parser = argparse.ArgumentParser( diff --git a/tests/test_data/defaults-with-purpose-built-env.yml b/tests/test_data/defaults-with-purpose-built-env.yml new file mode 100644 index 0000000..cb4bc53 --- /dev/null +++ b/tests/test_data/defaults-with-purpose-built-env.yml @@ -0,0 +1,344 @@ +exporter: + comparison: + baseline-branch: master + metrics: + - Ops/sec + - $.Tests.Overall.rps + - $.OverallRates.overallOpsRate + mode: higher-better + redistimeseries: + break_by: + - version + - commit + metrics: + - $.Tests.Overall.rps + - $.Tests.Overall.avg_latency_ms + - $.Tests.Overall.p50_latency_ms + - $.Tests.Overall.p95_latency_ms + - $.Tests.Overall.p99_latency_ms + - $.Tests.Overall.max_latency_ms + - $.Tests.Overall.min_latency_ms + - $.build.build_time + - $.build.vector_index_sz_mb + - $."ALL STATS".*."Ops/sec" + - $."ALL STATS".*."Latency" + - $.OverallRates.overallOpsRate + - $.OverallQuantiles.allCommands.q50 + - $.OverallQuantiles.allCommands.q95 + - $.OverallQuantiles.allCommands.q99 + - $.OverallQuantiles.allCommands.q999 + timemetric: $.StartTime +remote: +- type: oss-standalone +- setup: redisearch-m5 +spec: + setups: + - name: oss-standalone + redis_topology: + primaries: 1 + replicas: 0 + resources: + requests: + cpus: '1' + memory: 10g + type: oss-standalone + - name: oss-standalone-1replica + redis_topology: + placement: sparse + primaries: 1 + replicas: 1 + resources: + requests: + cpus: '2' + memory: 10g + type: oss-standalone + - name: oss-cluster-01-primaries + redis_topology: + placement: sparse + primaries: 1 + replicas: 0 + resources: + requests: + cpus: '1' + memory: 10g + type: oss-cluster + - name: oss-cluster-02-primaries + redis_topology: + placement: sparse + primaries: 2 + replicas: 0 + resources: + requests: + cpus: '2' + memory: 10g + type: oss-cluster + - name: oss-cluster-03-primaries + redis_topology: + placement: sparse + primaries: 3 + replicas: 0 + resources: + requests: + cpus: '3' + memory: 30g + type: oss-cluster + - name: oss-cluster-04-primaries + redis_topology: + placement: sparse + primaries: 4 + replicas: 0 + resources: + requests: + cpus: '4' + memory: 40g + type: oss-cluster + - name: oss-cluster-05-primaries + redis_topology: + placement: sparse + primaries: 5 + replicas: 0 + resources: + requests: + cpus: '5' + memory: 50g + type: oss-cluster + - name: oss-cluster-08-primaries + redis_topology: + placement: sparse + primaries: 8 + replicas: 0 + resources: + requests: + cpus: '10' + memory: 90g + type: oss-cluster + - name: oss-cluster-09-primaries + redis_topology: + placement: sparse + primaries: 9 + replicas: 0 + resources: + requests: + cpus: '10' + memory: 90g + type: oss-cluster + - name: oss-cluster-15-primaries + redis_topology: + placement: sparse + primaries: 15 + replicas: 0 + resources: + requests: + cpus: '15' + memory: 150g + type: oss-cluster + - name: oss-cluster-16-primaries + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-20-primaries + redis_topology: + placement: sparse + primaries: 20 + replicas: 0 + resources: + requests: + cpus: '20' + memory: 500g + type: oss-cluster + - name: oss-cluster-21-primaries + redis_topology: + placement: sparse + primaries: 21 + replicas: 0 + resources: + requests: + cpus: '21' + memory: 210g + type: oss-cluster + - name: oss-cluster-24-primaries + redis_topology: + placement: sparse + primaries: 24 + replicas: 0 + resources: + requests: + cpus: '24' + memory: 500g + type: oss-cluster + - name: oss-cluster-30-primaries + redis_topology: + placement: sparse + primaries: 30 + replicas: 0 + resources: + requests: + cpus: '30' + memory: 300g + type: oss-cluster + - name: oss-cluster-32-primaries + redis_topology: + placement: sparse + primaries: 32 + replicas: 0 + resources: + requests: + cpus: '32' + memory: 300g + type: oss-cluster + - name: oss-cluster-16-primaries_master_w10_st10 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_master_w10_st20 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_master_w20_st10 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_master_w20_st20 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_joan-uv-threads_w10_st10_sio10 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_joan-uv-threads_w10_st20_sio10 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_joan-uv-threads_w20_st10_sio10 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_joan-uv-threads_w20_st20_sio10 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_joan-uv-threads_w10_st10_sio20 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_joan-uv-threads_w10_st20_sio20 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_joan-uv-threads_w20_st10_sio20 + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + type: oss-cluster + - name: oss-cluster-16-primaries_joan-uv-threads_w20_st20_sio20 + type: oss-cluster + redis_topology: + placement: sparse + primaries: 16 + replicas: 0 + resources: + requests: + cpus: '18' + memory: 180g + dbconfig: + module-configuration-parameters: + redisearch: + WORKERS: 6 + MIN_OPERATION_WORKERS: 6 + module-oss: + WORKERS: 6 + MIN_OPERATION_WORKERS: 6 + + - name: oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20 + type: oss-cluster + redis_topology: + placement: sparse + primaries: 02 + replicas: 0 + resources: + requests: + cpus: '4' + memory: 180g + dbconfig: + module-configuration-parameters: + redisearch: + WORKERS: 6 + MIN_OPERATION_WORKERS: 6 + module-oss: + WORKERS: 6 + MIN_OPERATION_WORKERS: 6 + + +version: 0.2 \ No newline at end of file diff --git a/tests/test_data/test-purpose-built-env.yml b/tests/test_data/test-purpose-built-env.yml new file mode 100644 index 0000000..50a95bd --- /dev/null +++ b/tests/test_data/test-purpose-built-env.yml @@ -0,0 +1,6 @@ +name: test-purpose-built-env-cluster +setups: + - oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20 +clientconfig: + tool: redis-benchmark + arguments: "-t set -n 100 -c 5" diff --git a/tests/test_defaults_purpose_built_env.py b/tests/test_defaults_purpose_built_env.py new file mode 100644 index 0000000..90dcabd --- /dev/null +++ b/tests/test_defaults_purpose_built_env.py @@ -0,0 +1,276 @@ +import argparse +import unittest.mock +import yaml +import subprocess +import time +import redis +import tempfile +import os + +from redisbench_admin.run_local.args import create_run_local_arguments +from redisbench_admin.run_local.run_local import run_local_command_logic +from redisbench_admin.utils.benchmark_config import ( + get_defaults, + process_default_yaml_properties_file, +) +from redisbench_admin.run.common import extract_test_feasible_setups +from redisbench_admin.environments.oss_cluster import ( + spin_up_local_redis_cluster, + setup_redis_cluster_from_conns, + generate_cluster_redis_server_args, +) + + +def test_defaults_purpose_built_env_parsing(): + """Test that the new defaults-with-purpose-built-env.yml file is properly parsed""" + defaults_filename = "./tests/test_data/defaults-with-purpose-built-env.yml" + + # Test that the file can be loaded and parsed + ( + default_kpis, + default_remote, + default_metrics, + exporter_timemetric_path, + default_specs, + cluster_config, + ) = get_defaults(defaults_filename) + + # Verify basic structure + assert default_specs is not None + assert "setups" in default_specs + + # Verify the specific environment we're testing + setups = default_specs["setups"] + setup_names = [setup["name"] for setup in setups] + + # Check that our target environment exists + assert "oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20" in setup_names + + # Find and validate the specific setup + target_setup = None + for setup in setups: + if setup["name"] == "oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20": + target_setup = setup + break + + assert target_setup is not None + + # Validate the setup structure + assert target_setup["type"] == "oss-cluster" + assert "redis_topology" in target_setup + assert "resources" in target_setup + assert "dbconfig" in target_setup + + # Validate topology + topology = target_setup["redis_topology"] + assert topology["primaries"] == 2 # Note: YAML "02" becomes int 2 + assert topology["replicas"] == 0 + assert topology["placement"] == "sparse" + + # Validate resources + resources = target_setup["resources"] + assert "requests" in resources + assert resources["requests"]["cpus"] == "4" + assert resources["requests"]["memory"] == "180g" + + # Validate dbconfig + dbconfig = target_setup["dbconfig"] + assert "module-configuration-parameters" in dbconfig + module_params = dbconfig["module-configuration-parameters"] + assert "redisearch" in module_params + assert "module-oss" in module_params + assert module_params["redisearch"]["WORKERS"] == 6 + assert module_params["redisearch"]["MIN_OPERATION_WORKERS"] == 6 + + +def test_extract_feasible_setups_with_purpose_built_env(): + """Test that extract_test_feasible_setups works with the new defaults file""" + defaults_filename = "./tests/test_data/defaults-with-purpose-built-env.yml" + + ( + default_kpis, + default_remote, + default_metrics, + exporter_timemetric_path, + default_specs, + cluster_config, + ) = get_defaults(defaults_filename) + + # Test with a benchmark config that specifies our target environment + benchmark_config = { + "setups": ["oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20"] + } + + feasible_setups = extract_test_feasible_setups( + benchmark_config, "setups", default_specs, backwards_compatible=False + ) + + # Verify the setup was found and extracted correctly + assert len(feasible_setups) == 1 + assert "oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20" in feasible_setups + + setup = feasible_setups["oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20"] + assert setup["type"] == "oss-cluster" + assert setup["redis_topology"]["primaries"] == 2 + assert setup["redis_topology"]["replicas"] == 0 + + +def test_dry_run_with_simple_standalone_env(): + """Test dry-run with real Redis using a simple oss-standalone environment (no modules)""" + parser = argparse.ArgumentParser( + description="test simple env dry-run", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser = create_run_local_arguments(parser) + + # Create a simple test config that uses a standalone environment (no modules) + test_config = { + "name": "test-simple-env", + "setups": ["oss-standalone"], + "clientconfig": [ + {"tool": "redis-benchmark"}, + {"parameters": [ + {"clients": 5}, + {"requests": 100}, + {"test": "set"} + ]} + ] + } + + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + rts = redis.Redis(port=rts_port,decode_responses=True) + rts.ping() + rts.flushall() + + # Save the test config temporarily + with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f: + yaml.dump(test_config, f) + test_config_path = f.name + + try: + args = parser.parse_args( + args=[ + "--test", + test_config_path, + "--defaults_filename", + "./tests/test_data/defaults-with-dbconfig.yml", # Use simpler defaults + "--dry-run", + "--allowed-envs", + "oss-standalone", + "--port", + "12000", + "--redis-7", + "True", + "--push_results_redistimeseries", + "--redistimeseries_port", + f"{rts_port}" + ] + ) + + try: + run_local_command_logic(args, "tool", "v0") + success = True + except SystemExit as e: + success = e.code == 0 + + assert success, "Dry-run with real Redis standalone should succeed" + print("โœ… Dry-run completed successfully with real Redis standalone") + + assert rts.info("keyspace")["db0"]["keys"] >= 0 + + + finally: + # Clean up temporary file + os.unlink(test_config_path) + + + + + + +def test_dry_run_with_purpose_built_env(): + """Test dry-run with real Redis using oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20 environment""" + parser = argparse.ArgumentParser( + description="test purpose-built env dry-run", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser = create_run_local_arguments(parser) + + # Env name + setup_name = "oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20" + + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + rts = redis.Redis(port=rts_port,decode_responses=True) + rts.ping() + rts.flushall() + + # Create a simple test config that uses our target environment + test_config = { + "name": "test-purpose-built-env", + "setups": [setup_name], + "clientconfig": [ + {"tool": "redis-benchmark"}, + {"parameters": [ + {"clients": 5}, + {"requests": 100}, + {"test": "set"} + ]} + ] + } + + # Save the test config temporarily + with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f: + yaml.dump(test_config, f) + test_config_path = f.name + + triggering_env_name = "test-triggering-env" + try: + args = parser.parse_args( + args=[ + "--test", + test_config_path, + "--defaults_filename", + "./tests/test_data/defaults-with-purpose-built-env.yml", + "--dry-run", + "--allowed-envs", + setup_name, + "--port", + "12100", + "--redis-7", + "True", + "--push_results_redistimeseries", + "--redistimeseries_port", + f"{rts_port}", + "--triggering_env", + triggering_env_name, + ] + ) + + try: + run_local_command_logic(args, "tool", "v0") + success = True + except SystemExit as e: + success = e.code == 0 + + # ensure we have data + assert rts.info("keyspace")["db0"]["keys"] >= 0 + + # ensure that the environemt name is present on timeseries + + keys_from_deployment = rts.ts().queryindex([f"deployment_name={setup_name}"]) + assert len(keys_from_deployment) > 0 + + assert setup_name in rts.zrange(f"ci.benchmarks.redislabs/{triggering_env_name}/redis-performance/redisbench-admin:deployment_names", 0, -1) + + + + assert success, "Dry-run with real Redis should succeed" + print("โœ… Dry-run completed successfully with real Redis cluster") + + finally: + # Clean up temporary file + os.unlink(test_config_path) diff --git a/tests/test_export.py b/tests/test_export.py index b76141a..fa3dc67 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -24,12 +24,15 @@ def test_get_timeserie_name(self): def test_export_command_logic(): + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) rts_host = os.getenv("RTS_DATASINK_HOST", None) - rts_port = 16379 rts_pass = "" if rts_host is None: return - rts = redis.Redis(port=16379, host=rts_host) + rts = redis.Redis(port=rts_port, host=rts_host) rts.ping() rts.flushall() parser = argparse.ArgumentParser( @@ -74,12 +77,16 @@ def test_export_command_logic(): def test_export_command_logic(): + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + + rts_port = os.environ.get("RTS_PORT",None) rts_host = os.getenv("RTS_DATASINK_HOST", None) - rts_port = 16379 rts_pass = "" if rts_host is None: return - rts = redis.Redis(port=16379, host=rts_host) + rts = redis.Redis(port=rts_port, host=rts_host) rts.ping() rts.flushall() parser = argparse.ArgumentParser( @@ -134,12 +141,15 @@ def test_export_opereto_csv_to_timeseries_dict(): def test_export_command_logic_google_benchmark(): + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) rts_host = os.getenv("RTS_DATASINK_HOST", None) - rts_port = 16379 rts_pass = "" if rts_host is None: return - rts = redis.Redis(port=16379, host=rts_host) + rts = redis.Redis(port=rts_port, host=rts_host) rts.ping() rts.flushall() parser = argparse.ArgumentParser( diff --git a/tests/test_grafana.py b/tests/test_grafana.py index 9e53dba..6275f41 100644 --- a/tests/test_grafana.py +++ b/tests/test_grafana.py @@ -16,8 +16,11 @@ def test_generate_artifacts_table_grafana_redis(): + import os rts_host = os.getenv("RTS_DATASINK_HOST", None) - rts_port = 16379 + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) if rts_host is None: return redis_conn = redis.Redis(port=rts_port, host=rts_host, decode_responses=True) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 99821d0..270b772 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -33,8 +33,11 @@ def test_extract_results_table(): def test_collect_redis_metrics(): + import os rts_host = os.getenv("RTS_DATASINK_HOST", None) - rts_port = 16379 + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) if rts_host is None: return rts = redis.Redis(port=rts_port, host=rts_host) diff --git a/tests/test_redistimeseries.py b/tests/test_redistimeseries.py index a6bd759..2c6d61b 100644 --- a/tests/test_redistimeseries.py +++ b/tests/test_redistimeseries.py @@ -19,7 +19,11 @@ def test_timeseries_test_sucess_flow(): try: - rts = redis.Redis(port=16379) + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + rts = redis.Redis(port=rts_port) rts.ping() rts.flushall() with open( diff --git a/tests/test_remote.py b/tests/test_remote.py index 96afe7d..87b92e2 100644 --- a/tests/test_remote.py +++ b/tests/test_remote.py @@ -125,7 +125,11 @@ def test_fetch_remote_setup_from_config_aarch64(): def test_push_data_to_redistimeseries(): time_series_dict = {} try: - rts = redis.Redis(port=16379) + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + rts = redis.Redis(port=rts_port) rts.ping() except redis.exceptions.ConnectionError: pass @@ -269,7 +273,11 @@ def test_extract_timeseries_from_results(): def test_exporter_create_ts(): try: - rts = redis.Redis(port=16379) + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + rts = redis.Redis(port=rts_port) rts.ping() rts.flushall() with open( @@ -475,7 +483,11 @@ def test_exporter_create_ts(): timeseries_name = "ts1" time_series = {"labels": {"metric-type": "commandstats"}} try: - rts = redis.Redis(port=16379) + import os + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) + rts = redis.Redis(port=rts_port) rts.ping() rts.flushall() assert True == exporter_create_ts(rts, time_series, timeseries_name) diff --git a/tests/test_run_local.py b/tests/test_run_local.py index 76dc67b..546455c 100644 --- a/tests/test_run_local.py +++ b/tests/test_run_local.py @@ -192,11 +192,14 @@ def test_run_local_command_logic(): assert e.code == 1 ## run while pushing results to redis_conn + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) rts_host = os.getenv("RTS_DATASINK_HOST", None) - rts_port = 16379 + rts_pass = "" if rts_host is None: return - rts = redis.Redis(port=16379, host=rts_host) + rts = redis.Redis(port=rts_port, host=rts_host) rts.ping() rts.flushall() parser = argparse.ArgumentParser( @@ -221,3 +224,4 @@ def test_run_local_command_logic(): run_local_command_logic(args, "tool", "v0") except SystemExit as e: assert e.code == 0 + diff --git a/tests/test_run_remote.py b/tests/test_run_remote.py index 3de5368..57255bc 100644 --- a/tests/test_run_remote.py +++ b/tests/test_run_remote.py @@ -27,8 +27,10 @@ def test_export_redis_metrics(): setup_type = "oss-standalone" artifact_version = None try: - rts_host = os.getenv("RTS_DATASINK_HOST", None) - rts_port = 16379 + rts_host = os.environ.get("RTS_DATASINK_HOST", None) + # Ensure we have the test DB to store results + assert "RTS_PORT" in os.environ + rts_port = os.environ.get("RTS_PORT",None) if rts_host is None: return rts = redis.Redis(port=rts_port, host=rts_host) diff --git a/tox.ini b/tox.ini index 7aed770..56e165f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,24 +1,41 @@ [tox] -isolated_build = True -envlist = integration-tests +envlist = compliance,integration-tests -[tox:.package] -# note tox will use the same python version as under what tox is installed to package -# so unless this is python 3 you can require a given python version for the packaging -# environment via the basepython key -basepython = python3 + + +[testenv:compliance] +skip_install = True +deps = + black==24.3.0 + flake8 +stoponfail = True +commands = + black --check redisbench_admin + flake8 redisbench_admin + +[testenv:format] +skip_install = True +deps = + black==24.3.0 +commands = + black redisbench_admin [testenv:integration-tests] -deps = +deps = -r{toxinidir}/dev_requirements.txt pytest-cov + tox-docker +docker = + rts_datasink + db_server + client_server -stoponfail = True +passenv = TST_BUILDER_X,TST_RUNNER_X,GH_TOKEN,TST_REDIS_DIR,DOCKER_HOST,DOCKER_TLS_VERIFY,DOCKER_CERT_PATH,TST_BINARY_REDIS_DIR + +stoponfail = True commands = - black --check redisbench_admin - flake8 redisbench_admin coverage erase pytest --cov=redisbench_admin --cov-report=term-missing -ra {posargs} coverage xml @@ -33,9 +50,15 @@ docker = [docker:rts_datasink] -image = redis/redis-stack-server:7.2.0-v11 -ports = - 16379:6379/tcp +image = redis/redis-stack-server:7.4.0-v0 +# Expose ports to the testenv +expose = + RTS_PORT=6379/tcp + +healthcheck_cmd = redis-cli ping +healthcheck_interval = 1 +healthcheck_timeout = 3 +healthcheck_retries = 30 [docker:db_server] image = ghcr.io/linuxserver/openssh-server