diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..246e2f6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,45 @@
+# Makefile for redisbench-admin
+
+.PHONY: compliance compliance-fix test integration-tests help
+
+# Code quality and compliance checks
+compliance:
+	@echo "🔍 Running compliance checks..."
+	tox -e compliance
+
+# Fix code formatting issues
+format:
+	@echo "🔧 Fixing code formatting..."
+	tox -e format
+	@echo "✅ Code formatting fixed!"
+
+# Alias for format
+compliance-fix: format
+
+# Run tests with coverage
+test:
+	@echo "🧪 Running tests..."
+	poetry run coverage erase
+	poetry run pytest --cov=redisbench_admin --cov-report=term-missing -ra
+	poetry run coverage xml
+	@echo "✅ Tests completed!"
+
+# Run integration tests (alias for test)
+integration-tests: test
+
+# Run both compliance and tests
+all: compliance test
+
+# Show help
+help:
+	@echo "Available targets:"
+	@echo "  compliance       - Run code quality checks (black, flake8)"
+	@echo "  format          - Fix code formatting with black"
+	@echo "  compliance-fix   - Alias for format"
+	@echo "  test            - Run tests with coverage"
+	@echo "  integration-tests - Alias for test"
+	@echo "  all             - Run compliance checks and tests"
+	@echo "  help            - Show this help message"
+
+# Default target
+.DEFAULT_GOAL := help
diff --git a/README.md b/README.md
index cae7f62..947fe43 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,13 @@ $ tox
 
 To run a specific test:
 ```sh
-$ tox -- tests/test_redistimeseries.py
+$ tox -- tests/test_defaults_purpose_built_env.py
+```
+
+To run a specific test and persist the docker container used for timeseries:
+
+```
+tox  --docker-dont-stop=rts_datasink  -- -vv --log-cli-level=INFO  tests/test_defaults_purpose_built_env.py 
 ```
 
 To run a specific test with verbose logging:
diff --git a/pyproject.toml b/pyproject.toml
index d99f34f..0a59798 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "redisbench-admin"
-version = "0.11.37"
+version = "0.11.39"
 description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )."
 authors = ["filipecosta90 <filipecosta.90@gmail.com>","Redis Performance Group <performance@redis.com>"]
 readme = "README.md"
@@ -44,6 +44,9 @@ certifi = ">=2021.10.8,<2025.0.0"
 pygithub = "^1.57"
 
 [tool.poetry.dev-dependencies]
+tox = ">=4.16.0"
+tox-docker = ">=5.0.0"
+docker = ">=7.1.0"
 pytest = "^4.6"
 pytest-cov = "^2.9.0"
 codecov = "2.1.13"
diff --git a/redisbench_admin/compare/args.py b/redisbench_admin/compare/args.py
index 5aba32c..1ddce9e 100644
--- a/redisbench_admin/compare/args.py
+++ b/redisbench_admin/compare/args.py
@@ -134,7 +134,7 @@ def create_compare_arguments(parser):
     parser.add_argument(
         "--regressions-percent-lower-limit",
         type=float,
-        default=5.0,
+        default=8.0,
         help="Only consider regressions with a percentage over the defined limit. (0-100)",
     )
     parser.add_argument(
diff --git a/redisbench_admin/compare/compare.py b/redisbench_admin/compare/compare.py
index e489f9a..ac740e5 100644
--- a/redisbench_admin/compare/compare.py
+++ b/redisbench_admin/compare/compare.py
@@ -13,6 +13,7 @@
 import humanize
 import datetime as dt
 import os
+import statistics
 from tqdm import tqdm
 from github import Github
 from slack_sdk.webhook import WebhookClient
@@ -270,6 +271,10 @@ def compare_command_logic(args, project_name, project_version):
         total_stable,
         total_unstable,
         total_comparison_points,
+        total_unstable_baseline,
+        total_unstable_comparison,
+        total_latency_confirmed_regressions,
+        latency_confirmed_regression_details,
     ) = compute_regression_table(
         rts,
         tf_github_org,
@@ -303,6 +308,7 @@ def compare_command_logic(args, project_name, project_version):
         comparison_architecture,
         first_n_baseline,
         first_n_comparison,
+        grafana_link_base,
     )
     comment_body = ""
     if total_comparison_points > 0:
@@ -321,11 +327,63 @@ def compare_command_logic(args, project_name, project_version):
             )
 
         if total_unstable > 0:
+            unstable_details = []
+            if total_unstable_baseline > 0:
+                unstable_details.append(f"{total_unstable_baseline} baseline")
+            if total_unstable_comparison > 0:
+                unstable_details.append(f"{total_unstable_comparison} comparison")
+
+            unstable_breakdown = (
+                " (" + ", ".join(unstable_details) + ")" if unstable_details else ""
+            )
             comparison_summary += (
-                "- Detected a total of {} highly unstable benchmarks.\n".format(
-                    total_unstable
+                "- Detected a total of {} highly unstable benchmarks{}.\n".format(
+                    total_unstable, unstable_breakdown
                 )
             )
+
+            # Add latency confirmation summary if applicable
+            if total_latency_confirmed_regressions > 0:
+                comparison_summary += "- Latency analysis confirmed regressions in {} of the unstable tests:\n".format(
+                    total_latency_confirmed_regressions
+                )
+
+                # Add detailed breakdown as bullet points with test links
+                if latency_confirmed_regression_details:
+                    for detail in latency_confirmed_regression_details:
+                        test_name = detail["test_name"]
+                        commands_info = []
+                        for cmd_detail in detail["commands"]:
+                            commands_info.append(
+                                f"{cmd_detail['command']} +{cmd_detail['change_percent']:.1f}%"
+                            )
+
+                        if commands_info:
+                            # Create test link if grafana_link_base is available
+                            test_display_name = test_name
+                            if grafana_link_base is not None:
+                                grafana_test_link = f"{grafana_link_base}?orgId=1&var-test_case={test_name}"
+                                if baseline_branch is not None:
+                                    grafana_test_link += (
+                                        f"&var-branch={baseline_branch}"
+                                    )
+                                if comparison_branch is not None:
+                                    grafana_test_link += (
+                                        f"&var-branch={comparison_branch}"
+                                    )
+                                grafana_test_link += "&from=now-30d&to=now"
+                                test_display_name = (
+                                    f"[{test_name}]({grafana_test_link})"
+                                )
+
+                            # Add confidence indicator if available
+                            confidence_indicator = ""
+                            if "high_confidence" in detail:
+                                confidence_indicator = (
+                                    " 🔴" if detail["high_confidence"] else " ⚠️"
+                                )
+
+                            comparison_summary += f"  - {test_display_name}: {', '.join(commands_info)}{confidence_indicator}\n"
         if total_improvements > 0:
             comparison_summary += "- Detected a total of {} improvements above the improvement water line.\n".format(
                 total_improvements
@@ -484,6 +542,9 @@ def compare_command_logic(args, project_name, project_version):
         total_stable,
         total_unstable,
         total_comparison_points,
+        total_unstable_baseline,
+        total_unstable_comparison,
+        total_latency_confirmed_regressions,
     )
 
 
@@ -531,6 +592,7 @@ def compute_regression_table(
     comparison_architecture=ARCH_X86,
     first_n_baseline=-1,
     first_n_comparison=-1,
+    grafana_link_base=None,
 ):
     START_TIME_NOW_UTC, _, _ = get_start_time_vars()
     START_TIME_LAST_MONTH_UTC = START_TIME_NOW_UTC - datetime.timedelta(days=31)
@@ -593,6 +655,10 @@ def compute_regression_table(
         total_stable,
         total_unstable,
         total_comparison_points,
+        total_unstable_baseline,
+        total_unstable_comparison,
+        total_latency_confirmed_regressions,
+        latency_confirmed_regression_details,
     ) = from_rts_to_regression_table(
         baseline_deployment_name,
         comparison_deployment_name,
@@ -621,14 +687,97 @@ def compute_regression_table(
         comparison_architecture,
         first_n_baseline,
         first_n_comparison,
+        grafana_link_base,
+        baseline_branch,
+        baseline_tag,
+        comparison_branch,
+        comparison_tag,
+        from_date,
+        to_date,
     )
     logging.info(
         "Printing differential analysis between {} and {}".format(
             baseline_str, comparison_str
         )
     )
-    writer = MarkdownTableWriter(
-        table_name="Comparison between {} and {}.\n\nTime Period from {}. (environment used: {})\n".format(
+
+    # Split table into improvements, regressions, and no-changes
+    improvements_table = []
+    regressions_table = []
+    no_changes_table = []
+
+    for row in table:
+        # Check if there's a meaningful change (not stable/unstable)
+        note = row[4].lower() if len(row) > 4 else ""
+        percentage_str = row[3] if len(row) > 3 else "0.0%"
+
+        # Extract percentage value
+        try:
+            percentage_val = float(percentage_str.replace("%", "").strip())
+        except:
+            percentage_val = 0.0
+
+        # Categorize based on change type
+        if "improvement" in note and "potential" not in note:
+            # Only actual improvements, not potential ones
+            improvements_table.append(row)
+        elif ("regression" in note and "potential" not in note) or "unstable" in note:
+            # Only actual regressions, not potential ones, plus unstable tests
+            regressions_table.append(row)
+        elif "no change" in note or "potential" in note:
+            # No changes and potential changes (below significance threshold)
+            no_changes_table.append(row)
+        elif abs(percentage_val) > 3.0:  # Significant changes based on percentage
+            if (percentage_val > 0 and metric_mode == "higher-better") or (
+                percentage_val < 0 and metric_mode == "lower-better"
+            ):
+                improvements_table.append(row)
+            else:
+                regressions_table.append(row)
+        else:
+            no_changes_table.append(row)
+
+    # Sort tables by percentage change
+    def get_percentage_value(row):
+        """Extract percentage value from row for sorting"""
+        try:
+            percentage_str = row[3] if len(row) > 3 else "0.0%"
+            return float(percentage_str.replace("%", "").strip())
+        except:
+            return 0.0
+
+    # Sort improvements by percentage change (highest first)
+    improvements_table.sort(key=get_percentage_value, reverse=True)
+
+    # Sort regressions by percentage change (most negative first for higher-better, most positive first for lower-better)
+    if metric_mode == "higher-better":
+        # For higher-better metrics, most negative changes are worst regressions
+        regressions_table.sort(key=get_percentage_value)
+    else:
+        # For lower-better metrics, most positive changes are worst regressions
+        regressions_table.sort(key=get_percentage_value, reverse=True)
+
+    # Create improvements table (visible)
+    improvements_writer = MarkdownTableWriter(
+        table_name="Performance Improvements - Comparison between {} and {}.\n\nTime Period from {}. (environment used: {})\n".format(
+            baseline_str,
+            comparison_str,
+            from_human_str,
+            baseline_deployment_name,
+        ),
+        headers=[
+            "Test Case",
+            "Baseline {} (median obs. +- std.dev)".format(baseline_str),
+            "Comparison {} (median obs. +- std.dev)".format(comparison_str),
+            "% change ({})".format(metric_mode),
+            "Note",
+        ],
+        value_matrix=improvements_table,
+    )
+
+    # Create regressions table (visible)
+    regressions_writer = MarkdownTableWriter(
+        table_name="Performance Regressions and Issues - Comparison between {} and {}.\n\nTime Period from {}. (environment used: {})\n".format(
             baseline_str,
             comparison_str,
             from_human_str,
@@ -641,8 +790,22 @@ def compute_regression_table(
             "% change ({})".format(metric_mode),
             "Note",
         ],
-        value_matrix=table,
+        value_matrix=regressions_table,
     )
+
+    # Create no-changes table (hidden in markdown)
+    no_changes_writer = MarkdownTableWriter(
+        table_name="Tests with No Significant Changes",
+        headers=[
+            "Test Case",
+            "Baseline {} (median obs. +- std.dev)".format(baseline_str),
+            "Comparison {} (median obs. +- std.dev)".format(comparison_str),
+            "% change ({})".format(metric_mode),
+            "Note",
+        ],
+        value_matrix=no_changes_table,
+    )
+
     table_output = ""
 
     from io import StringIO
@@ -651,7 +814,25 @@ def compute_regression_table(
     old_stdout = sys.stdout
     sys.stdout = mystdout = StringIO()
 
-    writer.dump(mystdout, False)
+    # Output improvements table first (if any)
+    if improvements_table:
+        improvements_writer.dump(mystdout, False)
+        mystdout.write("\n\n")
+
+    # Output regressions table (if any)
+    if regressions_table:
+        regressions_writer.dump(mystdout, False)
+        mystdout.write("\n\n")
+
+    # Add hidden no-changes table
+    if no_changes_table:
+        mystdout.write(
+            "<details>\n<summary>Tests with No Significant Changes ({} tests)</summary>\n\n".format(
+                len(no_changes_table)
+            )
+        )
+        no_changes_writer.dump(mystdout, False)
+        mystdout.write("\n</details>\n")
 
     sys.stdout = old_stdout
 
@@ -665,6 +846,10 @@ def compute_regression_table(
         total_stable,
         total_unstable,
         total_comparison_points,
+        total_unstable_baseline,
+        total_unstable_comparison,
+        total_latency_confirmed_regressions,
+        latency_confirmed_regression_details,
     )
 
 
@@ -752,6 +937,13 @@ def from_rts_to_regression_table(
     comparison_architecture=ARCH_X86,
     first_n_baseline=-1,
     first_n_comparison=-1,
+    grafana_link_base=None,
+    baseline_branch=None,
+    baseline_tag=None,
+    comparison_branch=None,
+    comparison_tag=None,
+    from_date=None,
+    to_date=None,
 ):
     print_all = print_regressions_only is False and print_improvements_only is False
     table = []
@@ -759,8 +951,12 @@ def from_rts_to_regression_table(
     total_improvements = 0
     total_stable = 0
     total_unstable = 0
+    total_unstable_baseline = 0
+    total_unstable_comparison = 0
     total_regressions = 0
     total_comparison_points = 0
+    total_latency_confirmed_regressions = 0
+    latency_confirmed_regression_details = []  # Track specific test details
     noise_waterline = 3
     progress = tqdm(unit="benchmark time-series", total=len(test_names))
     for test_name in test_names:
@@ -898,10 +1094,243 @@ def from_rts_to_regression_table(
             logging.error("Detected a ZeroDivisionError. {}".format(e.__str__()))
             pass
         unstable = False
+        unstable_baseline = False
+        unstable_comparison = False
+        latency_confirms_regression = False
+
         if baseline_v != "N/A" and comparison_v != "N/A":
             if comparison_pct_change > 10.0 or baseline_pct_change > 10.0:
-                note = "UNSTABLE (very high variance)"
                 unstable = True
+                unstable_baseline = baseline_pct_change > 10.0
+                unstable_comparison = comparison_pct_change > 10.0
+
+                # Build detailed unstable note
+                unstable_parts = []
+                if unstable_baseline and unstable_comparison:
+                    unstable_parts.append(
+                        "UNSTABLE (baseline & comparison high variance)"
+                    )
+                elif unstable_baseline:
+                    unstable_parts.append("UNSTABLE (baseline high variance)")
+                elif unstable_comparison:
+                    unstable_parts.append("UNSTABLE (comparison high variance)")
+
+                note = unstable_parts[0]
+
+                # Log detailed warning about unstable data detection
+                logging.warning(
+                    f"UNSTABLE DATA DETECTED for test '{test_name}': "
+                    f"baseline variance={baseline_pct_change:.1f}%, "
+                    f"comparison variance={comparison_pct_change:.1f}% "
+                    f"(threshold=10.0%)"
+                )
+
+                # For throughput metrics (higher-better), check both server-side and client-side latency
+                if metric_mode == "higher-better":
+                    logging.info(
+                        f"Performing 2nd-level latency validation for unstable throughput metric '{test_name}' "
+                        f"(metric_mode={metric_mode})"
+                    )
+
+                    # Check server-side p50 latency
+                    (
+                        server_latency_note,
+                        server_confirms_regression,
+                        server_regression_details,
+                    ) = check_latency_for_unstable_throughput(
+                        rts,
+                        test_name,
+                        baseline_str,
+                        comparison_str,
+                        by_str_baseline,
+                        by_str_comparison,
+                        baseline_deployment_name,
+                        comparison_deployment_name,
+                        tf_triggering_env,
+                        from_ts_ms,
+                        to_ts_ms,
+                        last_n_baseline,
+                        last_n_comparison,
+                        first_n_baseline,
+                        first_n_comparison,
+                        running_platform,
+                        baseline_architecture,
+                        comparison_architecture,
+                        verbose,
+                    )
+
+                    # Check client-side latency metrics
+                    (
+                        client_latency_note,
+                        client_confirms_regression,
+                        client_regression_details,
+                    ) = check_client_side_latency(
+                        rts,
+                        test_name,
+                        baseline_str,
+                        comparison_str,
+                        by_str_baseline,
+                        by_str_comparison,
+                        baseline_deployment_name,
+                        comparison_deployment_name,
+                        tf_triggering_env,
+                        from_ts_ms,
+                        to_ts_ms,
+                        last_n_baseline,
+                        last_n_comparison,
+                        first_n_baseline,
+                        first_n_comparison,
+                        running_platform,
+                        baseline_architecture,
+                        comparison_architecture,
+                        verbose,
+                    )
+
+                    # Combine results from both server and client side
+                    combined_latency_notes = []
+                    if server_latency_note:
+                        combined_latency_notes.append(f"server: {server_latency_note}")
+                    if client_latency_note:
+                        combined_latency_notes.append(f"client: {client_latency_note}")
+
+                    # Only confirm regression if BOTH server and client side show evidence AND data is stable enough
+                    # Check if either server or client data contains unstable indicators
+                    server_has_unstable = (
+                        server_latency_note and "UNSTABLE" in server_latency_note
+                    )
+                    client_has_unstable = (
+                        client_latency_note and "UNSTABLE" in client_latency_note
+                    )
+
+                    # Don't confirm regression if either side has unstable data
+                    if server_has_unstable or client_has_unstable:
+                        both_confirm_regression = False
+                        unstable_sides = []
+                        if server_has_unstable:
+                            unstable_sides.append("server")
+                        if client_has_unstable:
+                            unstable_sides.append("client")
+                        blocked_note = f"regression blocked due to unstable {' and '.join(unstable_sides)} latency data"
+                        note += f"; {blocked_note}"
+                        logging.info(
+                            f"Blocking regression confirmation for '{test_name}' due to unstable latency data"
+                        )
+                        if server_has_unstable:
+                            logging.info("  Server-side latency data is unstable")
+                        if client_has_unstable:
+                            logging.info("  Client-side latency data is unstable")
+                    else:
+                        both_confirm_regression = (
+                            server_confirms_regression and client_confirms_regression
+                        )
+
+                    if combined_latency_notes:
+                        combined_note = "; ".join(combined_latency_notes)
+                        note += f"; {combined_note}"
+                        logging.info(
+                            f"Combined latency check result for '{test_name}': {combined_note}"
+                        )
+
+                        if both_confirm_regression:
+                            logging.info(
+                                f"BOTH server and client latency analysis CONFIRM regression for '{test_name}'"
+                            )
+
+                            # Set the flag for counting confirmed regressions
+                            latency_confirms_regression = True
+
+                            # Combine regression details from both server and client
+                            combined_regression_details = (
+                                server_regression_details or client_regression_details
+                            )
+                            if combined_regression_details:
+                                combined_regression_details["server_side"] = (
+                                    server_confirms_regression
+                                )
+                                combined_regression_details["client_side"] = (
+                                    client_confirms_regression
+                                )
+
+                                # 2nd level confirmation is sufficient - always add to confirmed regressions
+                                logging.info(
+                                    f"Adding '{test_name}' to confirmed regressions based on 2nd level validation"
+                                )
+
+                                # Perform 3rd-level analysis: variance + p99 check for additional confidence scoring
+                                logging.info(
+                                    f"Performing 3rd-level analysis (variance + p99) for confidence scoring on '{test_name}'"
+                                )
+                                (
+                                    confidence_note,
+                                    high_confidence,
+                                ) = perform_variance_and_p99_analysis(
+                                    rts,
+                                    test_name,
+                                    baseline_str,
+                                    comparison_str,
+                                    by_str_baseline,
+                                    by_str_comparison,
+                                    baseline_deployment_name,
+                                    comparison_deployment_name,
+                                    tf_triggering_env,
+                                    from_ts_ms,
+                                    to_ts_ms,
+                                    last_n_baseline,
+                                    last_n_comparison,
+                                    first_n_baseline,
+                                    first_n_comparison,
+                                    running_platform,
+                                    baseline_architecture,
+                                    comparison_architecture,
+                                    verbose,
+                                )
+
+                                if confidence_note:
+                                    note += f"; {confidence_note}"
+                                    logging.info(
+                                        f"Confidence analysis for '{test_name}': {confidence_note}"
+                                    )
+                                    # Use 3rd level confidence if available
+                                    combined_regression_details["high_confidence"] = (
+                                        high_confidence
+                                    )
+                                else:
+                                    # No 3rd level data available - default to moderate confidence since 2nd level confirmed
+                                    logging.info(
+                                        f"No 3rd level data available for '{test_name}' - using 2nd level confirmation"
+                                    )
+                                    combined_regression_details["high_confidence"] = (
+                                        True  # 2nd level confirmation is reliable
+                                    )
+
+                                # Always add to confirmed regressions when 2nd level confirms
+                                latency_confirmed_regression_details.append(
+                                    combined_regression_details
+                                )
+                        elif server_confirms_regression or client_confirms_regression:
+                            side_confirmed = (
+                                "server" if server_confirms_regression else "client"
+                            )
+                            side_not_confirmed = (
+                                "client" if server_confirms_regression else "server"
+                            )
+                            insufficient_evidence_note = f"only {side_confirmed} side confirms regression ({side_not_confirmed} side stable) - insufficient evidence"
+                            note += f"; {insufficient_evidence_note}"
+                            logging.info(
+                                f"Only {side_confirmed} side confirms regression for '{test_name}' - insufficient evidence"
+                            )
+                        else:
+                            no_regression_note = (
+                                "neither server nor client side confirms regression"
+                            )
+                            note += f"; {no_regression_note}"
+                            logging.info(
+                                f"Neither server nor client side confirms regression for '{test_name}'"
+                            )
+                    else:
+                        logging.info(
+                            f"No latency data available for secondary check on '{test_name}'"
+                        )
 
             baseline_v_str = prepare_value_str(
                 baseline_pct_change, baseline_v, baseline_values, simplify_table
@@ -956,6 +1385,12 @@ def from_rts_to_regression_table(
 
             if unstable:
                 total_unstable += 1
+                if unstable_baseline:
+                    total_unstable_baseline += 1
+                if unstable_comparison:
+                    total_unstable_comparison += 1
+                if latency_confirms_regression:
+                    total_latency_confirmed_regressions += 1
 
             should_add_line = False
             if print_regressions_only and detected_regression:
@@ -976,6 +1411,13 @@ def from_rts_to_regression_table(
                     percentage_change,
                     table,
                     test_name,
+                    grafana_link_base,
+                    baseline_branch,
+                    baseline_tag,
+                    comparison_branch,
+                    comparison_tag,
+                    from_date,
+                    to_date,
                 )
     return (
         detected_regressions,
@@ -985,9 +1427,995 @@ def from_rts_to_regression_table(
         total_stable,
         total_unstable,
         total_comparison_points,
+        total_unstable_baseline,
+        total_unstable_comparison,
+        total_latency_confirmed_regressions,
+        latency_confirmed_regression_details,
     )
 
 
+def check_client_side_latency(
+    rts,
+    test_name,
+    baseline_str,
+    comparison_str,
+    by_str_baseline,
+    by_str_comparison,
+    baseline_deployment_name,
+    comparison_deployment_name,
+    tf_triggering_env,
+    from_ts_ms,
+    to_ts_ms,
+    last_n_baseline,
+    last_n_comparison,
+    first_n_baseline,
+    first_n_comparison,
+    running_platform,
+    baseline_architecture,
+    comparison_architecture,
+    verbose=False,
+):
+    """
+    Check client-side latency metrics to provide additional validation for regression detection.
+
+    Returns:
+        tuple: (note_string, confirms_regression_bool, regression_details_dict)
+    """
+    logging.info(f"Starting client-side latency check for test: {test_name}")
+    try:
+        # Client-side latency metrics to check
+        client_metrics = [
+            "p50_latency_ms",
+            "Latency",
+            "OverallQuantiles.allCommands.q50",
+            "Tests.INSERT.AverageLatency_us_",
+            "Tests.READ.AverageLatency_us_",
+            "Tests.SEARCH.AverageLatency_us_",
+            "Tests.UPDATE.AverageLatency_us_",
+        ]
+
+        client_latency_notes = []
+        significant_client_latency_increases = 0
+        regression_details = {"test_name": test_name, "commands": []}
+
+        for metric in client_metrics:
+            # Build filters for client-side latency metric
+            filters_baseline = [
+                f"{by_str_baseline}={baseline_str}",
+                f"metric={metric}",
+                f"test_name={test_name}",
+                f"deployment_name={baseline_deployment_name}",
+                f"triggering_env={tf_triggering_env}",
+            ]
+            filters_comparison = [
+                f"{by_str_comparison}={comparison_str}",
+                f"metric={metric}",
+                f"test_name={test_name}",
+                f"deployment_name={comparison_deployment_name}",
+                f"triggering_env={tf_triggering_env}",
+            ]
+
+            # Add optional filters
+            if running_platform is not None:
+                filters_baseline.append(f"running_platform={running_platform}")
+                filters_comparison.append(f"running_platform={running_platform}")
+            if baseline_architecture != ARCH_X86:
+                filters_baseline.append(f"arch={baseline_architecture}")
+            if comparison_architecture != ARCH_X86:
+                filters_comparison.append(f"arch={comparison_architecture}")
+
+            # Query for client-side latency time-series
+            baseline_client_ts = rts.ts().queryindex(filters_baseline)
+            comparison_client_ts = rts.ts().queryindex(filters_comparison)
+
+            if len(baseline_client_ts) == 0 or len(comparison_client_ts) == 0:
+                if verbose:
+                    logging.info(
+                        f"  No client-side data found for metric '{metric}' in {test_name}"
+                    )
+                continue
+
+            logging.info(
+                f"  Found client-side metric '{metric}': {len(baseline_client_ts)} baseline, {len(comparison_client_ts)} comparison time-series"
+            )
+
+            # Filter out target time-series
+            baseline_client_ts = [ts for ts in baseline_client_ts if "target" not in ts]
+            comparison_client_ts = [
+                ts for ts in comparison_client_ts if "target" not in ts
+            ]
+
+            if len(baseline_client_ts) == 0 or len(comparison_client_ts) == 0:
+                continue
+
+            # Use the first available time-series for each side
+            baseline_ts = baseline_client_ts[0]
+            comparison_ts = comparison_client_ts[0]
+
+            # Get client-side latency data
+            baseline_client_data = rts.ts().revrange(baseline_ts, from_ts_ms, to_ts_ms)
+            comparison_client_data = rts.ts().revrange(
+                comparison_ts, from_ts_ms, to_ts_ms
+            )
+
+            if len(baseline_client_data) == 0 or len(comparison_client_data) == 0:
+                if verbose:
+                    logging.info(
+                        f"  No data points for metric '{metric}': baseline={len(baseline_client_data)}, comparison={len(comparison_client_data)}"
+                    )
+                continue
+
+            # Calculate client-side latency statistics
+            baseline_client_values = []
+            comparison_client_values = []
+
+            (_, baseline_client_median, _) = get_v_pct_change_and_largest_var(
+                baseline_client_data,
+                0,
+                0,
+                baseline_client_values,
+                0,
+                last_n_baseline,
+                verbose,
+                first_n_baseline,
+            )
+
+            (_, comparison_client_median, _) = get_v_pct_change_and_largest_var(
+                comparison_client_data,
+                0,
+                0,
+                comparison_client_values,
+                0,
+                last_n_comparison,
+                verbose,
+                first_n_comparison,
+            )
+
+            if baseline_client_median == "N/A" or comparison_client_median == "N/A":
+                if verbose:
+                    logging.info(
+                        f"  Could not calculate median for metric '{metric}': baseline={baseline_client_median}, comparison={comparison_client_median}"
+                    )
+                continue
+
+            # Calculate variance (coefficient of variation) for both baseline and comparison
+            baseline_client_mean = (
+                statistics.mean(baseline_client_values) if baseline_client_values else 0
+            )
+            baseline_client_stdev = (
+                statistics.stdev(baseline_client_values)
+                if len(baseline_client_values) > 1
+                else 0
+            )
+            baseline_client_cv = (
+                (baseline_client_stdev / baseline_client_mean * 100)
+                if baseline_client_mean > 0
+                else float("inf")
+            )
+
+            comparison_client_mean = (
+                statistics.mean(comparison_client_values)
+                if comparison_client_values
+                else 0
+            )
+            comparison_client_stdev = (
+                statistics.stdev(comparison_client_values)
+                if len(comparison_client_values) > 1
+                else 0
+            )
+            comparison_client_cv = (
+                (comparison_client_stdev / comparison_client_mean * 100)
+                if comparison_client_mean > 0
+                else float("inf")
+            )
+
+            # Calculate client-side latency change (for latency, higher is worse)
+            client_latency_change = (
+                float(comparison_client_median) / float(baseline_client_median) - 1
+            ) * 100.0
+
+            logging.info(
+                f"  Client metric '{metric}': baseline={baseline_client_median:.2f} (CV={baseline_client_cv:.1f}%), comparison={comparison_client_median:.2f} (CV={comparison_client_cv:.1f}%), change={client_latency_change:.1f}%"
+            )
+
+            # Check if client latency data is too unstable to be reliable
+            client_data_unstable = (
+                baseline_client_cv > 50.0 or comparison_client_cv > 50.0
+            )
+
+            if client_data_unstable:
+                # Mark as unstable client latency data
+                unstable_reason = []
+                if baseline_client_cv > 50.0:
+                    unstable_reason.append(f"baseline CV={baseline_client_cv:.1f}%")
+                if comparison_client_cv > 50.0:
+                    unstable_reason.append(f"comparison CV={comparison_client_cv:.1f}%")
+
+                client_latency_notes.append(
+                    f"{metric} UNSTABLE ({', '.join(unstable_reason)} - data too noisy for reliable analysis)"
+                )
+                logging.warning(
+                    f"  Client metric '{metric}': UNSTABLE latency data detected - {', '.join(unstable_reason)}"
+                )
+            elif (
+                abs(client_latency_change) > 5.0
+            ):  # Only report significant client latency changes for stable data
+                direction = "increased" if client_latency_change > 0 else "decreased"
+
+                # Adjust significance threshold based on baseline variance
+                if baseline_client_cv < 30.0:
+                    # Low variance - use standard threshold
+                    significance_threshold = 10.0
+                elif baseline_client_cv < 50.0:
+                    # Moderate variance - require larger change
+                    significance_threshold = 15.0
+                else:
+                    # High variance - require much larger change
+                    significance_threshold = 25.0
+
+                client_latency_notes.append(
+                    f"{metric} {direction} {abs(client_latency_change):.1f}% (baseline CV={baseline_client_cv:.1f}%)"
+                )
+                logging.info(
+                    f"  Client metric '{metric}': SIGNIFICANT latency change detected ({direction} {abs(client_latency_change):.1f}%, baseline CV={baseline_client_cv:.1f}%)"
+                )
+
+                # Track significant client latency increases (potential regression confirmation)
+                if client_latency_change > significance_threshold:
+                    significant_client_latency_increases += 1
+                    regression_details["commands"].append(
+                        {
+                            "command": metric,
+                            "change_percent": client_latency_change,
+                            "direction": direction,
+                            "baseline_cv": baseline_client_cv,
+                            "comparison_cv": comparison_client_cv,
+                        }
+                    )
+                    logging.info(
+                        f"  Client metric '{metric}': CONFIRMS regression (change={client_latency_change:.1f}% > threshold={significance_threshold:.1f}%)"
+                    )
+                else:
+                    logging.info(
+                        f"  Client metric '{metric}': Change below significance threshold (change={client_latency_change:.1f}% <= threshold={significance_threshold:.1f}%)"
+                    )
+            elif verbose:
+                client_latency_notes.append(
+                    f"{metric} stable (CV={baseline_client_cv:.1f}%)"
+                )
+                logging.info(
+                    f"  Client metric '{metric}': latency stable (change={client_latency_change:.1f}%, baseline CV={baseline_client_cv:.1f}%)"
+                )
+
+        # Determine if client-side latency confirms regression
+        confirms_regression = significant_client_latency_increases > 0
+
+        # Return combined client latency notes
+        if client_latency_notes:
+            result = "; ".join(client_latency_notes)
+            logging.info(
+                f"Client-side latency check completed for {test_name}: {result}"
+            )
+            return (
+                result,
+                confirms_regression,
+                regression_details if confirms_regression else None,
+            )
+        else:
+            result = "client latency stable" if len(client_metrics) > 0 else None
+            logging.info(
+                f"Client-side latency check completed for {test_name}: {result or 'no data'}"
+            )
+            return result, False, None
+
+    except Exception as e:
+        logging.error(f"Error checking client-side latency for {test_name}: {e}")
+        return None, False, None
+
+
+def perform_variance_and_p99_analysis(
+    rts,
+    test_name,
+    baseline_str,
+    comparison_str,
+    by_str_baseline,
+    by_str_comparison,
+    baseline_deployment_name,
+    comparison_deployment_name,
+    tf_triggering_env,
+    from_ts_ms,
+    to_ts_ms,
+    last_n_baseline,
+    last_n_comparison,
+    first_n_baseline,
+    first_n_comparison,
+    running_platform,
+    baseline_architecture,
+    comparison_architecture,
+    verbose=False,
+):
+    """
+    Perform 3rd-level analysis using variance and p99 metrics to assess confidence in regression detection.
+
+    Returns:
+        tuple: (confidence_note, high_confidence_bool)
+    """
+    try:
+        logging.info(f"Starting variance and p99 analysis for {test_name}")
+
+        # Build filters for p99 latency metric using both metric=p99 and metric-type=(latencystats)
+        filters_baseline = [
+            f"{by_str_baseline}={baseline_str}",
+            "metric=p99",
+            "metric-type=(latencystats)",
+            f"test_name={test_name}",
+            f"deployment_name={baseline_deployment_name}",
+            f"triggering_env={tf_triggering_env}",
+        ]
+        filters_comparison = [
+            f"{by_str_comparison}={comparison_str}",
+            "metric=p99",
+            "metric-type=(latencystats)",
+            f"test_name={test_name}",
+            f"deployment_name={comparison_deployment_name}",
+            f"triggering_env={tf_triggering_env}",
+        ]
+
+        # Add optional filters
+        if running_platform is not None:
+            filters_baseline.append(f"running_platform={running_platform}")
+            filters_comparison.append(f"running_platform={running_platform}")
+        if baseline_architecture != ARCH_X86:
+            filters_baseline.append(f"arch={baseline_architecture}")
+        if comparison_architecture != ARCH_X86:
+            filters_comparison.append(f"arch={comparison_architecture}")
+
+        # Query for p99 latency time-series
+        logging.info(f"Querying p99 latencystats time-series for {test_name}")
+        baseline_p99_ts = rts.ts().queryindex(filters_baseline)
+        comparison_p99_ts = rts.ts().queryindex(filters_comparison)
+
+        logging.info(f"Found {len(baseline_p99_ts)} baseline p99 latency time-series")
+        logging.info(
+            f"Found {len(comparison_p99_ts)} comparison p99 latency time-series"
+        )
+
+        # Filter out target time-series and unwanted commands (reuse existing function)
+        def should_exclude_timeseries(ts_name):
+            """Check if time-series should be excluded based on command"""
+            if "target" in ts_name:
+                return True
+            ts_name_lower = ts_name.lower()
+            excluded_commands = ["config", "info", "ping", "cluster", "resetstat"]
+            return any(cmd in ts_name_lower for cmd in excluded_commands)
+
+        baseline_p99_ts = [
+            ts for ts in baseline_p99_ts if not should_exclude_timeseries(ts)
+        ]
+        comparison_p99_ts = [
+            ts for ts in comparison_p99_ts if not should_exclude_timeseries(ts)
+        ]
+
+        if len(baseline_p99_ts) == 0 or len(comparison_p99_ts) == 0:
+            logging.warning(
+                f"No p99 latency data found for {test_name} after filtering"
+            )
+            return None, False
+
+        # Extract command names from time-series (reuse existing function)
+        def extract_command_from_ts(ts_name):
+            """Extract meaningful command name from time-series name"""
+            # Look for latencystats_latency_percentiles_usec_<COMMAND>_p99 pattern
+            match = re.search(
+                r"latencystats_latency_percentiles_usec_([^_/]+)_p99", ts_name
+            )
+            if match:
+                return match.group(1)
+            # Look for command= pattern in the time-series name
+            match = re.search(r"command=([^/]+)", ts_name)
+            if match:
+                return match.group(1)
+            # If no specific pattern found, try to extract from the end of the path
+            parts = ts_name.split("/")
+            if len(parts) > 0:
+                return parts[-1]
+            return "unknown"
+
+        # Group time-series by command
+        baseline_by_command = {}
+        comparison_by_command = {}
+
+        for ts in baseline_p99_ts:
+            cmd = extract_command_from_ts(ts)
+            if cmd not in baseline_by_command:
+                baseline_by_command[cmd] = []
+            baseline_by_command[cmd].append(ts)
+
+        for ts in comparison_p99_ts:
+            cmd = extract_command_from_ts(ts)
+            if cmd not in comparison_by_command:
+                comparison_by_command[cmd] = []
+            comparison_by_command[cmd].append(ts)
+
+        # Find common commands between baseline and comparison
+        common_commands = set(baseline_by_command.keys()) & set(
+            comparison_by_command.keys()
+        )
+
+        if not common_commands:
+            logging.warning(
+                f"No common commands found for p99 variance analysis in {test_name}"
+            )
+            return None, False
+
+        variance_notes = []
+        p99_notes = []
+        high_confidence_indicators = 0
+        total_indicators = 0
+
+        # Analyze variance and p99 for each command
+        for command in sorted(common_commands):
+            total_indicators += 1
+            logging.info(f"Analyzing p99 variance for command: {command}")
+
+            baseline_ts_list = baseline_by_command[command]
+            comparison_ts_list = comparison_by_command[command]
+
+            # If multiple time-series for the same command, try to get the best one
+            if len(baseline_ts_list) > 1:
+                baseline_ts_list = get_only_Totals(baseline_ts_list)
+            if len(comparison_ts_list) > 1:
+                comparison_ts_list = get_only_Totals(comparison_ts_list)
+
+            if len(baseline_ts_list) != 1 or len(comparison_ts_list) != 1:
+                logging.warning(
+                    f"  Skipping {command}: baseline={len(baseline_ts_list)}, comparison={len(comparison_ts_list)} time-series"
+                )
+                continue
+
+            # Get p99 latency data for this command
+            baseline_p99_data = []
+            comparison_p99_data = []
+
+            for ts_name in baseline_ts_list:
+                datapoints = rts.ts().revrange(ts_name, from_ts_ms, to_ts_ms)
+                baseline_p99_data.extend(datapoints)
+
+            for ts_name in comparison_ts_list:
+                datapoints = rts.ts().revrange(ts_name, from_ts_ms, to_ts_ms)
+                comparison_p99_data.extend(datapoints)
+
+            if len(baseline_p99_data) < 3 or len(comparison_p99_data) < 3:
+                logging.warning(
+                    f"  Insufficient p99 data for {command}: baseline={len(baseline_p99_data)}, comparison={len(comparison_p99_data)} datapoints"
+                )
+                continue
+
+            # Extract values for variance calculation
+            baseline_values = [dp[1] for dp in baseline_p99_data]
+            comparison_values = [dp[1] for dp in comparison_p99_data]
+
+            # Calculate variance (coefficient of variation)
+            baseline_mean = statistics.mean(baseline_values)
+            baseline_stdev = (
+                statistics.stdev(baseline_values) if len(baseline_values) > 1 else 0
+            )
+            baseline_cv = (
+                (baseline_stdev / baseline_mean * 100)
+                if baseline_mean > 0
+                else float("inf")
+            )
+
+            comparison_mean = statistics.mean(comparison_values)
+            comparison_stdev = (
+                statistics.stdev(comparison_values) if len(comparison_values) > 1 else 0
+            )
+            comparison_cv = (
+                (comparison_stdev / comparison_mean * 100)
+                if comparison_mean > 0
+                else float("inf")
+            )
+
+            # Calculate p99 change
+            p99_change = (
+                ((comparison_mean - baseline_mean) / baseline_mean * 100)
+                if baseline_mean > 0
+                else 0
+            )
+
+            # Assess confidence based on variance and p99 change
+            if baseline_cv < 30:  # Low variance in baseline (< 30% CV)
+                if abs(p99_change) > 15:  # Significant p99 change
+                    high_confidence_indicators += 1
+                    p99_notes.append(
+                        f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (stable baseline)"
+                    )
+                else:
+                    p99_notes.append(
+                        f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (stable baseline, minor change)"
+                    )
+            elif baseline_cv < 50:  # Moderate variance
+                if abs(p99_change) > 25:  # Need larger change for confidence
+                    high_confidence_indicators += 1
+                    p99_notes.append(
+                        f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (moderate baseline variance)"
+                    )
+                else:
+                    p99_notes.append(
+                        f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (moderate baseline variance, uncertain)"
+                    )
+            else:  # High variance
+                if abs(p99_change) > 40:  # Need very large change for confidence
+                    high_confidence_indicators += 1
+                    p99_notes.append(
+                        f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (high baseline variance, large change)"
+                    )
+                else:
+                    p99_notes.append(
+                        f"{command} p99 {'+' if p99_change > 0 else ''}{p99_change:.1f}% (high baseline variance, low confidence)"
+                    )
+
+            variance_notes.append(f"{command} baseline CV={baseline_cv:.1f}%")
+
+            if verbose:
+                logging.info(
+                    f"  Command {command}: baseline CV={baseline_cv:.1f}%, comparison CV={comparison_cv:.1f}%, p99 change={p99_change:.1f}%"
+                )
+
+        # Determine overall confidence
+        confidence_ratio = (
+            high_confidence_indicators / total_indicators if total_indicators > 0 else 0
+        )
+        high_confidence = (
+            confidence_ratio >= 0.5
+        )  # At least 50% of indicators show high confidence
+
+        # Create confidence note
+        confidence_parts = []
+        if variance_notes:
+            confidence_parts.extend(variance_notes)
+        if p99_notes:
+            confidence_parts.extend(p99_notes)
+
+        confidence_note = "; ".join(confidence_parts) if confidence_parts else None
+
+        if confidence_note:
+            confidence_level = "HIGH" if high_confidence else "LOW"
+            cv_explanation = "CV=coefficient of variation (data stability: <30% stable, 30-50% moderate, >50% unstable)"
+            confidence_note = (
+                f"confidence={confidence_level} ({confidence_note}; {cv_explanation})"
+            )
+
+        logging.info(
+            f"Variance and p99 analysis completed for {test_name}: confidence={confidence_ratio:.2f}, high_confidence={high_confidence}"
+        )
+        return confidence_note, high_confidence
+
+    except Exception as e:
+        logging.error(f"Error in variance and p99 analysis for {test_name}: {e}")
+        return None, False
+
+
+def check_latency_for_unstable_throughput(
+    rts,
+    test_name,
+    baseline_str,
+    comparison_str,
+    by_str_baseline,
+    by_str_comparison,
+    baseline_deployment_name,
+    comparison_deployment_name,
+    tf_triggering_env,
+    from_ts_ms,
+    to_ts_ms,
+    last_n_baseline,
+    last_n_comparison,
+    first_n_baseline,
+    first_n_comparison,
+    running_platform,
+    baseline_architecture,
+    comparison_architecture,
+    verbose,
+):
+    """
+    Check latency (p50) for unstable throughput metrics to provide additional context.
+    Returns a tuple: (note_string, confirms_regression_bool, regression_details_dict)
+    """
+    logging.info(f"Starting latency check for unstable throughput test: {test_name}")
+    try:
+        # Build filters for p50 latency metric using both metric=p50 and metric-type=(latencystats)
+        filters_baseline = [
+            f"{by_str_baseline}={baseline_str}",
+            "metric=p50",
+            "metric-type=(latencystats)",
+            f"test_name={test_name}",
+            f"deployment_name={baseline_deployment_name}",
+            f"triggering_env={tf_triggering_env}",
+        ]
+        filters_comparison = [
+            f"{by_str_comparison}={comparison_str}",
+            "metric=p50",
+            "metric-type=(latencystats)",
+            f"test_name={test_name}",
+            f"deployment_name={comparison_deployment_name}",
+            f"triggering_env={tf_triggering_env}",
+        ]
+
+        # Add optional filters
+        if running_platform is not None:
+            filters_baseline.append(f"running_platform={running_platform}")
+            filters_comparison.append(f"running_platform={running_platform}")
+        if baseline_architecture != ARCH_X86:
+            filters_baseline.append(f"arch={baseline_architecture}")
+        if comparison_architecture != ARCH_X86:
+            filters_comparison.append(f"arch={comparison_architecture}")
+
+        # Query for p50 latency time-series
+        logging.info(f"Querying p50 latencystats time-series for {test_name}")
+        logging.info(f"Baseline filters: {filters_baseline}")
+        logging.info(f"Comparison filters: {filters_comparison}")
+
+        baseline_latency_ts = rts.ts().queryindex(filters_baseline)
+        comparison_latency_ts = rts.ts().queryindex(filters_comparison)
+
+        logging.info(
+            f"Found {len(baseline_latency_ts)} baseline p50 latency time-series"
+        )
+        logging.info(
+            f"Found {len(comparison_latency_ts)} comparison p50 latency time-series"
+        )
+
+        if verbose and baseline_latency_ts:
+            logging.info(f"Baseline latency time-series: {baseline_latency_ts}")
+        if verbose and comparison_latency_ts:
+            logging.info(f"Comparison latency time-series: {comparison_latency_ts}")
+
+        # Filter out target time-series and unwanted commands
+        def should_exclude_timeseries(ts_name):
+            """Check if time-series should be excluded based on command"""
+            # Exclude target time-series
+            if "target" in ts_name:
+                return True
+
+            # Convert to lowercase for case-insensitive matching
+            ts_name_lower = ts_name.lower()
+
+            # Exclude administrative commands (case-insensitive)
+            excluded_commands = ["config", "info", "ping", "cluster", "resetstat"]
+            return any(cmd in ts_name_lower for cmd in excluded_commands)
+
+        baseline_latency_ts_before = len(baseline_latency_ts)
+        comparison_latency_ts_before = len(comparison_latency_ts)
+
+        # Apply filtering and log what gets excluded
+        baseline_excluded = [
+            ts for ts in baseline_latency_ts if should_exclude_timeseries(ts)
+        ]
+        comparison_excluded = [
+            ts for ts in comparison_latency_ts if should_exclude_timeseries(ts)
+        ]
+
+        baseline_latency_ts = [
+            ts for ts in baseline_latency_ts if not should_exclude_timeseries(ts)
+        ]
+        comparison_latency_ts = [
+            ts for ts in comparison_latency_ts if not should_exclude_timeseries(ts)
+        ]
+
+        logging.info(
+            f"After filtering: baseline {baseline_latency_ts_before} -> {len(baseline_latency_ts)}, "
+            f"comparison {comparison_latency_ts_before} -> {len(comparison_latency_ts)}"
+        )
+
+        if baseline_excluded:
+            logging.info(
+                f"Excluded {len(baseline_excluded)} baseline administrative command time-series"
+            )
+            if verbose:
+                for ts in baseline_excluded:
+                    logging.info(f"  Excluded baseline: {ts}")
+        if comparison_excluded:
+            logging.info(
+                f"Excluded {len(comparison_excluded)} comparison administrative command time-series"
+            )
+            if verbose:
+                for ts in comparison_excluded:
+                    logging.info(f"  Excluded comparison: {ts}")
+
+        if len(baseline_latency_ts) == 0 or len(comparison_latency_ts) == 0:
+            logging.warning(
+                f"No p50 latency data found for {test_name} after filtering"
+            )
+            return None, False, None
+
+        # Extract command names from time-series to match baseline and comparison
+        def extract_command_from_ts(ts_name):
+            """Extract meaningful command name from time-series name"""
+            import re
+
+            # Look for latencystats_latency_percentiles_usec_<COMMAND>_p50 pattern
+            match = re.search(
+                r"latencystats_latency_percentiles_usec_([^_/]+)_p50", ts_name
+            )
+            if match:
+                return match.group(1)
+
+            # Look for command= pattern in the time-series name
+            match = re.search(r"command=([^/]+)", ts_name)
+            if match:
+                return match.group(1)
+
+            # If no specific pattern found, try to extract from the end of the path
+            # e.g., .../Ops/sec/GET -> GET
+            parts = ts_name.split("/")
+            if len(parts) > 0:
+                return parts[-1]
+            return "unknown"
+
+        # Group time-series by command
+        baseline_by_command = {}
+        comparison_by_command = {}
+
+        for ts in baseline_latency_ts:
+            cmd = extract_command_from_ts(ts)
+            if verbose:
+                logging.info(f"Baseline time-series '{ts}' -> command '{cmd}'")
+            if cmd not in baseline_by_command:
+                baseline_by_command[cmd] = []
+            baseline_by_command[cmd].append(ts)
+
+        for ts in comparison_latency_ts:
+            cmd = extract_command_from_ts(ts)
+            if verbose:
+                logging.info(f"Comparison time-series '{ts}' -> command '{cmd}'")
+            if cmd not in comparison_by_command:
+                comparison_by_command[cmd] = []
+            comparison_by_command[cmd].append(ts)
+
+        # Find common commands between baseline and comparison
+        common_commands = set(baseline_by_command.keys()) & set(
+            comparison_by_command.keys()
+        )
+
+        logging.info(f"Baseline commands found: {sorted(baseline_by_command.keys())}")
+        logging.info(
+            f"Comparison commands found: {sorted(comparison_by_command.keys())}"
+        )
+        logging.info(
+            f"Common commands for latency comparison: {sorted(common_commands)}"
+        )
+
+        if not common_commands:
+            logging.warning(
+                f"No common commands found for latency comparison in {test_name}"
+            )
+            return None, False, None
+
+        latency_notes = []
+        significant_latency_increases = (
+            0  # Track commands with significant latency increases
+        )
+        regression_details = {"test_name": test_name, "commands": []}
+
+        # Compare latency for each command individually
+        for command in sorted(common_commands):
+            logging.info(f"Analyzing latency for command: {command}")
+            baseline_ts_list = baseline_by_command[command]
+            comparison_ts_list = comparison_by_command[command]
+
+            logging.info(
+                f"  Command {command}: {len(baseline_ts_list)} baseline, {len(comparison_ts_list)} comparison time-series"
+            )
+
+            # If multiple time-series for the same command, try to get the best one
+            if len(baseline_ts_list) > 1:
+                logging.info(
+                    f"  Multiple baseline time-series for {command}, filtering..."
+                )
+                baseline_ts_list = get_only_Totals(baseline_ts_list)
+            if len(comparison_ts_list) > 1:
+                logging.info(
+                    f"  Multiple comparison time-series for {command}, filtering..."
+                )
+                comparison_ts_list = get_only_Totals(comparison_ts_list)
+
+            if len(baseline_ts_list) != 1 or len(comparison_ts_list) != 1:
+                logging.warning(
+                    f"  Skipping {command}: baseline={len(baseline_ts_list)}, comparison={len(comparison_ts_list)} time-series"
+                )
+                continue
+
+            # Get latency data for this command
+            baseline_latency_data = []
+            comparison_latency_data = []
+
+            for ts_name in baseline_ts_list:
+                datapoints = rts.ts().revrange(ts_name, from_ts_ms, to_ts_ms)
+                baseline_latency_data.extend(datapoints)
+
+            for ts_name in comparison_ts_list:
+                datapoints = rts.ts().revrange(ts_name, from_ts_ms, to_ts_ms)
+                comparison_latency_data.extend(datapoints)
+
+            if len(baseline_latency_data) == 0 or len(comparison_latency_data) == 0:
+                logging.warning(
+                    f"  No latency data for {command}: baseline={len(baseline_latency_data)}, comparison={len(comparison_latency_data)} datapoints"
+                )
+                continue
+
+            logging.info(
+                f"  Command {command}: {len(baseline_latency_data)} baseline, {len(comparison_latency_data)} comparison datapoints"
+            )
+
+            # Calculate latency statistics for this command
+            baseline_latency_values = []
+            comparison_latency_values = []
+
+            (_, baseline_latency_median, _) = get_v_pct_change_and_largest_var(
+                baseline_latency_data,
+                0,
+                0,
+                baseline_latency_values,
+                0,
+                last_n_baseline,
+                verbose,
+                first_n_baseline,
+            )
+
+            (_, comparison_latency_median, _) = get_v_pct_change_and_largest_var(
+                comparison_latency_data,
+                0,
+                0,
+                comparison_latency_values,
+                0,
+                last_n_comparison,
+                verbose,
+                first_n_comparison,
+            )
+
+            if baseline_latency_median == "N/A" or comparison_latency_median == "N/A":
+                logging.warning(
+                    f"  Could not calculate median for {command}: baseline={baseline_latency_median}, comparison={comparison_latency_median}"
+                )
+                continue
+
+            # Calculate variance (coefficient of variation) for both baseline and comparison
+            baseline_latency_mean = (
+                statistics.mean(baseline_latency_values)
+                if baseline_latency_values
+                else 0
+            )
+            baseline_latency_stdev = (
+                statistics.stdev(baseline_latency_values)
+                if len(baseline_latency_values) > 1
+                else 0
+            )
+            baseline_latency_cv = (
+                (baseline_latency_stdev / baseline_latency_mean * 100)
+                if baseline_latency_mean > 0
+                else float("inf")
+            )
+
+            comparison_latency_mean = (
+                statistics.mean(comparison_latency_values)
+                if comparison_latency_values
+                else 0
+            )
+            comparison_latency_stdev = (
+                statistics.stdev(comparison_latency_values)
+                if len(comparison_latency_values) > 1
+                else 0
+            )
+            comparison_latency_cv = (
+                (comparison_latency_stdev / comparison_latency_mean * 100)
+                if comparison_latency_mean > 0
+                else float("inf")
+            )
+
+            # Calculate latency change (for latency, lower is better)
+            latency_change = (
+                float(comparison_latency_median) / float(baseline_latency_median) - 1
+            ) * 100.0
+
+            logging.info(
+                f"  Command {command}: baseline p50={baseline_latency_median:.2f} (CV={baseline_latency_cv:.1f}%), comparison p50={comparison_latency_median:.2f} (CV={comparison_latency_cv:.1f}%), change={latency_change:.1f}%"
+            )
+
+            # Check if latency data is too unstable to be reliable
+            latency_data_unstable = (
+                baseline_latency_cv > 50.0 or comparison_latency_cv > 50.0
+            )
+
+            if latency_data_unstable:
+                # Mark as unstable latency data
+                unstable_reason = []
+                if baseline_latency_cv > 50.0:
+                    unstable_reason.append(f"baseline CV={baseline_latency_cv:.1f}%")
+                if comparison_latency_cv > 50.0:
+                    unstable_reason.append(
+                        f"comparison CV={comparison_latency_cv:.1f}%"
+                    )
+
+                latency_notes.append(
+                    f"{command} p50 UNSTABLE ({', '.join(unstable_reason)} - data too noisy for reliable analysis)"
+                )
+                logging.warning(
+                    f"  Command {command}: UNSTABLE latency data detected - {', '.join(unstable_reason)}"
+                )
+            elif (
+                abs(latency_change) > 5.0
+            ):  # Only report significant latency changes for stable data
+                direction = "increased" if latency_change > 0 else "decreased"
+
+                # Adjust significance threshold based on baseline variance
+                if baseline_latency_cv < 30.0:
+                    # Low variance - use standard threshold
+                    significance_threshold = 10.0
+                elif baseline_latency_cv < 50.0:
+                    # Moderate variance - require larger change
+                    significance_threshold = 15.0
+                else:
+                    # High variance - require much larger change
+                    significance_threshold = 25.0
+
+                latency_notes.append(
+                    f"{command} p50 {direction} {abs(latency_change):.1f}% (baseline CV={baseline_latency_cv:.1f}%)"
+                )
+                logging.info(
+                    f"  Command {command}: SIGNIFICANT latency change detected ({direction} {abs(latency_change):.1f}%, baseline CV={baseline_latency_cv:.1f}%)"
+                )
+
+                # Track significant latency increases (potential regression confirmation)
+                if latency_change > significance_threshold:
+                    significant_latency_increases += 1
+                    regression_details["commands"].append(
+                        {
+                            "command": command,
+                            "change_percent": latency_change,
+                            "direction": direction,
+                            "baseline_cv": baseline_latency_cv,
+                            "comparison_cv": comparison_latency_cv,
+                        }
+                    )
+                    logging.info(
+                        f"  Command {command}: CONFIRMS regression (change={latency_change:.1f}% > threshold={significance_threshold:.1f}%)"
+                    )
+                else:
+                    logging.info(
+                        f"  Command {command}: Change below significance threshold (change={latency_change:.1f}% <= threshold={significance_threshold:.1f}%)"
+                    )
+            elif verbose:
+                latency_notes.append(
+                    f"{command} p50 stable (CV={baseline_latency_cv:.1f}%)"
+                )
+                logging.info(
+                    f"  Command {command}: latency stable (change={latency_change:.1f}%, baseline CV={baseline_latency_cv:.1f}%)"
+                )
+
+        # Determine if latency confirms regression
+        confirms_regression = significant_latency_increases > 0
+
+        # Return combined latency notes
+        if latency_notes:
+            result = "; ".join(latency_notes)
+            logging.info(f"Latency check completed for {test_name}: {result}")
+            return (
+                result,
+                confirms_regression,
+                regression_details if confirms_regression else None,
+            )
+        else:
+            result = "p50 latency stable" if common_commands else None
+            logging.info(
+                f"Latency check completed for {test_name}: {result or 'no data'}"
+            )
+            return result, False, None
+
+    except Exception as e:
+        logging.error(f"Error checking latency for {test_name}: {e}")
+        return None, False, None
+
+
 def get_only_Totals(baseline_timeseries):
     logging.warning("\t\tTime-series: {}".format(", ".join(baseline_timeseries)))
     logging.info("Checking if Totals will reduce timeseries.")
@@ -995,6 +2423,37 @@ def get_only_Totals(baseline_timeseries):
     for ts_name in baseline_timeseries:
         if "Totals" in ts_name:
             new_base.append(ts_name)
+
+    # If no "Totals" time-series found, try to pick the best alternative
+    if len(new_base) == 0:
+        logging.warning(
+            "No 'Totals' time-series found, trying to pick best alternative."
+        )
+        # Prefer time-series without quotes in metric names
+        unquoted_series = [ts for ts in baseline_timeseries if "'" not in ts]
+        if unquoted_series:
+            new_base = unquoted_series
+        else:
+            # Fall back to original list
+            new_base = baseline_timeseries
+
+    # If we still have multiple time-series after filtering for "Totals",
+    # prefer the one without quotes in the metric name
+    if len(new_base) > 1:
+        logging.info("Multiple time-series found, preferring unquoted metric names.")
+        unquoted_series = [ts for ts in new_base if "'" not in ts]
+        if unquoted_series:
+            new_base = unquoted_series
+
+        # If we still have multiple, take the first one
+        if len(new_base) > 1:
+            logging.warning(
+                "Still multiple time-series after filtering, taking the first one: {}".format(
+                    new_base[0]
+                )
+            )
+            new_base = [new_base[0]]
+
     baseline_timeseries = new_base
     return baseline_timeseries
 
@@ -1061,11 +2520,38 @@ def add_line(
     percentage_change,
     table,
     test_name,
+    grafana_link_base=None,
+    baseline_branch=None,
+    baseline_version=None,
+    comparison_branch=None,
+    comparison_version=None,
+    from_date=None,
+    to_date=None,
 ):
+    grafana_link = None
+    if grafana_link_base is not None:
+        grafana_link = "{}?orgId=1".format(grafana_link_base)
+        grafana_link += f"&var-test_case={test_name}"
+
+        if baseline_branch is not None:
+            grafana_link += f"&var-branch={baseline_branch}"
+        if baseline_version is not None:
+            grafana_link += f"&var-version={baseline_version}"
+        if comparison_branch is not None:
+            grafana_link += f"&var-branch={comparison_branch}"
+        if comparison_version is not None:
+            grafana_link += f"&var-version={comparison_version}"
+        grafana_link += "&from=now-30d&to=now"
+
+    # Create test name with optional Grafana link
+    test_name_display = test_name
+    if grafana_link is not None:
+        test_name_display = f"[{test_name}]({grafana_link})"
+
     percentage_change_str = "{:.1f}% ".format(percentage_change)
     table.append(
         [
-            test_name,
+            test_name_display,
             baseline_v_str,
             comparison_v_str,
             percentage_change_str,
@@ -1102,9 +2588,9 @@ def get_v_pct_change_and_largest_var(
             comparison_values.append(tuple[1])
 
         comparison_df = pd.DataFrame(comparison_values)
-        comparison_median = float(comparison_df.median())
+        comparison_median = float(comparison_df.median().iloc[0])
         comparison_v = comparison_median
-        comparison_std = float(comparison_df.std())
+        comparison_std = float(comparison_df.std().iloc[0])
         if verbose:
             logging.info(
                 "comparison_datapoints: {} value: {}; std-dev: {}; median: {}".format(
diff --git a/redisbench_admin/environments/oss_cluster.py b/redisbench_admin/environments/oss_cluster.py
index 526b6c3..8bd526e 100644
--- a/redisbench_admin/environments/oss_cluster.py
+++ b/redisbench_admin/environments/oss_cluster.py
@@ -89,6 +89,20 @@ def generate_meet_cmds(shard_count, shard_host, start_port):
 def setup_oss_cluster_from_conns(meet_cmds, redis_conns, shard_count):
     status = False
     try:
+        # Pre-setup validation: check uptime and cluster mode
+        for primary_pos, redis_conn in enumerate(redis_conns):
+            redis_conn.ping()
+
+            server_info = redis_conn.info("server")
+            uptime = server_info.get("uptime_in_seconds", 0)
+            cluster_enabled = server_info.get("cluster_enabled", 0)
+            tcp_port = server_info.get("tcp_port", "n/a")
+
+            logging.info(
+                f"Node {primary_pos} ({tcp_port}): uptime={uptime}s cluster_enabled={cluster_enabled}"
+            )
+
+        # Send meet commands
         for primary_pos, redis_conn in enumerate(redis_conns):
             logging.info(
                 "Sending to primary #{} a total of {} MEET commands".format(
@@ -138,6 +152,29 @@ def setup_oss_cluster_from_conns(meet_cmds, redis_conns, shard_count):
                 )
                 logging.info("Node {}: cluster_state {}".format(n, cluster_state_ok))
                 sleep(1)
+
+        # Post-setup validation: check uptime and cluster mode
+        sleep(10)
+        for primary_pos, redis_conn in enumerate(redis_conns):
+            redis_conn.ping()
+
+            server_info = redis_conn.info("server")
+            uptime = server_info.get("uptime_in_seconds", 0)
+            server_info = redis_conn.info("cluster")
+            cluster_enabled = server_info.get("cluster_enabled", -1)
+            tcp_port = server_info.get("tcp_port", "n/a")
+
+            logging.info(
+                f"Node {primary_pos} ({tcp_port}): uptime={uptime}s cluster_enabled={cluster_enabled}"
+            )
+
+            if cluster_enabled != 1:
+                logging.error(
+                    "Node {}: cluster mode is not enabled (cluster_enabled={})".format(
+                        primary_pos, cluster_enabled
+                    )
+                )
+                return False
         status = True
     except redis.exceptions.RedisError as e:
         logging.warning("Received an error {}".format(e.__str__()))
diff --git a/redisbench_admin/run/cluster.py b/redisbench_admin/run/cluster.py
index 270cd1a..8aef21e 100644
--- a/redisbench_admin/run/cluster.py
+++ b/redisbench_admin/run/cluster.py
@@ -115,6 +115,12 @@ def spin_up_redis_cluster_remote_redis(
     logname,
     redis_7=True,
 ):
+    # Import the function from standalone module
+    from redisbench_admin.run_remote.standalone import ensure_redis_server_available
+
+    # Ensure redis-server is available before trying to start cluster
+    ensure_redis_server_available(server_public_ip, username, private_key, ssh_port)
+
     logging.info("Generating the remote redis-server command arguments")
     redis_process_commands = []
     logfiles = []
diff --git a/redisbench_admin/run/metrics.py b/redisbench_admin/run/metrics.py
index 3db76cc..45f16f9 100644
--- a/redisbench_admin/run/metrics.py
+++ b/redisbench_admin/run/metrics.py
@@ -188,8 +188,6 @@ def get_total_cpu(info_data):
 
 
 def collect_cpu_data(redis_conns=[], delta_secs: float = 5.0, delay_start: float = 1.0):
-    global BENCHMARK_CPU_STATS_GLOBAL
-    global BENCHMARK_RUNNING_GLOBAL
     import time
 
     counter = 0
diff --git a/redisbench_admin/run_local/args.py b/redisbench_admin/run_local/args.py
index 00a7316..bd5da5b 100644
--- a/redisbench_admin/run_local/args.py
+++ b/redisbench_admin/run_local/args.py
@@ -40,4 +40,16 @@ def create_run_local_arguments(parser):
         default=IGNORE_KEYSPACE_ERRORS,
         help="Ignore keyspace check errors. Will still log them as errors",
     )
+    parser.add_argument(
+        "--dry-run",
+        default=False,
+        action="store_true",
+        help="Setup environment and test connectivity without running benchmarks",
+    )
+    parser.add_argument(
+        "--dry-run-with-preload",
+        default=False,
+        action="store_true",
+        help="Setup environment, preload data, and test connectivity without running benchmarks",
+    )
     return parser
diff --git a/redisbench_admin/run_local/run_local.py b/redisbench_admin/run_local/run_local.py
index 622252a..4c7d46f 100644
--- a/redisbench_admin/run_local/run_local.py
+++ b/redisbench_admin/run_local/run_local.py
@@ -16,6 +16,7 @@
 from redisbench_admin.utils.remote import (
     get_project_ts_tags,
     push_data_to_redistimeseries,
+    perform_connectivity_test,
 )
 
 import redisbench_admin.run.metrics
@@ -326,6 +327,7 @@ def run_local_command_logic(args, project_name, project_version):
                                 )
 
                                 # run the benchmark
+
                                 cpu_stats_thread = threading.Thread(
                                     target=collect_cpu_data,
                                     args=(redis_conns, 5.0, 1.0),
@@ -335,9 +337,60 @@ def run_local_command_logic(args, project_name, project_version):
                                 )
                                 cpu_stats_thread.start()
                                 benchmark_start_time = datetime.datetime.now()
-                                stdout, stderr = run_local_benchmark(
-                                    benchmark_tool, command
+                                logging.info(
+                                    "Running benchmark command: {}".format(command)
                                 )
+                                # Handle dry-run modes
+                                if args.dry_run or args.dry_run_with_preload:
+                                    logging.info(
+                                        "🏃 Dry-run mode detected - performing connectivity tests"
+                                    )
+
+                                    # Test basic connectivity after setup
+                                    connectivity_success = perform_connectivity_test(
+                                        redis_conns, "after local environment setup"
+                                    )
+
+                                    if args.dry_run_with_preload:
+                                        logging.info(
+                                            "📦 Dry-run with preload - data loading already completed during setup"
+                                        )
+                                        # Test connectivity after preload (data was loaded during local_db_spin)
+                                        connectivity_success = (
+                                            perform_connectivity_test(
+                                                redis_conns, "after data preloading"
+                                            )
+                                            and connectivity_success
+                                        )
+
+                                    # Print dry-run summary
+                                    logging.info("=" * 50)
+                                    logging.info("🎯 DRY-RUN SUMMARY")
+                                    logging.info("=" * 50)
+                                    logging.info(
+                                        f"✅ Database: {setup_type} ({'cluster' if cluster_api_enabled else 'standalone'}) started locally"
+                                    )
+                                    logging.info(
+                                        f"✅ Client tools: {benchmark_tool} available"
+                                    )
+                                    logging.info(
+                                        f"{'✅' if connectivity_success else '❌'} Connectivity: {len(redis_conns)} connection(s) tested"
+                                    )
+                                    if args.dry_run_with_preload:
+                                        logging.info(
+                                            "✅ Data preload: Completed during setup"
+                                        )
+                                    logging.info("🏁 Dry-run completed successfully")
+                                    logging.info(
+                                        "⏭️  Benchmark execution skipped (dry-run mode)"
+                                    )
+                                    logging.info("=" * 50)
+
+                                    # Skip benchmark execution and continue to next test
+                                else:
+                                    stdout, stderr = run_local_benchmark(
+                                        benchmark_tool, command
+                                    )
                                 benchmark_end_time = datetime.datetime.now()
                                 redisbench_admin.run.metrics.BENCHMARK_RUNNING_GLOBAL = (
                                     False
@@ -364,10 +417,10 @@ def run_local_command_logic(args, project_name, project_version):
                                         benchmark_end_time, benchmark_start_time
                                     )
                                 )
-
-                                logging.info("Extracting the benchmark results")
-                                logging.info("stdout: {}".format(stdout))
-                                logging.info("stderr: {}".format(stderr))
+                                if args.dry_run is False:
+                                    logging.info("Extracting the benchmark results")
+                                    logging.info("stdout: {}".format(stdout))
+                                    logging.info("stderr: {}".format(stderr))
 
                                 (
                                     _,
@@ -420,53 +473,54 @@ def run_local_command_logic(args, project_name, project_version):
                                         test_name,
                                         tf_triggering_env,
                                     )
-
-                                post_process_benchmark_results(
-                                    benchmark_tool,
-                                    local_benchmark_output_filename,
-                                    start_time_ms,
-                                    start_time_str,
-                                    stdout,
-                                )
                                 results_dict = {}
-                                with open(
-                                    local_benchmark_output_filename, "r"
-                                ) as json_file:
-                                    results_dict = json.load(json_file)
-                                    print_results_table_stdout(
-                                        benchmark_config,
-                                        default_metrics,
-                                        results_dict,
-                                        setup_name,
-                                        setup_type,
-                                        test_name,
-                                        total_shards_cpu_usage,
-                                        overall_end_time_metrics,
-                                        [
-                                            "memory_used_memory",
-                                            "memory_used_memory_dataset",
-                                        ],
-                                    )
-                                    export_redis_metrics(
-                                        artifact_version,
-                                        end_time_ms,
-                                        overall_end_time_metrics,
-                                        rts,
-                                        setup_name,
-                                        setup_type,
-                                        test_name,
-                                        tf_github_branch,
-                                        tf_github_org,
-                                        tf_github_repo,
-                                        tf_triggering_env,
-                                        {"metric-type": "redis-metrics"},
-                                        0,
+                                if args.dry_run is False:
+                                    post_process_benchmark_results(
+                                        benchmark_tool,
+                                        local_benchmark_output_filename,
+                                        start_time_ms,
+                                        start_time_str,
+                                        stdout,
                                     )
 
-                                    # check KPIs
-                                    return_code = results_dict_kpi_check(
-                                        benchmark_config, results_dict, return_code
-                                    )
+                                    with open(
+                                        local_benchmark_output_filename, "r"
+                                    ) as json_file:
+                                        results_dict = json.load(json_file)
+                                        print_results_table_stdout(
+                                            benchmark_config,
+                                            default_metrics,
+                                            results_dict,
+                                            setup_name,
+                                            setup_type,
+                                            test_name,
+                                            total_shards_cpu_usage,
+                                            overall_end_time_metrics,
+                                            [
+                                                "memory_used_memory",
+                                                "memory_used_memory_dataset",
+                                            ],
+                                        )
+                                        export_redis_metrics(
+                                            artifact_version,
+                                            end_time_ms,
+                                            overall_end_time_metrics,
+                                            rts,
+                                            setup_name,
+                                            setup_type,
+                                            test_name,
+                                            tf_github_branch,
+                                            tf_github_org,
+                                            tf_github_repo,
+                                            tf_triggering_env,
+                                            {"metric-type": "redis-metrics"},
+                                            0,
+                                        )
+
+                                        # check KPIs
+                                        return_code = results_dict_kpi_check(
+                                            benchmark_config, results_dict, return_code
+                                        )
 
                                 metadata_tags = get_metadata_tags(benchmark_config)
                                 (
@@ -508,7 +562,10 @@ def run_local_command_logic(args, project_name, project_version):
                                     "Some unexpected exception was caught "
                                     "during local work. Failing test...."
                                 )
-                                logging.critical(sys.exc_info()[0])
+                                if len(sys.exc_info()) > 0:
+                                    logging.critical(sys.exc_info()[0])
+                                else:
+                                    logging.critical(sys.exc_info())
                                 print("-" * 60)
                                 traceback.print_exc(file=sys.stdout)
                                 print("-" * 60)
diff --git a/redisbench_admin/run_remote/args.py b/redisbench_admin/run_remote/args.py
index 0b22382..cd643f8 100644
--- a/redisbench_admin/run_remote/args.py
+++ b/redisbench_admin/run_remote/args.py
@@ -112,6 +112,18 @@ def create_run_remote_arguments(parser):
         action="store_true",
         help="skip environment variables check",
     )
+    parser.add_argument(
+        "--dry-run",
+        default=False,
+        action="store_true",
+        help="Setup environment and test connectivity without running benchmarks",
+    )
+    parser.add_argument(
+        "--dry-run-with-preload",
+        default=False,
+        action="store_true",
+        help="Setup environment, preload data, and test connectivity without running benchmarks",
+    )
     parser.add_argument(
         "--continue-on-module-check-error",
         default=False,
diff --git a/redisbench_admin/run_remote/remote_db.py b/redisbench_admin/run_remote/remote_db.py
index 7550650..528af0f 100644
--- a/redisbench_admin/run_remote/remote_db.py
+++ b/redisbench_admin/run_remote/remote_db.py
@@ -406,9 +406,17 @@ def db_error_artifacts(
     upload_s3,
     username,
 ):
+    # Import the zip check function
+    from redisbench_admin.run_remote.standalone import ensure_zip_available
+
+    # Ensure zip is available before trying to use it
+    ensure_zip_available(server_public_ip, username, private_key, db_ssh_port)
+
     local_zipfile = "{}.zip".format(logname)
     remote_zipfile = "/home/{}/{}".format(username, local_zipfile)
-    execute_remote_commands(
+
+    # Create zip file
+    zip_result = execute_remote_commands(
         server_public_ip,
         username,
         private_key,
@@ -417,26 +425,57 @@ def db_error_artifacts(
         ],
         db_ssh_port,
     )
-    failed_remote_run_artifact_store(
-        upload_s3,
-        server_public_ip,
-        dirname,
-        remote_zipfile,
-        local_zipfile,
-        s3_bucket_name,
-        s3_bucket_path,
-        username,
-        private_key,
-    )
+
+    # Check if zip creation was successful
+    zip_success = True
+    for pos, res_pos in enumerate(zip_result):
+        [recv_exit_status, stdout, stderr] = res_pos
+        if recv_exit_status != 0:
+            logging.warning(
+                "Zip creation failed with exit code {}. stdout: {}. stderr: {}".format(
+                    recv_exit_status, stdout, stderr
+                )
+            )
+            zip_success = False
+
+    # Only try to upload if zip was created successfully
+    if zip_success:
+        try:
+            failed_remote_run_artifact_store(
+                upload_s3,
+                server_public_ip,
+                dirname,
+                remote_zipfile,
+                local_zipfile,
+                s3_bucket_name,
+                s3_bucket_path,
+                username,
+                private_key,
+            )
+        except Exception as e:
+            logging.warning(
+                "Failed to upload zip file to S3: {}. Continuing without upload.".format(
+                    e
+                )
+            )
+    else:
+        logging.warning("Skipping S3 upload due to zip creation failure")
     if len(full_logfiles) > 0:
-        failed_remote_run_artifact_store(
-            upload_s3,
-            server_public_ip,
-            dirname,
-            full_logfiles[0],
-            logname,
-            s3_bucket_name,
-            s3_bucket_path,
-            username,
-            private_key,
-        )
+        try:
+            failed_remote_run_artifact_store(
+                upload_s3,
+                server_public_ip,
+                dirname,
+                full_logfiles[0],
+                logname,
+                s3_bucket_name,
+                s3_bucket_path,
+                username,
+                private_key,
+            )
+        except Exception as e:
+            logging.warning(
+                "Failed to upload logfile to S3: {}. Continuing without upload.".format(
+                    e
+                )
+            )
diff --git a/redisbench_admin/run_remote/remote_helpers.py b/redisbench_admin/run_remote/remote_helpers.py
index c220ef8..7d7af34 100644
--- a/redisbench_admin/run_remote/remote_helpers.py
+++ b/redisbench_admin/run_remote/remote_helpers.py
@@ -71,6 +71,23 @@ def remote_tool_pre_bench_step(
     logging.info(
         f"Settting up remote tool {benchmark_tool} requirements. architecture ={architecture}"
     )
+
+    # Check and install benchmark tools if needed
+    if benchmark_tool == "memtier_benchmark":
+        from redisbench_admin.run_remote.standalone import (
+            ensure_memtier_benchmark_available,
+        )
+
+        ensure_memtier_benchmark_available(
+            client_public_ip, username, private_key, client_ssh_port
+        )
+    elif benchmark_tool == "redis-benchmark":
+        from redisbench_admin.run_remote.standalone import ensure_redis_server_available
+
+        # redis-benchmark comes with redis-server, so ensure redis-server is installed
+        ensure_redis_server_available(
+            client_public_ip, username, private_key, client_ssh_port
+        )
     if benchmark_tool == "redisgraph-benchmark-go":
         setup_remote_benchmark_tool_redisgraph_benchmark_go(
             client_public_ip,
diff --git a/redisbench_admin/run_remote/run_remote.py b/redisbench_admin/run_remote/run_remote.py
index ed1a1a2..ed796ce 100644
--- a/redisbench_admin/run_remote/run_remote.py
+++ b/redisbench_admin/run_remote/run_remote.py
@@ -69,6 +69,7 @@
     get_project_ts_tags,
     push_data_to_redistimeseries,
     fetch_remote_id_from_config,
+    perform_connectivity_test,
 )
 
 from redisbench_admin.utils.utils import (
@@ -161,7 +162,24 @@ def run_remote_command_logic(args, project_name, project_version):
         )
         webhook_client_slack = WebhookClient(webhook_url)
 
-    if args.skip_env_vars_verify is False:
+    # Only check AWS credentials when actually needed
+    needs_aws_for_infrastructure = (
+        args.inventory is None
+    )  # No inventory = need to deploy with Terraform
+    needs_aws_for_s3 = args.upload_results_s3  # S3 upload enabled
+
+    if args.skip_env_vars_verify is False and (
+        needs_aws_for_infrastructure or needs_aws_for_s3
+    ):
+        # Log why AWS credentials are being checked
+        aws_reasons = []
+        if needs_aws_for_infrastructure:
+            aws_reasons.append("infrastructure deployment (no --inventory provided)")
+        if needs_aws_for_s3:
+            aws_reasons.append("S3 upload (--upload_results_s3 enabled)")
+
+        logging.info("AWS credentials required for: {}".format(", ".join(aws_reasons)))
+
         env_check_status, failure_reason = check_ec2_env()
         if env_check_status is False:
             if webhook_notifications_active:
@@ -177,6 +195,10 @@ def run_remote_command_logic(args, project_name, project_version):
                 )
             logging.critical("{}. Exiting right away!".format(failure_reason))
             exit(1)
+    elif args.skip_env_vars_verify is False:
+        logging.info(
+            "AWS credentials check skipped (using --inventory and S3 upload disabled)"
+        )
 
     continue_on_module_check_error = args.continue_on_module_check_error
     module_check_status, error_message = redis_modules_check(local_module_files)
@@ -674,6 +696,62 @@ def run_remote_command_logic(args, project_name, project_version):
                                                 )
                                             )
 
+                                    # Handle dry-run modes
+                                    if args.dry_run or args.dry_run_with_preload:
+                                        logging.info(
+                                            "🏃 Dry-run mode detected - performing connectivity tests"
+                                        )
+
+                                        # Test basic connectivity after setup
+                                        connectivity_success = (
+                                            perform_connectivity_test(
+                                                redis_conns, "after environment setup"
+                                            )
+                                        )
+
+                                        if args.dry_run_with_preload:
+                                            logging.info(
+                                                "📦 Dry-run with preload - data loading already completed during setup"
+                                            )
+                                            # Test connectivity after preload (data was loaded during remote_db_spin)
+                                            connectivity_success = (
+                                                perform_connectivity_test(
+                                                    redis_conns, "after data preloading"
+                                                )
+                                                and connectivity_success
+                                            )
+
+                                        # Print dry-run summary
+                                        logging.info("=" * 50)
+                                        logging.info("🎯 DRY-RUN SUMMARY")
+                                        logging.info("=" * 50)
+                                        logging.info(
+                                            f"✅ Infrastructure: {'Deployed' if args.inventory is None else 'Using existing'}"
+                                        )
+                                        logging.info(
+                                            f"✅ Database: {setup_type} ({'cluster' if cluster_enabled else 'standalone'}) started"
+                                        )
+                                        logging.info(
+                                            f"✅ Client tools: Setup completed on {client_public_ip}"
+                                        )
+                                        logging.info(
+                                            f"{'✅' if connectivity_success else '❌'} Connectivity: {len(redis_conns)} connection(s) tested"
+                                        )
+                                        if args.dry_run_with_preload:
+                                            logging.info(
+                                                "✅ Data preload: Completed during setup"
+                                            )
+                                        logging.info(
+                                            "🏁 Dry-run completed successfully"
+                                        )
+                                        logging.info(
+                                            "⏭️  Benchmark execution skipped (dry-run mode)"
+                                        )
+                                        logging.info("=" * 50)
+
+                                        # Skip benchmark execution and continue to next test
+                                        continue
+
                                     logging.info(
                                         "Will store benchmark json output to local file {}".format(
                                             local_bench_fname
diff --git a/redisbench_admin/run_remote/standalone.py b/redisbench_admin/run_remote/standalone.py
index 7d3bd23..4c9904e 100644
--- a/redisbench_admin/run_remote/standalone.py
+++ b/redisbench_admin/run_remote/standalone.py
@@ -15,6 +15,139 @@
 from redisbench_admin.utils.utils import redis_server_config_module_part
 
 
+def ensure_redis_server_available(server_public_ip, username, private_key, port=22):
+    """Check if redis-server is available, install if not"""
+    logging.info("Checking if redis-server is available on remote server...")
+
+    # Check if redis-server exists
+    check_result = execute_remote_commands(
+        server_public_ip, username, private_key, ["which redis-server"], port
+    )
+
+    # Check the result
+    if len(check_result) > 0:
+        [recv_exit_status, stdout, stderr] = check_result[0]
+        if recv_exit_status != 0:
+            logging.info("redis-server not found, installing Redis...")
+
+            # Install Redis using the provided commands
+            install_commands = [
+                "sudo apt-get install lsb-release curl gpg -y",
+                "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg",
+                "sudo chmod 644 /usr/share/keyrings/redis-archive-keyring.gpg",
+                'echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list',
+                "sudo apt-get update",
+                "sudo apt-get install redis -y",
+                "sudo systemctl disable redis-server",
+            ]
+
+            install_result = execute_remote_commands(
+                server_public_ip, username, private_key, install_commands, port
+            )
+
+            # Check if installation was successful
+            for pos, res_pos in enumerate(install_result):
+                [recv_exit_status, stdout, stderr] = res_pos
+                if recv_exit_status != 0:
+                    logging.warning(
+                        "Redis installation command {} returned exit code {}. stdout: {}. stderr: {}".format(
+                            pos, recv_exit_status, stdout, stderr
+                        )
+                    )
+
+            logging.info("Redis installation completed and auto-start disabled")
+        else:
+            logging.info("redis-server is already available")
+    else:
+        logging.error("Failed to check redis-server availability")
+
+
+def ensure_zip_available(server_public_ip, username, private_key, port=22):
+    """Check if zip is available, install if not"""
+    logging.info("Checking if zip is available on remote server...")
+
+    # Check if zip exists
+    check_result = execute_remote_commands(
+        server_public_ip, username, private_key, ["which zip"], port
+    )
+
+    # Check the result
+    if len(check_result) > 0:
+        [recv_exit_status, stdout, stderr] = check_result[0]
+        if recv_exit_status != 0:
+            logging.info("zip not found, installing...")
+
+            # Install zip
+            install_commands = ["sudo apt-get install zip -y"]
+
+            install_result = execute_remote_commands(
+                server_public_ip, username, private_key, install_commands, port
+            )
+
+            # Check if installation was successful
+            for pos, res_pos in enumerate(install_result):
+                [recv_exit_status, stdout, stderr] = res_pos
+                if recv_exit_status != 0:
+                    logging.warning(
+                        "Zip installation command {} returned exit code {}. stdout: {}. stderr: {}".format(
+                            pos, recv_exit_status, stdout, stderr
+                        )
+                    )
+
+            logging.info("Zip installation completed")
+        else:
+            logging.info("zip is already available")
+    else:
+        logging.error("Failed to check zip availability")
+
+
+def ensure_memtier_benchmark_available(
+    client_public_ip, username, private_key, port=22
+):
+    """Check if memtier_benchmark is available, install if not"""
+    logging.info("Checking if memtier_benchmark is available on remote client...")
+
+    # Check if memtier_benchmark exists
+    check_result = execute_remote_commands(
+        client_public_ip, username, private_key, ["which memtier_benchmark"], port
+    )
+
+    # Check the result
+    if len(check_result) > 0:
+        [recv_exit_status, stdout, stderr] = check_result[0]
+        if recv_exit_status != 0:
+            logging.info("memtier_benchmark not found, installing...")
+
+            # Install memtier_benchmark using the provided commands
+            install_commands = [
+                "sudo apt install lsb-release curl gpg -y",
+                "curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg",
+                'echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list',
+                "sudo apt-get update",
+                "sudo apt-get install memtier-benchmark -y",
+            ]
+
+            install_result = execute_remote_commands(
+                client_public_ip, username, private_key, install_commands, port
+            )
+
+            # Check if installation was successful
+            for pos, res_pos in enumerate(install_result):
+                [recv_exit_status, stdout, stderr] = res_pos
+                if recv_exit_status != 0:
+                    logging.warning(
+                        "memtier_benchmark installation command {} returned exit code {}. stdout: {}. stderr: {}".format(
+                            pos, recv_exit_status, stdout, stderr
+                        )
+                    )
+
+            logging.info("memtier_benchmark installation completed")
+        else:
+            logging.info("memtier_benchmark is already available")
+    else:
+        logging.error("Failed to check memtier_benchmark availability")
+
+
 def spin_up_standalone_remote_redis(
     temporary_dir,
     server_public_ip,
@@ -27,6 +160,9 @@ def spin_up_standalone_remote_redis(
     modules_configuration_parameters_map={},
     redis_7=True,
 ):
+    # Ensure redis-server is available before trying to start it
+    ensure_redis_server_available(server_public_ip, username, private_key, port)
+
     full_logfile, initial_redis_cmd = generate_remote_standalone_redis_cmd(
         logfile,
         redis_configuration_parameters,
diff --git a/redisbench_admin/utils/remote.py b/redisbench_admin/utils/remote.py
index df415c0..ada875c 100644
--- a/redisbench_admin/utils/remote.py
+++ b/redisbench_admin/utils/remote.py
@@ -1189,3 +1189,31 @@ def check_ec2_env():
         logging.error(error_message)
 
     return status, error_message
+
+
+def perform_connectivity_test(redis_conns, test_description=""):
+    """Perform PING test on all Redis connections"""
+    logging.info(f"🔍 Performing connectivity test: {test_description}")
+
+    success_count = 0
+    total_count = len(redis_conns)
+
+    for i, conn in enumerate(redis_conns):
+        try:
+            result = conn.ping()
+            if result:
+                logging.info(f"✅ Connection {i}: PING successful")
+                success_count += 1
+            else:
+                logging.error(f"❌ Connection {i}: PING returned False")
+        except Exception as e:
+            logging.error(f"❌ Connection {i}: PING failed - {e}")
+
+    if success_count == total_count:
+        logging.info(f"🎉 All {total_count} connectivity tests passed!")
+        return True
+    else:
+        logging.error(
+            f"💥 {total_count - success_count}/{total_count} connectivity tests failed!"
+        )
+        return False
diff --git a/redisbench_admin/utils/utils.py b/redisbench_admin/utils/utils.py
index b86fcb7..de835e1 100644
--- a/redisbench_admin/utils/utils.py
+++ b/redisbench_admin/utils/utils.py
@@ -128,33 +128,51 @@ def upload_artifacts_to_s3(
     if region_name is None:
         region_name = EC2_REGION
     logging.info("-- uploading results to s3 -- ")
-    if aws_access_key_id is not None and aws_secret_access_key is not None:
-        logging.info("-- Using REQUEST PROVIDED AWS credentials -- ")
-        session = boto3.Session(
-            aws_access_key_id, aws_secret_access_key, aws_session_token, region_name
-        )
-        s3 = session.resource("s3")
-    else:
-        logging.info("-- Using default AWS credentials -- ")
-        s3 = boto3.resource("s3")
-    bucket = s3.Bucket(s3_bucket_name)
-    progress = tqdm(unit="files", total=len(artifacts))
 
-    for full_artifact_path in artifacts:
-        artifact = os.path.basename(full_artifact_path)
-        object_key = "{bucket_path}{filename}".format(
-            bucket_path=s3_bucket_path, filename=artifact
-        )
+    try:
+        if aws_access_key_id is not None and aws_secret_access_key is not None:
+            logging.info("-- Using REQUEST PROVIDED AWS credentials -- ")
+            session = boto3.Session(
+                aws_access_key_id, aws_secret_access_key, aws_session_token, region_name
+            )
+            s3 = session.resource("s3")
+        else:
+            logging.info("-- Using default AWS credentials -- ")
+            s3 = boto3.resource("s3")
+        bucket = s3.Bucket(s3_bucket_name)
+        progress = tqdm(unit="files", total=len(artifacts))
+
+        for full_artifact_path in artifacts:
+            try:
+                artifact = os.path.basename(full_artifact_path)
+                object_key = "{bucket_path}{filename}".format(
+                    bucket_path=s3_bucket_path, filename=artifact
+                )
 
-        bucket.upload_file(full_artifact_path, object_key)
-        object_acl = s3.ObjectAcl(s3_bucket_name, object_key)
-        object_acl.put(ACL="public-read")
-        progress.update()
-        url = "https://s3.{0}.amazonaws.com/{1}/{2}{3}".format(
-            region_name, s3_bucket_name, s3_bucket_path, quote_plus(artifact)
+                bucket.upload_file(full_artifact_path, object_key)
+                object_acl = s3.ObjectAcl(s3_bucket_name, object_key)
+                object_acl.put(ACL="public-read")
+                progress.update()
+                url = "https://s3.{0}.amazonaws.com/{1}/{2}{3}".format(
+                    region_name, s3_bucket_name, s3_bucket_path, quote_plus(artifact)
+                )
+                artifacts_map[artifact] = url
+                logging.info("Successfully uploaded {} to S3".format(artifact))
+            except Exception as e:
+                logging.warning(
+                    "Failed to upload artifact {} to S3: {}. Skipping this file.".format(
+                        full_artifact_path, e
+                    )
+                )
+                progress.update()
+        progress.close()
+    except Exception as e:
+        logging.error(
+            "Failed to initialize S3 connection: {}. No files will be uploaded.".format(
+                e
+            )
         )
-        artifacts_map[artifact] = url
-    progress.close()
+
     return artifacts_map
 
 
diff --git a/scripts/compliance.sh b/scripts/compliance.sh
new file mode 100644
index 0000000..eb8cc7c
--- /dev/null
+++ b/scripts/compliance.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# Compliance checks script for redisbench-admin
+set -e
+
+echo "🔍 Running compliance checks..."
+
+echo "📝 Checking code formatting with black..."
+poetry run black --check redisbench_admin
+
+echo "🔍 Running linting with flake8..."
+poetry run flake8 redisbench_admin
+
+echo "✅ All compliance checks passed!"
diff --git a/tests/test_common.py b/tests/test_common.py
index 735943c..f42702d 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -172,7 +172,15 @@ def test_common_exporter_logic():
     # negative test
     common_exporter_logic(None, None, None, None, None, None, None, None, None, None)
     try:
-        rts = redis.Redis(port=16379)
+        import os
+        # Ensure we have the test DB to store results
+        assert "RTS_PORT" in os.environ
+        rts_port = os.environ.get("RTS_PORT",None)
+        rts_host = os.getenv("RTS_DATASINK_HOST", None)
+        rts_pass = ""
+        if rts_host is None:
+            return
+        rts = redis.Redis(port=rts_port, host=rts_host)
         rts.ping()
         with open(
             "./tests/test_data/redis-benchmark-full-suite-1Mkeys-100B.yml", "r"
@@ -508,9 +516,16 @@ def test_dbconfig_keyspacelen_check():
     from redis import StrictRedis
     from redis.exceptions import ConnectionError
 
-    redis_port = 16379
+    import os
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
+    rts_host = os.getenv("RTS_DATASINK_HOST", None)
+    rts_pass = ""
+    if rts_host is None:
+        return
     try:
-        redis = StrictRedis(port=redis_port)
+        redis = StrictRedis(port=rts_port, host=rts_host)
         redis.ping()
         redis.flushall()
         redis_conns = [redis]
@@ -520,18 +535,19 @@ def test_dbconfig_keyspacelen_check():
         ) as yml_file:
             benchmark_config = yaml.safe_load(yml_file)
             # no keyspace len check
-            result = dbconfig_keyspacelen_check(benchmark_config, redis_conns)
+            result = dbconfig_keyspacelen_check(benchmark_config, redis_conns,False,5)
             assert result == True
 
         with open("./tests/test_data/tsbs-targets.yml", "r") as yml_file:
             benchmark_config = yaml.safe_load(yml_file)
             # check and fail
             try:
-                result = dbconfig_keyspacelen_check(benchmark_config, redis_conns)
+                result = dbconfig_keyspacelen_check(benchmark_config, redis_conns,False,5)
             except Exception as e:
+                err_str = e.__str__()
                 assert (
-                    e.__str__()
-                    == "The total numbers of keys in setup does not match the expected spec: 1000!=0. Aborting..."
+                    "The total number of keys in setup does not match the expected spec: 1000 != 0. Aborting after" in 
+                    err_str
                 )
 
             # check and pass
@@ -582,9 +598,13 @@ def test_execute_init_commands():
     from redis import StrictRedis
     from redis.exceptions import ConnectionError
 
-    redis_port = 16379
+    import os
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
+    
     try:
-        redis = StrictRedis(port=redis_port)
+        redis = StrictRedis(port=rts_port)
         redis.ping()
         redis.flushall()
         redis.config_resetstat()
diff --git a/tests/test_compare.py b/tests/test_compare.py
index a0828b5..bf24c90 100644
--- a/tests/test_compare.py
+++ b/tests/test_compare.py
@@ -10,12 +10,14 @@
 
 
 def test_compare_command_logic():
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
     rts_host = os.getenv("RTS_DATASINK_HOST", None)
-    rts_port = 16379
     rts_pass = ""
     if rts_host is None:
         return
-    rts = redis.Redis(port=16379, host=rts_host)
+    rts = redis.Redis(port=rts_port, host=rts_host)
     rts.ping()
     rts.flushall()
     parser = argparse.ArgumentParser(
diff --git a/tests/test_data/defaults-with-purpose-built-env.yml b/tests/test_data/defaults-with-purpose-built-env.yml
new file mode 100644
index 0000000..cb4bc53
--- /dev/null
+++ b/tests/test_data/defaults-with-purpose-built-env.yml
@@ -0,0 +1,344 @@
+exporter:
+  comparison:
+    baseline-branch: master
+    metrics:
+    - Ops/sec
+    - $.Tests.Overall.rps
+    - $.OverallRates.overallOpsRate
+    mode: higher-better
+  redistimeseries:
+    break_by:
+    - version
+    - commit
+    metrics:
+    - $.Tests.Overall.rps
+    - $.Tests.Overall.avg_latency_ms
+    - $.Tests.Overall.p50_latency_ms
+    - $.Tests.Overall.p95_latency_ms
+    - $.Tests.Overall.p99_latency_ms
+    - $.Tests.Overall.max_latency_ms
+    - $.Tests.Overall.min_latency_ms
+    - $.build.build_time
+    - $.build.vector_index_sz_mb
+    - $."ALL STATS".*."Ops/sec"
+    - $."ALL STATS".*."Latency"
+    - $.OverallRates.overallOpsRate
+    - $.OverallQuantiles.allCommands.q50
+    - $.OverallQuantiles.allCommands.q95
+    - $.OverallQuantiles.allCommands.q99
+    - $.OverallQuantiles.allCommands.q999
+    timemetric: $.StartTime
+remote:
+- type: oss-standalone
+- setup: redisearch-m5
+spec:
+  setups:
+  - name: oss-standalone
+    redis_topology:
+      primaries: 1
+      replicas: 0
+    resources:
+      requests:
+        cpus: '1'
+        memory: 10g
+    type: oss-standalone
+  - name: oss-standalone-1replica
+    redis_topology:
+      placement: sparse
+      primaries: 1
+      replicas: 1
+    resources:
+      requests:
+        cpus: '2'
+        memory: 10g
+    type: oss-standalone
+  - name: oss-cluster-01-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 1
+      replicas: 0
+    resources:
+      requests:
+        cpus: '1'
+        memory: 10g
+    type: oss-cluster
+  - name: oss-cluster-02-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 2
+      replicas: 0
+    resources:
+      requests:
+        cpus: '2'
+        memory: 10g
+    type: oss-cluster
+  - name: oss-cluster-03-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 3
+      replicas: 0
+    resources:
+      requests:
+        cpus: '3'
+        memory: 30g
+    type: oss-cluster
+  - name: oss-cluster-04-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 4
+      replicas: 0
+    resources:
+      requests:
+        cpus: '4'
+        memory: 40g
+    type: oss-cluster
+  - name: oss-cluster-05-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 5
+      replicas: 0
+    resources:
+      requests:
+        cpus: '5'
+        memory: 50g
+    type: oss-cluster
+  - name: oss-cluster-08-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 8
+      replicas: 0
+    resources:
+      requests:
+        cpus: '10'
+        memory: 90g
+    type: oss-cluster
+  - name: oss-cluster-09-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 9
+      replicas: 0
+    resources:
+      requests:
+        cpus: '10'
+        memory: 90g
+    type: oss-cluster
+  - name: oss-cluster-15-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 15
+      replicas: 0
+    resources:
+      requests:
+        cpus: '15'
+        memory: 150g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-20-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 20
+      replicas: 0
+    resources:
+      requests:
+        cpus: '20'
+        memory: 500g
+    type: oss-cluster
+  - name: oss-cluster-21-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 21
+      replicas: 0
+    resources:
+      requests:
+        cpus: '21'
+        memory: 210g
+    type: oss-cluster
+  - name: oss-cluster-24-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 24
+      replicas: 0
+    resources:
+      requests:
+        cpus: '24'
+        memory: 500g
+    type: oss-cluster
+  - name: oss-cluster-30-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 30
+      replicas: 0
+    resources:
+      requests:
+        cpus: '30'
+        memory: 300g
+    type: oss-cluster
+  - name: oss-cluster-32-primaries
+    redis_topology:
+      placement: sparse
+      primaries: 32
+      replicas: 0
+    resources:
+      requests:
+        cpus: '32'
+        memory: 300g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_master_w10_st10
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_master_w10_st20
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_master_w20_st10
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_master_w20_st20
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_joan-uv-threads_w10_st10_sio10
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_joan-uv-threads_w10_st20_sio10
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_joan-uv-threads_w20_st10_sio10
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_joan-uv-threads_w20_st20_sio10
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_joan-uv-threads_w10_st10_sio20
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_joan-uv-threads_w10_st20_sio20
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_joan-uv-threads_w20_st10_sio20
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    type: oss-cluster
+  - name: oss-cluster-16-primaries_joan-uv-threads_w20_st20_sio20
+    type: oss-cluster
+    redis_topology:
+      placement: sparse
+      primaries: 16
+      replicas: 0
+    resources:
+      requests:
+        cpus: '18'
+        memory: 180g
+    dbconfig:
+      module-configuration-parameters:
+        redisearch:
+          WORKERS: 6
+          MIN_OPERATION_WORKERS: 6
+        module-oss:
+          WORKERS: 6
+          MIN_OPERATION_WORKERS: 6
+
+  - name: oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20
+    type: oss-cluster
+    redis_topology:
+      placement: sparse
+      primaries: 02
+      replicas: 0
+    resources:
+      requests:
+        cpus: '4'
+        memory: 180g
+    dbconfig:
+      module-configuration-parameters:
+        redisearch:
+          WORKERS: 6
+          MIN_OPERATION_WORKERS: 6
+        module-oss:
+          WORKERS: 6
+          MIN_OPERATION_WORKERS: 6
+
+
+version: 0.2
\ No newline at end of file
diff --git a/tests/test_data/test-purpose-built-env.yml b/tests/test_data/test-purpose-built-env.yml
new file mode 100644
index 0000000..50a95bd
--- /dev/null
+++ b/tests/test_data/test-purpose-built-env.yml
@@ -0,0 +1,6 @@
+name: test-purpose-built-env-cluster
+setups:
+  - oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20
+clientconfig:
+  tool: redis-benchmark
+  arguments: "-t set -n 100 -c 5"
diff --git a/tests/test_defaults_purpose_built_env.py b/tests/test_defaults_purpose_built_env.py
new file mode 100644
index 0000000..90dcabd
--- /dev/null
+++ b/tests/test_defaults_purpose_built_env.py
@@ -0,0 +1,276 @@
+import argparse
+import unittest.mock
+import yaml
+import subprocess
+import time
+import redis
+import tempfile
+import os
+
+from redisbench_admin.run_local.args import create_run_local_arguments
+from redisbench_admin.run_local.run_local import run_local_command_logic
+from redisbench_admin.utils.benchmark_config import (
+    get_defaults,
+    process_default_yaml_properties_file,
+)
+from redisbench_admin.run.common import extract_test_feasible_setups
+from redisbench_admin.environments.oss_cluster import (
+    spin_up_local_redis_cluster,
+    setup_redis_cluster_from_conns,
+    generate_cluster_redis_server_args,
+)
+
+
+def test_defaults_purpose_built_env_parsing():
+    """Test that the new defaults-with-purpose-built-env.yml file is properly parsed"""
+    defaults_filename = "./tests/test_data/defaults-with-purpose-built-env.yml"
+    
+    # Test that the file can be loaded and parsed
+    (
+        default_kpis,
+        default_remote,
+        default_metrics,
+        exporter_timemetric_path,
+        default_specs,
+        cluster_config,
+    ) = get_defaults(defaults_filename)
+    
+    # Verify basic structure
+    assert default_specs is not None
+    assert "setups" in default_specs
+    
+    # Verify the specific environment we're testing
+    setups = default_specs["setups"]
+    setup_names = [setup["name"] for setup in setups]
+    
+    # Check that our target environment exists
+    assert "oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20" in setup_names
+    
+    # Find and validate the specific setup
+    target_setup = None
+    for setup in setups:
+        if setup["name"] == "oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20":
+            target_setup = setup
+            break
+    
+    assert target_setup is not None
+    
+    # Validate the setup structure
+    assert target_setup["type"] == "oss-cluster"
+    assert "redis_topology" in target_setup
+    assert "resources" in target_setup
+    assert "dbconfig" in target_setup
+    
+    # Validate topology
+    topology = target_setup["redis_topology"]
+    assert topology["primaries"] == 2  # Note: YAML "02" becomes int 2
+    assert topology["replicas"] == 0
+    assert topology["placement"] == "sparse"
+    
+    # Validate resources
+    resources = target_setup["resources"]
+    assert "requests" in resources
+    assert resources["requests"]["cpus"] == "4"
+    assert resources["requests"]["memory"] == "180g"
+    
+    # Validate dbconfig
+    dbconfig = target_setup["dbconfig"]
+    assert "module-configuration-parameters" in dbconfig
+    module_params = dbconfig["module-configuration-parameters"]
+    assert "redisearch" in module_params
+    assert "module-oss" in module_params
+    assert module_params["redisearch"]["WORKERS"] == 6
+    assert module_params["redisearch"]["MIN_OPERATION_WORKERS"] == 6
+
+
+def test_extract_feasible_setups_with_purpose_built_env():
+    """Test that extract_test_feasible_setups works with the new defaults file"""
+    defaults_filename = "./tests/test_data/defaults-with-purpose-built-env.yml"
+    
+    (
+        default_kpis,
+        default_remote,
+        default_metrics,
+        exporter_timemetric_path,
+        default_specs,
+        cluster_config,
+    ) = get_defaults(defaults_filename)
+    
+    # Test with a benchmark config that specifies our target environment
+    benchmark_config = {
+        "setups": ["oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20"]
+    }
+    
+    feasible_setups = extract_test_feasible_setups(
+        benchmark_config, "setups", default_specs, backwards_compatible=False
+    )
+    
+    # Verify the setup was found and extracted correctly
+    assert len(feasible_setups) == 1
+    assert "oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20" in feasible_setups
+    
+    setup = feasible_setups["oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20"]
+    assert setup["type"] == "oss-cluster"
+    assert setup["redis_topology"]["primaries"] == 2
+    assert setup["redis_topology"]["replicas"] == 0
+
+
+def test_dry_run_with_simple_standalone_env():
+    """Test dry-run with real Redis using a simple oss-standalone environment (no modules)"""
+    parser = argparse.ArgumentParser(
+        description="test simple env dry-run",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser = create_run_local_arguments(parser)
+
+    # Create a simple test config that uses a standalone environment (no modules)
+    test_config = {
+        "name": "test-simple-env",
+        "setups": ["oss-standalone"],
+        "clientconfig": [
+            {"tool": "redis-benchmark"},
+            {"parameters": [
+                {"clients": 5},
+                {"requests": 100},
+                {"test": "set"}
+            ]}
+        ]
+    }
+
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
+    rts = redis.Redis(port=rts_port,decode_responses=True)
+    rts.ping()
+    rts.flushall()
+
+    # Save the test config temporarily
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f:
+        yaml.dump(test_config, f)
+        test_config_path = f.name
+
+    try:
+        args = parser.parse_args(
+            args=[
+                "--test",
+                test_config_path,
+                "--defaults_filename",
+                "./tests/test_data/defaults-with-dbconfig.yml",  # Use simpler defaults
+                "--dry-run",
+                "--allowed-envs",
+                "oss-standalone",
+                "--port",
+                "12000",
+                "--redis-7",
+                "True",
+                "--push_results_redistimeseries",
+                "--redistimeseries_port",
+                f"{rts_port}"
+            ]
+        )
+
+        try:
+            run_local_command_logic(args, "tool", "v0")
+            success = True
+        except SystemExit as e:
+            success = e.code == 0
+
+        assert success, "Dry-run with real Redis standalone should succeed"
+        print("✅ Dry-run completed successfully with real Redis standalone")
+
+        assert rts.info("keyspace")["db0"]["keys"] >= 0
+            
+
+    finally:
+        # Clean up temporary file
+        os.unlink(test_config_path)
+
+
+
+
+
+
+def test_dry_run_with_purpose_built_env():
+    """Test dry-run with real Redis using oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20 environment"""
+    parser = argparse.ArgumentParser(
+        description="test purpose-built env dry-run",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser = create_run_local_arguments(parser)
+
+    # Env name
+    setup_name = "oss-cluster-02-primaries_joan-uv-threads_w20_st20_sio20"
+
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
+    rts = redis.Redis(port=rts_port,decode_responses=True)
+    rts.ping()
+    rts.flushall()
+
+    # Create a simple test config that uses our target environment
+    test_config = {
+        "name": "test-purpose-built-env",
+        "setups": [setup_name],
+        "clientconfig": [
+            {"tool": "redis-benchmark"},
+            {"parameters": [
+                {"clients": 5},
+                {"requests": 100},
+                {"test": "set"}
+            ]}
+        ]
+    }
+
+    # Save the test config temporarily
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.yml', delete=False) as f:
+        yaml.dump(test_config, f)
+        test_config_path = f.name
+
+    triggering_env_name = "test-triggering-env"
+    try:
+        args = parser.parse_args(
+            args=[
+                "--test",
+                test_config_path,
+                "--defaults_filename",
+                "./tests/test_data/defaults-with-purpose-built-env.yml",
+                "--dry-run",
+                "--allowed-envs",
+                setup_name,
+                "--port",
+                "12100",
+                "--redis-7",
+                "True",
+                "--push_results_redistimeseries",
+                "--redistimeseries_port",
+                f"{rts_port}",
+                "--triggering_env",
+                triggering_env_name,
+            ]
+        )
+
+        try:
+            run_local_command_logic(args, "tool", "v0")
+            success = True
+        except SystemExit as e:
+            success = e.code == 0
+
+        # ensure we have data
+        assert rts.info("keyspace")["db0"]["keys"] >= 0
+
+        # ensure that the environemt name is present on timeseries
+
+        keys_from_deployment = rts.ts().queryindex([f"deployment_name={setup_name}"])
+        assert len(keys_from_deployment) > 0
+
+        assert setup_name in rts.zrange(f"ci.benchmarks.redislabs/{triggering_env_name}/redis-performance/redisbench-admin:deployment_names", 0, -1)
+      
+
+
+        assert success, "Dry-run with real Redis should succeed"
+        print("✅ Dry-run completed successfully with real Redis cluster")
+
+    finally:
+        # Clean up temporary file
+        os.unlink(test_config_path)
diff --git a/tests/test_export.py b/tests/test_export.py
index b76141a..fa3dc67 100644
--- a/tests/test_export.py
+++ b/tests/test_export.py
@@ -24,12 +24,15 @@ def test_get_timeserie_name(self):
 
 
 def test_export_command_logic():
+    import os
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
     rts_host = os.getenv("RTS_DATASINK_HOST", None)
-    rts_port = 16379
     rts_pass = ""
     if rts_host is None:
         return
-    rts = redis.Redis(port=16379, host=rts_host)
+    rts = redis.Redis(port=rts_port, host=rts_host)
     rts.ping()
     rts.flushall()
     parser = argparse.ArgumentParser(
@@ -74,12 +77,16 @@ def test_export_command_logic():
 
 
 def test_export_command_logic():
+    import os
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    
+    rts_port = os.environ.get("RTS_PORT",None)
     rts_host = os.getenv("RTS_DATASINK_HOST", None)
-    rts_port = 16379
     rts_pass = ""
     if rts_host is None:
         return
-    rts = redis.Redis(port=16379, host=rts_host)
+    rts = redis.Redis(port=rts_port, host=rts_host)
     rts.ping()
     rts.flushall()
     parser = argparse.ArgumentParser(
@@ -134,12 +141,15 @@ def test_export_opereto_csv_to_timeseries_dict():
 
 
 def test_export_command_logic_google_benchmark():
+    import os
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
     rts_host = os.getenv("RTS_DATASINK_HOST", None)
-    rts_port = 16379
     rts_pass = ""
     if rts_host is None:
         return
-    rts = redis.Redis(port=16379, host=rts_host)
+    rts = redis.Redis(port=rts_port, host=rts_host)
     rts.ping()
     rts.flushall()
     parser = argparse.ArgumentParser(
diff --git a/tests/test_grafana.py b/tests/test_grafana.py
index 9e53dba..6275f41 100644
--- a/tests/test_grafana.py
+++ b/tests/test_grafana.py
@@ -16,8 +16,11 @@
 
 
 def test_generate_artifacts_table_grafana_redis():
+    import os 
     rts_host = os.getenv("RTS_DATASINK_HOST", None)
-    rts_port = 16379
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
     if rts_host is None:
         return
     redis_conn = redis.Redis(port=rts_port, host=rts_host, decode_responses=True)
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 99821d0..270b772 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -33,8 +33,11 @@ def test_extract_results_table():
 
 
 def test_collect_redis_metrics():
+    import os
     rts_host = os.getenv("RTS_DATASINK_HOST", None)
-    rts_port = 16379
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
     if rts_host is None:
         return
     rts = redis.Redis(port=rts_port, host=rts_host)
diff --git a/tests/test_redistimeseries.py b/tests/test_redistimeseries.py
index a6bd759..2c6d61b 100644
--- a/tests/test_redistimeseries.py
+++ b/tests/test_redistimeseries.py
@@ -19,7 +19,11 @@
 
 def test_timeseries_test_sucess_flow():
     try:
-        rts = redis.Redis(port=16379)
+        import os
+        # Ensure we have the test DB to store results
+        assert "RTS_PORT" in os.environ
+        rts_port = os.environ.get("RTS_PORT",None)
+        rts = redis.Redis(port=rts_port)
         rts.ping()
         rts.flushall()
         with open(
diff --git a/tests/test_remote.py b/tests/test_remote.py
index 96afe7d..87b92e2 100644
--- a/tests/test_remote.py
+++ b/tests/test_remote.py
@@ -125,7 +125,11 @@ def test_fetch_remote_setup_from_config_aarch64():
 def test_push_data_to_redistimeseries():
     time_series_dict = {}
     try:
-        rts = redis.Redis(port=16379)
+        import os
+        # Ensure we have the test DB to store results
+        assert "RTS_PORT" in os.environ
+        rts_port = os.environ.get("RTS_PORT",None)
+        rts = redis.Redis(port=rts_port)
         rts.ping()
     except redis.exceptions.ConnectionError:
         pass
@@ -269,7 +273,11 @@ def test_extract_timeseries_from_results():
 
 def test_exporter_create_ts():
     try:
-        rts = redis.Redis(port=16379)
+        import os
+        # Ensure we have the test DB to store results
+        assert "RTS_PORT" in os.environ
+        rts_port = os.environ.get("RTS_PORT",None)
+        rts = redis.Redis(port=rts_port)
         rts.ping()
         rts.flushall()
         with open(
@@ -475,7 +483,11 @@ def test_exporter_create_ts():
     timeseries_name = "ts1"
     time_series = {"labels": {"metric-type": "commandstats"}}
     try:
-        rts = redis.Redis(port=16379)
+        import os
+        # Ensure we have the test DB to store results
+        assert "RTS_PORT" in os.environ
+        rts_port = os.environ.get("RTS_PORT",None)
+        rts = redis.Redis(port=rts_port)
         rts.ping()
         rts.flushall()
         assert True == exporter_create_ts(rts, time_series, timeseries_name)
diff --git a/tests/test_run_local.py b/tests/test_run_local.py
index 76dc67b..546455c 100644
--- a/tests/test_run_local.py
+++ b/tests/test_run_local.py
@@ -192,11 +192,14 @@ def test_run_local_command_logic():
         assert e.code == 1
 
     ## run while pushing results to redis_conn
+    # Ensure we have the test DB to store results
+    assert "RTS_PORT" in os.environ
+    rts_port = os.environ.get("RTS_PORT",None)
     rts_host = os.getenv("RTS_DATASINK_HOST", None)
-    rts_port = 16379
+    rts_pass = ""
     if rts_host is None:
         return
-    rts = redis.Redis(port=16379, host=rts_host)
+    rts = redis.Redis(port=rts_port, host=rts_host)
     rts.ping()
     rts.flushall()
     parser = argparse.ArgumentParser(
@@ -221,3 +224,4 @@ def test_run_local_command_logic():
         run_local_command_logic(args, "tool", "v0")
     except SystemExit as e:
         assert e.code == 0
+
diff --git a/tests/test_run_remote.py b/tests/test_run_remote.py
index 3de5368..57255bc 100644
--- a/tests/test_run_remote.py
+++ b/tests/test_run_remote.py
@@ -27,8 +27,10 @@ def test_export_redis_metrics():
     setup_type = "oss-standalone"
     artifact_version = None
     try:
-        rts_host = os.getenv("RTS_DATASINK_HOST", None)
-        rts_port = 16379
+        rts_host = os.environ.get("RTS_DATASINK_HOST", None)
+        # Ensure we have the test DB to store results
+        assert "RTS_PORT" in os.environ
+        rts_port = os.environ.get("RTS_PORT",None)
         if rts_host is None:
             return
         rts = redis.Redis(port=rts_port, host=rts_host)
diff --git a/tox.ini b/tox.ini
index 7aed770..56e165f 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,24 +1,41 @@
 [tox]
-isolated_build = True
-envlist = integration-tests
+envlist = compliance,integration-tests
 
 
-[tox:.package]
-# note tox will use the same python version as under what tox is installed to package
-# so unless this is python 3 you can require a given python version for the packaging
-# environment via the basepython key
-basepython = python3
+
+
+[testenv:compliance]
+skip_install = True
+deps =
+    black==24.3.0
+    flake8
+stoponfail = True
+commands =
+    black --check redisbench_admin
+    flake8 redisbench_admin
+
+[testenv:format]
+skip_install = True
+deps =
+    black==24.3.0
+commands =
+    black redisbench_admin
 
 [testenv:integration-tests]
-deps = 
+deps =
     -r{toxinidir}/dev_requirements.txt
     pytest-cov
+    tox-docker
+docker =
+    rts_datasink
+    db_server
+    client_server
 
-stoponfail = True
+passenv = TST_BUILDER_X,TST_RUNNER_X,GH_TOKEN,TST_REDIS_DIR,DOCKER_HOST,DOCKER_TLS_VERIFY,DOCKER_CERT_PATH,TST_BINARY_REDIS_DIR
 
+
+stoponfail = True
 commands =
-    black --check redisbench_admin
-    flake8 redisbench_admin
     coverage erase
     pytest --cov=redisbench_admin --cov-report=term-missing -ra {posargs}
     coverage xml
@@ -33,9 +50,15 @@ docker =
 
 
 [docker:rts_datasink]
-image = redis/redis-stack-server:7.2.0-v11
-ports =
-    16379:6379/tcp
+image = redis/redis-stack-server:7.4.0-v0
+# Expose ports to the testenv
+expose =
+    RTS_PORT=6379/tcp
+
+healthcheck_cmd = redis-cli ping
+healthcheck_interval = 1
+healthcheck_timeout = 3
+healthcheck_retries = 30
 
 [docker:db_server]
 image = ghcr.io/linuxserver/openssh-server