Introduce compiler timing script & CI job (#4154)

AlexanderPortland · web-flow · commit 9d87281d4736 · 2025-06-23T23:19:07.000Z
We have an existing `benchcomp` script to measure Kani's performance in CI, but it is prone to noise and only focuses on the end-to-end performance of compilation and verification together, leaving a need for more granular measurements. This script focuses solely on changes in the runtime of the Kani compiler and runs with warm ups, repeats and outlier detection (based on the rust compiler's method in [`rustc-perf`](https://github.com/rust-lang/rustc-perf)) in an attempt to limit noise. The new `compile-timer-short` CI job uses this script on a subset of the `perf` tests (currently excluding `s2n-quic`, `kani-lib/arbitrary` & `misc/display-trait`) to produce a `benchcomp`-like table comparing the compiler performance per-crate before and after a given commit. This also modifies our auto-labeller to ensure end-to-end benchmarks (like `benchcomp`) and the new compiler-specific ones are only run when the parts of Kani that they profile have changed. We manually tested the CI job on my personal fork (see [this regressing run](https://github.com/AlexanderPortland/kani/actions/runs/15788016660?pr=6) from a test PR that intentionally slows down the compiler). Resolves #2442 By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses.
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -5,9 +5,20 @@
 #
 # Note that we enable dot, so "**" matches all files in a folder
 
-Z-BenchCI:
+Z-EndToEndBenchCI:
 - any:
   - changed-files:
-    - any-glob-to-any-file: ['kani-compiler/**', 'kani-driver/src/call-*', 'cprover_bindings/**', 'library/**']
+    - any-glob-to-any-file: ['kani-compiler/**', 'kani-driver/src/call_*', 'cprover_bindings/**', 'library/**']
     - any-glob-to-any-file: ['rust-toolchain.toml', 'Cargo.lock']
     - any-glob-to-any-file: ['kani-dependencies']
+
+Z-CompilerBenchCI:
+- any:
+  # we want to run compiler benchmarks if:
+  - changed-files:
+    # any parts of the compiler change
+    - any-glob-to-any-file: ['kani-compiler/**', 'cprover_bindings/**', 'library/**']
+    # the way we call the compiler changes
+    - any-glob-to-any-file: ['kani-driver/src/call_cargo.rs', 'kani-driver/src/call_single_file.rs']
+    # or if our dependencies change
+    - any-glob-to-any-file: ['rust-toolchain.toml', 'Cargo.lock']
diff --git a/.github/workflows/bench-compiler.yml b/.github/workflows/bench-compiler.yml
@@ -0,0 +1,140 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+#
+# Run performance benchmarks comparing the compiler performance of two different Kani versions.
+name: Kani Compiler Performance Benchmarks
+on:
+  push:
+    branches:
+      - 'main'
+  workflow_call:
+
+jobs:
+  compile-timer-short:
+    runs-on: ubuntu-24.04
+    steps:
+      - name: Save push event HEAD and HEAD~ to environment variables
+        if: ${{ github.event_name == 'push' }}
+        run: |
+          echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV"
+          echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV"
+      - name: Save pull request HEAD and base to environment variables
+        if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }}
+        run: |
+          echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV"
+          echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV"
+      - name: Check out Kani (old variant)
+        uses: actions/checkout@v4
+        with:
+          path: ./old
+          ref: ${{ env.OLD_REF }}
+          fetch-depth: 2
+
+      - name: Check out Kani (new variant)
+        uses: actions/checkout@v4
+        with:
+          path: ./new
+          ref: ${{ env.NEW_REF }}
+          fetch-depth: 1
+
+      - name: Set up Kani Dependencies (old variant)
+        uses: ./old/.github/actions/setup
+        with:
+          os: ubuntu-24.04
+          kani_dir: old
+
+      - name: Set up Kani Dependencies (new variant)
+        uses: ./new/.github/actions/setup
+        with:
+          os: ubuntu-24.04
+          kani_dir: new
+
+      - name: Copy benchmarks from new to old
+        run: rm -rf ./old/tests/perf ; cp -r ./new/tests/perf ./old/tests/
+
+      - name: Build `compile-timer` in old
+        run: cd old/tools/compile-timer && cargo build --release
+      - name: Build `kani` in old
+        run: cd old && cargo build-dev --release
+
+      - name: Build `compile-timer` in new
+        run: cd new/tools/compile-timer && cargo build --release
+      - name: Build `kani` in new
+        run: cd new && cargo build-dev --release
+
+      - name: Run `compile-timer` on old
+        run: |
+          export PATH="${{ github.workspace }}/old/scripts:$PATH"
+          cd old/tests/perf && ../../target/release/compile-timer --out-path compile-times-old.json --ignore kani-lib --ignore display_trait --ignore s2n-quic
+      - name: Run `compile-timer` on new
+        run: |
+          export PATH="${{ github.workspace }}/new/scripts:$PATH"
+          cd new/tests/perf && ../../target/release/compile-timer --out-path compile-times-new.json --ignore kani-lib --ignore display_trait --ignore s2n-quic
+      - name: Run analysis between the two
+        run: ./new/target/release/compile-analyzer --path-pre old/tests/perf/compile-times-old.json --path-post new/tests/perf/compile-times-new.json --only-markdown --suite-name short >> "$GITHUB_STEP_SUMMARY"
+
+  compile-timer-long:
+    runs-on: ubuntu-24.04
+    steps:
+      - name: Save push event HEAD and HEAD~ to environment variables
+        if: ${{ github.event_name == 'push' }}
+        run: |
+          echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV"
+          echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV"
+      - name: Save pull request HEAD and base to environment variables
+        if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }}
+        run: |
+          echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV"
+          echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV"
+      
+      - name: Check out Kani (old variant)
+        uses: actions/checkout@v4
+        with:
+          path: ./old
+          ref: ${{ env.OLD_REF }}
+          fetch-depth: 2
+
+      - name: Check out Kani (new variant)
+        uses: actions/checkout@v4
+        with:
+          path: ./new
+          ref: ${{ env.NEW_REF }}
+          fetch-depth: 1
+
+      - name: Set up Kani Dependencies (old variant)
+        uses: ./old/.github/actions/setup
+        with:
+          os: ubuntu-24.04
+          kani_dir: old
+
+      - name: Set up Kani Dependencies (new variant)
+        uses: ./new/.github/actions/setup
+        with:
+          os: ubuntu-24.04
+          kani_dir: new
+
+      # Ensures that a PR changing the benchmarks will have the new benchmarks run 
+      # for both commits.
+      - name: Copy benchmarks from new to old
+        run: rm -rf ./old/tests/perf ; cp -r ./new/tests/perf ./old/tests/
+
+      - name: Build `compile-timer` in old
+        run: cd old/tools/compile-timer && cargo build --release
+      - name: Build `kani` in old
+        run: cd old && cargo build-dev --release
+
+      - name: Build `compile-timer` in new
+        run: cd new/tools/compile-timer && cargo build --release
+      - name: Build `kani` in new
+        run: cd new && cargo build-dev --release
+
+      - name: Run `compile-timer` on old
+        run: |
+          export PATH="${{ github.workspace }}/old/scripts:$PATH"
+          cd old/tests/perf/s2n-quic && ../../../target/release/compile-timer --out-path compile-times-old.json --also-visit quic/s2n-quic-core --also-visit quic/s2n-quic-platform --also-visit common/s2n-codec --skip-current
+      - name: Run `compile-timer` on new
+        run: |
+          export PATH="${{ github.workspace }}/new/scripts:$PATH"
+          cd new/tests/perf/s2n-quic && ../../../target/release/compile-timer --out-path compile-times-new.json --also-visit quic/s2n-quic-core --also-visit quic/s2n-quic-platform --also-visit common/s2n-codec --skip-current
+      - name: Run analysis between the two
+        run: ./new/target/release/compile-analyzer --path-pre old/tests/perf/s2n-quic/compile-times-old.json --path-post new/tests/perf/s2n-quic/compile-times-new.json --only-markdown --suite-name long >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/bench-e2e.yml b/.github/workflows/bench-e2e.yml
@@ -6,7 +6,7 @@
 # This workflow will run when:
 #  - Changes are pushed to 'main'.
 #  - Triggered by another workflow
-name: Kani Performance Benchmarks
+name: Kani End-To-End Performance Benchmarks
 on:
   push:
     branches:
diff --git a/.github/workflows/extra_jobs.yml b/.github/workflows/extra_jobs.yml
@@ -43,9 +43,16 @@ jobs:
       with:
         dot: true
 
-  verification-bench:
-    name: Verification Benchmarks
+  end-to-end-bench:
+    name: End-to-End Benchmarks
     needs: auto-label
     permissions: {}
-    if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-BenchCI') && github.event_name != 'merge_group' }}
-    uses: ./.github/workflows/bench.yml
+    if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-EndToEndBenchCI') && github.event_name != 'merge_group' }}
+    uses: ./.github/workflows/bench-e2e.yml
+
+  compiler-bench:
+    name: Compiler Benchmarks
+    needs: auto-label
+    permissions: {}
+    if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-CompilerBenchCI') && github.event_name != 'merge_group' }}
+    uses: ./.github/workflows/bench-compiler.yml
diff --git a/Cargo.lock b/Cargo.lock
@@ -403,6 +403,15 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "compile-timer"
+version = "0.1.0"
+dependencies = [
+ "clap",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "compiletest"
 version = "0.0.0"
diff --git a/Cargo.toml b/Cargo.toml
@@ -48,6 +48,7 @@ members = [
   "tools/build-kani",
   "tools/kani-cov",
   "tools/scanner",
+  "tools/compile-timer",
   "kani-driver",
   "kani-compiler",
   "kani_metadata",
diff --git a/kani-driver/src/call_cargo.rs b/kani-driver/src/call_cargo.rs
@@ -357,7 +357,13 @@ crate-type = ["lib"]
                     && t1.doc == t2.doc)
         }
 
+        let compile_start = std::time::Instant::now();
         let artifacts = self.run_build(cargo_cmd)?;
+        if std::env::var("TIME_COMPILER").is_ok() {
+            // conditionally print the compilation time for debugging & use by `compile-timer`
+            // doesn't just use the existing `--debug` flag because the number of prints significantly affects performance
+            println!("BUILT {} IN {:?}μs", target.name, compile_start.elapsed().as_micros());
+        }
         debug!(?artifacts, "run_build_target");
 
         // We generate kani specific artifacts only for the build target. The build target is
diff --git a/tools/benchcomp/benchcomp/visualizers/__init__.py b/tools/benchcomp/benchcomp/visualizers/__init__.py
@@ -265,12 +265,17 @@ def _get_template():
             Scatterplot axis ranges are {{ d["scaled_metrics"][metric]["min_value"] }} (bottom/left) to {{ d["scaled_metrics"][metric]["max_value"] }} (top/right).
 
             {% endif -%}
+            <details> <summary>Breakdown by harness</summary>
+
             | Benchmark | {% for variant in d["variants"][metric] %} {{ variant }} |{% endfor %}
             | --- |{% for variant in d["variants"][metric] %} --- |{% endfor -%}
             {% for bench_name, bench_variants in benchmarks.items () %}
             | {{ bench_name }} {% for variant in d["variants"][metric] -%}
              | {{ bench_variants[variant] }} {% endfor %}|
             {%- endfor %}
+            
+            </details>
+
             {% endfor -%}
             """)
 
diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py
@@ -477,18 +477,28 @@ def test_markdown_results_table(self):
                     ```
                     Scatterplot axis ranges are 5 (bottom/left) to 10 (top/right).
 
+                    <details> <summary>Breakdown by harness</summary>
+
                     | Benchmark |  variant_1 | variant_2 | ratio |
                     | --- | --- | --- | --- |
                     | bench_1 | 5 | 10 | **2.0** |
                     | bench_2 | 10 | 5 | 0.5 |
 
+                    </details>
+                    
+                    
                     ## success
 
+                    <details> <summary>Breakdown by harness</summary>
+
                     | Benchmark |  variant_1 | variant_2 | notes |
                     | --- | --- | --- | --- |
                     | bench_1 | True | True |  |
                     | bench_2 | True | False | regressed |
                     | bench_3 | False | True | newly passing |
+
+                    </details>
+                    
                     """))
 
 
diff --git a/tools/compile-timer/Cargo.toml b/tools/compile-timer/Cargo.toml
@@ -0,0 +1,24 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+
+[package]
+name = "compile-timer"
+version = "0.1.0"
+edition = "2024"
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+clap = { version = "4.5.40", features = ["derive"] }
+serde = {version = "1.0.219", features = ["derive"]}
+serde_json = "1.0.140"
+
+[[bin]]
+name = "compile-timer"
+path = "src/compile-timer.rs"
+
+[[bin]]
+name = "compile-analyzer"
+path = "src/compile-analyzer.rs"
+
+[lints]
+workspace = true
diff --git a/tools/compile-timer/README.md b/tools/compile-timer/README.md
@@ -0,0 +1,19 @@
+# Compile-Timer
+This is a simple script for timing the Kani compiler's end-to-end performance on crates.
+
+## Setup
+You can run it by first compiling Kani (with `cargo build-dev --release` in the project root), then building this script (with `cargo build --release` in this `compile-timer` directory). This will build new `compile-timer` & `compile-analyzer` binaries in `kani/target/release`. 
+
+## Recording Compiler Times with `compile-timer`
+After doing that, you should make sure you have Kani on your $PATH (see instructions [here](https://model-checking.github.io/kani/build-from-source.html#adding-kani-to-your-path)) after which you can run `compile-timer --out-path [OUT_JSON_FILE]` in any directory to profile the compiler's performance on it.
+
+By default, the script recursively goes into directories and will use `cargo kani` to profile any Rust projects it encounters (which it determines by looking for a `Cargo.toml`). You can tell it to ignore specific subtrees by passing in the `--ignore [DIR_NAME]` flag.
+
+## Visualizing Compiler Times with `compile-analyzer`
+`compile-timer` itself will have some debug output including each individual run's time and aggregates for each crate.
+
+`compile-analyzer` is specifically for comparing performance across multiple commits. 
+
+Once you've run `compile-timer` on both commits, you can run `compile-analyzer --path-pre [FIRST_JSON_FILE] --path-post [SECOND_JSON_FILE]` to see the change in performance going from the first to second commit. 
+
+By default, `compile-analyzer` will just print to the console, but if you specify the `--only-markdown` option, it's output will be formatted for GitHub flavored markdown (as is useful in CI).
diff --git a/tools/compile-timer/src/common.rs b/tools/compile-timer/src/common.rs
@@ -0,0 +1,61 @@
+// Copyright Kani Contributors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+#![allow(dead_code)]
+use serde::{Deserialize, Serialize};
+use std::path::{Path, PathBuf};
+use std::time::Duration;
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct AggrResult {
+    pub krate: PathBuf,
+    pub krate_trimmed_path: String,
+    /// the stats for only the 25th-75th percentile of runs on this crate, i.e., the interquartile range
+    pub iqr_stats: Stats,
+    /// the stats for all runs on this crate
+    full_stats: Stats,
+}
+
+pub fn krate_trimmed_path(krate: &Path) -> String {
+    format!(
+        "{:?}",
+        krate
+            .canonicalize()
+            .unwrap()
+            .strip_prefix(std::env::current_dir().unwrap().parent().unwrap())
+            .unwrap()
+    )
+}
+
+impl AggrResult {
+    pub fn new(krate: PathBuf, iqr_stats: Stats, full_stats: Stats) -> Self {
+        AggrResult { krate_trimmed_path: krate_trimmed_path(&krate), krate, iqr_stats, full_stats }
+    }
+
+    pub fn full_std_dev(&self) -> Duration {
+        self.full_stats.std_dev
+    }
+
+    pub fn iqr(&self) -> Duration {
+        self.iqr_stats.range.1 - self.iqr_stats.range.0
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Stats {
+    pub avg: Duration,
+    pub std_dev: Duration,
+    pub range: (Duration, Duration),
+}
+
+/// Sum the IQR averages and IQR standard deviations respectively for all crates timed.
+pub fn aggregate_aggregates(info: &[AggrResult]) -> (Duration, Duration) {
+    for i in info {
+        println!("krate {:?} -- {:?}", i.krate, i.iqr_stats.avg);
+    }
+
+    (info.iter().map(|i| i.iqr_stats.avg).sum(), info.iter().map(|i| i.iqr_stats.std_dev).sum())
+}
+
+pub fn fraction_of_duration(dur: Duration, frac: f64) -> Duration {
+    Duration::from_nanos(((dur.as_nanos() as f64) * frac) as u64)
+}
diff --git a/tools/compile-timer/src/compile-analyzer.rs b/tools/compile-timer/src/compile-analyzer.rs
diff --git a/tools/compile-timer/src/compile-timer.rs b/tools/compile-timer/src/compile-timer.rs