Skip to content

Commit 9d87281

Browse files
Introduce compiler timing script & CI job (#4154)
We have an existing `benchcomp` script to measure Kani's performance in CI, but it is prone to noise and only focuses on the end-to-end performance of compilation and verification together, leaving a need for more granular measurements. This script focuses solely on changes in the runtime of the Kani compiler and runs with warm ups, repeats and outlier detection (based on the rust compiler's method in [`rustc-perf`](https://github.com/rust-lang/rustc-perf)) in an attempt to limit noise. The new `compile-timer-short` CI job uses this script on a subset of the `perf` tests (currently excluding `s2n-quic`, `kani-lib/arbitrary` & `misc/display-trait`) to produce a `benchcomp`-like table comparing the compiler performance per-crate before and after a given commit. This also modifies our auto-labeller to ensure end-to-end benchmarks (like `benchcomp`) and the new compiler-specific ones are only run when the parts of Kani that they profile have changed. We manually tested the CI job on my personal fork (see [this regressing run](https://github.com/AlexanderPortland/kani/actions/runs/15788016660?pr=6) from a test PR that intentionally slows down the compiler). Resolves #2442 By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses.
1 parent e1c784c commit 9d87281

File tree

14 files changed

+724
-7
lines changed

14 files changed

+724
-7
lines changed

.github/labeler.yml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,20 @@
55
#
66
# Note that we enable dot, so "**" matches all files in a folder
77

8-
Z-BenchCI:
8+
Z-EndToEndBenchCI:
99
- any:
1010
- changed-files:
11-
- any-glob-to-any-file: ['kani-compiler/**', 'kani-driver/src/call-*', 'cprover_bindings/**', 'library/**']
11+
- any-glob-to-any-file: ['kani-compiler/**', 'kani-driver/src/call_*', 'cprover_bindings/**', 'library/**']
1212
- any-glob-to-any-file: ['rust-toolchain.toml', 'Cargo.lock']
1313
- any-glob-to-any-file: ['kani-dependencies']
14+
15+
Z-CompilerBenchCI:
16+
- any:
17+
# we want to run compiler benchmarks if:
18+
- changed-files:
19+
# any parts of the compiler change
20+
- any-glob-to-any-file: ['kani-compiler/**', 'cprover_bindings/**', 'library/**']
21+
# the way we call the compiler changes
22+
- any-glob-to-any-file: ['kani-driver/src/call_cargo.rs', 'kani-driver/src/call_single_file.rs']
23+
# or if our dependencies change
24+
- any-glob-to-any-file: ['rust-toolchain.toml', 'Cargo.lock']

.github/workflows/bench-compiler.yml

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
#
4+
# Run performance benchmarks comparing the compiler performance of two different Kani versions.
5+
name: Kani Compiler Performance Benchmarks
6+
on:
7+
push:
8+
branches:
9+
- 'main'
10+
workflow_call:
11+
12+
jobs:
13+
compile-timer-short:
14+
runs-on: ubuntu-24.04
15+
steps:
16+
- name: Save push event HEAD and HEAD~ to environment variables
17+
if: ${{ github.event_name == 'push' }}
18+
run: |
19+
echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV"
20+
echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV"
21+
- name: Save pull request HEAD and base to environment variables
22+
if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }}
23+
run: |
24+
echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV"
25+
echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV"
26+
- name: Check out Kani (old variant)
27+
uses: actions/checkout@v4
28+
with:
29+
path: ./old
30+
ref: ${{ env.OLD_REF }}
31+
fetch-depth: 2
32+
33+
- name: Check out Kani (new variant)
34+
uses: actions/checkout@v4
35+
with:
36+
path: ./new
37+
ref: ${{ env.NEW_REF }}
38+
fetch-depth: 1
39+
40+
- name: Set up Kani Dependencies (old variant)
41+
uses: ./old/.github/actions/setup
42+
with:
43+
os: ubuntu-24.04
44+
kani_dir: old
45+
46+
- name: Set up Kani Dependencies (new variant)
47+
uses: ./new/.github/actions/setup
48+
with:
49+
os: ubuntu-24.04
50+
kani_dir: new
51+
52+
- name: Copy benchmarks from new to old
53+
run: rm -rf ./old/tests/perf ; cp -r ./new/tests/perf ./old/tests/
54+
55+
- name: Build `compile-timer` in old
56+
run: cd old/tools/compile-timer && cargo build --release
57+
- name: Build `kani` in old
58+
run: cd old && cargo build-dev --release
59+
60+
- name: Build `compile-timer` in new
61+
run: cd new/tools/compile-timer && cargo build --release
62+
- name: Build `kani` in new
63+
run: cd new && cargo build-dev --release
64+
65+
- name: Run `compile-timer` on old
66+
run: |
67+
export PATH="${{ github.workspace }}/old/scripts:$PATH"
68+
cd old/tests/perf && ../../target/release/compile-timer --out-path compile-times-old.json --ignore kani-lib --ignore display_trait --ignore s2n-quic
69+
- name: Run `compile-timer` on new
70+
run: |
71+
export PATH="${{ github.workspace }}/new/scripts:$PATH"
72+
cd new/tests/perf && ../../target/release/compile-timer --out-path compile-times-new.json --ignore kani-lib --ignore display_trait --ignore s2n-quic
73+
- name: Run analysis between the two
74+
run: ./new/target/release/compile-analyzer --path-pre old/tests/perf/compile-times-old.json --path-post new/tests/perf/compile-times-new.json --only-markdown --suite-name short >> "$GITHUB_STEP_SUMMARY"
75+
76+
compile-timer-long:
77+
runs-on: ubuntu-24.04
78+
steps:
79+
- name: Save push event HEAD and HEAD~ to environment variables
80+
if: ${{ github.event_name == 'push' }}
81+
run: |
82+
echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV"
83+
echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV"
84+
- name: Save pull request HEAD and base to environment variables
85+
if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }}
86+
run: |
87+
echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV"
88+
echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV"
89+
90+
- name: Check out Kani (old variant)
91+
uses: actions/checkout@v4
92+
with:
93+
path: ./old
94+
ref: ${{ env.OLD_REF }}
95+
fetch-depth: 2
96+
97+
- name: Check out Kani (new variant)
98+
uses: actions/checkout@v4
99+
with:
100+
path: ./new
101+
ref: ${{ env.NEW_REF }}
102+
fetch-depth: 1
103+
104+
- name: Set up Kani Dependencies (old variant)
105+
uses: ./old/.github/actions/setup
106+
with:
107+
os: ubuntu-24.04
108+
kani_dir: old
109+
110+
- name: Set up Kani Dependencies (new variant)
111+
uses: ./new/.github/actions/setup
112+
with:
113+
os: ubuntu-24.04
114+
kani_dir: new
115+
116+
# Ensures that a PR changing the benchmarks will have the new benchmarks run
117+
# for both commits.
118+
- name: Copy benchmarks from new to old
119+
run: rm -rf ./old/tests/perf ; cp -r ./new/tests/perf ./old/tests/
120+
121+
- name: Build `compile-timer` in old
122+
run: cd old/tools/compile-timer && cargo build --release
123+
- name: Build `kani` in old
124+
run: cd old && cargo build-dev --release
125+
126+
- name: Build `compile-timer` in new
127+
run: cd new/tools/compile-timer && cargo build --release
128+
- name: Build `kani` in new
129+
run: cd new && cargo build-dev --release
130+
131+
- name: Run `compile-timer` on old
132+
run: |
133+
export PATH="${{ github.workspace }}/old/scripts:$PATH"
134+
cd old/tests/perf/s2n-quic && ../../../target/release/compile-timer --out-path compile-times-old.json --also-visit quic/s2n-quic-core --also-visit quic/s2n-quic-platform --also-visit common/s2n-codec --skip-current
135+
- name: Run `compile-timer` on new
136+
run: |
137+
export PATH="${{ github.workspace }}/new/scripts:$PATH"
138+
cd new/tests/perf/s2n-quic && ../../../target/release/compile-timer --out-path compile-times-new.json --also-visit quic/s2n-quic-core --also-visit quic/s2n-quic-platform --also-visit common/s2n-codec --skip-current
139+
- name: Run analysis between the two
140+
run: ./new/target/release/compile-analyzer --path-pre old/tests/perf/s2n-quic/compile-times-old.json --path-post new/tests/perf/s2n-quic/compile-times-new.json --only-markdown --suite-name long >> "$GITHUB_STEP_SUMMARY"

.github/workflows/bench.yml renamed to .github/workflows/bench-e2e.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# This workflow will run when:
77
# - Changes are pushed to 'main'.
88
# - Triggered by another workflow
9-
name: Kani Performance Benchmarks
9+
name: Kani End-To-End Performance Benchmarks
1010
on:
1111
push:
1212
branches:

.github/workflows/extra_jobs.yml

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,16 @@ jobs:
4343
with:
4444
dot: true
4545

46-
verification-bench:
47-
name: Verification Benchmarks
46+
end-to-end-bench:
47+
name: End-to-End Benchmarks
4848
needs: auto-label
4949
permissions: {}
50-
if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-BenchCI') && github.event_name != 'merge_group' }}
51-
uses: ./.github/workflows/bench.yml
50+
if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-EndToEndBenchCI') && github.event_name != 'merge_group' }}
51+
uses: ./.github/workflows/bench-e2e.yml
52+
53+
compiler-bench:
54+
name: Compiler Benchmarks
55+
needs: auto-label
56+
permissions: {}
57+
if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-CompilerBenchCI') && github.event_name != 'merge_group' }}
58+
uses: ./.github/workflows/bench-compiler.yml

Cargo.lock

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,15 @@ dependencies = [
403403
"unicode-width",
404404
]
405405

406+
[[package]]
407+
name = "compile-timer"
408+
version = "0.1.0"
409+
dependencies = [
410+
"clap",
411+
"serde",
412+
"serde_json",
413+
]
414+
406415
[[package]]
407416
name = "compiletest"
408417
version = "0.0.0"

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ members = [
4848
"tools/build-kani",
4949
"tools/kani-cov",
5050
"tools/scanner",
51+
"tools/compile-timer",
5152
"kani-driver",
5253
"kani-compiler",
5354
"kani_metadata",

kani-driver/src/call_cargo.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,13 @@ crate-type = ["lib"]
357357
&& t1.doc == t2.doc)
358358
}
359359

360+
let compile_start = std::time::Instant::now();
360361
let artifacts = self.run_build(cargo_cmd)?;
362+
if std::env::var("TIME_COMPILER").is_ok() {
363+
// conditionally print the compilation time for debugging & use by `compile-timer`
364+
// doesn't just use the existing `--debug` flag because the number of prints significantly affects performance
365+
println!("BUILT {} IN {:?}μs", target.name, compile_start.elapsed().as_micros());
366+
}
361367
debug!(?artifacts, "run_build_target");
362368

363369
// We generate kani specific artifacts only for the build target. The build target is

tools/benchcomp/benchcomp/visualizers/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,17 @@ def _get_template():
265265
Scatterplot axis ranges are {{ d["scaled_metrics"][metric]["min_value"] }} (bottom/left) to {{ d["scaled_metrics"][metric]["max_value"] }} (top/right).
266266
267267
{% endif -%}
268+
<details> <summary>Breakdown by harness</summary>
269+
268270
| Benchmark | {% for variant in d["variants"][metric] %} {{ variant }} |{% endfor %}
269271
| --- |{% for variant in d["variants"][metric] %} --- |{% endfor -%}
270272
{% for bench_name, bench_variants in benchmarks.items () %}
271273
| {{ bench_name }} {% for variant in d["variants"][metric] -%}
272274
| {{ bench_variants[variant] }} {% endfor %}|
273275
{%- endfor %}
276+
277+
</details>
278+
274279
{% endfor -%}
275280
""")
276281

tools/benchcomp/test/test_regression.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,18 +477,28 @@ def test_markdown_results_table(self):
477477
```
478478
Scatterplot axis ranges are 5 (bottom/left) to 10 (top/right).
479479
480+
<details> <summary>Breakdown by harness</summary>
481+
480482
| Benchmark | variant_1 | variant_2 | ratio |
481483
| --- | --- | --- | --- |
482484
| bench_1 | 5 | 10 | **2.0** |
483485
| bench_2 | 10 | 5 | 0.5 |
484486
487+
</details>
488+
489+
485490
## success
486491
492+
<details> <summary>Breakdown by harness</summary>
493+
487494
| Benchmark | variant_1 | variant_2 | notes |
488495
| --- | --- | --- | --- |
489496
| bench_1 | True | True | |
490497
| bench_2 | True | False | regressed |
491498
| bench_3 | False | True | newly passing |
499+
500+
</details>
501+
492502
"""))
493503

494504

tools/compile-timer/Cargo.toml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
4+
[package]
5+
name = "compile-timer"
6+
version = "0.1.0"
7+
edition = "2024"
8+
license = "MIT OR Apache-2.0"
9+
10+
[dependencies]
11+
clap = { version = "4.5.40", features = ["derive"] }
12+
serde = {version = "1.0.219", features = ["derive"]}
13+
serde_json = "1.0.140"
14+
15+
[[bin]]
16+
name = "compile-timer"
17+
path = "src/compile-timer.rs"
18+
19+
[[bin]]
20+
name = "compile-analyzer"
21+
path = "src/compile-analyzer.rs"
22+
23+
[lints]
24+
workspace = true

tools/compile-timer/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Compile-Timer
2+
This is a simple script for timing the Kani compiler's end-to-end performance on crates.
3+
4+
## Setup
5+
You can run it by first compiling Kani (with `cargo build-dev --release` in the project root), then building this script (with `cargo build --release` in this `compile-timer` directory). This will build new `compile-timer` & `compile-analyzer` binaries in `kani/target/release`.
6+
7+
## Recording Compiler Times with `compile-timer`
8+
After doing that, you should make sure you have Kani on your $PATH (see instructions [here](https://model-checking.github.io/kani/build-from-source.html#adding-kani-to-your-path)) after which you can run `compile-timer --out-path [OUT_JSON_FILE]` in any directory to profile the compiler's performance on it.
9+
10+
By default, the script recursively goes into directories and will use `cargo kani` to profile any Rust projects it encounters (which it determines by looking for a `Cargo.toml`). You can tell it to ignore specific subtrees by passing in the `--ignore [DIR_NAME]` flag.
11+
12+
## Visualizing Compiler Times with `compile-analyzer`
13+
`compile-timer` itself will have some debug output including each individual run's time and aggregates for each crate.
14+
15+
`compile-analyzer` is specifically for comparing performance across multiple commits.
16+
17+
Once you've run `compile-timer` on both commits, you can run `compile-analyzer --path-pre [FIRST_JSON_FILE] --path-post [SECOND_JSON_FILE]` to see the change in performance going from the first to second commit.
18+
19+
By default, `compile-analyzer` will just print to the console, but if you specify the `--only-markdown` option, it's output will be formatted for GitHub flavored markdown (as is useful in CI).

tools/compile-timer/src/common.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright Kani Contributors
2+
// SPDX-License-Identifier: Apache-2.0 OR MIT
3+
#![allow(dead_code)]
4+
use serde::{Deserialize, Serialize};
5+
use std::path::{Path, PathBuf};
6+
use std::time::Duration;
7+
8+
#[derive(Debug, Serialize, Deserialize)]
9+
pub struct AggrResult {
10+
pub krate: PathBuf,
11+
pub krate_trimmed_path: String,
12+
/// the stats for only the 25th-75th percentile of runs on this crate, i.e., the interquartile range
13+
pub iqr_stats: Stats,
14+
/// the stats for all runs on this crate
15+
full_stats: Stats,
16+
}
17+
18+
pub fn krate_trimmed_path(krate: &Path) -> String {
19+
format!(
20+
"{:?}",
21+
krate
22+
.canonicalize()
23+
.unwrap()
24+
.strip_prefix(std::env::current_dir().unwrap().parent().unwrap())
25+
.unwrap()
26+
)
27+
}
28+
29+
impl AggrResult {
30+
pub fn new(krate: PathBuf, iqr_stats: Stats, full_stats: Stats) -> Self {
31+
AggrResult { krate_trimmed_path: krate_trimmed_path(&krate), krate, iqr_stats, full_stats }
32+
}
33+
34+
pub fn full_std_dev(&self) -> Duration {
35+
self.full_stats.std_dev
36+
}
37+
38+
pub fn iqr(&self) -> Duration {
39+
self.iqr_stats.range.1 - self.iqr_stats.range.0
40+
}
41+
}
42+
43+
#[derive(Debug, Serialize, Deserialize)]
44+
pub struct Stats {
45+
pub avg: Duration,
46+
pub std_dev: Duration,
47+
pub range: (Duration, Duration),
48+
}
49+
50+
/// Sum the IQR averages and IQR standard deviations respectively for all crates timed.
51+
pub fn aggregate_aggregates(info: &[AggrResult]) -> (Duration, Duration) {
52+
for i in info {
53+
println!("krate {:?} -- {:?}", i.krate, i.iqr_stats.avg);
54+
}
55+
56+
(info.iter().map(|i| i.iqr_stats.avg).sum(), info.iter().map(|i| i.iqr_stats.std_dev).sum())
57+
}
58+
59+
pub fn fraction_of_duration(dur: Duration, frac: f64) -> Duration {
60+
Duration::from_nanos(((dur.as_nanos() as f64) * frac) as u64)
61+
}

0 commit comments

Comments
 (0)