Skip to content

Commit 9be1101

Browse files
committed
Add LinuxPerf extension for branch + instruction counts
This updates the core BenchmarkTools types to include `instructions` and `branches` fields. These fields support serialization and all of the usual stats / judgements via the Trial / TrialEstimate / TrialRatio interface. If the extension is not available or `perf` is not configured correctly on your system, these are `NaN`. This also keeps the serialization format backwards-compatible, reporting any missing measurements as `NaN`.
1 parent b9f4c5e commit 9be1101

15 files changed

+628
-93
lines changed

.github/workflows/CI.yml

+1-7
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,14 @@ jobs:
1616
fail-fast: false
1717
matrix:
1818
version:
19-
- '1.6'
19+
- '1.10'
2020
- '1'
2121
- 'nightly'
2222
arch:
2323
- x64
2424
os:
2525
- ubuntu-latest
2626
include:
27-
- version: '1.7'
28-
arch: x64
29-
os: ubuntu-20.04
30-
- version: '1.8'
31-
arch: x64
32-
os: ubuntu-22.04
3327
- version: '1.9'
3428
arch: x64
3529
os: ubuntu-22.04

Project.toml

+9-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
1111
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1212
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
1313

14+
[weakdeps]
15+
LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
16+
17+
[extensions]
18+
LinuxPerfExt = "LinuxPerf"
19+
1420
[compat]
1521
Aqua = "0.8"
1622
Compat = ">= 4.11.0"
@@ -22,7 +28,8 @@ Profile = "<0.0.1, 1"
2228
Statistics = "<0.0.1, 1"
2329
Test = "<0.0.1, 1"
2430
UUIDs = "<0.0.1, 1"
25-
julia = "1.6"
31+
julia = "1.9"
32+
LinuxPerf = ">= 0.4"
2633

2734
[extras]
2835
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
@@ -31,4 +38,4 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
3138
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
3239

3340
[targets]
34-
test = ["Aqua", "JuliaFormatter", "Statistics", "Test"]
41+
test = ["Aqua", "JuliaFormatter", "Statistics", "Test", "LinuxPerf"]

ext/LinuxPerfExt/LinuxPerfExt.jl

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
module LinuxPerfExt
2+
3+
import BenchmarkTools: PerfInterface
4+
import LinuxPerf: LinuxPerf, PerfBench, EventGroup, EventType
5+
import LinuxPerf: enable!, disable!, enable_all!, disable_all!, close, read!
6+
7+
function interface()
8+
let g = try
9+
EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
10+
catch
11+
# If perf is not working on the system, the above constructor will throw an
12+
# ioctl or perf_event_open error (after presenting a warning to the user)
13+
return PerfInterface()
14+
end
15+
close(g)
16+
length(g.fds) != 2 && return PerfInterface()
17+
end
18+
19+
# If we made it here, perf seems to be working on this system
20+
return PerfInterface(;
21+
setup=() ->
22+
let g = EventGroup([EventType(:hw, :instructions), EventType(:hw, :branches)])
23+
PerfBench(0, EventGroup[g])
24+
end,
25+
start=(bench) -> enable_all!(),
26+
stop=(bench) -> disable_all!(),
27+
# start=(bench) -> enable!(bench),
28+
# stop=(bench) -> disable!(bench),
29+
teardown=(bench) -> close(bench),
30+
read=(bench) -> let g = only(bench.groups)
31+
(N, time_enabled, time_running, insts, branches) = read!(
32+
g.leader_io, Vector{UInt64}(undef, 5)
33+
)
34+
if 2 * time_running <= time_enabled
35+
# enabled less than 50% of the time
36+
# (most likely due to PMU contention with other perf events)
37+
return (NaN, NaN)
38+
else
39+
# account for partially-active measurement
40+
k = time_enabled / time_running
41+
estimated_instructions = Float64(insts) * k
42+
estimated_branches = Float64(branches) * k
43+
return (estimated_instructions, estimated_branches)
44+
end
45+
end,
46+
)
47+
end
48+
49+
end

src/BenchmarkTools.jl

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ export loadparams!
2525
include("trials.jl")
2626

2727
export gctime,
28+
instructions,
29+
branches,
2830
memory,
2931
allocs,
3032
params,

src/execution.jl

+48-9
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,24 @@ macro benchmarkable(args...)
506506
end
507507
end
508508

509+
struct PerfInterface
510+
setup::Function
511+
start::Function
512+
stop::Function
513+
read::Function
514+
teardown::Function
515+
516+
function PerfInterface(;
517+
setup=Returns(nothing),
518+
start=Returns(nothing),
519+
stop=Returns(nothing),
520+
read=Returns((NaN, NaN)),
521+
teardown=Returns(nothing),
522+
)
523+
return new(setup, start, stop, read, teardown)
524+
end
525+
end
526+
509527
# `eval` an expression that forcibly defines the specified benchmark at
510528
# top-level in order to allow transfer of locally-scoped variables into
511529
# benchmark scope.
@@ -553,6 +571,8 @@ function generate_benchmark_definition(
553571
end
554572
)
555573
end
574+
ext = Base.get_extension(BenchmarkTools, :LinuxPerfExt)
575+
LinuxPerf = isnothing(ext) ? PerfInterface() : ext.interface()
556576
return Core.eval(
557577
eval_module,
558578
quote
@@ -563,17 +583,34 @@ function generate_benchmark_definition(
563583
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
564584
)
565585
$(setup)
586+
__perf_bench = $(LinuxPerf.setup)()
587+
__gcdiff = nothing
588+
__return_val = nothing
589+
__sample_time::Int64 = 0
590+
__sample_instructions::Float64 = 0
591+
__sample_branches::Float64 = 0
566592
__evals = __params.evals
567-
__gc_start = Base.gc_num()
568-
__start_time = time_ns()
569-
__return_val = $(invocation)
570-
for __iter in 2:__evals
571-
$(invocation)
593+
try
594+
__gc_start = Base.gc_num()
595+
$(LinuxPerf.start)(__perf_bench)
596+
__start_time = time_ns()
597+
__return_val = $(invocation)
598+
for __iter in 2:__evals
599+
$(invocation)
600+
end
601+
__sample_time = time_ns() - __start_time
602+
$(LinuxPerf.stop)(__perf_bench)
603+
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
604+
__sample_instructions, __sample_branches = $(LinuxPerf.read)(
605+
__perf_bench
606+
)
607+
finally
608+
$(LinuxPerf.teardown)(__perf_bench)
609+
$(teardown)
572610
end
573-
__sample_time = time_ns() - __start_time
574-
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
575-
$(teardown)
576611
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
612+
__instructions = max(__sample_instructions / __evals, 0.0) # may be NaN
613+
__branches = max(__sample_branches / __evals, 0.0) # may be NaN
577614
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
578615
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
579616
__allocs = Int(
@@ -585,7 +622,9 @@ function generate_benchmark_definition(
585622
__evals,
586623
),
587624
)
588-
return __time, __gctime, __memory, __allocs, __return_val
625+
return __time,
626+
__instructions, __branches, __gctime, __memory, __allocs,
627+
__return_val
589628
end
590629
$BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
591630
end,

src/groups.jl

+2
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ Base.min(groups::BenchmarkGroup...) = mapvals(min, groups...)
113113
Base.max(groups::BenchmarkGroup...) = mapvals(max, groups...)
114114

115115
Base.time(group::BenchmarkGroup) = mapvals(time, group)
116+
instructions(group::BenchmarkGroup) = mapvals(instructions, group)
117+
branches(group::BenchmarkGroup) = mapvals(branches, group)
116118
gctime(group::BenchmarkGroup) = mapvals(gctime, group)
117119
memory(group::BenchmarkGroup) = mapvals(memory, group)
118120
allocs(group::BenchmarkGroup) = mapvals(allocs, group)

src/parameters.jl

+27-2
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,14 @@ mutable struct Parameters
1414
gctrial::Bool
1515
gcsample::Bool
1616
time_tolerance::Float64
17+
instruction_tolerance::Float64
18+
branch_tolerance::Float64
1719
memory_tolerance::Float64
1820
end
1921

20-
const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, false, 0, true, false, 0.05, 0.01)
22+
const DEFAULT_PARAMETERS = Parameters(
23+
5.0, 10000, 1, false, 0, true, false, 0.05, 0.05, 0.05, 0.01
24+
)
2125

2226
function Parameters(;
2327
seconds=DEFAULT_PARAMETERS.seconds,
@@ -28,6 +32,8 @@ function Parameters(;
2832
gctrial=DEFAULT_PARAMETERS.gctrial,
2933
gcsample=DEFAULT_PARAMETERS.gcsample,
3034
time_tolerance=DEFAULT_PARAMETERS.time_tolerance,
35+
instruction_tolerance=DEFAULT_PARAMETERS.instruction_tolerance,
36+
branch_tolerance=DEFAULT_PARAMETERS.branch_tolerance,
3137
memory_tolerance=DEFAULT_PARAMETERS.memory_tolerance,
3238
)
3339
return Parameters(
@@ -39,6 +45,8 @@ function Parameters(;
3945
gctrial,
4046
gcsample,
4147
time_tolerance,
48+
instruction_tolerance,
49+
branch_tolerance,
4250
memory_tolerance,
4351
)
4452
end
@@ -52,6 +60,8 @@ function Parameters(
5260
gctrial=nothing,
5361
gcsample=nothing,
5462
time_tolerance=nothing,
63+
instruction_tolerance=nothing,
64+
branch_tolerance=nothing,
5565
memory_tolerance=nothing,
5666
)
5767
params = Parameters()
@@ -63,6 +73,13 @@ function Parameters(
6373
params.gcsample = gcsample != nothing ? gcsample : default.gcsample
6474
params.time_tolerance =
6575
time_tolerance != nothing ? time_tolerance : default.time_tolerance
76+
params.instruction_tolerance = if instruction_tolerance != nothing
77+
instruction_tolerance
78+
else
79+
default.instruction_tolerance
80+
end
81+
params.branch_tolerance =
82+
branch_tolerance != nothing ? branch_tolerance : default.branch_tolerance
6683
params.memory_tolerance =
6784
memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
6885
return params::BenchmarkTools.Parameters
@@ -76,6 +93,8 @@ function Base.:(==)(a::Parameters, b::Parameters)
7693
a.gctrial == b.gctrial &&
7794
a.gcsample == b.gcsample &&
7895
a.time_tolerance == b.time_tolerance &&
96+
a.instruction_tolerance == b.instruction_tolerance &&
97+
a.branch_tolerance == b.branch_tolerance &&
7998
a.memory_tolerance == b.memory_tolerance
8099
end
81100

@@ -89,6 +108,8 @@ function Base.copy(p::Parameters)
89108
p.gctrial,
90109
p.gcsample,
91110
p.time_tolerance,
111+
p.instruction_tolerance,
112+
p.branch_tolerance,
92113
p.memory_tolerance,
93114
)
94115
end
@@ -109,7 +130,11 @@ end
109130

110131
@noinline function overhead_sample(evals)
111132
start_time = time_ns()
112-
for _ in 1:evals
133+
try
134+
for _ in 1:evals
135+
nullfunc()
136+
end
137+
finally
113138
nullfunc()
114139
end
115140
sample_time = time_ns() - start_time

src/serialization.jl

+32-2
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,38 @@ function recover(x::Vector)
5555
else
5656
xsi = if fn == "evals_set" && !haskey(fields, fn)
5757
false
58-
elseif fn in ("seconds", "overhead", "time_tolerance", "memory_tolerance") &&
59-
fields[fn] === nothing
58+
elseif fn in ("instructions", "branches")
59+
# JSON spec doesn't support NaN, so handle it specially here
60+
if !haskey(fields, fn)
61+
if ft === Vector{Float64}
62+
Float64[NaN for _ in length(fields["time"])]
63+
elseif ft === Float64
64+
NaN
65+
else
66+
@assert false
67+
end
68+
else
69+
if ft === Vector{Float64}
70+
Float64[
71+
elem === nothing ? NaN : convert(Float64, elem) for
72+
elem in fields[fn]
73+
]
74+
else
75+
fields[fn] === nothing ? NaN : convert(ft, fields[fn])
76+
end
77+
end
78+
elseif fn == "instruction_tolerance" && !haskey(fields, fn)
79+
DEFAULT_PARAMETERS.instruction_tolerance
80+
elseif fn == "branch_tolerance" && !haskey(fields, fn)
81+
DEFAULT_PARAMETERS.branch_tolerance
82+
elseif fn in (
83+
"seconds",
84+
"overhead",
85+
"time_tolerance",
86+
"instruction_tolerance",
87+
"branch_tolerance",
88+
"memory_tolerance",
89+
) && fields[fn] === nothing
6090
# JSON spec doesn't support Inf
6191
# These fields should all be >= 0, so we can ignore -Inf case
6292
typemax(ft)

0 commit comments

Comments
 (0)