diff --git a/Project.toml b/Project.toml index a91c231d..b48447a1 100644 --- a/Project.toml +++ b/Project.toml @@ -4,14 +4,17 @@ version = "1.3.2" [deps] JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] JSON = "0.18, 0.19, 0.20, 0.21" +LinuxPerf = "= 0.3.5" julia = "1" [extras] diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl index c5d23077..445ad3a5 100644 --- a/src/BenchmarkTools.jl +++ b/src/BenchmarkTools.jl @@ -9,6 +9,8 @@ using UUIDs: uuid4 using Printf using Profile +import LinuxPerf +import Random const BENCHMARKTOOLS_VERSION = v"1.0.0" diff --git a/src/execution.jl b/src/execution.jl index 16bb6532..e8325b66 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -96,9 +96,9 @@ function _run(b::Benchmark, p::Parameters; verbose = false, pad = "", kwargs...) start_time = Base.time() trial = Trial(params) params.gcsample && gcscrub() - s = b.samplefunc(b.quote_vals, params) - push!(trial, s[1:end-1]...) - return_val = s[end] + trial_contents = b.samplefunc(b.quote_vals, params) + push!(trial, trial_contents) + return_val = trial_contents.__return_val iters = 2 while (Base.time() - start_time) < params.seconds && iters ≤ params.samples params.gcsample && gcscrub() @@ -492,6 +492,9 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, quote_ x end) end + experimental_enable_linux_perf = true # TODO: take this as input from the user + # TODO: let the user actually provide these options. + linux_perf_opts = LinuxPerf.parse_pstats_options([]) return Core.eval(eval_module, quote @noinline $(signature_def) = begin $(core_body) end @noinline function $(samplefunc)($(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters) @@ -512,7 +515,36 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, quote_ __allocs = Int(Base.fld(__gcdiff.malloc + __gcdiff.realloc + __gcdiff.poolalloc + __gcdiff.bigalloc, __evals)) - return __time, __gctime, __memory, __allocs, __return_val + if $(experimental_enable_linux_perf) + # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 + __linux_perf_groups = LinuxPerf.set_default_spaces( + $(linux_perf_opts.events), + $(linux_perf_opts.spaces), + ) + __linux_perf_bench = LinuxPerf.make_bench_threaded( + __linux_perf_groups; + threads = $(linux_perf_opts.threads), + ) + LinuxPerf.enable!(__linux_perf_bench) + # We'll just run it one time. + __return_val_2 = $(invocation) + LinuxPerf.disable!(__linux_perf_bench) + # trick the compiler not to eliminate the code + if rand() < 0 + __linux_perf_stats = __return_val_2 + else + __linux_perf_stats = LinuxPerf.Stats(__linux_perf_bench) + end + end + return (; + __time, + __gctime, + __memory, + __allocs, + __return_val, + __return_val_2, + __linux_perf_stats, + ) end $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params)) end) diff --git a/src/trials.jl b/src/trials.jl index e24d9271..38fa7296 100644 --- a/src/trials.jl +++ b/src/trials.jl @@ -8,6 +8,7 @@ mutable struct Trial gctimes::Vector{Float64} memory::Int allocs::Int + linux_perf_stats::Union{LinuxPerf.Stats, Nothing} end Trial(params::Parameters) = Trial(params, Float64[], Float64[], typemax(Int), typemax(Int)) @@ -22,11 +23,25 @@ end Base.copy(t::Trial) = Trial(copy(t.params), copy(t.times), copy(t.gctimes), t.memory, t.allocs) -function Base.push!(t::Trial, time, gctime, memory, allocs) +const TrialContents = NamedTuple{( + :__time, + :__gctime, + :__memory, + :__allocs, + :__return_val, + :__linux_perf_stats, +)} + +function Base.push!(t::Trial, trial_contents::TrialContents) + time = trial_contents.__time + gctime = trial_contents.__gctime + memory = trial_contents.__memory + allocs = trial_contents.__allocs push!(t.times, time) push!(t.gctimes, gctime) memory < t.memory && (t.memory = memory) allocs < t.allocs && (t.allocs = allocs) + trial.linux_perf_stats = trial_contents.__linux_perf_stats return t end