diff --git a/docs/src/index.md b/docs/src/index.md
index e30c0440..38a018e5 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -17,19 +17,22 @@ julia> using BenchmarkTools
 # timing results. Note that each sample can require multiple evaluations
 # benchmark kernel evaluations. See the BenchmarkTools manual for details.
 julia> @benchmark sort(data) setup=(data=rand(10))
-BenchmarkTools.Trial:
- 10000 samples with 968 evaulations took a median time of 90.902 ns (0.00% GC)
- Time  (mean ± σ):   94.936 ns ±  47.797 ns  (GC: 2.78% ±  5.03%)
- Range (min … max):  77.655 ns … 954.823 ns  (GC: 0.00% … 87.94%)
-
-          ▁▃▅▆▇█▇▆▅▂▁                                          
-  ▂▂▃▃▄▅▆▇███████████▇▆▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
-  77.7 ns         Histogram: frequency by time           137 ns
-
- Memory estimate: 160 bytes, allocs estimate: 1.
+┌ Trial:
+│  min 46.954 ns, median 59.475 ns, mean 61.344 ns, 99ᵗʰ 80.203 ns
+│  1 allocation, 144 bytes
+│  GC time: mean 1.092 ns (1.78%), max 537.224 ns (88.05%)
+│                          ◔  ◑   *
+│                        ▂▄▅▇▇█▆▆▄▂
+│  ▁▂▁▁▂▂▂▂▁▂▂▁▁▂▂▂▂▃▃▅▆████████████▇▅▅▃▃▃▃▃▃▃▃▃▃▃▃▂▃▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁ ▄
+└  46 ns             10_000 samples, each 985 evaluations             81 ns +
 ```
 
+In the histogram of sample times, the median is marked `◑` and the mean `*`; on most displays
+these will be indicaded by color too (but not in the documentation).
+
 For quick sanity checks, one can use the [`@btime` macro](https://juliaci.github.io/BenchmarkTools.jl/stable/manual/#Benchmarking-basics), which is a convenience wrapper around `@benchmark` whose output is analogous to Julia's built-in [`@time` macro](https://docs.julialang.org/en/v1/base/base/#Base.@time):
+This prints only the **minimum** time, which is often the most informative for fast-running
+calculations:
 
 ```julia
 julia> @btime sin(x) setup=(x=rand())
diff --git a/docs/src/manual.md b/docs/src/manual.md
index 8fd5105a..095e0002 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -23,38 +23,33 @@ recording the total time `t` it takes to record `n` evaluations, and estimating
 To quickly benchmark a Julia expression, use `@benchmark`:
 
 ```julia
-julia> @benchmark sin(1)
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
- Range (min … max):  1.442 ns … 53.028 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     1.453 ns              ┊ GC (median):    0.00%
- Time  (mean ± σ):   1.462 ns ±  0.566 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-                                   █                              
-  ▂▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▃
-  1.44 ns           Histogram: frequency by time           1.46 ns (top 1%)
-
- Memory estimate: 0 bytes, allocs estimate: 0.
+julia> @benchmark sum(sin, range(0, 2pi, length=17))
+┌ Trial:
+│  min 277.822 ns, median 278.523 ns, mean 282.786 ns, 99ᵗʰ 353.745 ns
+│  0 allocations
+│         ◑  *
+│        █
+│  ▁▁▁▁▁▁█▄▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁ ▂
+└  270 ns            10_000 samples, each 298 evaluations            360 ns +
 ```
 
 The `@benchmark` macro is essentially shorthand for defining a benchmark, auto-tuning the benchmark's configuration parameters, and running the benchmark. These three steps can be done explicitly using `@benchmarkable`, `tune!` and `run`:
 
 ```julia
-julia> b = @benchmarkable sin(1); # define the benchmark with default parameters
+# define the benchmark with default parameters
+julia> b = @benchmarkable sum(sin, range(0, 2pi, length=17));
 
 # find the right evals/sample and number of samples to take for this benchmark
 julia> tune!(b);
 
 julia> run(b)
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
- Range (min … max):  1.442 ns … 4.308 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     1.453 ns             ┊ GC (median):    0.00%
- Time  (mean ± σ):   1.456 ns ± 0.056 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-                                  █                              
-  ▂▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▃
-  1.44 ns          Histogram: frequency by time           1.46 ns (top 1%)
-
- Memory estimate: 0 bytes, allocs estimate: 0.
+┌ Trial:
+│  min 277.914 ns, median 278.465 ns, mean 281.920 ns, 99ᵗʰ 331.963 ns
+│  0 allocations
+│          ◔◑  *
+│          █
+│  ▁▁▁▁▁▁▁▁█▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁ ▂
+└  270 ns            10_000 samples, each 303 evaluations            340 ns +
 ```
 
 Alternatively, you can use the `@btime` or `@belapsed` macros.
@@ -65,12 +60,15 @@ before returning the value of the expression, while `@belapsed`
 returns the minimum time in seconds.
 
 ```julia
-julia> @btime sin(1)
-  13.612 ns (0 allocations: 0 bytes)
-0.8414709848078965
+julia> @btime sum(sin, range(0, 2pi, length=17))
+  277.960 ns (0 allocations: 0 bytes)
+-3.559516622919863e-16
+
+julia> @belapsed sum(sin, range(0, 2pi, length=17))
+2.7796013289036544e-7
 
-julia> @belapsed sin(1)
-1.3614228456913828e-8
+julia> ans * 1e9  # convert to nanoseconds
+277.96013289036546
 ```
 
 ### Benchmark `Parameters`
@@ -108,31 +106,26 @@ You can interpolate values into `@benchmark` and `@benchmarkable` expressions:
 
 ```julia
 # rand(1000) is executed for each evaluation
-julia> @benchmark sum(rand(1000))
-BenchmarkTools.Trial: 10000 samples with 10 evaluations.
- Range (min … max):  1.153 μs … 142.253 μs  ┊ GC (min … max): 0.00% … 96.43%
- Time  (median):     1.363 μs               ┊ GC (median):    0.00%
- Time  (mean ± σ):   1.786 μs ±   4.612 μs  ┊ GC (mean ± σ):  9.58% ±  3.70%
-
-   ▄▆██▇▇▆▄▃▂▁                           ▁▁▂▂▂▂▂▂▂▁▂▁              
-  ████████████████▆▆▇▅▆▇▆▆▆▇▆▇▆▆▅▄▄▄▅▃▄▇██████████████▇▇▇▇▆▆▇▆▆▅▅▅▅
-  1.15 μs         Histogram: log(frequency) by time          3.8 μs (top 1%)
-
- Memory estimate: 7.94 KiB, allocs estimate: 1.
+julia> @benchmark sum(sqrt, rand(1000))
+┌ Trial:
+│  min 1.629 μs, median 2.021 μs, mean 2.464 μs, 99ᵗʰ 4.063 μs
+│  1 allocation, 7.94 KiB
+│  GC time: mean 385.130 ns (15.63%), max 266.488 μs (98.02%)
+│           ◔  ◑ ◕          *
+│   ▆        ▂█▇▅
+│  ▃█▄▄▃▃▃▃▅██████▆▅▄▄▄▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁ ▃
+└  1.6 μs             10_000 samples, each 10 evaluations            4.1 μs +
 
 # rand(1000) is evaluated at definition time, and the resulting
 # value is interpolated into the benchmark expression
-julia> @benchmark sum($(rand(1000)))
-BenchmarkTools.Trial: 10000 samples with 963 evaluations.
- Range (min … max):  84.477 ns … 241.602 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     84.497 ns               ┊ GC (median):    0.00%
- Time  (mean ± σ):   85.125 ns ±   5.262 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-  █                                                                 
-  █▅▇▅▄███▇▇▆▆▆▄▄▅▅▄▄▅▄▄▅▄▄▄▄▁▃▄▁▁▃▃▃▄▃▁▃▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▃▃▁▁▁▃▁▁▁▁▆
-  84.5 ns         Histogram: log(frequency) by time           109 ns (top 1%)
-
- Memory estimate: 0 bytes, allocs estimate: 0.
+julia> @benchmark sum(sqrt, $(rand(1000)))
+┌ Trial:
+│  min 900.786 ns, median 904.762 ns, mean 916.071 ns, 99ᵗʰ 1.161 μs
+│  0 allocations
+│       ◑◕ *
+│       █
+│  ▁▁▁▁▂█▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁ ▂
+└  880 ns             10_000 samples, each 42 evaluations            1.2 μs +
 ```
 
 A good rule of thumb is that **external variables should be explicitly interpolated into the benchmark expression**:
@@ -142,29 +135,25 @@ julia> A = rand(1000);
 
 # BAD: A is a global variable in the benchmarking context
 julia> @benchmark [i*i for i in A]
-BenchmarkTools.Trial: 10000 samples with 54 evaluations.
- Range (min … max):  889.241 ns … 29.584 μs  ┊ GC (min … max):  0.00% … 93.33%
- Time  (median):       1.073 μs              ┊ GC (median):     0.00%
- Time  (mean ± σ):     1.296 μs ±  2.004 μs  ┊ GC (mean ± σ):  14.31% ±  8.76%
-
-      ▃█▆                                                           
-  ▂▂▄▆███▇▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▂▂▁▁▁▁▁▂▁▁▁▁▂▂▁▁▁▁▂▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂
-  889 ns             Histogram: frequency by time            2.92 μs (top 1%)
-
- Memory estimate: 7.95 KiB, allocs estimate: 2.
+┌ Trial:
+│  min 550.043 ns, median 1.011 μs, mean 1.385 μs, 99ᵗʰ 13.571 μs
+│  2 allocations, total 7.95 KiB
+│  GC time: mean 396.300 ns (28.61%), max 15.675 μs (89.99%)
+│    ◑◕*
+│    █
+│  ▅▅█▇▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁ ▂
+└  550 ns             7_811 samples, each 184 evaluations             14 μs +
 
 # GOOD: A is a constant value in the benchmarking context
 julia> @benchmark [i*i for i in $A]
-BenchmarkTools.Trial: 10000 samples with 121 evaluations.
- Range (min … max):  742.455 ns … 11.846 μs  ┊ GC (min … max):  0.00% … 88.05%
- Time  (median):     909.959 ns              ┊ GC (median):     0.00%
- Time  (mean ± σ):     1.135 μs ±  1.366 μs  ┊ GC (mean ± σ):  16.94% ± 12.58%
-
-  ▇█▅▂                                                             ▁
-  ████▇▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▅▆██
-  742 ns          Histogram: log(frequency) by time          10.3 μs (top 1%)
-
- Memory estimate: 7.94 KiB, allocs estimate: 1.
+┌ Trial:
+│  min 490.885 ns, median 944.875 ns, mean 1.288 μs, 99ᵗʰ 12.160 μs
+│  1 allocation, 7.94 KiB
+│  GC time: mean 367.793 ns (28.54%), max 14.558 μs (94.32%)
+│    ◔◑ *
+│    █
+│  ▅▃██▃▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁ ▂
+└  490 ns             8_049 samples, each 192 evaluations             13 μs +
 ```
 
 (Note that "KiB" is the SI prefix for a [kibibyte](https://en.wikipedia.org/wiki/Kibibyte): 1024 bytes.)
@@ -213,7 +202,7 @@ julia> let x = 1
 BenchmarkTools allows you to pass `setup` and `teardown` expressions to `@benchmark` and `@benchmarkable`. The `setup` expression is evaluated just before sample execution, while the `teardown` expression is evaluated just after sample execution. Here's an example where this kind of thing is useful:
 
 ```julia
-julia> x = rand(100000);
+julia> x = rand(100_000);
 
 # For each sample, bind a variable `y` to a fresh copy of `x`. As you
 # can see, `y` is accessible within the scope of the core expression.
@@ -221,16 +210,13 @@ julia> b = @benchmarkable sort!(y) setup=(y = copy($x))
 Benchmark(evals=1, seconds=5.0, samples=10000)
 
 julia> run(b)
-BenchmarkTools.Trial: 819 samples with 1 evaluations.
- Range (min … max):  5.983 ms …  6.954 ms  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     6.019 ms              ┊ GC (median):    0.00%
- Time  (mean ± σ):   6.029 ms ± 46.222 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-        ▃▂▂▄█▄▂▃                                                  
-  ▂▃▃▄▆▅████████▇▆▆▅▄▄▄▅▆▄▃▄▅▄▃▂▃▃▃▂▂▃▁▂▂▂▁▂▂▂▂▂▂▁▁▁▁▂▂▁▁▁▂▂▁▁▂▁▁▂
-  5.98 ms           Histogram: frequency by time           6.18 ms (top 1%)
-
- Memory estimate: 0 bytes, allocs estimate: 0.
+┌ Trial:
+│  min 4.595 ms, median 4.647 ms, mean 4.658 ms, 99ᵗʰ 4.826 ms
+│  0 allocations
+│                     ◔      ◑ *     ◕
+│                   ▁█
+│  ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁██▇▆▆▅▄▅▆▄▅▆▅▃▆▆▅▆▄▄▅▅▄▄▄▃▂▁▂▂▃▂▃▁▂▁▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁ ▃
+└  4.5 ms               419 samples, each 1 evaluation               4.9 ms +
 ```
 
 In the above example, we wish to benchmark Julia's in-place sorting method. Without a setup phase, we'd have to either allocate a new input vector for each sample (such that the allocation time would pollute our results) or use the same input vector every sample (such that all samples but the first would benchmark the wrong thing - sorting an already sorted vector). The setup phase solves the problem by allowing us to do some work that can be utilized by the core expression, without that work being erroneously included in our performance results.
@@ -241,33 +227,29 @@ Note that the `setup` and `teardown` phases are **executed for each sample, not
 
 It's possible for LLVM and Julia's compiler to perform optimizations on `@benchmarkable` expressions. In some cases, these optimizations can elide a computation altogether, resulting in unexpectedly "fast" benchmarks. For example, the following expression is non-allocating:
 ```julia
-julia> @benchmark (view(a, 1:2, 1:2); 1) setup=(a = rand(3, 3))
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
- Range (min … max):  2.885 ns … 14.797 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     2.895 ns              ┊ GC (median):    0.00%
- Time  (mean ± σ):   3.320 ns ±  0.909 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-  █             ▁   ▁ ▁▁▁                                     ▂▃▃▁
-  █▁▁▇█▇▆█▇████████████████▇█▇█▇▇▇▇█▇█▇▅▅▄▁▁▁▁▄▃▁▃▃▁▄▃▁▄▁▃▅▅██████
-  2.88 ns        Histogram: log(frequency) by time         5.79 ns (top 1%)
+# TODO: This needs a new example!! View is in fact non-allocating.
 
- Memory estimate: 0 bytes, allocs estimate: 0.0
+julia> @benchmark (view(a, 1:2, 1:2); 1) setup=(a = rand(3, 3))
+┌ Trial:
+│  min 1.292 ns, median 1.417 ns, mean 1.417 ns, 99ᵗʰ 1.500 ns
+│  0 allocations
+│                                                     *◑
+│                                                     ▁█
+│  ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▆▁▁▁▁▁▁▁▁▁██▁▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▂ ▂
+└  1.2 ns           10_000 samples, each 1_000 evaluations           1.5 ns +
 ```
 
 Note, however, that this does not mean that `view(a, 1:2, 1:2)` is non-allocating:
 
 ```julia
 julia> @benchmark view(a, 1:2, 1:2) setup=(a = rand(3, 3))
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
- Range (min … max):  3.175 ns … 18.314 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     3.176 ns              ┊ GC (median):    0.00%
- Time  (mean ± σ):   3.262 ns ±  0.882 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-  █                                                               
-  █▁▂▁▁▁▂▁▂▁▂▁▁▂▁▁▂▂▂▂▂▂▁▁▂▁▁▂▁▁▁▂▂▁▁▁▂▁▂▂▁▂▁▁▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▂
-  3.18 ns           Histogram: frequency by time           4.78 ns (top 1%)
-
- Memory estimate: 0 bytes, allocs estimate: 0.8
+┌ Trial:
+│  min 1.666 ns, median 1.750 ns, mean 1.739 ns, 99ᵗʰ 1.792 ns
+│  0 allocations
+│                                         ◔         *   ◑ 
+│                                                       █
+│  ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▆▄▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁ ▂
+└  1.6 ns           10_000 samples, each 1_000 evaluations           1.8 ns +
 ```
 
 The key point here is that these two benchmarks measure different things, even though their code is similar. In the first example, Julia was able to optimize away `view(a, 1:2, 1:2)` because it could prove that the value wasn't being returned and `a` wasn't being mutated. In the second example, the optimization is not performed because `view(a, 1:2, 1:2)` is a return value of the benchmark expression.
@@ -306,22 +288,22 @@ This section provides a limited number of examples demonstrating these types. Fo
 Running a benchmark produces an instance of the `Trial` type:
 
 ```julia
-julia> t = @benchmark eigen(rand(10, 10))
-BenchmarkTools.Trial: 10000 samples with 1 evaluations.
- Range (min … max):  26.549 μs …  1.503 ms  ┊ GC (min … max): 0.00% … 93.21%
- Time  (median):     30.818 μs              ┊ GC (median):    0.00%
- Time  (mean ± σ):   31.777 μs ± 25.161 μs  ┊ GC (mean ± σ):  1.31% ±  1.63%
-
-             ▂▃▅▆█▇▇▆▆▄▄▃▁▁                                        
-  ▁▁▁▁▁▁▂▃▄▆████████████████▆▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
-  26.5 μs           Histogram: frequency by time            41.3 μs (top 1%)
+julia> using LinearAlgebra
 
- Memory estimate: 16.36 KiB, allocs estimate: 19.
+julia> t = @benchmark eigen(rand(10, 10))
+┌ Trial:
+│  min 12.708 μs, median 15.125 μs, mean 16.126 μs, 99ᵗʰ 28.626 μs
+│  19 allocations, total 16.16 KiB
+│  GC time: mean 519.554 ns (3.22%), max 1.787 ms (98.16%)
+│             ◔ ◑  ◕*
+│          ▂▃▇▇▇█▆▄▃
+│  ▁▁▂▂▂▃▅▇██████████▆▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁ ▃
+└  12 μs               10_000 samples, each 1 evaluation              29 μs +
 
 julia> dump(t) # here's what's actually stored in a Trial
 BenchmarkTools.Trial
   params: BenchmarkTools.Parameters
-    seconds: Float64 5.0
+    seconds: Float64 2.0
     samples: Int64 10000
     evals: Int64 1
     overhead: Float64 0.0
@@ -329,51 +311,65 @@ BenchmarkTools.Trial
     gcsample: Bool false
     time_tolerance: Float64 0.05
     memory_tolerance: Float64 0.01
-  times: Array{Float64}((10000,)) [26549.0, 26960.0, 27030.0, 27171.0, 27211.0, 27261.0, 27270.0, 27311.0, 27311.0, 27321.0  …  55383.0, 55934.0, 58649.0, 62847.0, 68547.0, 75761.0, 247081.0, 1.421718e6, 1.488322e6, 1.50329e6]
-  gctimes: Array{Float64}((10000,)) [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.366184e6, 1.389518e6, 1.40116e6]
-  memory: Int64 16752
+  times: Array{Float64}((10000,)) [35084.0, 16708.0, 16542.0, 15750.0, 14917.0, 15500.0, 16459.0, 15917.0, 14667.0, 16875.0  …  15750.0, 15667.0, 17000.0, 14167.0, 16000.0, 15583.0, 15500.0, 15167.0, 16125.0, 15250.0]
+  gctimes: Array{Float64}((10000,)) [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+  memory: Int64 16544
   allocs: Int64 19
 ```
 
-As you can see from the above, a couple of different timing estimates are pretty-printed with the `Trial`. You can calculate these estimates yourself using the `minimum`, `median`, `mean`, `maximum`, and `std` functions:
+As you can see from the above, a couple of different timing estimates are pretty-printed with the `Trial`. You can calculate these estimates yourself using the `minimum`, `median`, `mean`, `maximum`,, `quantile` and `std` functions:
 
 ```julia
+julia> using Statistics
+
 julia> minimum(t)
 BenchmarkTools.TrialEstimate: 
-  time:             26.549 μs
+  time:             12.708 μs
   gctime:           0.000 ns (0.00%)
-  memory:           16.36 KiB
+  memory:           16.16 KiB
   allocs:           19
 
-julia> median(t)
+julia> median(t)  # The same as quantile(t, 0.5)
 BenchmarkTools.TrialEstimate: 
-  time:             30.818 μs
+  time:             15.125 μs
   gctime:           0.000 ns (0.00%)
-  memory:           16.36 KiB
+  memory:           16.16 KiB
   allocs:           19
 
 julia> mean(t)
 BenchmarkTools.TrialEstimate: 
-  time:             31.777 μs
-  gctime:           415.686 ns (1.31%)
-  memory:           16.36 KiB
+  time:             16.126 μs
+  gctime:           519.554 ns (3.22%)
+  memory:           16.16 KiB
+  allocs:           19
+
+julia> quantile(t, 0.99)
+BenchmarkTools.TrialEstimate: 
+  time:             28.626 μs
+  gctime:           0.000 ns (0.00%)
+  memory:           16.16 KiB
   allocs:           19
 
 julia> maximum(t)
 BenchmarkTools.TrialEstimate: 
-  time:             1.503 ms
-  gctime:           1.401 ms (93.21%)
-  memory:           16.36 KiB
+  time:             1.821 ms
+  gctime:           1.787 ms (98.16%)
+  memory:           16.16 KiB
   allocs:           19
 
 julia> std(t)
 BenchmarkTools.TrialEstimate: 
-  time:             25.161 μs
-  gctime:           23.999 μs (95.38%)
-  memory:           16.36 KiB
+  time:             30.401 μs
+  gctime:           30.002 μs (98.69%)
+  memory:           16.16 KiB
   allocs:           19
 ```
 
+Note that `maximum(t)` finds the run with maximum time, and displays its `gctime`.
+This will often be the longest `gctime`, since garbage collection is often what makes
+the slowest run take so long, but this is not guaranteed. The display of `@benchmark`
+uses instead `maximum(t.gctimes)`.
+
 ### Which estimator should I use?
 
 Time distributions are always right-skewed for the benchmarks we've tested. This phenomena can be justified by considering that the machine noise affecting the benchmarking process is, in some sense, inherently positive - there aren't really sources of noise that would regularly cause your machine to execute a series of instructions *faster* than the theoretical "ideal" time prescribed by your hardware. Following this characterization of benchmark noise, we can describe the behavior of our estimators:
@@ -901,45 +897,44 @@ For comparing two or more benchmarks against one another, you can manually speci
 `IOContext` to set `:histmin` and `:histmax`:
 
 ```julia
-julia> io = IOContext(stdout, :histmin=>0.5, :histmax=>8, :logbins=>true)
+julia> io = IOContext(stdout, :histmin=>0.5, :histmax=>10, :logbins=>true)
 IOContext(Base.TTY(RawFD(13) open, 0 bytes waiting))
 
-julia> b = @benchmark x^3   setup=(x = rand()); show(io, MIME("text/plain"), b)
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
- Range (min … max):  1.239 ns … 31.433 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     1.244 ns              ┊ GC (median):    0.00%
- Time  (mean ± σ):   1.266 ns ±  0.611 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-       █
-  ▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▂
-  0.5 ns       Histogram: log(frequency) by time        8 ns <
-
- Memory estimate: 0 bytes, allocs estimate: 0.
-julia> b = @benchmark x^3.0 setup=(x = rand()); show(io, MIME("text/plain"), b)
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
- Range (min … max):  5.636 ns … 38.756 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     5.662 ns              ┊ GC (median):    0.00%
- Time  (mean ± σ):   5.767 ns ±  1.384 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-                                         █▆    ▂             ▁
-  ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁███▄▄▃█▁▁▁▁▁▁▁▁▁▁▁▁ █
-  0.5 ns       Histogram: log(frequency) by time        8 ns <
-
- Memory estimate: 0 bytes, allocs estimate: 0.
-
-```
-
-Set `:logbins` to `true` or `false` to ensure that all use the same vertical scaling (log frequency or frequency).
-
+julia> b1 = @benchmark x^3   setup=(x = rand()); show(io, MIME("text/plain"), b1)
+┌ Trial:
+│  min 0.875 ns, median 0.958 ns, mean 0.958 ns, 99ᵗʰ 1.042 ns
+│  0 allocations
+│    ◑* 
+│     █▁                                                                    ▁
+│  ▁▁▇██▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ █
+└  0.5 ns   log(counts) from 10_000 samples, each 1_000 evaluations   10 ns +
+
+julia> b2 = @benchmark x^3.0 setup=(x = rand()); show(io, MIME("text/plain"), b2)
+┌ Trial:
+│  min 8.258 ns, median 8.342 ns, mean 8.440 ns, 99ᵗʰ 8.718 ns
+│  0 allocations
+│                                                            ◑* 
+│                                                             █▅▄           ▁
+│  ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▇████▁▁▁▁▁▁▁▁▁ █
+└  0.5 ns    log(counts) from 10_000 samples, each 999 evaluations    10 ns +
+```
+
+Setting `:logbins` to `true` plots a graph with the log of the number of samples on
+its vertical axis, instead of an ordinary histogram with the linear count of samples.
+The lowest bar `▁` (drawn in grey when the terminal supports color) still indicates
+zero counts.
 
 The `Trial` object can be visualized using the `BenchmarkPlots` package:
 
 ```julia
 using BenchmarkPlots, StatsPlots
-b = @benchmarkable lu(rand(10,10))
+b = @benchmarkable lu(rand(1000, 1000))
 t = run(b)
 
 plot(t)
+
+# TODO -- yaxis=:log10 leads to Warning: No strict ticks found
+# and  st=:box  leads to Warning: Keyword argument hover not supported with Plots.GRBackend
 ```
 
 This will show the timing results of the trial as a violin plot. You can use
diff --git a/src/trials.jl b/src/trials.jl
index 639ee794..a0abd8d6 100644
--- a/src/trials.jl
+++ b/src/trials.jl
@@ -118,6 +118,7 @@ function Base.maximum(trial::Trial)
     return TrialEstimate(trial, trial.times[i], trial.gctimes[i])
 end
 
+Statistics.quantile(trial::Trial, p::Real) = TrialEstimate(trial, quantile(trial.times, p), quantile(trial.gctimes, p))
 Statistics.median(trial::Trial) = TrialEstimate(trial, median(trial.times), median(trial.gctimes))
 Statistics.mean(trial::Trial) = TrialEstimate(trial, mean(trial.times), mean(trial.gctimes))
 Statistics.std(trial::Trial) = TrialEstimate(trial, std(trial.times), std(trial.gctimes))
@@ -279,6 +280,12 @@ function prettymemory(b)
     return string(@sprintf("%.2f", value), " ", units)
 end
 
+# This returns a string like "16_384", used for number of samples & allocations.
+function prettycount(n::Integer)
+    groups = map(join, Iterators.partition(digits(n), 3))
+    return reverse(join(groups, '_'))
+end
+
 function withtypename(f, io, t)
     needtype = get(io, :typeinfo, Nothing) !== typeof(t)
     if needtype
@@ -290,18 +297,17 @@ function withtypename(f, io, t)
     end
 end
 
-function bindata(sorteddata, nbins, min, max)
-    Δ = (max - min) / nbins
-    bins = zeros(nbins)
-    lastpos = 0
-    for i ∈ 1:nbins
-        pos = searchsortedlast(sorteddata, min + i * Δ)
-        bins[i] = pos - lastpos
-        lastpos = pos
+function histogram_bindata(data, fences::AbstractRange)
+    @assert step(fences) > 0
+    bins = zeros(Int, length(fences))
+    for t in data
+        i = searchsortedlast(fences, t)
+        # Any data to the left of the leftmost divider is ignored:
+        iszero(i) && continue 
+        bins[i] += 1
     end
     bins
 end
-bindata(sorteddata, nbins) = bindata(sorteddata, nbins, first(sorteddata), last(sorteddata))
 
 function asciihist(bins, height=1)
     histbars = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█']
@@ -316,6 +322,16 @@ function asciihist(bins, height=1)
     map(height -> if height < 1; ' ' else histbars[height] end, heightmatrix)
 end
 
+function hist_round_low(times, lo=minimum(times), av=mean(times))
+    av < 0.1 && return 0.0  # stop at 0, not 0.01 ns, in trivial cases
+    raw = min(lo, av / 1.03)  # demand low edge 3% from mean, or further
+    return round(raw, RoundDown; sigdigits = 2)
+end
+function hist_round_high(times, av=mean(times), hi=quantile(times, 0.99))
+    raw = max(1, hi, 1.03 * av)  # demand high edge 3% above mean, and at least 1ns
+    return round(raw, RoundUp; sigdigits = 2)
+end
+
 _summary(io, t, args...) = withtypename(() -> print(io, args...), io, t)
 
 Base.summary(io::IO, t::Trial) = _summary(io, t, prettytime(time(t)))
@@ -339,168 +355,229 @@ Base.show(io::IO, t::TrialRatio) = _show(io, t)
 Base.show(io::IO, t::TrialJudgement) = _show(io, t)
 
 function Base.show(io::IO, ::MIME"text/plain", t::Trial)
-
     pad = get(io, :pad, "")
-    print(io, "BenchmarkTools.Trial: ", length(t), " sample", if length(t) > 1 "s" else "" end,
-          " with ", t.params.evals, " evaluation", if t.params.evals > 1 "s" else "" end ,".\n")
-
-    perm = sortperm(t.times)
-    times = t.times[perm]
-    gctimes = t.gctimes[perm]
-
-    if length(t) > 1
-        med = median(t)
-        avg = mean(t)
-        std = Statistics.std(t)
-        min = minimum(t)
-        max = maximum(t)
-
-        medtime, medgc = prettytime(time(med)), prettypercent(gcratio(med))
-        avgtime, avggc = prettytime(time(avg)), prettypercent(gcratio(avg))
-        stdtime, stdgc = prettytime(time(std)), prettypercent(Statistics.std(gctimes ./ times))
-        mintime, mingc = prettytime(time(min)), prettypercent(gcratio(min))
-        maxtime, maxgc = prettytime(time(max)), prettypercent(gcratio(max))
-
-        memorystr = string(prettymemory(memory(min)))
-        allocsstr = string(allocs(min))
-    elseif length(t) == 1
-        print(io, pad, " Single result which took ")
-        printstyled(io, prettytime(times[1]); color=:blue)
-        print(io, " (", prettypercent(gctimes[1]/times[1]), " GC) ")
-        print(io, "to evaluate,\n")
-        print(io, pad, " with a memory estimate of ")
-        printstyled(io, prettymemory(t.memory[1]); color=:yellow)
-        print(io, ", over ")
-        printstyled(io, t.allocs[1]; color=:yellow)
-        print(io, " allocations.")
-        return
+    histnumber = get(io, :histnumber, "")  # for Vector{Trial} printing
+
+    boxcolor = :light_black
+    boxspace = "  "
+    modulestr = "" # "BenchmarkTools."
+    avgcolor = :green
+    medcolor = :blue
+    alloccolor = :yellow
+    captioncolor = :light_black
+    showpercentile = 99  # used both for display time, and to set right cutoff of histogram
+
+    alloc_strings = if allocs(t) == 0
+        ("0 allocations", "")
+    elseif allocs(t) == 1
+        ("1 allocation, ", prettymemory(memory(t)))  # divided in two to colour the second
     else
-        print(io, pad, " No results.")
-        return
+        (prettycount(allocs(t)) * " allocations, total ", prettymemory(memory(t)))
     end
 
-    lmaxtimewidth = maximum(length.((medtime, avgtime, mintime)))
-    rmaxtimewidth = maximum(length.((stdtime, maxtime)))
-    lmaxgcwidth = maximum(length.((medgc, avggc, mingc)))
-    rmaxgcwidth = maximum(length.((stdgc, maxgc)))
-
-    # Main stats
-
-    print(io, pad, " Range ")
-    printstyled(io, "("; color=:light_black)
-    printstyled(io, "min"; color=:cyan, bold=true)
-    print(io, " … ")
-    printstyled(io, "max"; color=:magenta)
-    printstyled(io, "):  "; color=:light_black)
-    printstyled(io, lpad(mintime, lmaxtimewidth); color=:cyan, bold=true)
-    print(io, " … ")
-    printstyled(io, lpad(maxtime, rmaxtimewidth); color=:magenta)
-    print(io, "  ")
-    printstyled(io, "┊"; color=:light_black)
-    print(io, " GC ")
-    printstyled(io, "("; color=:light_black)
-    print(io, "min … max")
-    printstyled(io, "): "; color=:light_black)
-    print(io, lpad(mingc, lmaxgcwidth), " … ", lpad(maxgc, rmaxgcwidth))
-
-    print(io, "\n", pad, " Time  ")
-    printstyled(io, "("; color=:light_black)
-    printstyled(io, "median"; color=:blue, bold=true)
-    printstyled(io, "):     "; color=:light_black)
-    printstyled(io, lpad(medtime, lmaxtimewidth), rpad(" ", rmaxtimewidth + 5); color=:blue, bold=true)
-    printstyled(io, "┊"; color=:light_black)
-    print(io, " GC ")
-    printstyled(io, "("; color=:light_black)
-    print(io, "median")
-    printstyled(io, "):    "; color=:light_black)
-    print(io, lpad(medgc, lmaxgcwidth))
-
-    print(io, "\n", pad, " Time  ")
-    printstyled(io, "("; color=:light_black)
-    printstyled(io, "mean"; color=:green, bold=true)
-    print(io, " ± ")
-    printstyled(io, "σ"; color=:green)
-    printstyled(io, "):   "; color=:light_black)
-    printstyled(io, lpad(avgtime, lmaxtimewidth); color=:green, bold=true)
-    print(io, " ± ")
-    printstyled(io, lpad(stdtime, rmaxtimewidth); color=:green)
-    print(io, "  ")
-    printstyled(io, "┊"; color=:light_black)
-    print(io, " GC ")
-    printstyled(io, "("; color=:light_black)
-    print(io, "mean ± σ")
-    printstyled(io, "):  "; color=:light_black)
-    print(io, lpad(avggc, lmaxgcwidth), " ± ", lpad(stdgc, rmaxgcwidth))
+    samplesstr = string(
+        prettycount(length(t)),
+        if length(t) == 1  " sample, with " else " samples, each " end,
+        prettycount(t.params.evals), 
+        if t.params.evals == 1  " evaluation" else " evaluations" end,
+    )
+
+    if length(t) == 0
+        print(io, modulestr, "Trial$histnumber: 0 samples")
+        return
+    elseif length(t) == 1
+        printstyled(io, "┌ ", modulestr, "Trial$histnumber:\n"; color=boxcolor)
+
+        # Time
+        printstyled(io, pad, "│", boxspace; color=boxcolor)
+        print(io, "time ")
+        printstyled(io, prettytime(t.times[1]); color=medcolor, bold=true)
+
+        # Memory
+        println(io)
+        printstyled(io, pad, "│", boxspace; color=boxcolor)
+        # print(io, allocsstr)
+        print(io, alloc_strings[1])
+        printstyled(io, alloc_strings[2]; color=alloccolor)
+
+        # GC time
+        if t.gctimes[1] > 0
+            println(io)
+            printstyled(io, pad, "│", boxspace; color=boxcolor)
+            print(io, "GC time: ", prettytime(t.gctimes[1]))
+            printstyled(io, " (", prettypercent(t.gctimes[1] / t.times[1]),")"; color=avgcolor)
+        end
+
+        # Samples
+        println(io)
+        printstyled(io, pad, "└", boxspace; color=boxcolor)
+        printstyled(io, samplesstr; color=:light_black)
+
+        return
+    end # done with trivial cases.
+
+    # Main text block
+
+    printstyled(io, "┌ ", modulestr, "Trial$histnumber:\n"; color=boxcolor)
+
+    printstyled(io, pad, "│", boxspace; color=boxcolor)
+    printstyled(io, "min "; color=:default)
+    mintime = minimum(t.times)
+    printstyled(io, prettytime(mintime); color=:default, bold=true)
+    print(io, ", ")
+    printstyled(io, "median "; color=medcolor)
+    medtime = median(t.times)
+    printstyled(io, prettytime(medtime); color=medcolor, bold=true)
+    # printstyled(io, " (½)"; color=medcolor)
+    print(io, ", ")
+    printstyled(io, "mean "; color=avgcolor)
+    avgtime = mean(t.times)
+    printstyled(io, prettytime(avgtime); color=avgcolor, bold=true)
+    # printstyled(io, " (*)"; color=avgcolor)
+    print(io, ", ")
+    print(io, showpercentile, "ᵗʰ ")
+    quantime = quantile(t.times, showpercentile/100)
+    printstyled(io, prettytime(quantime); bold=true)
+    println(io)
+
+    printstyled(io, pad, "│", boxspace; color=boxcolor)
+    # println(io, allocsstr)
+    print(io, alloc_strings[1])
+    printstyled(io, alloc_strings[2] * "\n"; color=alloccolor)
+
+    if !all(iszero, t.gctimes)
+        # Mean GC time is just that; then we take the percentage of the mean time
+        printstyled(io, pad, "│", boxspace; color=boxcolor)
+        _avgc = mean(t.gctimes)
+        print(io, "GC time: mean ", prettytime(_avgc))
+        printstyled(io, " (", prettypercent(_avgc / avgtime), ")"; color=avgcolor)
+
+        # Maximum GC time is _not_ taken as the GC time of the slowst run, maximum(t).
+        # The percentage shown is of the same max-GC run, again not the percentage of longest time.
+        # Of course, very often the slowest run is due to GC, and these distinctions won't matter.
+        _t, _i = findmax(t.gctimes)
+        println(io, ", max ", prettytime(_t), " (", prettypercent(_t / t.times[_i]), ")")
+    end
 
     # Histogram
 
-    histquantile = 0.99
-    # The height and width of the printed histogram in characters.
-    histheight = 2
-    histwidth = 42 + lmaxtimewidth + rmaxtimewidth
-
-    histtimes = times[1:round(Int, histquantile*end)]
-    histmin = get(io, :histmin, first(histtimes))
-    histmax = get(io, :histmax, last(histtimes))
-    logbins = get(io, :logbins, nothing)
-    bins = bindata(histtimes, histwidth - 1, histmin, histmax)
-    append!(bins, [1, floor((1-histquantile) * length(times))])
-    # if median size of (bins with >10% average data/bin) is less than 5% of max bin size, log the bin sizes
-    if logbins === true || (logbins === nothing && median(filter(b -> b > 0.1 * length(times) / histwidth, bins)) / maximum(bins) < 0.05)
-        bins, logbins = log.(1 .+ bins), true
-    else
-        logbins = false
+    logbins = get(io, :logbins, nothing) === true
+    caption = logbins ? ("log(counts) from " * samplesstr) : samplesstr
+
+    # The height and width of the printed histogram in characters:
+    histheight = 2    
+    _nonhistwidth = 5 + length(boxspace)
+    _minhistwidth = 18 + length(caption)
+    histwidth = max(_minhistwidth, min(90, displaysize(io)[2]) - _nonhistwidth)
+    # This should fit it within your terminal, but stops growing at 90 columns. Below about
+    # 55 columns it will stop shrinking, by which point the first line has already wrapped.
+
+    histmin = get(io, :histmin, hist_round_low(t.times, mintime, avgtime))
+    histmax = get(io, :histmax, hist_round_high(t.times, avgtime, quantime))
+
+    # Here nextfloat() ensures both endpoints included, will only matter for
+    # artificial cases such as:  Trial(Parameters(), [3,4,5], [0,0,0], 0, 0)
+    fences = range(histmin, stop=nextfloat(float(histmax)), length=histwidth)  # stop necc. for Julia 1.0
+    bins = histogram_bindata(t.times, fences)
+    # Last bin is everything right of last fence, introduce a gap for printing:
+    _lastbin = pop!(bins)
+    push!(bins, 0, _lastbin)
+    if logbins
+        bins = log.(1 .+ bins)
     end
     hist = asciihist(bins, histheight)
-    hist[:,end-1] .= ' '
-    maxbin = maximum(bins)
-
-    delta1 = (histmax - histmin) / (histwidth - 1)
-    if delta1 > 0
-        medpos = 1 + round(Int, (histtimes[length(times) ÷ 2] - histmin) / delta1)
-        avgpos = 1 + round(Int, (mean(times) - histmin) / delta1)
-    else
-        medpos, avgpos = 1, 1
+    hist[:, end-1] .= ' '
+
+    avgpos = searchsortedlast(fences, avgtime)
+    medpos = searchsortedlast(fences, medtime)
+    q25pos = searchsortedlast(fences, quantile(t.times, 0.25)) # might be 0, that's OK
+    q75pos = searchsortedlast(fences, quantile(t.times, 0.75))
+
+    # Above the histogram bars, print markers for special ones:
+    printstyled(io, pad, "│", boxspace; color=boxcolor)
+    istop = maximum(filter(i -> i in axes(hist,2), [avgpos, medpos+1, q75pos]))
+    for i in axes(hist, 2)
+        i > istop && break
+        if i == avgpos
+            printstyled(io, "*", color=avgcolor, bold=true)
+        elseif i == medpos || 
+                (medpos==avgpos && i==medpos-1 && medtime<=avgtime) ||
+                (medpos==avgpos && i==medpos+1 && medtime>avgtime)
+            # Marker for "median" is moved one to the left if they collide exactly
+            # printstyled(io, "½", color=medcolor)
+            printstyled(io, "◑", color=medcolor)
+        elseif i == q25pos
+            # Quartile markers exist partly to explain the median marker, without needing a legend
+            # printstyled(io, "¼", color=:light_black)
+            if VERSION > v"1.7-"
+                printstyled(io, "◔", color=:light_black, hidden=true)
+            else
+                printstyled(io, "◔", color=:light_black)
+            end
+        elseif i == q75pos
+            # printstyled(io, "¾", color=:light_black)
+            if VERSION > v"1.7-"
+                printstyled(io, "◕", color=:light_black, hidden=true)
+            else
+                printstyled(io, "◕", color=:light_black)
+            end
+        else
+            print(io, " ")
+        end
     end
 
-    print(io, "\n")
     for r in axes(hist, 1)
-        print(io, "\n", pad, "  ")
+        println(io)
+        printstyled(io, pad, "│", boxspace; color=boxcolor)
+        istop = findlast(c -> c != ' ', view(hist, r, :))
         for (i, bar) in enumerate(view(hist, r, :))
-            color = :default
-            if i == avgpos color = :green end
-            if i == medpos color = :blue end
-            printstyled(io, bar; color=color)
+            i > istop && break  # don't print trailing spaces, as they waste space when line-wrapped
+            if i == avgpos
+                printstyled(io, bar; color=avgcolor)
+                # If mean & median bars co-incide, colour the mean. Matches markers above.
+            elseif i == medpos
+                printstyled(io, bar; color=medcolor)
+            elseif bins[i] == 0
+                printstyled(io, bar; color=:light_black)
+            else
+                printstyled(io, bar; color=:default)
+            end
+            
         end
     end
 
-    remtrailingzeros(timestr) = replace(timestr, r"\.?0+ " => " ")
-    minhisttime, maxhisttime = remtrailingzeros.(prettytime.(round.([histmin, histmax], sigdigits=3)))
+    # Strings for axis labels, rounded again in case you supply :histmin => 123.456
+    minhisttime = replace(prettytime(round(histmin, sigdigits=3)), r"\.?0+ " => " ")
+    maxhisttime = replace(prettytime(round(histmax, sigdigits=3)), r"\.?0+ " => " ")
 
-    print(io, "\n", pad, "  ", minhisttime)
-    caption = "Histogram: " * ( logbins ? "log(frequency)" : "frequency" ) * " by time"
-    if logbins
-        printstyled(io, " " ^ ((histwidth - length(caption)) ÷ 2 - length(minhisttime)); color=:light_black)
-        printstyled(io, "Histogram: "; color=:light_black)
-        printstyled(io, "log("; bold=true, color=:light_black)
-        printstyled(io, "frequency"; color=:light_black)
-        printstyled(io, ")"; bold=true, color=:light_black)
-        printstyled(io, " by time"; color=:light_black)
-    else
-        printstyled(io, " " ^ ((histwidth - length(caption)) ÷ 2 - length(minhisttime)), caption; color=:light_black)
-    end
+    println(io)
+    printstyled(io, pad, "└", boxspace; color=boxcolor)
+    print(io, minhisttime)
+    printstyled(io, " " ^ ((histwidth - length(caption)) ÷ 2 - length(minhisttime)), caption; color=captioncolor)
     print(io, lpad(maxhisttime, ceil(Int, (histwidth - length(caption)) / 2) - 1), " ")
-    printstyled(io, "<"; bold=true)
-
-    # Memory info
-
-    print(io, "\n\n", pad, " Memory estimate")
-    printstyled(io, ": "; color=:light_black)
-    printstyled(io, memorystr; color=:yellow)
-    print(io, ", allocs estimate")
-    printstyled(io, ": "; color=:light_black)
-    printstyled(io, allocsstr; color=:yellow)
-    print(io, ".")
+    print(io, "+")
+    # printstyled(io, "●", color=:light_black)  # other options "⋯" "¹⁰⁰"
+end
+
+function Base.show(io::IO, m::MIME"text/plain", vt::AbstractVector{<:Trial})
+    bounds = map(vt) do t
+        # compute these exactly as in individual show methods:
+        mintime = minimum(t.times)
+        avgtime = mean(t.times)
+        quantime = quantile(t.times, 99/100)
+        lo = hist_round_low(t.times, mintime, avgtime)
+        hi = hist_round_high(t.times, avgtime, quantime)
+        (lo, hi)
+    end
+    histmin = get(io, :histmin, minimum(first, bounds))
+    histmax = get(io, :histmax, maximum(last, bounds))
+    ioc = IOContext(io, :histmin => histmin, :histmax => histmax)
+
+    print(io, summary(vt), ":")
+    for (i,t) in pairs(vt)
+        println(io)
+        show(IOContext(ioc, :histnumber => " [$i]"), m, t)
+    end
 end
 
 function Base.show(io::IO, ::MIME"text/plain", t::TrialEstimate)
@@ -531,3 +608,5 @@ function Base.show(io::IO, ::MIME"text/plain", t::TrialJudgement)
     printmemoryjudge(io, t)
     println(io, " (", prettypercent(params(t).memory_tolerance), " tolerance)")
 end
+
+
diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
index 4e72f76d..78f5b7de 100644
--- a/test/ExecutionTests.jl
+++ b/test/ExecutionTests.jl
@@ -135,8 +135,9 @@ tune!(b)
 # test kwargs separated by `,`
 @benchmark(output=sin(x), setup=(x=1.0; output=0.0), teardown=(@test output == sin(x)))
 
-for (tf, rex1, rex2) in ((false, r"0.5 ns +Histogram: frequency by time +8 ns",        r"Histogram: frequency"),
-                         (true,  r"0.5 ns +Histogram: log\(frequency\) by time +8 ns", r"Histogram: log\(frequency\)"))
+for (tf, rex1, rex2) in ((false, r"0.5 ns ",        r"  [0-9_]+ samples, each"),
+                         (missing, r"0.5 ns ",        r"  [0-9_]+ samples, each"),
+                         (true,  r"0.5 ns +log\(counts\) from ", r"log\(counts\) from [0-9_]+ samples"))
     io = IOBuffer()
     ioctx = IOContext(io, :histmin=>0.5, :histmax=>8, :logbins=>tf)
     @show tf
@@ -154,13 +155,6 @@ for (tf, rex1, rex2) in ((false, r"0.5 ns +Histogram: frequency by time +8 ns",
     str = String(take!(io))
     idx = findfirst(rex2, str)
     @test isa(idx, UnitRange)
-    # A peaked distribution will trigger log by default
-    t = [fill(1, 21); 2]
-    b = BenchmarkTools.Trial(BenchmarkTools.DEFAULT_PARAMETERS, t/sum(t)*1e9*BenchmarkTools.DEFAULT_PARAMETERS.seconds, zeros(100), 0, 0)
-    show(ioctx, MIME("text/plain"), b)
-    str = String(take!(io))
-    idx = findfirst(rex2, str)
-    @test isa(idx, UnitRange)
 end
 
 #############
diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl
index c9b9681f..64a6954a 100644
--- a/test/TrialsTests.jl
+++ b/test/TrialsTests.jl
@@ -3,6 +3,11 @@ module TrialsTests
 using BenchmarkTools
 using Test
 
+if !isdefined(@__MODULE__(), :contains)
+    # added in Julia 1.5, defined here to make tests pass on 1.0
+    contains(haystack::AbstractString, needle) = occursin(needle, haystack)
+end
+
 #########
 # Trial #
 #########
@@ -198,6 +203,10 @@ tj_r_2 = judge(tr; time_tolerance = 2.0, memory_tolerance = 2.0)
 @test BenchmarkTools.prettymemory(1073741823) == "1024.00 MiB"
 @test BenchmarkTools.prettymemory(1073741824) == "1.00 GiB"
 
+@test BenchmarkTools.prettycount(10) == "10"
+@test BenchmarkTools.prettycount(1023) == "1_023"
+@test BenchmarkTools.prettycount(40560789) == "40_560_789"
+
 @test sprint(show, "text/plain", ta) == sprint(show, ta; context=:compact => false) == """
 BenchmarkTools.TrialEstimate: 
   time:             0.490 ns
@@ -233,17 +242,96 @@ else
 
 end
 
-trial = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [1.0, 1.01], [0.0, 0.0], 0, 0)
-@test sprint(show, "text/plain", trial) == """
-BenchmarkTools.Trial: 2 samples with 1 evaluation.
- Range (min … max):  1.000 ns … 1.010 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     1.005 ns             ┊ GC (median):    0.00%
- Time  (mean ± σ):   1.005 ns ± 0.007 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
+# Trial with 0 samples
+t0 = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [], [], 0, 0)
+@test sprint(show, "text/plain", t0) == "Trial: 0 samples"
+
+# Trial with 1 sample
+t001 = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [pi * 10^6], [0], 0, 0)
+s001 = sprint(show, "text/plain", t001)
+@test contains(s001, "┌ Trial:")  # box starting at the type
+@test contains(s001, "│  time 3.142 ms")
+@test contains(s001, "│  0 allocations\n")  # doesn't print 0 bytes after this
+@test contains(s001, "└  1 sample, with 1 evaluation")
+
+# Histogram utils
+@test BenchmarkTools.asciihist([1,2,3]) == ['▃' '▆' '█']
+@test BenchmarkTools.asciihist([1,2,0,3], 2) == [' ' '▃' ' ' '█'; '▇' '█' '▁' '█']
+
+@test BenchmarkTools.histogram_bindata([1.1, 3.1, 99], 1:3) == [1,0,2]
+@test BenchmarkTools.histogram_bindata([1.1, -99, 3.1], 1:3.0) == [1,0,1]
+
+# Trials with several samples
+t003 = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [0.01, 0.02, 0.04], [0,0,0], 0, 0)
+s003 = sprint(show, "text/plain", t003)
+@test contains(s003, " 1 ns +")  # right limit is 1ns
+@test contains(s003, "min 0.010 ns, median 0.020 ns, mean 0.023 ns, 99ᵗʰ 0.040 ns")
+
+t123 = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [1,2,3.], [0,0,0.], 0, 0)
+s123 = sprint(show, "text/plain", t123)
+@test contains(s123, "min 1.000 ns, median 2.000 ns, mean 2.000 ns")
+@test contains(s123, " 0 allocations\n")  # doesn't print 0 bytes after this
+@test contains(s123, " ◑* ")  # median ◑ is shifted left
+@test contains(s123, "▁▁█▁▁")  # has a histogram, mostly zero
+@test contains(s123, "▁▁▁█ ▁\n")  # 3.0 fits in last bin, not the overflow
+@test endswith(s123, "3 ns +")  # right endpoint rounded to 3, no decimals
+@test contains(s123, "3 samples, each 1 evaluation")  # caption
+
+t456 = BenchmarkTools.Trial(BenchmarkTools.Parameters(), 100 * [1,1,3,14,16.], [0,0,2,0,0.], 456, 7)
+s456 = sprint(show, "text/plain", t456)
+@test contains(s456, "7 allocations, total 456 bytes")
+@test contains(s456, "GC time: mean 0.400 ns (0.06%), max 2.000 ns (0.67%)")
+@test contains(s456, "┌ Trial:")  # box starting at the type
+@test contains(s456, "│  ◔       ")  # 1st quartile lines up with bar
+@test contains(s456, "│  █▁▁▁▁▁▁▁")
+@test contains(s456, "└  100 ns ")  # box closing + left endpoint without decimals
+
+# Compact show & arrays of Trials
+@test sprint(show, t001) == "Trial(3.142 ms)"
+@test sprint(show, t003) == "Trial(0.010 ns)"
+if VERSION >= v"1.6"  # 1.5 prints Array{T,1}
+    @test sprint(show, "text/plain", [t001, t003]) == "2-element Vector{BenchmarkTools.Trial}:\n 3.142 ms\n 0.010 ns"
+    @test_skip sprint(show, "text/plain", [t0]) == "1-element Vector{BenchmarkTools.Trial}:\n ??"
+    # this is an error on BenchmarkTools v1.2.1, and v0.4.3, probably long before:
+    # MethodError: reducing over an empty collection is not allowed
+end
+
+#=
+
+# Some visual histogram checks, in which mean/median should highlight a bar, or not:
+
+using BenchmarkTools: Trial, Parameters
+Trial(Parameters(), [pi * 10^9], [0], 0, 0)  # one sample
+
+# mean == median, one bar. Symbol for median moves to the left.
+Trial(Parameters(), [pi, pi], [0, 0], 0, 0)
+Trial(Parameters(), fill(101, 33), vcat(zeros(32), 50), 0, 0)
+
+# mean == median, three bars
+Trial(Parameters(), [3,4,5], [0,0,0], 0, 0)
+
+# three bars, including mean not median
+Trial(Parameters(), pi * [1,3,4,4], [0,0,0,100], 1, 1)
+
+# three bars, including median & both quartiles, but not mean
+Trial(Parameters(), 99.9 * [1,1,3,14,16], [0,0,99,0,0], 222, 2)
+
+# same, but smaller range. Note also max GC is not max time.
+Trial(Parameters(), 999 .+ [1,1,3,14,16], [0,0,123,0,0], 45e6, 7)
+
+
+# Check that auto-sizing stops on very small widths:
+io = IOContext(stdout, :displaysize => (25,30))
+show(io, MIME("text/plain"), Trial(Parameters(), [3,4,5], [0,0,0], 0, 0))
+show(io, MIME("text/plain"), Trial(Parameters(), repeat(100 * [3,4,5], 10^6), zeros(3*10^6), 0, 0))
+
+io = IOContext(stdout, :displaysize => (25,50), :logbins => true)  # this is wider
+show(io, MIME("text/plain"), Trial(Parameters(), 100 * [3,4,5], [0,0,0], 0, 0))
 
-  █                                                       █  
-  █▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█ ▁
-  1 ns           Histogram: frequency by time       1.01 ns <
+# Check that data off the left is OK, and median still highlighted:
+io = IOContext(stdout, :histmin => 200.123)
+show(io, MIME("text/plain"), Trial(Parameters(), 99.9 * [1,1,3,14,16], [0,0,99,0,0], 222, 2))
 
- Memory estimate: 0 bytes, allocs estimate: 0."""
+=#
 
 end # module