From 05b980e356d46c9b98a6c1dd3e2cc3ae6820f74b Mon Sep 17 00:00:00 2001 From: Ben Arthur Date: Mon, 24 Mar 2025 07:39:28 -0400 Subject: [PATCH 1/5] add dims kwarg --- src/mapreduce.jl | 28 +++++++++++++++++++++++++--- test/runtests.jl | 21 +++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/src/mapreduce.jl b/src/mapreduce.jl index a28e431..4577c2f 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -51,9 +51,13 @@ end # Implementation for special cases and if fallback breaks in future julia versions -for fname in [:sum, :prod, :all, :any, :minimum, :maximum] - @eval Base.$fname(v::AbstractDiskArray) = Base.$fname(identity, v::AbstractDiskArray) - @eval function Base.$fname(f::Function, v::AbstractDiskArray) +for (fname, _fname) in ((:sum, :_sum), (:prod, :_prod), + (:all, :_all), (:any, :_any), + (:minimum, :_minimum), (:maximum, :_maximum)) + @eval Base.$fname(v::AbstractDiskArray; dims=:) = Base.$_fname(v, dims) + @eval Base.$fname(f::Function, v::AbstractDiskArray; dims=:) = Base.$_fname(f, v, dims) + @eval Base.$_fname(v::AbstractDiskArray, ::Colon) = Base.$_fname(identity, v, :) + @eval function Base.$_fname(f::Function, v::AbstractDiskArray, ::Colon) $fname(eachchunk(v)) do chunk $fname(f, v[chunk...]) end @@ -66,3 +70,21 @@ function Base.count(f, v::AbstractDiskArray) count(f, v[chunk...]) end end + +for (_fname, init, acum) in ((:_sum, :zero, :+), (:_prod, :one, :*), + (:_all, _->:true, :&), (:_any, _->:false, :|), + (:_maximum, :typemin, :max), (:_minimum, :typemax, :min)) + @eval Base.$_fname(a::AbstractDiskArray, dims) = Base.$_fname(identity, a, dims) + @eval function Base.$_fname(f::Function, a::AbstractDiskArray, dims) + _dims = typeof(dims)<:Tuple ? [dims...] : typeof(dims)<:Number ? [dims] : dims + out_dims = [size(a)...] + out_dims[_dims] .= 1 + out = fill($init(eltype(a)), out_dims...) + for c in eachchunk(a) + out_c = [c...] + out_c[_dims] .= Ref(1:1) + out[out_c...] .= $acum.(out[out_c...], Base.$_fname(f, a[c...], dims)) + end + return out + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 8ecefa2..6eaba41 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1059,3 +1059,24 @@ end end @test count(a) + count(!, a) == length(a) end + +@testset "dims kwarg" begin + a = ChunkedDiskArray(reshape(1:3*4*5, 3,4,5); chunksize=(2,3,4)) + for fname in (:sum, :prod, :minimum, :maximum) + @test @eval $fname(a) == $fname(Array(a)) + @test @eval $fname(a, dims=1) == $fname(Array(a), dims=1) + @test @eval $fname(a, dims=2) == $fname(Array(a), dims=2) + @test @eval $fname(a, dims=(1,2)) == $fname(Array(a), dims=(1,2)) + @eval out = @capture_out @trace $fname($a) DiskArrays + @test occursin("DiskGenerator", out) == false + end + b = ChunkedDiskArray(reshape(1:3*4*5 .> 10, 3,4,5); chunksize=(2,3,4)) + for fname in (:all, :any) + @test @eval $fname(b) == $fname(Array(b)) + @test @eval $fname(b, dims=1) == $fname(Array(b), dims=1) + @test @eval $fname(b, dims=2) == $fname(Array(b), dims=2) + @test @eval $fname(b, dims=(1,2)) == $fname(Array(b), dims=(1,2)) + @eval out = @capture_out @trace $fname($b) DiskArrays + @test occursin("DiskGenerator", out) == false + end +end From 597f4698685fef1169b16eef982089ce4dff08aa Mon Sep 17 00:00:00 2001 From: Ben Arthur Date: Mon, 24 Mar 2025 09:40:18 -0400 Subject: [PATCH 2/5] fix predicate functions --- src/mapreduce.jl | 2 +- test/runtests.jl | 32 ++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/mapreduce.jl b/src/mapreduce.jl index 4577c2f..b4d7c34 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -79,7 +79,7 @@ for (_fname, init, acum) in ((:_sum, :zero, :+), (:_prod, :one, :*), _dims = typeof(dims)<:Tuple ? [dims...] : typeof(dims)<:Number ? [dims] : dims out_dims = [size(a)...] out_dims[_dims] .= 1 - out = fill($init(eltype(a)), out_dims...) + out = fill($init(Base.return_types(f, (eltype(a),))[1]), out_dims...) for c in eachchunk(a) out_c = [c...] out_c[_dims] .= Ref(1:1) diff --git a/test/runtests.jl b/test/runtests.jl index 6eaba41..e466ebb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1062,21 +1062,25 @@ end @testset "dims kwarg" begin a = ChunkedDiskArray(reshape(1:3*4*5, 3,4,5); chunksize=(2,3,4)) - for fname in (:sum, :prod, :minimum, :maximum) - @test @eval $fname(a) == $fname(Array(a)) - @test @eval $fname(a, dims=1) == $fname(Array(a), dims=1) - @test @eval $fname(a, dims=2) == $fname(Array(a), dims=2) - @test @eval $fname(a, dims=(1,2)) == $fname(Array(a), dims=(1,2)) - @eval out = @capture_out @trace $fname($a) DiskArrays - @test occursin("DiskGenerator", out) == false + for fname in (:sum, :prod, :minimum, :maximum) + @test @eval $fname($a) == $fname(Array($a)) + @test @eval $fname($a, dims=1) == $fname(Array($a), dims=1) + @test @eval $fname($a, dims=2) == $fname(Array($a), dims=2) + @test @eval $fname($a, dims=(1,2)) == $fname(Array($a), dims=(1,2)) + @test @eval $fname(x->x/2, $a) ≈ $fname(x->x/2, Array($a)) + @test @eval $fname(x->x/2, $a, dims=(1,2)) ≈ $fname(x->x/2, Array($a), dims=(1,2)) + @eval out = @capture_out @trace $fname($a) DiskArrays + @test occursin("DiskGenerator", out) == false end b = ChunkedDiskArray(reshape(1:3*4*5 .> 10, 3,4,5); chunksize=(2,3,4)) - for fname in (:all, :any) - @test @eval $fname(b) == $fname(Array(b)) - @test @eval $fname(b, dims=1) == $fname(Array(b), dims=1) - @test @eval $fname(b, dims=2) == $fname(Array(b), dims=2) - @test @eval $fname(b, dims=(1,2)) == $fname(Array(b), dims=(1,2)) - @eval out = @capture_out @trace $fname($b) DiskArrays - @test occursin("DiskGenerator", out) == false + for fname in (:all, :any) + @test @eval $fname($b) == $fname(Array($b)) + @test @eval $fname($b, dims=1) == $fname(Array($b), dims=1) + @test @eval $fname($b, dims=2) == $fname(Array($b), dims=2) + @test @eval $fname($b, dims=(1,2)) == $fname(Array($b), dims=(1,2)) + @test @eval $fname(!, $b) == $fname(!, Array($b)) + @test @eval $fname(!, $b, dims=(1,2)) == $fname(!, Array($b), dims=(1,2)) + @eval out = @capture_out @trace $fname($b) DiskArrays + @test occursin("DiskGenerator", out) == false end end From 78e1181e796a70a715e34e0df9bc17d026ece9e3 Mon Sep 17 00:00:00 2001 From: Ben Arthur Date: Mon, 24 Mar 2025 10:34:14 -0400 Subject: [PATCH 3/5] more type stable per rafaqz review --- src/mapreduce.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mapreduce.jl b/src/mapreduce.jl index b4d7c34..f002fee 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -75,14 +75,16 @@ for (_fname, init, acum) in ((:_sum, :zero, :+), (:_prod, :one, :*), (:_all, _->:true, :&), (:_any, _->:false, :|), (:_maximum, :typemin, :max), (:_minimum, :typemax, :min)) @eval Base.$_fname(a::AbstractDiskArray, dims) = Base.$_fname(identity, a, dims) - @eval function Base.$_fname(f::Function, a::AbstractDiskArray, dims) + @eval function Base.$_fname(f::Function, a::AbstractDiskArray{T,N}, dims) where {T,N} _dims = typeof(dims)<:Tuple ? [dims...] : typeof(dims)<:Number ? [dims] : dims - out_dims = [size(a)...] - out_dims[_dims] .= 1 - out = fill($init(Base.return_types(f, (eltype(a),))[1]), out_dims...) + out_dims = ntuple(Val(N)) do i + i in _dims ? 1 : size(a)[i] + end + out = fill($init(Base.return_types(f, (T,))[1]), out_dims...) for c in eachchunk(a) - out_c = [c...] - out_c[_dims] .= Ref(1:1) + out_c = ntuple(Val(N)) do i + i in _dims ? (1:1) : c[i] + end out[out_c...] .= $acum.(out[out_c...], Base.$_fname(f, a[c...], dims)) end return out From c7dc455c7203042ff112f12c5ed6adfde5b33a7d Mon Sep 17 00:00:00 2001 From: Ben Arthur Date: Tue, 25 Mar 2025 07:44:02 -0400 Subject: [PATCH 4/5] even more type stable thanks to rafaqz --- src/mapreduce.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mapreduce.jl b/src/mapreduce.jl index f002fee..f7040e2 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -80,7 +80,8 @@ for (_fname, init, acum) in ((:_sum, :zero, :+), (:_prod, :one, :*), out_dims = ntuple(Val(N)) do i i in _dims ? 1 : size(a)[i] end - out = fill($init(Base.return_types(f, (T,))[1]), out_dims...) + T1 = Base.promote_op(f, T) + out = fill($init(T1), out_dims...) for c in eachchunk(a) out_c = ntuple(Val(N)) do i i in _dims ? (1:1) : c[i] From a2c4132432cb49102bc1bd82be0baa790c35ae37 Mon Sep 17 00:00:00 2001 From: Ben Arthur Date: Tue, 25 Mar 2025 07:47:30 -0400 Subject: [PATCH 5/5] fix tabs --- src/mapreduce.jl | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/mapreduce.jl b/src/mapreduce.jl index f7040e2..f5af91e 100644 --- a/src/mapreduce.jl +++ b/src/mapreduce.jl @@ -74,20 +74,20 @@ end for (_fname, init, acum) in ((:_sum, :zero, :+), (:_prod, :one, :*), (:_all, _->:true, :&), (:_any, _->:false, :|), (:_maximum, :typemin, :max), (:_minimum, :typemax, :min)) - @eval Base.$_fname(a::AbstractDiskArray, dims) = Base.$_fname(identity, a, dims) - @eval function Base.$_fname(f::Function, a::AbstractDiskArray{T,N}, dims) where {T,N} - _dims = typeof(dims)<:Tuple ? [dims...] : typeof(dims)<:Number ? [dims] : dims - out_dims = ntuple(Val(N)) do i - i in _dims ? 1 : size(a)[i] - end - T1 = Base.promote_op(f, T) - out = fill($init(T1), out_dims...) - for c in eachchunk(a) - out_c = ntuple(Val(N)) do i - i in _dims ? (1:1) : c[i] - end - out[out_c...] .= $acum.(out[out_c...], Base.$_fname(f, a[c...], dims)) - end - return out - end + @eval Base.$_fname(a::AbstractDiskArray, dims) = Base.$_fname(identity, a, dims) + @eval function Base.$_fname(f::Function, a::AbstractDiskArray{T,N}, dims) where {T,N} + _dims = typeof(dims)<:Tuple ? [dims...] : typeof(dims)<:Number ? [dims] : dims + out_dims = ntuple(Val(N)) do i + i in _dims ? 1 : size(a)[i] + end + T1 = Base.promote_op(f, T) + out = fill($init(T1), out_dims...) + for c in eachchunk(a) + out_c = ntuple(Val(N)) do i + i in _dims ? (1:1) : c[i] + end + out[out_c...] .= $acum.(out[out_c...], Base.$_fname(f, a[c...], dims)) + end + return out + end end