From 087497810e1582ae3f984a760d6afdede2b246de Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Thu, 20 Mar 2025 10:25:47 +0100
Subject: [PATCH 01/25] first changes

---
 Project.toml        | 5 -----
 src/FourierTools.jl | 3 ++-
 src/fft_helpers.jl  | 2 +-
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/Project.toml b/Project.toml
index 2b6efa2..1ba7aca 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,20 +10,15 @@ IndexFunArrays = "613c443e-d742-454e-bfc6-1d7f8dd76566"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NDTools = "98581153-e998-4eef-8d0d-5ec2c052313d"
 NFFT = "efe261a4-0d2b-5849-be55-fc731d526b0d"
-PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a"
 
 [compat]
 ChainRulesCore = "1, 1.0, 1.1"
 FFTW = "1.5"
 ImageTransformations = "0.9"
 IndexFunArrays = "0.2"
-NDTools = "0.5.1, 0.6, 0.7"
 NFFT = "0.11, 0.12, 0.13"
-PaddedViews = "0.5"
 Reexport = "1"
-ShiftedArrays = "2"
 Zygote = "0.6"
 julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10"
 
diff --git a/src/FourierTools.jl b/src/FourierTools.jl
index de4e2f7..aee86b7 100644
--- a/src/FourierTools.jl
+++ b/src/FourierTools.jl
@@ -2,7 +2,8 @@ module FourierTools
 
 
 using Reexport
-using PaddedViews, ShiftedArrays
+# using PaddedViews
+# using ShiftedArrays
 @reexport using FFTW
 using LinearAlgebra
 using IndexFunArrays
diff --git a/src/fft_helpers.jl b/src/fft_helpers.jl
index bacd514..15ed047 100644
--- a/src/fft_helpers.jl
+++ b/src/fft_helpers.jl
@@ -16,7 +16,7 @@ optional_collect(a::AbstractArray) = collect(a)
 optional_collect(a::Array) = a 
 
 # for CircShiftedArray we only need collect if shifts is non-zero
-function optional_collect(csa::ShiftedArrays.CircShiftedArray)
+function optional_collect(csa::CircShiftedArray)
     if all(iszero.(csa.shifts))
         return optional_collect(parent(csa))
     else

From 0014f532d96a302dea82c6c67055c4dac58cd41a Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Thu, 20 Mar 2025 16:04:06 +0100
Subject: [PATCH 02/25] added Adapts stuff

---
 Project.toml                | 20 +++++++++---
 ext/CUDASupportExt.jl       | 50 +++++++++++++++++++++++++++++
 src/FourierTools.jl         |  4 +--
 src/custom_fourier_types.jl | 19 +++++++++--
 src/fourier_resizing.jl     | 64 ++++++++++++++++++-------------------
 src/fourier_rotate.jl       |  9 ++++--
 6 files changed, 122 insertions(+), 44 deletions(-)
 create mode 100644 ext/CUDASupportExt.jl

diff --git a/Project.toml b/Project.toml
index 1ba7aca..d6b1753 100644
--- a/Project.toml
+++ b/Project.toml
@@ -11,16 +11,20 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NDTools = "98581153-e998-4eef-8d0d-5ec2c052313d"
 NFFT = "efe261a4-0d2b-5849-be55-fc731d526b0d"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a"
 
 [compat]
 ChainRulesCore = "1, 1.0, 1.1"
 FFTW = "1.5"
-ImageTransformations = "0.9"
+ImageTransformations = "0.9, 0.10"
 IndexFunArrays = "0.2"
 NFFT = "0.11, 0.12, 0.13"
 Reexport = "1"
-Zygote = "0.6"
-julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10"
+ShiftedArrays = "2.0.0"
+Zygote = "0.6, 0.7"
+CUDA = "5.2, 5.3, 5.4, 5.5, 5.6"
+Adapt = "3.7, 4.0, 4.1"
+julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11"
 
 [extras]
 FractionalTransforms = "e50ca838-b4f0-4a10-ad18-4b920bf1ae5c"
@@ -29,6 +33,14 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TestImages = "5e47fb64-e119-507b-a336-dd2b206d9990"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+
+[extensions]
+CUDASupportExt = ["CUDA", "Adapt"]
+
+[weakdeps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 
 [targets]
-test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote"]
+test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote", "CUDA"]
diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
new file mode 100644
index 0000000..6ed4aeb
--- /dev/null
+++ b/ext/CUDASupportExt.jl
@@ -0,0 +1,50 @@
+module CUDASupportExt
+using CUDA 
+using Adapt
+using ShiftedArrays
+using FourierTools
+using Base # to allow displaying such arrays without causing the single indexing CUDA error
+
+# define adapt structures for the ShiftedArrays model. This will not be needed if the PR is merged:
+Adapt.adapt_structure(to, x::CircShiftedArray{T, D}) where {T, D} = CircShiftedArray(adapt(to, parent(x)), shifts(x));
+parent_type(::Type{CircShiftedArray{T, N, S}})  where {T, N, S} = S
+Base.Broadcast.BroadcastStyle(::Type{T})  where {T<:CircShiftedArray} = Base.Broadcast.BroadcastStyle(parent_type(T))
+
+# cu_storage_type(::Type{T}) where {CT,CN,CD,T<:CuArray{CT,CN,CD}} = CD
+# lets do this for the ShiftedArray type
+# Adapt.adapt_structure(to, x::ShiftedArray{T, M, N}) where {T, M, N} = ShiftedArray(adapt(to, parent(x)), shifts(x); default=ShiftedArrays.default(x));
+
+# # function Base.Broadcast.BroadcastStyle(::Type{T})  where (CT,CN,CD,T<: ShiftedArray{<:Any,<:Any,<:Any,<:CuArray})
+# function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:ShiftedArray{<:Any,<:Any,<:Any,<:CuArray{T2,N,CD}}}
+#     CUDA.CuArrayStyle{N,CD}()
+# end
+
+# lets do this for the FourierSplit
+Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA}) where {T, M, AA} = FourierTools.FourierSplit(adapt(to, parent(x)), ndims(x), x.L1, x.L2, x.do_split);
+
+# function Base.Broadcast.BroadcastStyle(::Type{T})  where (CT,CN,CD,T<: ShiftedArray{<:Any,<:Any,<:Any,<:CuArray})
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{T2,N,CD}}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+function Base.collect(x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    return copy(x) # stay on the GPU        
+end
+
+function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    return Array(copy(x)) # stay on the GPU        
+end
+
+function Base.:(==)(x::T, y::AbstractArray)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    return all(x .== y)
+end
+
+function Base.:(==)(y::AbstractArray, x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    return all(x .== y)
+end
+
+function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    return all(x .== y)
+end
+
+end
\ No newline at end of file
diff --git a/src/FourierTools.jl b/src/FourierTools.jl
index aee86b7..6befcc8 100644
--- a/src/FourierTools.jl
+++ b/src/FourierTools.jl
@@ -3,7 +3,7 @@ module FourierTools
 
 using Reexport
 # using PaddedViews
-# using ShiftedArrays
+using ShiftedArrays
 @reexport using FFTW
 using LinearAlgebra
 using IndexFunArrays
@@ -12,8 +12,6 @@ using NDTools
 @reexport using NFFT
 FFTW.set_num_threads(4)
 
-
-
 include("utils.jl")
 include("nfft_nd.jl")
 include("resampling.jl")
diff --git a/src/custom_fourier_types.jl b/src/custom_fourier_types.jl
index b2ff902..5cddf31 100644
--- a/src/custom_fourier_types.jl
+++ b/src/custom_fourier_types.jl
@@ -37,14 +37,29 @@ Base.size(A::FourierSplit) = size(parent(A))
 
 @inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}}, i::Vararg{Int,N}) where {T,N}
     if (i[A.D]==A.L2 || i[A.D]==A.L1) && A.do_split # index along this dimension A.D corrsponds to slice L2
-        # not that "setindex" in the line below modifies only the index, not the array
-        @inbounds return parent(A)[Base.setindex(i,A.L1, A.D)...] / 2
+        # note that "setindex" in the line below modifies only the index, not the array
+        @inbounds return parent(A)[Base.setindex(i, A.L1, A.D)...] / 2
     else i[A.D]==A.L2
         @inbounds return parent(A)[i...]
         # @inbounds return parent(A)[i...]
     end
 end
 
+# One-D version
+@inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}}, i::Int) where {T,N}
+    if A.do_split
+        # compute the ND index from the one-D index i
+        ind = Tuple(CartesianIndices(parent(A))[i])
+        if (ind[A.D]==A.L2 || ind[A.D]==A.L1) 
+            return parent(A)[Base.setindex(ind, A.L1, A.D)...] / 2
+        else
+            return parent(A)[i]
+        end
+    else
+        return parent(A)[i]
+    end
+end
+
 """
     FourierJoin{T,N, AA<:AbstractArray{T, N}} <: AbstractArray{T, N}
 
diff --git a/src/fourier_resizing.jl b/src/fourier_resizing.jl
index 1c5f319..9e66c63 100644
--- a/src/fourier_resizing.jl
+++ b/src/fourier_resizing.jl
@@ -93,38 +93,38 @@ function select_region_rft(mat, old_size, new_size)
         rft_fix_before(mat, old_size, new_size), rft_new_size), old_size, new_size)
 end
 
-"""
-    select_region(mat; new_size)
-
-performs the necessary Fourier-space operations of resampling
-in the space of ft (meaning the already circshifted version of fft).
-
-`new_size`.
-The size of the array view after the operation finished. 
-
-`center`.
-Specifies the center of the new view in coordinates of the old view. By default an alignment of the Fourier-centers is assumed.
-# Examples
-```jldoctest
-julia> using FFTW, FourierTools
-
-julia> select_region(ones(3,3),new_size=(7,7),center=(1,3))
-7×7 PaddedView(0.0, OffsetArray(::Matrix{Float64}, 4:6, 2:4), (Base.OneTo(7), Base.OneTo(7))) with eltype Float64:
- 0.0  0.0  0.0  0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0  0.0  0.0
- 0.0  1.0  1.0  1.0  0.0  0.0  0.0
- 0.0  1.0  1.0  1.0  0.0  0.0  0.0
- 0.0  1.0  1.0  1.0  0.0  0.0  0.0
- 0.0  0.0  0.0  0.0  0.0  0.0  0.0
-```
-"""
-function select_region(mat; new_size=size(mat), center=ft_center_diff(size(mat)).+1, pad_value=zero(eltype(mat)))
-    new_size = Tuple(expand_size(new_size, size(mat)))
-    center = Tuple(expand_size(center, ft_center_diff(size(mat)) .+ 1))
-    oldcenter = ft_center_diff(new_size) .+ 1
-    PaddedView(pad_value, mat, new_size, oldcenter .- center.+1);
-end
+# """
+#     select_region(mat; new_size)
+
+# performs the necessary Fourier-space operations of resampling
+# in the space of ft (meaning the already circshifted version of fft).
+
+# `new_size`.
+# The size of the array view after the operation finished. 
+
+# `center`.
+# Specifies the center of the new view in coordinates of the old view. By default an alignment of the Fourier-centers is assumed.
+# # Examples
+# ```jldoctest
+# julia> using FFTW, FourierTools
+
+# julia> select_region(ones(3,3),new_size=(7,7),center=(1,3))
+# 7×7 PaddedView(0.0, OffsetArray(::Matrix{Float64}, 4:6, 2:4), (Base.OneTo(7), Base.OneTo(7))) with eltype Float64:
+#  0.0  0.0  0.0  0.0  0.0  0.0  0.0
+#  0.0  0.0  0.0  0.0  0.0  0.0  0.0
+#  0.0  0.0  0.0  0.0  0.0  0.0  0.0
+#  0.0  1.0  1.0  1.0  0.0  0.0  0.0
+#  0.0  1.0  1.0  1.0  0.0  0.0  0.0
+#  0.0  1.0  1.0  1.0  0.0  0.0  0.0
+#  0.0  0.0  0.0  0.0  0.0  0.0  0.0
+# ```
+# """
+# function select_region(mat; new_size=size(mat), center=ft_center_diff(size(mat)).+1, pad_value=zero(eltype(mat)))
+#     new_size = Tuple(expand_size(new_size, size(mat)))
+#     center = Tuple(expand_size(center, ft_center_diff(size(mat)) .+ 1))
+#     oldcenter = ft_center_diff(new_size) .+ 1
+#     PaddedView(pad_value, mat, new_size, oldcenter .- center.+1);
+# end
 
 function ft_pad(mat, new_size)
     return select_region(mat; new_size = new_size)
diff --git a/src/fourier_rotate.jl b/src/fourier_rotate.jl
index 625b7f0..568d30e 100644
--- a/src/fourier_rotate.jl
+++ b/src/fourier_rotate.jl
@@ -22,11 +22,13 @@ function rotate(arr, θ, rotation_plane=(1, 2); adapt_size=true, keep_new_size=f
         a,b = rotation_plane
         old_size = size(arr)
 
+        pad_value = eltype(arr)(pad_value)
+
         # enforce an odd size along these dimensions, to simplify the potential flips below.
         arr = let
             if iseven(size(arr,a)) || iseven(size(arr,b))
                 new_size = size(arr) .+ ntuple(i-> (i==a || i==b) ? iseven(size(arr,i)) : 0, ndims(arr))
-                select_region(arr, new_size=new_size, pad_value=pad_value)
+                select_region_view(arr, new_size=new_size, pad_value=pad_value)
             else
                 arr
             end
@@ -53,7 +55,8 @@ function rotate(arr, θ, rotation_plane=(1, 2); adapt_size=true, keep_new_size=f
                 0
             end
         end
-        arr = select_region(arr, new_size=old_size .+ extra_size, pad_value=pad_value)
+
+        arr = select_region_view(arr, new_size=old_size .+ extra_size, pad_value=pad_value)
         # convert to radiants
 
         # parameters for shearing
@@ -67,7 +70,7 @@ function rotate(arr, θ, rotation_plane=(1, 2); adapt_size=true, keep_new_size=f
         if keep_new_size || size(arr) == old_size
             return arr
         else
-            return select_region(arr, new_size=old_size, pad_value=pad_value)
+            return select_region_view(arr, new_size=old_size, pad_value=pad_value)
         end
     else
         return rotate!(copy(arr), θ, rotation_plane) 

From da99701f2b3b8b8f5ca09bbe81bf44d2220858f7 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Thu, 20 Mar 2025 18:13:02 +0100
Subject: [PATCH 03/25] replaced test files

---
 ext/CUDASupportExt.jl                    |   2 +
 test/circ_shifted_arrays.jl              |  41 +++
 test/convolutions.jl                     |  79 +++---
 test/correlations.jl                     |  16 +-
 test/custom_fourier_types.jl             |   2 +-
 test/czt.jl                              |   2 +-
 test/fft_helpers.jl                      |  33 +--
 test/fftshift_alternatives.jl            |   6 +-
 test/fourier_rotate.jl                   |  23 +-
 test/fourier_shear.jl                    |  22 +-
 test/fourier_shifting.jl                 |  25 +-
 test/resampling_tests.jl                 |  49 ++--
 test/runtests.jl                         |  42 +--
 test/utils.jl                            |  47 ++--
 test_old/convolutions.jl                 | 109 ++++++++
 test_old/correlations.jl                 |  14 +
 test_old/custom_fourier_types.jl         |  19 ++
 test_old/czt.jl                          |  39 +++
 test_old/damping.jl                      |  14 +
 test_old/fft_helpers.jl                  |  83 ++++++
 test_old/fftshift_alternatives.jl        |  45 ++++
 test_old/fourier_filtering.jl            |  44 +++
 test_old/fourier_rotate.jl               |  44 +++
 test_old/fourier_shear.jl                |  46 ++++
 test_old/fourier_shifting.jl             |  81 ++++++
 test_old/fractional_fourier_transform.jl |  45 ++++
 test_old/nfft_tests.jl                   |  25 ++
 test_old/resampling_tests.jl             | 325 +++++++++++++++++++++++
 test_old/runtests.jl                     |  30 +++
 test_old/sdft.jl                         | 102 +++++++
 test_old/utils.jl                        | 141 ++++++++++
 31 files changed, 1439 insertions(+), 156 deletions(-)
 create mode 100644 test/circ_shifted_arrays.jl
 create mode 100644 test_old/convolutions.jl
 create mode 100644 test_old/correlations.jl
 create mode 100644 test_old/custom_fourier_types.jl
 create mode 100644 test_old/czt.jl
 create mode 100644 test_old/damping.jl
 create mode 100644 test_old/fft_helpers.jl
 create mode 100644 test_old/fftshift_alternatives.jl
 create mode 100644 test_old/fourier_filtering.jl
 create mode 100644 test_old/fourier_rotate.jl
 create mode 100644 test_old/fourier_shear.jl
 create mode 100644 test_old/fourier_shifting.jl
 create mode 100644 test_old/fractional_fourier_transform.jl
 create mode 100644 test_old/nfft_tests.jl
 create mode 100644 test_old/resampling_tests.jl
 create mode 100644 test_old/runtests.jl
 create mode 100644 test_old/sdft.jl
 create mode 100644 test_old/utils.jl

diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
index 6ed4aeb..1341735 100644
--- a/ext/CUDASupportExt.jl
+++ b/ext/CUDASupportExt.jl
@@ -47,4 +47,6 @@ function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{
     return all(x .== y)
 end
 
+optional_collect(a::CuArray) = a 
+
 end
\ No newline at end of file
diff --git a/test/circ_shifted_arrays.jl b/test/circ_shifted_arrays.jl
new file mode 100644
index 0000000..81567d3
--- /dev/null
+++ b/test/circ_shifted_arrays.jl
@@ -0,0 +1,41 @@
+@testset "Convolution methods" begin
+    # a = reshape(1:1000000,(1000,1000)) .+ 0
+    # CUDA.allowscalar(false);
+    sz = (15,12)
+    myshift = (4,3)
+    a = reshape(1:prod(sz),sz) .+ 0
+    c = CircShiftedArray(a,myshift);
+    b = copy(a)
+    d = c .+ c;
+
+    @test (c == c .+0)
+
+    ca = circshift(a, myshift)
+    # they are not the same but numerically the same:
+    @test (c != ca)
+    @test (collect(c) == ca)
+
+    # adding a constant does not change the type
+    @test typeof(c) == typeof(c .+ 0)
+    # adding another CSA does not change the type
+    b .= c
+    @test b == collect(c)
+    cc = CircShiftedArray(c,.-myshift)
+    @test a == collect(cc)
+
+    # assignment into a CSA
+    d .= a
+    @test d[1,1] == a[1,1]
+    @test collect(d) == a
+
+
+    # try a complicated broadcast expression
+    @test ca.+ 2 .* sin.(ca) == collect(c.+2 .*sin.(c))
+
+    #@run foo(c)
+    @test sum(a, dims=1) != collect(sum(c,dims=1))
+    @test sum(ca,dims=1) == collect(sum(c,dims=1))
+    @test sum(a, dims=2) != collect(sum(c,dims=2))
+    @test sum(ca,dims=2) == collect(sum(c,dims=2))
+
+end
\ No newline at end of file
diff --git a/test/convolutions.jl b/test/convolutions.jl
index 1018eb3..ab89167 100644
--- a/test/convolutions.jl
+++ b/test/convolutions.jl
@@ -5,11 +5,12 @@
     function conv_test(psf, img, img_out, dims, s)
         otf = fft(psf, dims)
         otf_r = rfft(psf, dims)
-        otf_p, conv_p = plan_conv(img, psf, dims, flags=FFTW.ESTIMATE)
+        # otf_p, conv_p = plan_conv(img, psf, dims, flags=FFTW.ESTIMATE)
+        otf_p, conv_p = plan_conv(img, psf, dims)
         otf_p2, conv_p2 = plan_conv(img .+ 0.0im, 0.0im .+ psf, dims)
         otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims)
-        otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE)
-        otf_p4, conv_p4 = plan_conv_psf_buffer(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE)
+        # otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE)
+        otf_p4, conv_p4 = plan_conv_psf_buffer(img, fftshift(psf,dims), dims) # , flags=FFTW.MEASURE
         @testset "$s" begin
             @test img_out ≈ conv(0.0im .+ img, psf, dims)
             @test img_out ≈ conv(img, psf, dims)
@@ -27,45 +28,51 @@
     N = 5
     psf = zeros((N, N))
     psf[1, 1] = 1
-    img = randn((N, N))
+    psf = opt_cu(psf, use_cuda)
+    img = opt_cu(randn((N, N)), use_cuda)
     conv_test(psf, img, img, [1,2], "Convolution random image with delta peak")
 
 
     N = 5
     psf = zeros((N, N))
     psf[1, 1] = 1
-    img = randn((N, N, N))
+    psf = opt_cu(psf, use_cuda)
+    img = opt_cu(randn((N, N, N)), use_cuda)
     conv_test(psf, img, img, [1,2], "Convolution with different dimensions psf, img delta")
 
 
     N = 5
-    psf = abs.(randn((N, N, 2)))
-    img = randn((N, N, 2))
+    psf = opt_cu(abs.(randn((N, N, 2))), use_cuda)
+    img = opt_cu(randn((N, N, 2)), use_cuda)
     dims = [1, 2]
     img_out = conv_gen(img, psf, dims)
     conv_test(psf, img, img_out, dims, "Convolution with random 3D PSF and random 3D image over 2D dimensions")
- 
-    N = 5
-    psf = abs.(randn((N, N, N, N, N)))
-    img = randn((N, N, N, N, N))
-    dims = [1, 2, 3, 4]
-    img_out = conv_gen(img, psf, dims)
-    conv_test(psf, img, img_out, dims, "Convolution with random 5D PSF and random 5D image over 4 Dimensions")
 
-    N = 5
-    psf = abs.(zeros((N, N, N, N, N)))
-    for i = 1:N
-        psf[1,1,1,1, i] = 1
+    # Cuda has problems with >3D FFTs
+    if (!use_cuda)
+        N = 5
+        psf = opt_cu(abs.(randn((N, N, N, N, N))), use_cuda)
+        img = opt_cu(randn((N, N, N, N, N)), use_cuda)
+        dims = [1, 2, 3, 4]
+        img_out = conv_gen(img, psf, dims)
+        conv_test(psf, img, img_out, dims, "Convolution with random 5D PSF and random 5D image over 4 Dimensions")
+
+        N = 5
+        psf = abs.(zeros((N, N, N, N, N)))
+        for i = 1:N
+            psf[1,1,1,1, i] = 1
+        end
+        opt_cu(psf, use_cuda)
+        img = opt_cu(randn((N, N, N, N, N)), use_cuda)
+        dims = [1, 2, 3, 4]
+        img_out = conv_gen(img, psf, dims)
+        conv_test(psf, img, img, dims, "Convolution with 5D delta peak and random 5D image over 4 Dimensions")
     end
-    img = randn((N, N, N, N, N))
-    dims = [1, 2, 3, 4]
-    img_out = conv_gen(img, psf, dims)
-    conv_test(psf, img, img, dims, "Convolution with 5D delta peak and random 5D image over 4 Dimensions")
 
 
     @testset "Check broadcasting convolution" begin
-        img = randn((5,6,7))
-        psf = randn((5,6,7, 2, 3))
+        img = opt_cu(randn((5,6,7)), use_cuda)
+        psf = opt_cu(randn((5,6,7, 2, 3)), use_cuda)
         _, p = plan_conv_buffer(img, psf)
         @test conv(img, psf) ≈ p(img)
     end
@@ -73,8 +80,8 @@
 
    @testset "Check types" begin
         N = 10
-        img = randn(Float32, (N, N))
-        psf = abs.(randn(Float32, (N, N)))
+        img = opt_cu(randn(Float32, (N, N)), use_cuda)
+        psf = opt_cu(abs.(randn(Float32, (N, N))), use_cuda)
         dims = [1, 2] 
         @test typeof(conv_gen(img, psf, dims)) == typeof(conv(img, psf))
         @test typeof(conv_gen(img, psf, dims)) != typeof(conv(img .+ 0f0im, psf))
@@ -89,21 +96,23 @@
 
     @testset "dims argument nothing" begin
         N = 5
-        psf = abs.(randn((N, N, N, N, N)))
-        img = randn((N, N, N, N, N))
-        dims = [1,2,3,4,5] 
+        psf = opt_cu(abs.(randn((N, N, N))), use_cuda)
+        img = opt_cu(randn((N, N, N)), use_cuda)
+        dims = [1,2,3] 
         @test conv(psf, img) ≈ conv(img, psf, dims)
         @test conv(psf, img) ≈ conv(psf, img, dims)
         @test conv(img, psf) ≈ conv(img, psf, dims)
     end
 
-    @testset "adjoint convolution" begin
-        x = randn(ComplexF32, (5,6))
-        y = randn(ComplexF32, (5,6))
+    if (!use_cuda)
+        @testset "adjoint convolution" begin
+            x = opt_cu(randn(ComplexF32, (5,6)), use_cuda)
+            y = opt_cu( randn(ComplexF32, (5,6)), use_cuda)
 
-        y_ft, p = plan_conv(x, y)
-        @test ≈(exp(1im * 1.23) .+ conv(ones(eltype(y), size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(conv(x, y))), x)[1], rtol=1e-4)   
-        @test ≈(exp(1im * 1.23) .+ conv(ones(ComplexF32, size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(p(x, y_ft))), x)[1], rtol=1e-4) 
+            y_ft, p = plan_conv(x, y)
+            @test ≈(exp(1im * 1.23) .+ conv(ones(eltype(y), size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(conv(x, y))), x)[1], rtol=1e-4)   
+            @test ≈(exp(1im * 1.23) .+ conv(ones(ComplexF32, size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(p(x, y_ft))), x)[1], rtol=1e-4) 
+        end
     end
 
 end
diff --git a/test/correlations.jl b/test/correlations.jl
index 609b439..9f26935 100644
--- a/test/correlations.jl
+++ b/test/correlations.jl
@@ -1,14 +1,12 @@
-
-
 @testset "Correlations methods" begin
      
-    @test ccorr([1, 0], [1, 0], centered = true) == [0.0, 1.0]
-    @test ccorr([1, 0], [1, 0]) == [1.0, 0.0]
+    @test ccorr(opt_cu([1, 0], use_cuda), opt_cu([1, 0], use_cuda), centered = true) == opt_cu([0.0, 1.0], use_cuda)
+    @test ccorr(opt_cu([1, 0], use_cuda), opt_cu([1, 0], use_cuda)) == opt_cu([1.0, 0.0], use_cuda)
     
-    x = [1,2,3,4,5]
-    y = [1,2,3,4,5]
-    @test ccorr(x,y) ≈ [55, 45, 40, 40, 45]
-    @test ccorr(x,y, centered=true) ≈ [40, 45, 55, 45, 40]
+    x = opt_cu([1,2,3,4,5], use_cuda)
+    y = opt_cu([1,2,3,4,5], use_cuda)
+    @test ccorr(x,y) ≈ opt_cu([55, 45, 40, 40, 45], use_cuda)
+    @test ccorr(x,y, centered=true) ≈ opt_cu([40, 45, 55, 45, 40], use_cuda)
 
-    @test ccorr(x, x .* (1im)) == ComplexF64[0.0 - 55.0im, 0.0 - 45.0im, 0.0 - 40.0im, 0.0 - 40.0im, 0.0 - 45.0im]
+    @test ccorr(x, x .* (1im)) ≈ opt_cu(ComplexF64[0.0 - 55.0im, 0.0 - 45.0im, 0.0 - 40.0im, 0.0 - 40.0im, 0.0 - 45.0im], use_cuda)
 end
diff --git a/test/custom_fourier_types.jl b/test/custom_fourier_types.jl
index d735c27..6049f51 100644
--- a/test/custom_fourier_types.jl
+++ b/test/custom_fourier_types.jl
@@ -1,7 +1,7 @@
 
 @testset "Custom Fourier Types" begin
     N = 5
-    x = randn((N, N))
+    x = opt_cu(randn((N, N)), use_cuda)
     fs = FourierTools.FourierSplit(x, 2, 2, 4, true)
     @test FourierTools.parenttype(fs) == typeof(x)
     fs = FourierTools.FourierSplit(x, 2, 2, 4, false)
diff --git a/test/czt.jl b/test/czt.jl
index 3ec04bf..462b695 100644
--- a/test/czt.jl
+++ b/test/czt.jl
@@ -2,7 +2,7 @@ using NDTools # this is needed for the select_region! function below.
 
 @testset "chirp z-transformation" begin
     @testset "czt" begin
-        x = randn(ComplexF32, (5,6,7))
+        x = opt_cu(randn(ComplexF32, (5,6,7)), use_cuda)
         @test eltype(czt(x, (2.0,2.0,2.0))) == ComplexF32
         @test eltype(czt(x, (2f0,2f0,2f0))) == ComplexF32
         @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), rtol=1e-5)
diff --git a/test/fft_helpers.jl b/test/fft_helpers.jl
index badff06..391e77a 100644
--- a/test/fft_helpers.jl
+++ b/test/fft_helpers.jl
@@ -1,7 +1,7 @@
 @testset "test fft_helpers" begin
 
     @testset "Optional collect" begin
-        y = [1,2,3]
+        y = opt_cu([1,2,3],use_cuda)
         x = fftshift_view(y, (1))
         @test fftshift(y) == FourierTools.optional_collect(x)
     end
@@ -14,10 +14,11 @@
         testiffts(arr, dims) = @test(iffts(arr, dims) ≈ ifft(ifftshift(arr, dims), dims))
         testrft(arr, dims) = @test(rffts(arr, dims) ≈ fftshift(rfft(arr, dims), dims[2:end]))
         testirft(arr, dims, d) = @test(irffts(arr, d, dims) ≈ irfft(ifftshift(arr, dims[2:end]), d, dims))
-        for dim = 1:4
+        maxdim = ifelse(use_cuda, 3, 4)
+        for dim = 1:maxdim
             for _ in 1:3
                 s = ntuple(_ -> rand(1:13), dim)
-                arr = randn(ComplexF32, s)
+                arr = opt_cu(randn(ComplexF32, s), use_cuda)
                 dims = 1:dim
                 testft(arr, dims)
                 testift(arr, dims)
@@ -33,7 +34,7 @@
 
 
     @testset "Test 2d fft helpers" begin
-        arr = randn((6,7,8))
+        arr = opt_cu(randn((6,7,8)), use_cuda)
         dims = [1,2]
         d = 6
         @test(ft2d(arr) == fftshift(fft(ifftshift(arr, (1,2)), (1,2)), dims))
@@ -50,7 +51,7 @@
         @test(fftshift2d_view(arr) == fftshift_view(arr, (1,2)))
         @test(ifftshift2d_view(arr) == ifftshift_view(arr, (1,2)))
 
-        arr = randn(ComplexF32, (4,7,8))
+        arr = opt_cu(randn(ComplexF32, (4,7,8)), use_cuda)
         @test(irffts2d(arr, d) == irfft(ifftshift(arr, dims[2:2]), d, (1,2)))
         @test(irft2d(arr, d) == irft(arr, d, (1,2))) 
         @test(irfft2d(arr, d) == irfft(arr, d, (1,2))) 
@@ -60,24 +61,26 @@
     @testset "Test ft, ift, rft and irft real space centering" begin
         szs = ((10,10),(11,10),(100,101),(101,101))
         for sz in szs
-            @test ft(ones(sz)) ≈ prod(sz) .* delta(sz)
-            @test ft(delta(sz)) ≈ ones(sz)
-            @test rft(ones(sz)) ≈ prod(sz) .* delta(rft_size(sz), offset=CtrRFT)
-            @test rft(delta(sz)) ≈ ones(rft_size(sz))
-            @test ift(ones(sz)) ≈ delta(sz)
-            @test ift(delta(sz)) ≈ ones(sz) ./ prod(sz)
-            @test irft(ones(rft_size(sz)),sz[1]) ≈ delta(sz)
-            @test irft(delta(rft_size(sz),offset=CtrRFT),sz[1]) ≈ ones(sz) ./ prod(sz)
+            my_ones = opt_cu(ones(sz), use_cuda)
+            my_delta = opt_cu(collect(delta(sz)), use_cuda)
+            @test ft(my_ones) ≈ prod(sz) .* my_delta
+            @test ft(my_delta) ≈ my_ones
+            @test rft(my_ones) ≈ prod(sz) .* opt_cu(delta(rft_size(sz), offset=CtrRFT), use_cuda)
+            @test rft(my_delta) ≈ opt_cu(ones(rft_size(sz)), use_cuda)
+            @test ift(my_ones) ≈ my_delta
+            @test ift(my_delta) ≈ my_ones ./ prod(sz)
+            # needing to specify Complex datatype. Is a CUDA bug for irfft (!!!)
+            @test irft(opt_cu(ones(ComplexF64, rft_size(sz)), use_cuda), sz[1]) ≈ my_delta
+            @test irft(opt_cu(collect(delta(ComplexF64, rft_size(sz), offset=CtrRFT)), use_cuda), sz[1]) ≈ my_ones ./ prod(sz)
         end
     end
 
 
     @testset "Test in place methods" begin
-        x = randn(ComplexF32, (5,3,10))
+        x = opt_cu(randn(ComplexF32, (5,3,10)), use_cuda)
         dims = (1,2)
         @test fftshift(fft(x, dims), dims) ≈ ffts!(copy(x), dims)
         @test ffts2d!(copy(x)) ≈ ffts!(copy(x), (1,2))
     end
 
-
 end
diff --git a/test/fftshift_alternatives.jl b/test/fftshift_alternatives.jl
index e5f0f5f..4d37450 100644
--- a/test/fftshift_alternatives.jl
+++ b/test/fftshift_alternatives.jl
@@ -1,7 +1,7 @@
 @testset "fftshift alternatives" begin
     @testset "Test fftshift_view and ifftshift_view" begin
         Random.seed!(42)
-        x = randn((2,1,4,1,6,7,4,7))
+        x = opt_cu(randn((2,1,4,1,6,7,4,7)), use_cuda);
         dims = (4,6,7)
         @test fftshift(x,dims) == FourierTools.fftshift_view(x, dims)
         @test ifftshift(x,dims) == FourierTools.ifftshift_view(x, dims)
@@ -10,18 +10,18 @@
         @test x === FourierTools.optional_collect(ifftshift_view(fftshift_view(x, dims), dims))
         @test x === FourierTools.optional_collect(fftshift_view(ifftshift_view(x, dims), dims))
 
-        x = randn((13, 13, 14))
+        x = opt_cu(randn((13, 13, 14)), use_cuda);
         @test fftshift(x) == FourierTools.fftshift_view(x)
         @test ifftshift(x) == FourierTools.ifftshift_view(x)
         @test fftshift(x, (2,3)) == FourierTools.fftshift_view(x, (2,3))
         @test ifftshift(x, (2,3) ) == FourierTools.ifftshift_view(x, (2,3))
-
     end
 end
 
 
 @testset "fftshift and ifftshift in-place" begin
     function f(arr, dims)
+        arr = opt_cu(arr, use_cuda)
         arr3 = copy(arr)
         @test fftshift(arr, dims) == FourierTools._fftshift!(copy(arr), arr, dims)
         @test arr3 == arr
diff --git a/test/fourier_rotate.jl b/test/fourier_rotate.jl
index fb33fb9..52cb362 100644
--- a/test/fourier_rotate.jl
+++ b/test/fourier_rotate.jl
@@ -3,7 +3,7 @@
     @testset "Compare with ImageTransformations" begin
         
         function f(θ)
-            x = 1.0 .* range(0.0, 1.0, length=256)' .* range(0.0, 1.0, length=256)
+            x = opt_cu(1.0 .* range(0.0, 1.0, length=256)' .* range(0.0, 1.0, length=256), use_cuda)
             f(x) = sin(x * 20) + tan(1.2 * x) + sin(x) + cos(1.1323 * x) * x^3 + x^3 + 0.23 * x^4 + sin(1/(x+0.1))
             img = 5 .+ abs.(f.(x))
             img ./= maximum(img)
@@ -13,25 +13,28 @@
         
             m = sum(img) / length(img)
 
-            img_1 = parent(ImageTransformations.imrotate(img, θ, m))
-            z = ones(Float32, size(img_1))
+            img_1 = opt_cu(parent(ImageTransformations.imrotate(collect(img), θ, m)), use_cuda)
+            z = opt_cu(ones(Float32, size(img_1)), use_cuda)
             z .*= m
             FourierTools.center_set!(z, img)
-            img_2 = FourierTools.rotate(z, θ, pad_value=img_1[1,1])
-            img_2b = FourierTools.center_extract(FourierTools.rotate(z, θ, pad_value=img_1[1,1], keep_new_size=true), size(img_2))
-            img_3 = real(FourierTools.rotate(z .+ 0im, θ, pad_value=img_1[1,1]))
+            pad_val = collect(img_1[1:1,1:1])[1]
+            img_2 = FourierTools.rotate(z, θ, pad_value=pad_val)
+            img_2b = FourierTools.center_extract(FourierTools.rotate(z, θ, pad_value=pad_val, keep_new_size=true), size(img_2))
+            img_3 = real(FourierTools.rotate(z .+ 0im, θ, pad_value=pad_val))
             img_4 = FourierTools.rotate!(z, θ)
    
-            @test all(.≈(img_1, img_2, rtol=0.6))
-            @test ≈(img_1, img_2, rtol=0.03)
+            @test maximum(abs.(img_1 .- img_2)) .< 0.65
+            # @test all(.≈(img_1, img_2, rtol=0.65)) # 0.6
+            @test ≈(img_1, img_2, rtol=0.05) # 0.03
             @test ≈(img_3, img_2, rtol=0.01)
             @test ==(img_4, z)
             @test ==(img_2, img_2b)
 
             img_1c = FourierTools.center_extract(img_1, (100, 100))
             img_2c = FourierTools.center_extract(img_2, (100, 100))
-            @test all(.≈(img_1c, img_2c, rtol=0.3))
-            @test ≈(img_1c, img_2c, rtol=0.05)
+            # @test all(.≈(img_1c, img_2c, rtol=0.3))
+            @test maximum(abs.(img_1c .- img_2c)) .< 0.25
+            # @test ≈(img_1c, img_2c, rtol=0.05) # 0.05
         end
 
         f(deg2rad(-54.31))
diff --git a/test/fourier_shear.jl b/test/fourier_shear.jl
index e46dbdd..0567172 100644
--- a/test/fourier_shear.jl
+++ b/test/fourier_shear.jl
@@ -3,9 +3,9 @@
   
     @testset "Complex and real shear produce similar results" begin
         function f(a, b, Δ)
-            x = randn((30, 24, 13))
-            xc = 0im .+ x
-            xc2 = 1im  .* x
+            x = opt_cu(randn((30, 24, 13)), use_cuda);
+            xc = 0im .+ x;
+            xc2 = 1im  .* x;
             @test shear(x, Δ, a, b) ≈ real(shear(xc, Δ, a, b))
             @test shear(x, Δ, a, b) ≈ imag(shear(xc2, Δ, a, b))
         end
@@ -18,9 +18,9 @@
 
     @testset "Test that in-place works in-place" begin
         function f(a, b, Δ)
-            x = randn((30, 24, 13))
-            xc = randn(ComplexF32, (30, 24, 13)) 
-            xc2 = 1im  .* x
+            x = opt_cu(randn((30, 24, 13)), use_cuda);
+            xc = opt_cu(randn(ComplexF32, (30, 24, 13)), use_cuda);
+            xc2 = 1im  .* x;
             @test shear!(x, Δ, a, b) ≈ x 
             @test shear!(xc, Δ, a, b) ≈ xc 
             @test shear!(xc2, Δ, a, b) ≈ xc2
@@ -34,13 +34,15 @@
 
 
     @testset "Fix Nyquist" begin
-        @test shear(shear([1 2; 3 4.0], 0.123), -0.123, fix_nyquist = true) == [1.0 2.0; 3.0 4.0]
-        @test shear(shear([1 2; 3 4.0], 0.123), -0.123, fix_nyquist = false) != [1.0 2.0; 3.0 4.0]
+        dat = opt_cu([1 2; 3 4.0], use_cuda)
+        res = opt_cu([1.0 2.0; 3.0 4.0], use_cuda)
+        @test shear(shear(dat, 0.123), -0.123, fix_nyquist = true) == res
+        @test shear(shear(dat, 0.123), -0.123, fix_nyquist = false) != res
     end
 
     @testset "assign_shear_wrap!" begin
-        q = ones((10,11))
+        q = opt_cu(ones((10,11)), use_cuda);
         assign_shear_wrap!(q, 10)
-        @test q[:,1] == [0,0,0,0,0,1,1,1,1,1]
+        @test q[:,1] == opt_cu([0,0,0,0,0,1,1,1,1,1], use_cuda)
     end
 end
diff --git a/test/fourier_shifting.jl b/test/fourier_shifting.jl
index 12f109f..4421c58 100644
--- a/test/fourier_shifting.jl
+++ b/test/fourier_shifting.jl
@@ -3,18 +3,18 @@ Random.seed!(42)
 @testset "Fourier shifting methods" begin
 
     # Int error
-    @test_throws ArgumentError FourierTools.shift([1,2,3], (1,))
+    @test_throws ArgumentError FourierTools.shift(opt_cu([1,2,3], use_cuda), (1,))
 
     @testset "Empty shifts" begin
-        x = randn(ComplexF32, (11, 12, 13))
+        x = opt_cu(randn(ComplexF32, (11, 12, 13)), use_cuda);
         @test FourierTools.shift(x, []) == x
         
-        x = randn(Float32, (11, 12, 13))
+        x = opt_cu(randn(Float32, (11, 12, 13)), use_cuda);
         @test FourierTools.shift(x, []) == x
     end
 
     @testset "Integer shifts for complex and real arrays" begin
-        x = randn(ComplexF32, (11, 12, 13))
+        x =opt_cu(randn(ComplexF32, (11, 12, 13)), use_cuda);
 
         s = (2,2,2)
         @test FourierTools.shift(x, s) ≈ circshift(x, s) 
@@ -22,7 +22,7 @@ Random.seed!(42)
         @test FourierTools.shift(x, s) ≈ circshift(x, s) 
 
         @test FourierTools.shift(x, (0,0,0)) == x
-        x = randn(Float32, (11, 12, 13))
+        x = opt_cu(randn(Float32, (11, 12, 13)), use_cuda);
 
         s = (2,2,2)
         @test FourierTools.shift!(copy(x), s) ≈ circshift(x, s) 
@@ -35,7 +35,7 @@ Random.seed!(42)
 
     @testset "Half integer shifts" begin
 
-        x = [0.0, 1.0, 0.0, 1.0]
+        x = opt_cu([0.0, 1.0, 0.0, 1.0], use_cuda)
         xc = ComplexF32.(x)
         
         s = [0.5]
@@ -47,18 +47,19 @@ Random.seed!(42)
     end
 
     @testset "Check shifts with soft_fraction" begin
-        a = shift(delta((255,255)), (1.5,1.25),soft_fraction=0.1);
+        del = opt_cu(delta((255,255)), use_cuda)
+        a = shift(del, (1.5,1.25), soft_fraction=0.1);
         @test abs(sum(a[real(a).<0])) < 3.0
-        a = shift(delta((255,255)), (1.5,1.25),soft_fraction=0.0);
+        a = shift(del, (1.5,1.25), soft_fraction=0.0);
         @test abs(sum(a[real(a).<0])) > 5.0
     end
 
     @testset "Random shifts consistency between both methods" begin
-        x = randn((11, 12, 13))
+        x = opt_cu(randn((11, 12, 13)), use_cuda)
         s = randn((3,)) .* 10
         @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
         @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(x) .+ 0im, s))
-        x = randn((11, 12, 13))
+        x = opt_cu(randn((11, 12, 13)), use_cuda)
         s = randn((3,)) .* 10
         @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(x) .+ 0im, s))
         @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
@@ -67,12 +68,12 @@ Random.seed!(42)
 
     @testset "Check revertibility for complex and real data" begin
         @testset "Complex data" begin
-            x = randn(ComplexF32, (11, 12, 13))
+            x = opt_cu(randn(ComplexF32, (11, 12, 13)), use_cuda)
             s = (-1.1, 12.123, 0.21)
             @test x ≈ shift(shift(x, s), .- s, fix_nyquist_frequency=true) 
         end
         @testset "Real data" begin
-            x = randn(Float32, (11, 12, 13))
+            x = opt_cu(randn(Float32, (11, 12, 13)), use_cuda)
             s = (-1.1, 12.123, 0.21)
             @test x ≈ shift(shift(x, s), .- s, fix_nyquist_frequency=true) 
         end
diff --git a/test/resampling_tests.jl b/test/resampling_tests.jl
index 929a85b..bea59c3 100644
--- a/test/resampling_tests.jl
+++ b/test/resampling_tests.jl
@@ -4,15 +4,14 @@
             for _ in 1:5
                 s_small = ntuple(_ -> rand(1:13), dim)
                 s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
-                
-                
-                x = randn(Float32, (s_small))
+                                
+                x = opt_cu(randn(Float32, (s_small)), use_cuda)
                 @test x == resample(x, s_small)
                 @test Float32.(x) ≈ Float32.(resample(resample(x, s_large), s_small))
                 @test x ≈ resample_by_FFT(resample_by_FFT(x, s_large), s_small)
                 @test Float32.(x) ≈ Float32.(resample_by_RFFT(resample_by_RFFT(x, s_large), s_small))
                 @test x ≈ FourierTools.resample_by_1D(FourierTools.resample_by_1D(x, s_large), s_small)
-                x = randn(ComplexF32, (s_small))
+                x = opt_cu(randn(ComplexF32, (s_small)), use_cuda)
                 @test x ≈ resample(resample(x, s_large), s_small)
                 @test x ≈ resample_by_FFT(resample_by_FFT(x, s_large), s_small)
                 @test x ≈ resample_by_FFT(resample_by_FFT(real(x), s_large), s_small) + 1im .* resample_by_FFT(resample_by_FFT(imag(x), s_large), s_small) 
@@ -27,7 +26,7 @@
                 s_small = ntuple(_ -> rand(1:13), dim)
                 s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
                 
-                x = randn(Float32, (s_small))
+                x = opt_cu(randn(Float32, (s_small)), use_cuda)
                 @test ≈(FourierTools.resample(x, s_large), FourierTools.resample_by_1D(x, s_large))
             end
         end
@@ -39,7 +38,7 @@
                 s_small = ntuple(_ -> rand(1:13), dim)
                 s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
                 
-                x = randn(Float32, (s_small))
+                x = opt_cu(randn(Float32, (s_small)), use_cuda)
                 @test Float32.(resample(x, s_large)) ≈ Float32.(real(resample(ComplexF32.(x), s_large)))
                 @test FourierTools.resample_by_1D(x, s_large) ≈ real(FourierTools.resample_by_1D(ComplexF32.(x), s_large))
             end
@@ -49,7 +48,7 @@
 
     @testset "Tests that resample_by_FFT is purely real" begin
         function test_real(s_1, s_2)
-            x = randn(Float32, (s_1))
+            x = opt_cu(randn(Float32, (s_1)), use_cuda)
             y = resample_by_FFT(x, s_2)
             @test all(( imag.(y) .+ 1 .≈ 1))
             y = FourierTools.resample_by_1D(x, s_2)
@@ -85,8 +84,8 @@
 	    x_min = 0.0
 	    x_max = 16π
 	    
-	    xs_low = range(x_min, x_max, length=N_low+1)[1:N_low]
-	    xs_high = range(x_min, x_max, length=N)[1:end-1]
+	    xs_low = opt_cu(range(x_min, x_max, length=N_low+1)[1:N_low], use_cuda)
+	    xs_high = opt_cu(range(x_min, x_max, length=N)[1:end-1], use_cuda)
 	    f(x) = sin(0.5*x) + cos(x) + cos(2 * x) + sin(0.25*x)
 	    arr_low = f.(xs_low)
 	    arr_high = f.(xs_high)
@@ -108,10 +107,10 @@
 
     @testset "Upsample2 compared to resample" begin
     for sz in ((10,10),(5,8,9),(20,5,4))
-        a = rand(sz...)
+        a = opt_cu(rand(sz...), use_cuda)
         @test ≈(upsample2(a),resample(a,sz.*2))
         @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2)))
-        a = rand(ComplexF32, sz...)
+        a = opt_cu(rand(ComplexF32, sz...), use_cuda)
         @test ≈(upsample2(a),resample(a,sz.*2))
         @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2)))
         s2 = (d == 2 ? sz[d]*2 : sz[d] for d in 1:length(sz))
@@ -127,7 +126,7 @@
 	    x_min = 0.0
 	    x_max = 16π
 	    
-	    xs_low = range(x_min, x_max, length=N_low+1)[1:N_low]
+	    xs_low = opt_cu(range(x_min, x_max, length=N_low+1)[1:N_low], use_cuda)
 	    f(x) = sin(0.5*x) + cos(x) + cos(2 * x) + sin(0.25*x)
 	    arr_low = f.(xs_low)
 
@@ -155,8 +154,8 @@
     
     
         function test_2D(in_s, out_s)
-            x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
-            y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
+            x = opt_cu(range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1], use_cuda)
+            y = opt_cu(range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]', use_cuda)
     	    arr = abs.(x) .+ abs.(y) .+ sinc.(sqrt.(x .^2 .+ y .^2))
     	    arr_interp = resample(arr[1:end, 1:end], out_s);
     	    arr_ds = resample(arr_interp, in_s)
@@ -174,9 +173,9 @@
         test_2D((129, 128), (129, 153))
     
     
-        x = range(-10.0, 10.0, length=129)[1:end-1]
-        x2 = range(-10.0, 10.0, length=130)[1:end-1]
-        x_exact = range(-10.0, 10.0, length=2049)[1:end-1]
+        x = opt_cu(range(-10.0, 10.0, length=129)[1:end-1], use_cuda)
+        x2 = opt_cu(range(-10.0, 10.0, length=130)[1:end-1], use_cuda)
+        x_exact = opt_cu(range(-10.0, 10.0, length=2049)[1:end-1], use_cuda)
         y = x'
         y2 = x2'
         y_exact = x_exact'
@@ -202,8 +201,8 @@
     @testset "FFT resample 2D for a complex signal" begin
     
         function test_2D(in_s, out_s)
-        	x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
-        	y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
+        	x = opt_cu(range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1], use_cuda)
+        	y = opt_cu(range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]', use_cuda)
         	f(x, y) = 1im * (abs(x) + abs(y) + sinc(sqrt(x ^2 + y ^2)))
         	f2(x, y) =  abs(x) + abs(y) + sinc(sqrt((x - 5) ^2 + (y - 5)^2))
         
@@ -231,8 +230,8 @@
     
     @testset "FFT resample in 2D for a purely imaginary signal" begin
         function test_2D(in_s, out_s)
-        	x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
-        	y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
+        	x = opt_cu(range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1], use_cuda)
+        	y = opt_cu(range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]', use_cuda)
         	f(x, y) = 1im * (abs(x) + abs(y) + sinc(sqrt(x ^2 + y ^2)))
         
         	arr = f.(x, y)
@@ -256,9 +255,9 @@
     end
 
     @testset "test select_region_ft" begin
-        x = [1,2,3,4]
+        x = opt_cu([1,2,3,4], use_cuda)
         @test select_region_ft(ffts(x), (5,)) == ComplexF64[-1.0 + 0.0im, -2.0 - 2.0im, 10.0 + 0.0im, -2.0 + 2.0im, -1.0 + 0.0im]
-        x = [3.1495759241275225 0.24720770605505335 -1.311507800204285 -0.3387627167144301; -0.7214121984874265 -0.02566249380406308 0.687066447881175 -0.09536748694092163; -0.577092696986848 -0.6320809680268722 -0.09460071173365793 0.7689715736798227; 0.4593837753047561 -1.0204193548690512 -0.28474772376166907 1.442443602597533]
+        x = opt_cu([3.1495759241275225 0.24720770605505335 -1.311507800204285 -0.3387627167144301; -0.7214121984874265 -0.02566249380406308 0.687066447881175 -0.09536748694092163; -0.577092696986848 -0.6320809680268722 -0.09460071173365793 0.7689715736798227; 0.4593837753047561 -1.0204193548690512 -0.28474772376166907 1.442443602597533], use_cuda)
         @test select_region_ft(ffts(x), (7, 7)) == ComplexF64[0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 + 0.16703621316206385im 2.4110077589815555 - 0.16558718095884828im 2.2813159163314163 - 0.7520360306228049im 7.47614366018844 - 4.139633109911205im 1.3691035744780353 + 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.4801675770812479 + 0.0im 3.3142445917764407 - 3.2082400832669373im 1.6529948781166373 + 0.0im 3.3142445917764407 + 3.2082400832669373im 0.4801675770812479 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 - 0.16703621316206385im 7.47614366018844 + 4.139633109911205im 2.2813159163314163 + 0.7520360306228049im 2.4110077589815555 + 0.16558718095884828im 1.3691035744780353 - 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im]
     end
 
@@ -266,7 +265,7 @@
         dim =2
         s_small = (12,16) # ntuple(_ -> rand(1:13), dim)
         s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim)
-        dat = select_region(randn(Float32, (5,6)), new_size= s_small)
+        dat = select_region(opt_cu(randn(Float32, (5,6)), use_cuda), new_size= s_small)
         rs1 = FourierTools.resample(dat, s_large)
         rs1b = select_region(rs1, new_size=size(dat))
         rs2 = FourierTools.resample_czt(dat, s_large./s_small, do_damp=false)
@@ -286,7 +285,7 @@
         dim =2
         s_small = (12,16) # ntuple(_ -> rand(1:13), dim)
         s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim)
-        dat = select_region(randn(Float32, (5,6)), new_size= s_small)
+        dat = select_region(opt_cu(randn(Float32, (5,6)), use_cuda), new_size= s_small)
         rs1 = FourierTools.resample(dat, s_large)
         rs1b = select_region(rs1, new_size=size(dat))
         mymap = (t) -> t .* s_small ./ s_large  
diff --git a/test/runtests.jl b/test/runtests.jl
index 0c8ec32..8bb3643 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,3 @@
-using Random, Test, FFTW
 using FourierTools
 using ImageTransformations
 using IndexFunArrays
@@ -7,24 +6,33 @@ using NDTools
 using LinearAlgebra # for the assigned nfft function LinearAlgebra.mul!
 using FractionalTransforms
 using TestImages
+using Random, Test, FFTW
+using CUDA
 
 Random.seed!(42)
 
-include("fft_helpers.jl")
-include("fftshift_alternatives.jl")
-include("utils.jl")
-include("fourier_shifting.jl")
-include("fourier_shear.jl")
-include("fourier_rotate.jl")
-include("resampling_tests.jl")
-include("convolutions.jl")
-include("correlations.jl")
-include("custom_fourier_types.jl")
-include("damping.jl")
-include("czt.jl")
-include("nfft_tests.jl")
-include("fractional_fourier_transform.jl")
-include("fourier_filtering.jl")
-include("sdft.jl")
+use_cuda = true
+if use_cuda
+    CUDA.allowscalar(false);
+end
+opt_cu(img, use_cuda) = ifelse(use_cuda, CuArray(img), img)
+
+include("fft_helpers.jl");
+include("fftshift_alternatives.jl");
+include("utils.jl");
+include("fourier_shifting.jl");
+include("fourier_shear.jl");
+include("fourier_rotate.jl");
+include("resampling_tests.jl");
+
+include("convolutions.jl");
+include("correlations.jl");
+include("custom_fourier_types.jl");
+include("damping.jl");
+include("czt.jl"); #
+include("nfft_tests.jl");
+include("fractional_fourier_transform.jl");
+include("fourier_filtering.jl");
+include("sdft.jl");
 
 return
diff --git a/test/utils.jl b/test/utils.jl
index 5fdf23a..7daa22a 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -23,21 +23,24 @@
 
     @testset "Test rfft_size" begin
         s = (11, 20, 10)
-        @test FourierTools.rfft_size(s, 2) == size(rfft(randn(s),2))
-        @test FourierTools.rft_size(randn(s), 2) == size(rfft(randn(s),2))
-         
-        s = (11, 21, 10)
-        @test FourierTools.rfft_size(s, 2) == size(rfft(randn(s),2))
+        dat = opt_cu(randn(s), use_cuda);
+        if !use_cuda
+            @test FourierTools.rfft_size(s, 2) == size(rfft(dat,2))
+            @test FourierTools.rft_size(randn(s), 2) == size(rfft(dat,2))
+            s = (11, 21, 10)
+            @test FourierTools.rfft_size(s, 2) == size(rfft(dat,2))
+        end
         
         s = (11, 21, 10)
-        @test FourierTools.rfft_size(s, 1) == size(rfft(randn(s),(1,2,3)))
+        dat = opt_cu(randn(s), use_cuda);
+        @test FourierTools.rfft_size(s, 1) == size(rfft(dat,(1,2,3)))
     end
 
 
 
     function center_test(x1, x2, x3, y1, y2, y3)
-        arr1 = randn((x1, x2, x3))
-        arr2 = zeros((y1, y2, y3))
+        arr1 = opt_cu(randn((x1, x2, x3)), use_cuda);
+        arr2 = opt_cu(zeros((y1, y2, y3)), use_cuda);
     
         FourierTools.center_set!(arr2, arr1)
         arr3 = FourierTools.center_extract(arr2, (x1, x2, x3))
@@ -107,7 +110,6 @@
         @test all(fourierspace_pixelsize(1, (512,256)) .≈ 1 ./ (512, 256))
         @test realspace_pixelsize(1, 512) ≈ 1 / 512 
         @test all(realspace_pixelsize(1, (512,256)) .≈ 1 ./ (512, 256))
-
     end
 
 
@@ -117,25 +119,34 @@
     end
 
     @testset "odd_view, fourier_reverse!" begin
-        a = [1 2 3;4 5 6;7 8 9;10 11 12]
-        @test FourierTools.odd_view(a) == [4 5 6;7 8 9; 10 11 12]
+        a = opt_cu([1 2 3;4 5 6;7 8 9;10 11 12], use_cuda)
+        @test FourierTools.odd_view(a) == opt_cu([4 5 6;7 8 9; 10 11 12], use_cuda)
         fourier_reverse!(a)
-        @test a == [3 2 1;12 11 10;9 8 7;6 5 4]
-        a = [1 2 3;4 5 6;7 8 9;10 11 12]
+        @test a == opt_cu([3 2 1;12 11 10;9 8 7;6 5 4], use_cuda)
+        a = opt_cu([1 2 3;4 5 6;7 8 9;10 11 12], use_cuda)
         b = copy(a);
         fourier_reverse!(a,dims=1);
         @test a[2:end,:] == b[end:-1:2,:]
-        a = [1 2 3 4;5 6 7 8;9 10 11 12 ;13 14 15 16]
+        a = opt_cu([1 2 3 4;5 6 7 8;9 10 11 12 ;13 14 15 16], use_cuda)
         b = copy(a);
         fourier_reverse!(a);
-        @test a[2,2] == b[4,4]
-        @test a[2,3] == b[4,3]
+        # the ranges are used to avoid error in single element acces with CuArray
+        @test a[2:2,2:2] == b[4:4,4:4]
+        @test a[2:2,3:3] == b[4:4,3:3]
         fourier_reverse!(a);
         @test a == b
         fourier_reverse!(a;dims=1);
         @test a[2:end,:] == b[end:-1:2,:]
-        @test sum(abs.(imag.(ift(fourier_reverse!(ft(rand(5,6,7))))))) < 1e-10
+        rd = opt_cu(rand(5,6,7), use_cuda)
+        if !use_cuda
+            @test sum(abs.(imag.(ift(fourier_reverse!(ft(rd)))))) < 1e-10
+        end
+        # @test sum(abs.(imag.(ift(fourier_reverse(ft(rd)))))) < 1e-10
         sz = (10,9,6)
-        @test sum(abs.(real.(ift(fourier_reverse!(ft(box((sz)))))) .- box(sz))) < 1e-10
+        bb = opt_cu(box((sz)), use_cuda)
+        if !use_cuda
+            @test sum(abs.(real.(ift(fourier_reverse!(ft(bb)))) .- bb)) < 1e-10
+        end
+        # @test sum(abs.(real.(ift(fourier_reverse(ft(bb)))) .- bb)) < 1e-10
     end
 end
diff --git a/test_old/convolutions.jl b/test_old/convolutions.jl
new file mode 100644
index 0000000..1018eb3
--- /dev/null
+++ b/test_old/convolutions.jl
@@ -0,0 +1,109 @@
+@testset "Convolution methods" begin
+
+    conv_gen(u, v, dims) = real(ifft(fft(u, dims) .* fft(v, dims), dims))
+
+    function conv_test(psf, img, img_out, dims, s)
+        otf = fft(psf, dims)
+        otf_r = rfft(psf, dims)
+        otf_p, conv_p = plan_conv(img, psf, dims, flags=FFTW.ESTIMATE)
+        otf_p2, conv_p2 = plan_conv(img .+ 0.0im, 0.0im .+ psf, dims)
+        otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims)
+        otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE)
+        otf_p4, conv_p4 = plan_conv_psf_buffer(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE)
+        @testset "$s" begin
+            @test img_out ≈ conv(0.0im .+ img, psf, dims)
+            @test img_out ≈ conv(img, psf, dims)
+            @test img_out ≈ conv_p(img, otf_p)
+            @test img_out ≈ conv_p(img)
+            @test img_out ≈ conv_p2(img, otf_p2)
+            @test img_out ≈ conv_p2(img)
+            @test img_out ≈ conv_psf(img, fftshift(psf, dims), dims)
+            @test img_out ≈ conv_p3(img)
+            @test img_out ≈ conv_p4(img)
+        end
+    end
+    
+
+    N = 5
+    psf = zeros((N, N))
+    psf[1, 1] = 1
+    img = randn((N, N))
+    conv_test(psf, img, img, [1,2], "Convolution random image with delta peak")
+
+
+    N = 5
+    psf = zeros((N, N))
+    psf[1, 1] = 1
+    img = randn((N, N, N))
+    conv_test(psf, img, img, [1,2], "Convolution with different dimensions psf, img delta")
+
+
+    N = 5
+    psf = abs.(randn((N, N, 2)))
+    img = randn((N, N, 2))
+    dims = [1, 2]
+    img_out = conv_gen(img, psf, dims)
+    conv_test(psf, img, img_out, dims, "Convolution with random 3D PSF and random 3D image over 2D dimensions")
+ 
+    N = 5
+    psf = abs.(randn((N, N, N, N, N)))
+    img = randn((N, N, N, N, N))
+    dims = [1, 2, 3, 4]
+    img_out = conv_gen(img, psf, dims)
+    conv_test(psf, img, img_out, dims, "Convolution with random 5D PSF and random 5D image over 4 Dimensions")
+
+    N = 5
+    psf = abs.(zeros((N, N, N, N, N)))
+    for i = 1:N
+        psf[1,1,1,1, i] = 1
+    end
+    img = randn((N, N, N, N, N))
+    dims = [1, 2, 3, 4]
+    img_out = conv_gen(img, psf, dims)
+    conv_test(psf, img, img, dims, "Convolution with 5D delta peak and random 5D image over 4 Dimensions")
+
+
+    @testset "Check broadcasting convolution" begin
+        img = randn((5,6,7))
+        psf = randn((5,6,7, 2, 3))
+        _, p = plan_conv_buffer(img, psf)
+        @test conv(img, psf) ≈ p(img)
+    end
+    
+
+   @testset "Check types" begin
+        N = 10
+        img = randn(Float32, (N, N))
+        psf = abs.(randn(Float32, (N, N)))
+        dims = [1, 2] 
+        @test typeof(conv_gen(img, psf, dims)) == typeof(conv(img, psf))
+        @test typeof(conv_gen(img, psf, dims)) != typeof(conv(img .+ 0f0im, psf))
+        @test conv_gen(img, psf, dims) .+ 1f0im ≈ 1f0im .+ conv(img .+ 0f0im, psf)
+    end
+
+
+    @testset "Check type get_plan" begin
+        @test plan_rfft === FourierTools.get_plan(typeof(1f0))  
+        @test plan_fft === FourierTools.get_plan(typeof(1im))  
+    end
+
+    @testset "dims argument nothing" begin
+        N = 5
+        psf = abs.(randn((N, N, N, N, N)))
+        img = randn((N, N, N, N, N))
+        dims = [1,2,3,4,5] 
+        @test conv(psf, img) ≈ conv(img, psf, dims)
+        @test conv(psf, img) ≈ conv(psf, img, dims)
+        @test conv(img, psf) ≈ conv(img, psf, dims)
+    end
+
+    @testset "adjoint convolution" begin
+        x = randn(ComplexF32, (5,6))
+        y = randn(ComplexF32, (5,6))
+
+        y_ft, p = plan_conv(x, y)
+        @test ≈(exp(1im * 1.23) .+ conv(ones(eltype(y), size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(conv(x, y))), x)[1], rtol=1e-4)   
+        @test ≈(exp(1im * 1.23) .+ conv(ones(ComplexF32, size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(p(x, y_ft))), x)[1], rtol=1e-4) 
+    end
+
+end
diff --git a/test_old/correlations.jl b/test_old/correlations.jl
new file mode 100644
index 0000000..609b439
--- /dev/null
+++ b/test_old/correlations.jl
@@ -0,0 +1,14 @@
+
+
+@testset "Correlations methods" begin
+     
+    @test ccorr([1, 0], [1, 0], centered = true) == [0.0, 1.0]
+    @test ccorr([1, 0], [1, 0]) == [1.0, 0.0]
+    
+    x = [1,2,3,4,5]
+    y = [1,2,3,4,5]
+    @test ccorr(x,y) ≈ [55, 45, 40, 40, 45]
+    @test ccorr(x,y, centered=true) ≈ [40, 45, 55, 45, 40]
+
+    @test ccorr(x, x .* (1im)) == ComplexF64[0.0 - 55.0im, 0.0 - 45.0im, 0.0 - 40.0im, 0.0 - 40.0im, 0.0 - 45.0im]
+end
diff --git a/test_old/custom_fourier_types.jl b/test_old/custom_fourier_types.jl
new file mode 100644
index 0000000..d735c27
--- /dev/null
+++ b/test_old/custom_fourier_types.jl
@@ -0,0 +1,19 @@
+
+@testset "Custom Fourier Types" begin
+    N = 5
+    x = randn((N, N))
+    fs = FourierTools.FourierSplit(x, 2, 2, 4, true)
+    @test FourierTools.parenttype(fs) == typeof(x)
+    fs = FourierTools.FourierSplit(x, 2, 2, 4, false)
+    @test FourierTools.parenttype(fs) == typeof(x)
+    
+    fj = FourierTools.FourierJoin(x, 2, 2, 4, true)
+    @test FourierTools.parenttype(fj) == typeof(x)
+
+    fj = FourierTools.FourierJoin(x, 2, 2, 4, false)
+    @test FourierTools.parenttype(fj) == typeof(x)
+
+    @test FourierTools.parenttype(typeof(fj)) == typeof(x)
+
+    @test FourierTools.IndexStyle(typeof(fj)) == IndexStyle(typeof(fj))
+end
diff --git a/test_old/czt.jl b/test_old/czt.jl
new file mode 100644
index 0000000..3ec04bf
--- /dev/null
+++ b/test_old/czt.jl
@@ -0,0 +1,39 @@
+using NDTools # this is needed for the select_region! function below.
+
+@testset "chirp z-transformation" begin
+    @testset "czt" begin
+        x = randn(ComplexF32, (5,6,7))
+        @test eltype(czt(x, (2.0,2.0,2.0))) == ComplexF32
+        @test eltype(czt(x, (2f0,2f0,2f0))) == ComplexF32
+        @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), rtol=1e-5)
+        @test ≈(czt(x, (1.0,1.0,1.0), (1,3), src_center=(1,1,1), dst_center=(1,1,1)), fft(x, (1,3)), rtol=1e-5)
+        @test ≈(iczt(x, (1.0,1.0,1.0), (1,3), src_center=(1,1,1), dst_center=(1,1,1)), ifft(x, (1,3)), rtol=1e-5)
+
+        y = randn(ComplexF32, (5,6))
+        zoom = (1.0,1.0,1.0)
+        @test ≈(czt(x, zoom), ft(x), rtol=1e-4)         
+        @test ≈(czt(y, (1.0,1.0)), ft(y), rtol=1e-5)
+
+        @test ≈(iczt(czt(y, (1.0,1.0)), (1.0,1.0)), y, rtol=1e-5)
+        zoom = (2.0,2.0)
+        @test sum(abs.(imag(czt(ones(5,6),zoom, src_center=((5,6).+1)./2)))) < 1e-8
+
+        # for even sizes the czt is not the same as the ft and upsample operation. But should it be or not?
+        # @test ≈(czt(y,zoom),  select_region(upsample2(ft(y), fix_center=true), new_size=size(y)), rtol=1e-5)
+        # @test ≈(czt(y,zoom, src_center=(size(y).+1)./2), select_region(upsample2(ft(y), fix_center=true), new_size=size(y)), rtol=1e-5)
+
+        # for uneven sizes this works:
+        @test ≈(czt(y[1:5,1:5], zoom, (1,2), (10,10)),  upsample2(ft(y[1:5,1:5]), fix_center=true), rtol=1e-5)
+        p_czt = plan_czt(y, zoom, (1,2), (11,12))
+        @test ≈(p_czt * y, czt(y, zoom, (1,2), (11,12)))
+        # zoom smaller 1.0 causes wrap around:
+        zoom = (0.5,2.0)
+        @test abs(czt(y,zoom)[1,1]) > 1e-5
+        zoom = (2.0, 0.5)
+        # check if the remove_wrap works
+        @test abs(czt(y, zoom; remove_wrap=true)[1,1]) == 0.0
+        @test abs(iczt(y, zoom; remove_wrap=true)[1,1]) == 0.0
+        @test abs(czt(y, zoom; pad_value=0.2, remove_wrap=true)[1,1]) == 0.2f0
+        @test abs(iczt(y, zoom; pad_value=0.5f0, remove_wrap=true)[1,1]) == 0.5f0
+    end
+end
diff --git a/test_old/damping.jl b/test_old/damping.jl
new file mode 100644
index 0000000..55a0a86
--- /dev/null
+++ b/test_old/damping.jl
@@ -0,0 +1,14 @@
+using IndexFunArrays
+@testset "Test damping functions" begin
+
+    @testset "Test damp_edge_outside" begin
+        sz = (512,512)
+        data = disc(sz,150.0, offset=CtrCorner);
+        data_d = damp_edge_outside(data);
+        fta = abs.(ft(data));
+        ftb = abs.(ft(data_d));
+        @test fta[size(fta)[1]÷2+1,1] > 50
+        @test ftb[size(ftb)[1]÷2+1,1] < 15
+    end
+
+end
diff --git a/test_old/fft_helpers.jl b/test_old/fft_helpers.jl
new file mode 100644
index 0000000..badff06
--- /dev/null
+++ b/test_old/fft_helpers.jl
@@ -0,0 +1,83 @@
+@testset "test fft_helpers" begin
+
+    @testset "Optional collect" begin
+        y = [1,2,3]
+        x = fftshift_view(y, (1))
+        @test fftshift(y) == FourierTools.optional_collect(x)
+    end
+
+    @testset "Test ft and ift wrappers" begin
+        Random.seed!(42)
+        testft(arr, dims) = @test(ft(arr, dims) ≈ fftshift(fft(ifftshift(arr, dims), dims), dims))
+        testift(arr, dims) = @test(ift(arr, dims) ≈ fftshift(ifft(ifftshift(arr, dims), dims), dims))
+        testffts(arr, dims) = @test(ffts(arr, dims) ≈ fftshift(fft(arr, dims), dims))
+        testiffts(arr, dims) = @test(iffts(arr, dims) ≈ ifft(ifftshift(arr, dims), dims))
+        testrft(arr, dims) = @test(rffts(arr, dims) ≈ fftshift(rfft(arr, dims), dims[2:end]))
+        testirft(arr, dims, d) = @test(irffts(arr, d, dims) ≈ irfft(ifftshift(arr, dims[2:end]), d, dims))
+        for dim = 1:4
+            for _ in 1:3
+                s = ntuple(_ -> rand(1:13), dim)
+                arr = randn(ComplexF32, s)
+                dims = 1:dim
+                testft(arr, dims)
+                testift(arr, dims)
+                dims = 1:rand(1:dim)
+                testft(arr, dims)
+                testift(arr, dims)
+                testffts(arr, dims)
+                testiffts(arr, dims)
+                
+            end
+        end
+    end
+
+
+    @testset "Test 2d fft helpers" begin
+        arr = randn((6,7,8))
+        dims = [1,2]
+        d = 6
+        @test(ft2d(arr) == fftshift(fft(ifftshift(arr, (1,2)), (1,2)), dims))
+        @test(ift2d(arr) == fftshift(ifft(ifftshift(arr, (1,2)), (1,2)), dims))
+        @test(ffts2d(arr) == fftshift(fft(arr, (1,2)), (1,2)))
+        @test(iffts2d(arr) == ifft(ifftshift(arr, (1,2)), (1,2)))
+        @test(rffts2d(arr) == fftshift(rfft(arr, (1,2)), dims[2:2]))
+        @test(rft2d(arr) == fftshift(rfft(ifftshift(arr, (1,2)), (1,2)), dims[2:2]))
+        @test(fft2d(arr) == fft(arr, dims))
+        @test(ifft2d(arr) == ifft(arr, dims))
+        @test(rfft2d(arr) == rfft(arr, (1,2)))
+        @test(fftshift2d(arr) == fftshift(arr, (1,2)))
+        @test(ifftshift2d(arr) == ifftshift(arr, (1,2)))
+        @test(fftshift2d_view(arr) == fftshift_view(arr, (1,2)))
+        @test(ifftshift2d_view(arr) == ifftshift_view(arr, (1,2)))
+
+        arr = randn(ComplexF32, (4,7,8))
+        @test(irffts2d(arr, d) == irfft(ifftshift(arr, dims[2:2]), d, (1,2)))
+        @test(irft2d(arr, d) == irft(arr, d, (1,2))) 
+        @test(irfft2d(arr, d) == irfft(arr, d, (1,2))) 
+    end
+
+
+    @testset "Test ft, ift, rft and irft real space centering" begin
+        szs = ((10,10),(11,10),(100,101),(101,101))
+        for sz in szs
+            @test ft(ones(sz)) ≈ prod(sz) .* delta(sz)
+            @test ft(delta(sz)) ≈ ones(sz)
+            @test rft(ones(sz)) ≈ prod(sz) .* delta(rft_size(sz), offset=CtrRFT)
+            @test rft(delta(sz)) ≈ ones(rft_size(sz))
+            @test ift(ones(sz)) ≈ delta(sz)
+            @test ift(delta(sz)) ≈ ones(sz) ./ prod(sz)
+            @test irft(ones(rft_size(sz)),sz[1]) ≈ delta(sz)
+            @test irft(delta(rft_size(sz),offset=CtrRFT),sz[1]) ≈ ones(sz) ./ prod(sz)
+        end
+    end
+
+
+    @testset "Test in place methods" begin
+        x = randn(ComplexF32, (5,3,10))
+        dims = (1,2)
+        @test fftshift(fft(x, dims), dims) ≈ ffts!(copy(x), dims)
+        @test ffts2d!(copy(x)) ≈ ffts!(copy(x), (1,2))
+    end
+
+
+end
diff --git a/test_old/fftshift_alternatives.jl b/test_old/fftshift_alternatives.jl
new file mode 100644
index 0000000..e5f0f5f
--- /dev/null
+++ b/test_old/fftshift_alternatives.jl
@@ -0,0 +1,45 @@
+@testset "fftshift alternatives" begin
+    @testset "Test fftshift_view and ifftshift_view" begin
+        Random.seed!(42)
+        x = randn((2,1,4,1,6,7,4,7))
+        dims = (4,6,7)
+        @test fftshift(x,dims) == FourierTools.fftshift_view(x, dims)
+        @test ifftshift(x,dims) == FourierTools.ifftshift_view(x, dims)
+        @test x === FourierTools.optional_collect(ifftshift_view(fftshift_view(x)))
+        @test x === FourierTools.optional_collect(fftshift_view(ifftshift_view(x)))
+        @test x === FourierTools.optional_collect(ifftshift_view(fftshift_view(x, dims), dims))
+        @test x === FourierTools.optional_collect(fftshift_view(ifftshift_view(x, dims), dims))
+
+        x = randn((13, 13, 14))
+        @test fftshift(x) == FourierTools.fftshift_view(x)
+        @test ifftshift(x) == FourierTools.ifftshift_view(x)
+        @test fftshift(x, (2,3)) == FourierTools.fftshift_view(x, (2,3))
+        @test ifftshift(x, (2,3) ) == FourierTools.ifftshift_view(x, (2,3))
+
+    end
+end
+
+
+@testset "fftshift and ifftshift in-place" begin
+    function f(arr, dims)
+        arr3 = copy(arr)
+        @test fftshift(arr, dims) == FourierTools._fftshift!(copy(arr), arr, dims)
+        @test arr3 == arr
+        @test ifftshift(arr, dims) == FourierTools._ifftshift!(copy(arr), arr, dims)
+        @test arr3 == arr
+        @test FourierTools._fftshift!(copy(arr), arr, dims) != arr
+    end
+
+    f(randn((8,)), 1)
+    f(randn((2,)), 1)
+    f(randn((3,)), 1)
+    f(randn((3,4)), 1)
+    f(randn((3,4)), 2)
+    f(randn((4,4)), (1,2))
+    f(randn((5,5)), (1, 2))
+    f(randn((5,5)), (1,))
+    f(randn((8, 7, 6,4,1)), (1,2))
+    f(randn((8, 7, 6,4,1)), (2,3))
+    f(randn((8, 7, 6,4,1)), 3)
+    f(randn((8, 7, 6,4,1)), (1,2,3,4,5))
+end
diff --git a/test_old/fourier_filtering.jl b/test_old/fourier_filtering.jl
new file mode 100644
index 0000000..71fbd2b
--- /dev/null
+++ b/test_old/fourier_filtering.jl
@@ -0,0 +1,44 @@
+Random.seed!(42)
+
+@testset "Fourier filtering" begin
+
+    @testset "Gaussian filter complex" begin
+        sz = (21, 22)
+        x = randn(ComplexF32, sz)
+        sigma = (1.1,2.2)
+        gf = filter_gaussian(x, sigma, real_space_kernel=false)
+        # Note that this is not the same, since one kernel is generated in real space and one in Fourier space!
+        # with sizes around 10, the difference is huge!
+        k = gaussian(Float32, sz, sigma=sigma)
+        k = k./sum(k) # different than "normal".
+        gfc = conv_psf(x, k)
+        @test ≈(gf,gfc, rtol=1e-2) # it is realatively inaccurate due to the kernel being generated in different places
+        gfr = filter_gaussian(x, sigma, real_space_kernel=true)
+        @test ≈(gfr, gfc) # it can be debated how to best normalize a Gaussian filter
+        gfr = filter_gaussian(zeros(5).+1im, (1.0,), real_space_kernel=true)
+        @test ≈(gfr, zeros(5).+1im) # it can be debated how to best normalize a Gaussian filter
+    end
+
+    @testset "Gaussian filter real" begin
+        sz = (21, 22)
+        x = randn(Float32, sz)
+        sigma = (1.1, 2.2)
+        gf = filter_gaussian(x, sigma, real_space_kernel=true)
+        # Note that this is not the same, since one kernel is generated in real space and one in Fourier space!
+        # with sizes around 10, the difference is huge!
+        k = gaussian(sz, sigma=sigma)
+        k = k./sum(k) # different than "normal".
+        gf2 = conv_psf(x, k)
+        @test ≈(gf, gf2, rtol=1e-2) # it is realatively inaccurate due to the kernel being generated in different places
+        gf2 = filter_gaussian(zeros(sz), sigma, real_space_kernel=true)
+        @test ≈(gf2, zeros(sz)) # it can be debated how to best normalize a Gaussian filter
+    end
+    @testset "Other filters" begin
+        @test filter_hamming(FourierTools.delta(Float32, (3,)), border_in=0.0, border_out=1.0) ≈ [0.23,0.54, 0.23]
+        @test filter_hann(FourierTools.delta(Float32, (3,)), border_in=0.0, border_out=1.0) ≈ [0.25,0.5, 0.25]
+        @test FourierTools.fourier_filter_by_1D_FT!(ones(ComplexF64, 6), [ones(ComplexF64, 6)]; transform_win=true, normalize_win=false) ≈ 6 .* ones(ComplexF64, 6)
+        @test FourierTools.fourier_filter_by_1D_FT!(ones(ComplexF64, 6), [ones(ComplexF64, 6)]; transform_win=true, normalize_win=true) ≈ ones(ComplexF64, 6)
+        @test FourierTools.fourier_filter_by_1D_RFT!(ones(6), [ones(6)]; transform_win=true, normalize_win=false) ≈ 6 .* ones(6)
+        @test FourierTools.fourier_filter_by_1D_RFT!(ones(6), [ones(6)]; transform_win=true, normalize_win=true) ≈ ones(6)
+    end
+end
diff --git a/test_old/fourier_rotate.jl b/test_old/fourier_rotate.jl
new file mode 100644
index 0000000..fb33fb9
--- /dev/null
+++ b/test_old/fourier_rotate.jl
@@ -0,0 +1,44 @@
+@testset "Fourier Rotate" begin
+
+    @testset "Compare with ImageTransformations" begin
+        
+        function f(θ)
+            x = 1.0 .* range(0.0, 1.0, length=256)' .* range(0.0, 1.0, length=256)
+            f(x) = sin(x * 20) + tan(1.2 * x) + sin(x) + cos(1.1323 * x) * x^3 + x^3 + 0.23 * x^4 + sin(1/(x+0.1))
+            img = 5 .+ abs.(f.(x))
+            img ./= maximum(img)
+            img[20:40, 100:200] .= 1
+            img[20:200, 20:90] .= 0.3
+            img[20:200, 100:102] .= 0.7
+        
+            m = sum(img) / length(img)
+
+            img_1 = parent(ImageTransformations.imrotate(img, θ, m))
+            z = ones(Float32, size(img_1))
+            z .*= m
+            FourierTools.center_set!(z, img)
+            img_2 = FourierTools.rotate(z, θ, pad_value=img_1[1,1])
+            img_2b = FourierTools.center_extract(FourierTools.rotate(z, θ, pad_value=img_1[1,1], keep_new_size=true), size(img_2))
+            img_3 = real(FourierTools.rotate(z .+ 0im, θ, pad_value=img_1[1,1]))
+            img_4 = FourierTools.rotate!(z, θ)
+   
+            @test all(.≈(img_1, img_2, rtol=0.6))
+            @test ≈(img_1, img_2, rtol=0.03)
+            @test ≈(img_3, img_2, rtol=0.01)
+            @test ==(img_4, z)
+            @test ==(img_2, img_2b)
+
+            img_1c = FourierTools.center_extract(img_1, (100, 100))
+            img_2c = FourierTools.center_extract(img_2, (100, 100))
+            @test all(.≈(img_1c, img_2c, rtol=0.3))
+            @test ≈(img_1c, img_2c, rtol=0.05)
+        end
+
+        f(deg2rad(-54.31))
+        f(deg2rad(-95.31))
+        f(deg2rad(107.55))
+        f(deg2rad(-32.31))
+        f(deg2rad(32.31))
+        f(deg2rad(0))
+    end
+end
diff --git a/test_old/fourier_shear.jl b/test_old/fourier_shear.jl
new file mode 100644
index 0000000..e46dbdd
--- /dev/null
+++ b/test_old/fourier_shear.jl
@@ -0,0 +1,46 @@
+@testset "Fourier Shear" begin
+
+  
+    @testset "Complex and real shear produce similar results" begin
+        function f(a, b, Δ)
+            x = randn((30, 24, 13))
+            xc = 0im .+ x
+            xc2 = 1im  .* x
+            @test shear(x, Δ, a, b) ≈ real(shear(xc, Δ, a, b))
+            @test shear(x, Δ, a, b) ≈ imag(shear(xc2, Δ, a, b))
+        end
+
+        f(2, 3, 123.1)
+        f(3, 2, 13.1)
+        f(1, 2, 13.1)
+        f(3, 1, 13.1)
+    end
+
+    @testset "Test that in-place works in-place" begin
+        function f(a, b, Δ)
+            x = randn((30, 24, 13))
+            xc = randn(ComplexF32, (30, 24, 13)) 
+            xc2 = 1im  .* x
+            @test shear!(x, Δ, a, b) ≈ x 
+            @test shear!(xc, Δ, a, b) ≈ xc 
+            @test shear!(xc2, Δ, a, b) ≈ xc2
+        end
+
+        f(2, 3, 123.1)
+        f(3, 2, 13.1)
+        f(1, 2, 13.1)
+        f(3, 1, 13.1)
+    end
+
+
+    @testset "Fix Nyquist" begin
+        @test shear(shear([1 2; 3 4.0], 0.123), -0.123, fix_nyquist = true) == [1.0 2.0; 3.0 4.0]
+        @test shear(shear([1 2; 3 4.0], 0.123), -0.123, fix_nyquist = false) != [1.0 2.0; 3.0 4.0]
+    end
+
+    @testset "assign_shear_wrap!" begin
+        q = ones((10,11))
+        assign_shear_wrap!(q, 10)
+        @test q[:,1] == [0,0,0,0,0,1,1,1,1,1]
+    end
+end
diff --git a/test_old/fourier_shifting.jl b/test_old/fourier_shifting.jl
new file mode 100644
index 0000000..12f109f
--- /dev/null
+++ b/test_old/fourier_shifting.jl
@@ -0,0 +1,81 @@
+Random.seed!(42)
+
+@testset "Fourier shifting methods" begin
+
+    # Int error
+    @test_throws ArgumentError FourierTools.shift([1,2,3], (1,))
+
+    @testset "Empty shifts" begin
+        x = randn(ComplexF32, (11, 12, 13))
+        @test FourierTools.shift(x, []) == x
+        
+        x = randn(Float32, (11, 12, 13))
+        @test FourierTools.shift(x, []) == x
+    end
+
+    @testset "Integer shifts for complex and real arrays" begin
+        x = randn(ComplexF32, (11, 12, 13))
+
+        s = (2,2,2)
+        @test FourierTools.shift(x, s) ≈ circshift(x, s) 
+        s = (3,2,1)
+        @test FourierTools.shift(x, s) ≈ circshift(x, s) 
+
+        @test FourierTools.shift(x, (0,0,0)) == x
+        x = randn(Float32, (11, 12, 13))
+
+        s = (2,2,2)
+        @test FourierTools.shift!(copy(x), s) ≈ circshift(x, s) 
+        s = (3,2,1)
+        @test FourierTools.shift!(copy(x), s) ≈ circshift(x, s) 
+        
+        @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
+
+    end
+
+    @testset "Half integer shifts" begin
+
+        x = [0.0, 1.0, 0.0, 1.0]
+        xc = ComplexF32.(x)
+        
+        s = [0.5]
+        @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(xc), s))
+        @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(xc), 0.5))
+        @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
+        
+        @test sum(xc) ≈ sum(FourierTools.shift!(copy(xc), s))
+    end
+
+    @testset "Check shifts with soft_fraction" begin
+        a = shift(delta((255,255)), (1.5,1.25),soft_fraction=0.1);
+        @test abs(sum(a[real(a).<0])) < 3.0
+        a = shift(delta((255,255)), (1.5,1.25),soft_fraction=0.0);
+        @test abs(sum(a[real(a).<0])) > 5.0
+    end
+
+    @testset "Random shifts consistency between both methods" begin
+        x = randn((11, 12, 13))
+        s = randn((3,)) .* 10
+        @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
+        @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(x) .+ 0im, s))
+        x = randn((11, 12, 13))
+        s = randn((3,)) .* 10
+        @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(x) .+ 0im, s))
+        @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
+    end
+
+
+    @testset "Check revertibility for complex and real data" begin
+        @testset "Complex data" begin
+            x = randn(ComplexF32, (11, 12, 13))
+            s = (-1.1, 12.123, 0.21)
+            @test x ≈ shift(shift(x, s), .- s, fix_nyquist_frequency=true) 
+        end
+        @testset "Real data" begin
+            x = randn(Float32, (11, 12, 13))
+            s = (-1.1, 12.123, 0.21)
+            @test x ≈ shift(shift(x, s), .- s, fix_nyquist_frequency=true) 
+        end
+    end
+
+end
diff --git a/test_old/fractional_fourier_transform.jl b/test_old/fractional_fourier_transform.jl
new file mode 100644
index 0000000..3b1307a
--- /dev/null
+++ b/test_old/fractional_fourier_transform.jl
@@ -0,0 +1,45 @@
+@testset "Fractional Fast Fourier Transform" begin 
+
+    box1d = collect(box(Float32, (100,)))
+    box1d_ = collect(box(Float32, (101,)))
+    
+
+    # consistency with fft
+    @test abs.(ft(box1d)[30:70]) ./ sqrt(length(box1d)) ≈ abs.(frfft(box1d, 1.0, shift=true)[30:70])
+    @test all(.≈(1 .+ abs.(ft(box1d)[30:70]) ./ sqrt(length(box1d)), 
+              1 .+ abs.(frfft(frfft(box1d, 0.5, shift=true), 0.5, shift=true)[30:70]), rtol=5e-3))
+    @test eltype(frfft(box1d, 1.0)) === ComplexF32
+    
+    @test all(.≈(1 .+ abs.(ft(box1d_)[30:70]) ./ sqrt(length(box1d_)), 1 .+ abs.(frfft(box1d_, 1.0, shift=true)[30:70]), rtol=5e-2))
+    @test all(.≈(1 .+ abs.(ft(box1d_)[30:70]) ./ sqrt(length(box1d_)), 
+              1 .+ abs.(frfft(frfft(box1d_, 0.5, shift=true), 0.5, shift=true)[30:70]), rtol=7e-3))
+
+
+    for frac in [0, -0.999, 0.99, 2.001,-3.001, -3.999,4,-2, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4]
+        @test all(.≈(10 .+ abs.(FractionalTransforms.frft(collect(box1d_), frac))[30:70], 
+                 10 .+ abs.(frfft(box1d_, frac, shift=true))[30:70], rtol=9e-3))
+
+        @test all(.≈(10 .+ real.(FractionalTransforms.frft(collect(box1d_), frac))[30:70], 
+                     10 .+ real.(frfft(box1d_, frac, shift=true))[30:70], rtol=9e-3))
+
+        @test all(.≈(10 .+ imag.(FractionalTransforms.frft(collect(box1d_), frac))[30:70], 
+                 10 .+ imag.(frfft(box1d_, frac, shift=true))[30:70], rtol=9e-3))
+    end
+    # reversibility
+    @test all(.≈(real(frfft(frfft(box1d, 0.5, shift=true), -0.5, shift=true))[30:70] , real(box1d)[30:70], rtol=1e-4))
+    @test all(.≈(real(frfft(frfft(box1d_, 0.5, shift=true), -0.5, shift=true))[30:70] , real(box1d_)[30:70], rtol=1e-4))
+
+
+
+    img = Float64.(testimage("resolution_test"))
+
+    @test abs.(ft(img)) ./ sqrt(length(img)) .+ 10 ≈ 10 .+ abs.(frfft(img, 0.9999999)) rtol=1e-5
+    @test (real.(ft(img)) ./ sqrt(length(img)))[200:300] ≈ (real.(frfft(img, 0.9999999)))[200:300] rtol=0.001
+
+    
+    x = randn((12,))
+    x2 = randn((13,))
+    @test frfft(x, 0.5) ≈ frfft(reshape(x, 12,1,1,1,1), 0.5)
+    @test frfft(x, 0.5) ≈ reshape(frfft(collect(reshape(x, 1,12,1,1)), 0.5), 12)
+    @test reshape(frfft(reshape(x, 1,12,1,1), 0.43), 12) ≈ frfft(x, 0.43)
+end
diff --git a/test_old/nfft_tests.jl b/test_old/nfft_tests.jl
new file mode 100644
index 0000000..b595dec
--- /dev/null
+++ b/test_old/nfft_tests.jl
@@ -0,0 +1,25 @@
+@testset "Test nfft_nd  methods" begin
+    @testset "nfft_nd" begin
+        sz = (6,8, 10)
+        dat = rand(sz...)
+        nft = fftshift(fft(ifftshift(dat)))
+        @test isapprox(nfft_nd(dat, t->(0.0,0.0,0.0), is_in_pixels=true, is_local_shift=true), nft, rtol=1e-6)
+        @test isapprox(nfft_nd(dat, t->(0.0,0.0,0.0), is_in_pixels=false, is_local_shift=true), nft, rtol=1e-6)
+        nift = fftshift(ifft(ifftshift(dat)))
+        mynfft = nfft_nd(dat, t->(0.0,0.0,0.0), is_in_pixels=false, is_local_shift=true, is_adjoint=true) ./ prod(size(nift))
+        @test isapprox(mynfft, nift, rtol=1e-6)
+        @test isapprox(nfft_nd(dat, t->t, pad_value=nothing), nft, rtol=1e-6)
+        p =plan_nfft_nd(dat, t->t, pad_value=0.0)
+        @test isapprox(p*dat, nft, rtol=1e-6)
+        @test isapprox(nfft_nd(dat, t->(10.0,10.0,10.0), pad_value=0.0), zeros(sz), rtol=1e-6)
+        p = plan_nfft_nd(dat, t->t)
+        @test isapprox(p*dat, nft, rtol=1e-6)
+        b = nfft_nd(dat, t->t)
+        @test isapprox(b, nft, rtol=1e-6)
+        b = nfft_nd(dat .+ 0im, idx(size(dat), scale=ScaFT))
+        @test isapprox(b, nft .+ 0im, rtol=1e-6)
+        res = zeros(complex(eltype(dat)), sz)
+        LinearAlgebra.mul!(res, p, dat)
+        @test isapprox(res, nft, rtol=1e-6)
+    end
+end
diff --git a/test_old/resampling_tests.jl b/test_old/resampling_tests.jl
new file mode 100644
index 0000000..929a85b
--- /dev/null
+++ b/test_old/resampling_tests.jl
@@ -0,0 +1,325 @@
+@testset "Test resampling  methods" begin
+    @testset "Test that upsample and downsample is reversible" begin
+        for dim = 1:3
+            for _ in 1:5
+                s_small = ntuple(_ -> rand(1:13), dim)
+                s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
+                
+                
+                x = randn(Float32, (s_small))
+                @test x == resample(x, s_small)
+                @test Float32.(x) ≈ Float32.(resample(resample(x, s_large), s_small))
+                @test x ≈ resample_by_FFT(resample_by_FFT(x, s_large), s_small)
+                @test Float32.(x) ≈ Float32.(resample_by_RFFT(resample_by_RFFT(x, s_large), s_small))
+                @test x ≈ FourierTools.resample_by_1D(FourierTools.resample_by_1D(x, s_large), s_small)
+                x = randn(ComplexF32, (s_small))
+                @test x ≈ resample(resample(x, s_large), s_small)
+                @test x ≈ resample_by_FFT(resample_by_FFT(x, s_large), s_small)
+                @test x ≈ resample_by_FFT(resample_by_FFT(real(x), s_large), s_small) + 1im .* resample_by_FFT(resample_by_FFT(imag(x), s_large), s_small) 
+                @test x ≈ FourierTools.resample_by_1D(FourierTools.resample_by_1D(x, s_large), s_small)
+            end
+        end
+    end
+
+    @testset "Test that different resample methods are consistent" begin
+        for dim = 1:3
+            for _ in 1:5
+                s_small = ntuple(_ -> rand(1:13), dim)
+                s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
+                
+                x = randn(Float32, (s_small))
+                @test ≈(FourierTools.resample(x, s_large), FourierTools.resample_by_1D(x, s_large))
+            end
+        end
+    end
+
+    @testset "Test that complex and real routine produce same result for real array" begin
+        for dim = 1:3
+            for _ in 1:5
+                s_small = ntuple(_ -> rand(1:13), dim)
+                s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
+                
+                x = randn(Float32, (s_small))
+                @test Float32.(resample(x, s_large)) ≈ Float32.(real(resample(ComplexF32.(x), s_large)))
+                @test FourierTools.resample_by_1D(x, s_large) ≈ real(FourierTools.resample_by_1D(ComplexF32.(x), s_large))
+            end
+        end
+    end
+
+
+    @testset "Tests that resample_by_FFT is purely real" begin
+        function test_real(s_1, s_2)
+            x = randn(Float32, (s_1))
+            y = resample_by_FFT(x, s_2)
+            @test all(( imag.(y) .+ 1 .≈ 1))
+            y = FourierTools.resample_by_1D(x, s_2)
+            @test all(( imag.(y) .+ 1 .≈ 1))
+        end
+
+        for dim = 1:3
+            for _ in 1:5
+                s_1 = ntuple(_ -> rand(1:13), dim)
+                s_2 = ntuple(i -> rand(1:13), dim)
+                test_real(s_1, s_2)
+            end
+        end
+            
+        test_real((4, 4),(6, 6))
+        test_real((4, 4),(6, 7))
+        test_real((4, 4),(9, 9))
+        test_real((4, 5),(9, 9))
+        test_real((4, 5),(9, 8))
+        test_real((8, 8),(6, 7))
+        test_real((8, 8),(6, 5))
+        test_real((8, 8),(4, 5))
+        test_real((9, 9),(4, 5))
+        test_real((9, 9),(4, 5))
+        test_real((9, 9),(7, 8))
+        test_real((9, 9),(6, 5))
+
+    end
+
+    @testset "Sinc interpolation based on FFT" begin
+
+    function test_interpolation_sum_fft(N_low, N)
+	    x_min = 0.0
+	    x_max = 16π
+	    
+	    xs_low = range(x_min, x_max, length=N_low+1)[1:N_low]
+	    xs_high = range(x_min, x_max, length=N)[1:end-1]
+	    f(x) = sin(0.5*x) + cos(x) + cos(2 * x) + sin(0.25*x)
+	    arr_low = f.(xs_low)
+	    arr_high = f.(xs_high)
+
+	    xs_interp = range(x_min, x_max, length=N+1)[1:N]
+	    arr_interp = resample(arr_low, N)
+	    arr_interp2 = FourierTools.resample_by_1D(arr_low, N)
+
+
+        @test ≈(arr_interp[2*N ÷10: N*8÷10], arr_high[2* N ÷10: N*8÷10], rtol=0.05)
+        @test ≈(arr_interp2[2*N ÷10: N*8÷10], arr_high[2* N ÷10: N*8÷10], rtol=0.05)
+    end
+
+    test_interpolation_sum_fft(128, 1000)
+    test_interpolation_sum_fft(129, 1000)
+    test_interpolation_sum_fft(120, 1531)
+    test_interpolation_sum_fft(121, 1211)
+    end
+
+    @testset "Upsample2 compared to resample" begin
+    for sz in ((10,10),(5,8,9),(20,5,4))
+        a = rand(sz...)
+        @test ≈(upsample2(a),resample(a,sz.*2))
+        @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2)))
+        a = rand(ComplexF32, sz...)
+        @test ≈(upsample2(a),resample(a,sz.*2))
+        @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2)))
+        s2 = (d == 2 ? sz[d]*2 : sz[d] for d in 1:length(sz))
+        @test ≈(upsample2(a, dims=(2,)),resample(a,s2))
+        @test ≈(upsample2_abs2(a, dims=(2,)),abs2.(resample(a,s2)))
+        @test size( upsample2(collect(collect(1.0:9.0)'); fix_center=true, keep_singleton=true)) == (1,18)
+        @test upsample2(collect(1.0:9.0); fix_center=false)[1:16] ≈ upsample2(collect(1.0:9.0); fix_center=true)[2:17]
+    end
+    end
+
+    @testset "Downsampling based on frequency cutting" begin
+    function test_resample(N_low, N)
+	    x_min = 0.0
+	    x_max = 16π
+	    
+	    xs_low = range(x_min, x_max, length=N_low+1)[1:N_low]
+	    f(x) = sin(0.5*x) + cos(x) + cos(2 * x) + sin(0.25*x)
+	    arr_low = f.(xs_low)
+
+	    xs_interp = range(x_min, x_max, length=N+1)[1:N]
+	    arr_interp = resample(arr_low, N)
+
+	    xs_interp_s = range(x_min, x_max, length=N+1)[1:N]
+
+        arr_ds = resample(arr_interp, (N_low,) )
+        @test ≈(arr_ds, arr_low)
+        @test eltype(arr_low) === eltype(arr_ds)
+        @test eltype(arr_interp) === eltype(arr_ds)
+    end
+
+    test_resample(128, 1000)
+    test_resample(128, 1232)
+    test_resample(128, 255)
+    test_resample(253, 254)
+    test_resample(253, 1001)
+    test_resample(99, 100101)
+
+    end
+
+    @testset "FFT resample in 2D" begin
+    
+    
+        function test_2D(in_s, out_s)
+            x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
+            y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
+    	    arr = abs.(x) .+ abs.(y) .+ sinc.(sqrt.(x .^2 .+ y .^2))
+    	    arr_interp = resample(arr[1:end, 1:end], out_s);
+    	    arr_ds = resample(arr_interp, in_s)
+            @test arr_ds ≈ arr
+        end
+    
+        test_2D((128, 128), (150, 150))
+        test_2D((128, 128), (151, 151))
+        test_2D((129, 129), (150, 150))
+        test_2D((129, 129), (151, 151))
+        
+        test_2D((150, 128), (151, 150))
+        test_2D((128, 128), (151, 153))
+        test_2D((129, 128), (150, 153))
+        test_2D((129, 128), (129, 153))
+    
+    
+        x = range(-10.0, 10.0, length=129)[1:end-1]
+        x2 = range(-10.0, 10.0, length=130)[1:end-1]
+        x_exact = range(-10.0, 10.0, length=2049)[1:end-1]
+        y = x'
+        y2 = x2'
+        y_exact = x_exact'
+        arr = abs.(x) .+ abs.(y) .+sinc.(sqrt.(x .^2 .+ y .^2))
+        arr2 = abs.(x) .+ abs.(y) .+sinc.(sqrt.(x .^2 .+ y .^2))
+        arr_exact = abs.(x_exact) .+ abs.(y_exact) .+ sinc.(sqrt.(x_exact .^2 .+ y_exact .^2))
+        arr_interp = resample(arr[1:end, 1:end], (131, 131));
+        arr_interp2 = resample(arr[1:end, 1:end], (512, 512));
+        arr_interp3 = resample(arr[1:end, 1:end], (1024, 1024));
+        arr_ds = resample(arr_interp, (128, 128))
+        arr_ds2 = resample(arr_interp, (128, 128))
+        arr_ds23 = resample(arr_interp2, (512, 512))
+        arr_ds3 = resample(arr_interp, (128, 128))
+    
+        @test ≈(arr_ds3, arr)
+        @test ≈(arr_ds2, arr)
+        @test ≈(arr_ds, arr)
+        @test ≈(arr_ds23, arr_interp2)
+    
+    end
+    
+    
+    @testset "FFT resample 2D for a complex signal" begin
+    
+        function test_2D(in_s, out_s)
+        	x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
+        	y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
+        	f(x, y) = 1im * (abs(x) + abs(y) + sinc(sqrt(x ^2 + y ^2)))
+        	f2(x, y) =  abs(x) + abs(y) + sinc(sqrt((x - 5) ^2 + (y - 5)^2))
+        
+        	arr = f.(x, y) .+ f2.(x, y)
+        	arr_interp = resample(arr[1:end, 1:end], out_s);
+        	arr_ds = resample(arr_interp, in_s)
+            
+            @test eltype(arr) === eltype(arr_ds)
+            @test eltype(arr_interp) === eltype(arr_ds)
+            @test imag(arr) ≈ imag(arr_ds)
+            @test real(arr) ≈ real(arr_ds)
+        end
+    
+        test_2D((128, 128), (150, 150))
+        test_2D((128, 128), (151, 151))
+        test_2D((129, 129), (150, 150))
+        test_2D((129, 129), (151, 151))
+        
+        test_2D((150, 128), (151, 150))
+        test_2D((128, 128), (151, 153))
+        test_2D((129, 128), (150, 153))
+        test_2D((129, 128), (129, 153))
+    end
+    
+    
+    @testset "FFT resample in 2D for a purely imaginary signal" begin
+        function test_2D(in_s, out_s)
+        	x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
+        	y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
+        	f(x, y) = 1im * (abs(x) + abs(y) + sinc(sqrt(x ^2 + y ^2)))
+        
+        	arr = f.(x, y)
+        	arr_interp = resample(arr[1:end, 1:end], out_s);
+        	arr_ds = resample(arr_interp, in_s)
+            
+            @test imag(arr) ≈ imag(arr_ds)
+            @test all(real(arr_ds) .< 1e-13)
+            @test all(real(arr_interp) .< 1e-13)
+        end 
+    
+        test_2D((128, 128), (150, 150))
+        test_2D((128, 128), (151, 151))
+        test_2D((129, 129), (150, 150))
+        test_2D((129, 129), (151, 151))
+        
+        test_2D((150, 128), (151, 150))
+        test_2D((128, 128), (151, 153))
+        test_2D((129, 128), (150, 153))
+        test_2D((129, 128), (129, 153))
+    end
+
+    @testset "test select_region_ft" begin
+        x = [1,2,3,4]
+        @test select_region_ft(ffts(x), (5,)) == ComplexF64[-1.0 + 0.0im, -2.0 - 2.0im, 10.0 + 0.0im, -2.0 + 2.0im, -1.0 + 0.0im]
+        x = [3.1495759241275225 0.24720770605505335 -1.311507800204285 -0.3387627167144301; -0.7214121984874265 -0.02566249380406308 0.687066447881175 -0.09536748694092163; -0.577092696986848 -0.6320809680268722 -0.09460071173365793 0.7689715736798227; 0.4593837753047561 -1.0204193548690512 -0.28474772376166907 1.442443602597533]
+        @test select_region_ft(ffts(x), (7, 7)) == ComplexF64[0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 + 0.16703621316206385im 2.4110077589815555 - 0.16558718095884828im 2.2813159163314163 - 0.7520360306228049im 7.47614366018844 - 4.139633109911205im 1.3691035744780353 + 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.4801675770812479 + 0.0im 3.3142445917764407 - 3.2082400832669373im 1.6529948781166373 + 0.0im 3.3142445917764407 + 3.2082400832669373im 0.4801675770812479 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 - 0.16703621316206385im 7.47614366018844 + 4.139633109911205im 2.2813159163314163 + 0.7520360306228049im 2.4110077589815555 + 0.16558718095884828im 1.3691035744780353 - 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im]
+    end
+
+    @testset "test resample_czt" begin
+        dim =2
+        s_small = (12,16) # ntuple(_ -> rand(1:13), dim)
+        s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim)
+        dat = select_region(randn(Float32, (5,6)), new_size= s_small)
+        rs1 = FourierTools.resample(dat, s_large)
+        rs1b = select_region(rs1, new_size=size(dat))
+        rs2 = FourierTools.resample_czt(dat, s_large./s_small, do_damp=false)
+        @test rs1b ≈ rs2
+        rs2 = FourierTools.resample_czt(dat, (x->s_large[2]./s_small[2], y->s_large[1]./s_small[1]), do_damp=false)
+        @test rs1b ≈ rs2
+        rs2 = FourierTools.resample_czt(dat, (x->1.0, y->1.0), shear=(x->10.0,y->0.0),do_damp=false)
+        @test shear(dat,10) ≈ rs2
+        rs2 = FourierTools.resample_czt(dat, (x->1.0, y->1.0), shear=(10.0,0.0),do_damp=false)
+        @test shear(dat,10) ≈ rs2
+        rs2 = barrel_pin(dat, 0.5)
+        rs2b = FourierTools.resample_czt(dat, (x -> 1.0 + 0.5 .* (x-0.5)^2,x -> 1.0 + 0.5 .* (x-0.5)^2))
+        @test rs2b ≈ rs2
+    end
+
+    @testset "test resample_nfft" begin
+        dim =2
+        s_small = (12,16) # ntuple(_ -> rand(1:13), dim)
+        s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim)
+        dat = select_region(randn(Float32, (5,6)), new_size= s_small)
+        rs1 = FourierTools.resample(dat, s_large)
+        rs1b = select_region(rs1, new_size=size(dat))
+        mymap = (t) -> t .* s_small ./ s_large  
+        rs3 = FourierTools.resample_nfft(dat, mymap)
+        @test isapprox(rs1b, rs3, rtol=0.1)
+        new_pos = mymap.(idx(size(dat), scale=ScaFT))
+        rs4 = FourierTools.resample_nfft(dat, new_pos)
+        @test rs4 ≈ rs3
+        new_pos = cat(s_small[1]./s_large[1] .* xx(size(dat), scale=ScaFT), s_small[2]./s_large[2] .* yy(size(dat), scale=ScaFT),dims=3)
+        rs5 = FourierTools.resample_nfft(dat, new_pos)
+        @test rs5 ≈ rs3
+        # @test rs1b ≈ rs3
+        # test both modes: src and destination but only for a 1-pixel shift
+        rs6 = FourierTools.resample_nfft(dat, t->t .+ 1.0, is_src_coords=false, is_in_pixels=true)
+        rs7 = FourierTools.resample_nfft(dat, t->t .- 1.0, is_src_coords=true, is_in_pixels=true)
+        @test rs6 ≈ rs7
+        # test shrinking by a factor of two
+        new_pos = cat(xx(s_small.÷2, scale=ScaFT),yy(s_small.÷2, scale=ScaFT), dims=3)
+        rs8 = FourierTools.resample_nfft(dat, t->t, s_small.÷2, is_src_coords=true)
+        rs9 = FourierTools.resample_nfft(dat, new_pos,  is_src_coords=true)
+        rss = FourierTools.resample(dat, s_small.÷2)
+        @test rs8 ≈ rs9
+        rs10 = FourierTools.resample_nfft(dat, t->t, s_small.÷2; is_src_coords=false, is_in_pixels=true)
+        new_pos = cat(xx(s_small, offset=(0,0)),yy(s_small,offset=(0,0)), dims=3)
+        rs11 = FourierTools.resample_nfft(dat, new_pos, s_small.÷2; is_src_coords=false, is_in_pixels=true)
+        @test rs10 ≈ rs11    
+        # test the non-strided array
+        rs6 = FourierTools.resample_nfft(Base.PermutedDimsArray(dat,(2,1)), t->t .+ 1.0, is_src_coords=false, is_in_pixels=true)
+        rs7 = FourierTools.resample_nfft(Base.PermutedDimsArray(dat,(2,1)), t->t .- 1.0, is_src_coords=true, is_in_pixels=true)
+        @test rs6 ≈ rs7
+        rs6 = FourierTools.resample_nfft(1im .* dat , t->t .* 2.0, s_small.÷2, is_src_coords=false, is_in_pixels=false, pad_value=0.0)
+        rs7 = FourierTools.resample_nfft(1im .* dat, t->t .* 0.5, s_small.÷2, is_src_coords=true, is_in_pixels=false, pad_value=0.0)
+        @test rs6.*4 ≈ rs7
+    end
+
+end
diff --git a/test_old/runtests.jl b/test_old/runtests.jl
new file mode 100644
index 0000000..0c8ec32
--- /dev/null
+++ b/test_old/runtests.jl
@@ -0,0 +1,30 @@
+using Random, Test, FFTW
+using FourierTools
+using ImageTransformations
+using IndexFunArrays
+using Zygote
+using NDTools
+using LinearAlgebra # for the assigned nfft function LinearAlgebra.mul!
+using FractionalTransforms
+using TestImages
+
+Random.seed!(42)
+
+include("fft_helpers.jl")
+include("fftshift_alternatives.jl")
+include("utils.jl")
+include("fourier_shifting.jl")
+include("fourier_shear.jl")
+include("fourier_rotate.jl")
+include("resampling_tests.jl")
+include("convolutions.jl")
+include("correlations.jl")
+include("custom_fourier_types.jl")
+include("damping.jl")
+include("czt.jl")
+include("nfft_tests.jl")
+include("fractional_fourier_transform.jl")
+include("fourier_filtering.jl")
+include("sdft.jl")
+
+return
diff --git a/test_old/sdft.jl b/test_old/sdft.jl
new file mode 100644
index 0000000..02a7d64
--- /dev/null
+++ b/test_old/sdft.jl
@@ -0,0 +1,102 @@
+import FourierTools:
+    sdft_windowlength,
+    sdft_update!,
+    sdft_previousdft,
+    sdft_previousdata,
+    sdft_nextdata,
+    sdft_iteration,
+    sdft_backindices,
+    sdft_dataoffsets
+
+# Dummy method to test more complex designs
+struct TestSDFT{T,C} <: AbstractSDFT
+    n::T
+    factor::C
+end
+TestSDFT(n) = TestSDFT(n, exp(2π*im/n))
+sdft_windowlength(method::TestSDFT) = method.n
+sdft_backindices(::TestSDFT) = [0, 2]
+sdft_dataoffsets(::TestSDFT) = [0, 1]
+
+function sdft_update!(dft, x, method::TestSDFT{T,C}, state) where {T,C}
+    twiddle = one(C)
+    dft0 = sdft_previousdft(state, 0)
+    unused_dft = sdft_previousdft(state, 2) # not used - add for coverage
+    unused_data = sdft_previousdata(state, 1) # not used - add for coverage
+    unused_count = sdft_iteration(state) # not used - add for coverage
+    for k in eachindex(dft)
+        dft[k] = twiddle * (dft0[k] + sdft_nextdata(state) - sdft_previousdata(state)) +
+            0.0 * (unused_dft[k] + unused_data + unused_count)
+        twiddle *= method.factor
+    end
+end
+
+# Dummy method to test exceptions
+struct ErrorSDFT  <: AbstractSDFT end
+sdft_windowlength(method::ErrorSDFT) = 2
+function sdft_update!(dft, x, ::ErrorSDFT, state)
+    doesnotexist = sdft_previousdft(state, 1)
+    nothing
+end
+
+# Piecewise sinusoidal signal
+function signal(x)
+    if x < 1
+        5*cos(4π*x)
+    elseif x < 2
+        (-2x+7)*cos(2π*(x^2+1))
+    else
+        3*cos(10π*x)
+    end
+end
+
+y = signal.(range(0, 3, length=61))
+n = 20
+sample_offsets = (0, 20, 40)
+dfty_sample = [fft(view(y, (1:n) .+ offset)) for offset in sample_offsets]
+
+@testset "Sliding DFT" begin
+    # Compare SDFT
+    @testset "SDFT" begin
+        method = SDFT(n)
+        dfty = collect(method(y))
+        @testset "stateless" for i in eachindex(sample_offsets)
+            @test dfty[1 + sample_offsets[i]] ≈ dfty_sample[i]
+        end
+        dfty = collect(method(Iterators.Stateful(y)))
+        @testset "stateful" for i in eachindex(sample_offsets)
+            @test dfty[1 + sample_offsets[i]] ≈ dfty_sample[i]
+        end
+    end
+
+    # Method with dft history and more data points
+    @testset "TestSDFT" begin
+        method = TestSDFT(n)
+        dfty = collect(method(y))
+        @testset for i in eachindex(sample_offsets)
+            @test dfty[1 + sample_offsets[i]] ≈ dfty_sample[i]
+        end
+    end
+
+    # Exceptions
+    @testset "Exceptions" begin
+        @test_throws "insufficient data" iterate(SDFT(10)(ones(5)))
+        @test_throws "insufficient data" iterate(SDFT(10)(Float64[]))
+        @test_throws "previous DFT results not available" collect(ErrorSDFT()(y))
+    end
+
+    # Additional coverage
+    @testset "Extra" begin
+        itr = SDFT(n)(y)
+        _, state = iterate(itr)
+        @test ismissing(Base.isdone(itr))
+        @test ismissing(Base.isdone(itr, state))
+        FourierTools.sdft_updatedfthistory!(nothing)
+        FourierTools.sdft_updatefragment!(nothing, nothing, nothing)
+        dummy_state = FourierTools.SDFTStateData(nothing, nothing, 1.0, 1, 1)
+        @test FourierTools.haspreviousdata(dummy_state) == false
+        # sdft_dataoffsets
+        @test iszero(FourierTools.sdft_dataoffsets(SDFT(n)))
+        @test isnothing(FourierTools.sdft_dataoffsets(nothing))
+    end
+end
\ No newline at end of file
diff --git a/test_old/utils.jl b/test_old/utils.jl
new file mode 100644
index 0000000..5fdf23a
--- /dev/null
+++ b/test_old/utils.jl
@@ -0,0 +1,141 @@
+@testset "Test util functions" begin
+
+    @testset "Test fft center and rfft_center_diff" begin
+        Random.seed!(42)
+        @test 2 == FourierTools.fft_center(3)
+        @test 3 == FourierTools.fft_center(4)
+        @test 3 == FourierTools.fft_center(5)
+        @test (2,3,4) == FourierTools.fft_center.((3,4,6))
+
+
+        @test (0, 1, 2, 3) == FourierTools.ft_center_diff((12, 3, 5,6), (2,3,4))
+        @test (6, 1, 2, 3) == FourierTools.ft_center_diff((12, 3, 5,6))
+
+
+        @test (0, 0, 2, 3) == FourierTools.rft_center_diff((12, 3, 5,6), (2,3,4))
+        @test (0, 0, 0, 3) == FourierTools.rft_center_diff((12, 3, 5,6), (3,4))
+        @test (0, 0, 2, 3) == FourierTools.rft_center_diff((13, 3, 5,6), (1,3,4))
+        @test (0, 1, 2, 3) == FourierTools.rft_center_diff((13, 3, 5,6))
+
+    end
+
+
+
+    @testset "Test rfft_size" begin
+        s = (11, 20, 10)
+        @test FourierTools.rfft_size(s, 2) == size(rfft(randn(s),2))
+        @test FourierTools.rft_size(randn(s), 2) == size(rfft(randn(s),2))
+         
+        s = (11, 21, 10)
+        @test FourierTools.rfft_size(s, 2) == size(rfft(randn(s),2))
+        
+        s = (11, 21, 10)
+        @test FourierTools.rfft_size(s, 1) == size(rfft(randn(s),(1,2,3)))
+    end
+
+
+
+    function center_test(x1, x2, x3, y1, y2, y3)
+        arr1 = randn((x1, x2, x3))
+        arr2 = zeros((y1, y2, y3))
+    
+        FourierTools.center_set!(arr2, arr1)
+        arr3 = FourierTools.center_extract(arr2, (x1, x2, x3))
+        @test arr1 == arr3
+    end
+    
+     # test center set and center extract methods
+    @testset "center methods" begin
+        center_test(4, 4, 4, 6,7,4)
+        center_test(5, 4, 4, 7, 8, 4)
+        center_test(5, 4, 4, 8, 8, 8)
+        center_test(6, 4, 4, 7, 8, 8)
+    
+    
+        @test 1 == FourierTools.center_pos(1)
+        @test 2 == FourierTools.center_pos(2)
+        @test 2 == FourierTools.center_pos(3)
+        @test 3 == FourierTools.center_pos(4)
+        @test 3 == FourierTools.center_pos(5)
+        @test 513 == FourierTools.center_pos(1024)
+    
+        @test FourierTools.get_indices_around_center((5), (2)) == (2, 3)
+        @test FourierTools.get_indices_around_center((5), (3)) == (2, 4)
+        @test FourierTools.get_indices_around_center((4), (3)) == (2, 4)
+        @test FourierTools.get_indices_around_center((4), (2)) == (2, 3)
+    end
+
+
+    @testset "Test fftpos" begin
+
+        @test fftpos(1, 4, CenterFT) ≈  -0.5:0.25:0.25 
+        @test fftpos(1, 4, CenterLast) ≈ -0.75:0.25:0.0 
+        @test fftpos(1, 4, CenterMiddle) ≈ -0.375:0.25:0.375 
+        @test fftpos(1, 4, CenterFirst) ≈ 0.0:0.25:0.75 
+        @test fftpos(1, 4) ≈ 0.0:0.25:0.75 
+        @test fftpos(1f0, 4, 2) ≈ -0.25f0:0.25f0:0.5f0
+
+
+        function f(l, N)
+            a = fftpos(l, N, CenterFT)
+            b = fftpos(l, N, CenterFirst)
+            c = fftpos(l, N, CenterLast)
+            d = fftpos(l, N, CenterMiddle)
+            @test (a[end] - a[begin] ≈ b[end] - b[begin] ≈ c[end] - c[begin] ≈ d[end] -d[begin])
+        end
+
+        f(1, 2)
+        f(1, 3)
+        f(42, 4)
+        f(42, 5)
+    end
+
+
+    @testset "Test δ" begin 
+        @test δ((3, 3)) == [0 0 0; 0 1 0; 0 0 0]
+        @test δ((4, 3)) == [0 0 0; 0 0 0; 0 1 0; 0 0 0]
+        @test δ(Float32, (4, 3)) == Float32[0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 0.0]
+        @test δ(Float32, (4, 3)) |> eltype == Float32
+        @test δ(Float32, (4, 3)) |> eltype == Float32
+        @test δ(Float32, (4, 3)) == Float32[0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 0.0]
+    end
+
+
+
+    @testset "Pixel size conversion" begin
+        @test fourierspace_pixelsize(1, 512) ≈ 1 / 512 
+        @test all(fourierspace_pixelsize(1, (512,256)) .≈ 1 ./ (512, 256))
+        @test realspace_pixelsize(1, 512) ≈ 1 / 512 
+        @test all(realspace_pixelsize(1, (512,256)) .≈ 1 ./ (512, 256))
+
+    end
+
+
+    @testset "Check eltype error" begin
+        @test_throws ArgumentError FourierTools.eltype_error(Float32, Float64)
+        @test isnothing(FourierTools.eltype_error(Int, Int))
+    end
+
+    @testset "odd_view, fourier_reverse!" begin
+        a = [1 2 3;4 5 6;7 8 9;10 11 12]
+        @test FourierTools.odd_view(a) == [4 5 6;7 8 9; 10 11 12]
+        fourier_reverse!(a)
+        @test a == [3 2 1;12 11 10;9 8 7;6 5 4]
+        a = [1 2 3;4 5 6;7 8 9;10 11 12]
+        b = copy(a);
+        fourier_reverse!(a,dims=1);
+        @test a[2:end,:] == b[end:-1:2,:]
+        a = [1 2 3 4;5 6 7 8;9 10 11 12 ;13 14 15 16]
+        b = copy(a);
+        fourier_reverse!(a);
+        @test a[2,2] == b[4,4]
+        @test a[2,3] == b[4,3]
+        fourier_reverse!(a);
+        @test a == b
+        fourier_reverse!(a;dims=1);
+        @test a[2:end,:] == b[end:-1:2,:]
+        @test sum(abs.(imag.(ift(fourier_reverse!(ft(rand(5,6,7))))))) < 1e-10
+        sz = (10,9,6)
+        @test sum(abs.(real.(ift(fourier_reverse!(ft(box((sz)))))) .- box(sz))) < 1e-10
+    end
+end

From a559c4f37017ca2333a68b84882aa063ce6ff126 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Thu, 20 Mar 2025 18:24:05 +0100
Subject: [PATCH 04/25] added CircShiftedArray.jl

---
 src/CircShiftedArrays.jl | 279 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 279 insertions(+)
 create mode 100644 src/CircShiftedArrays.jl

diff --git a/src/CircShiftedArrays.jl b/src/CircShiftedArrays.jl
new file mode 100644
index 0000000..1f5a308
--- /dev/null
+++ b/src/CircShiftedArrays.jl
@@ -0,0 +1,279 @@
+export CircShiftedArray
+using Base
+using CUDA
+
+# a = reshape(1:1000000,(1000,1000)) .+ 0
+# a = reshape(1:(15*15),(15,15)) .+ 0
+# c = CircShiftedArray(a,(3,3));
+# b = copy(a)
+# d = c .+ c;
+
+"""
+    CircShiftedArray{T, N, A<:AbstractArray{T,N}, myshift<:NTuple{N,Int}} <: AbstractArray{T,N}
+
+is a type which lazily encampsulates a circular shifted array. If broadcasted with another `CircShiftedArray` it will stay to be a `CircShiftedArray` as long as the shifts are equal.
+For unequal shifts, the `circshift` routine will be used. Note that the shift is encoded as an `NTuple{}` into the type definition.
+"""
+struct CircShiftedArray{T, N, A<:AbstractArray{T,N}, myshift<:Tuple} <: AbstractArray{T,N}
+    parent::A    
+
+    function CircShiftedArray(parent::A, myshift::NTuple{N,Int}) where {T,N,A<:AbstractArray{T,N}}
+        ws = wrapshift(myshift, size(parent))
+        new{T,N,A, Tuple{ws...}}(parent)
+    end
+    function CircShiftedArray(parent::CircShiftedArray{T,N,A,S}, myshift::NTuple{N,Int}) where {T,N,A,S}
+        ws = wrapshift(myshift .+ to_tuple(csa_shift(typeof(parent))), size(parent))
+        new{T,N,A, Tuple{ws...}}(parent.parent)
+    end
+    # function CircShiftedArray(parent::CircShiftedArray{T,N,A,S}, myshift::NTuple{N,Int}) where {T,N,A,S==myshift}
+    #     parent
+    # end
+end
+# just a more convenient name
+circshift(arr, myshift) = CircShiftedArray(arr, myshift)
+# wraps shifts into the range 0...N-1
+wrapshift(shift::NTuple, dims::NTuple) = ntuple(i -> mod(shift[i], dims[i]), length(dims))
+# wraps indices into the range 1...N
+wrapids(shift::NTuple, dims::NTuple) = ntuple(i -> mod1(shift[i], dims[i]), length(dims))
+invert_rng(s, sz) = wrapshift(sz .- s, sz)
+
+# define a new broadcast style
+struct CircShiftedArrayStyle{N,S} <: Base.Broadcast.AbstractArrayStyle{N} end
+csa_shift(::Type{CircShiftedArray{T,N,A,S}}) where {T,N,A,S} = S
+to_tuple(S::Type{T}) where {T<:Tuple}= tuple(S.parameters...)
+csa_shift(::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = to_tuple(S)
+
+# convenient constructor
+CircShiftedArrayStyle{N,S}(::Val{M}, t::Tuple) where {N,S,M} = CircShiftedArrayStyle{max(N,M), Tuple{t...}}()
+# make it known to the system
+Base.Broadcast.BroadcastStyle(::Type{T}) where (T<: CircShiftedArray) = CircShiftedArrayStyle{ndims(T), csa_shift(T)}()
+# make subarrays (views) of CircShiftedArray also broadcast inthe CircArray style:
+Base.Broadcast.BroadcastStyle(::Type{SubArray{T,N,P,I,L}}) where {T,N,P<:CircShiftedArray,I,L} = CircShiftedArrayStyle{ndims(P), csa_shift(P)}()
+# Base.Broadcast.BroadcastStyle(::Type{T}) where (T2,N,P,I,L, T <: SubArray{T2,N,P,I,L})= CircShiftedArrayStyle{ndims(P), csa_shift(p)}()
+Base.Broadcast.BroadcastStyle(::CircShiftedArrayStyle{N,S}, ::Base.Broadcast.DefaultArrayStyle{M}) where {N,S,M} = CircShiftedArrayStyle{max(N,M),S}() #Broadcast.DefaultArrayStyle{CuArray}()
+function Base.Broadcast.BroadcastStyle(::CircShiftedArrayStyle{N,S1}, ::CircShiftedArrayStyle{M,S2}) where {N,S1,M,S2}
+    if S1 != S2
+        # maybe one could force materialization at this point instead.
+        error("You currently cannot mix CircShiftedArray of different shifts in a broadcasted expression.")
+    end
+    CircShiftedArrayStyle{max(N,M),S1}() #Broadcast.DefaultArrayStyle{CuArray}()
+end
+#Base.Broadcast.BroadcastStyle(::CircShiftedArrayStyle{0,S}, ::Base.Broadcast.DefaultArrayStyle{M}) where {S,M} = CircShiftedArrayStyle{M,S} #Broadcast.DefaultArrayStyle{CuArray}()
+
+@inline Base.size(csa::CircShiftedArray) = size(csa.parent)
+@inline Base.size(csa::CircShiftedArray, d::Int) = size(csa.parent, d)
+@inline Base.axes(csa::CircShiftedArray) = axes(csa.parent)
+@inline Base.IndexStyle(::Type{<:CircShiftedArray}) = IndexLinear()
+@inline Base.parent(csa::CircShiftedArray) = csa.parent
+
+CircShiftedVector(v::AbstractVector, n = ()) = CircShiftedArray(v, n)
+
+
+# linear indexing ignores the shifts
+@inline Base.getindex(csa::CircShiftedArray{T,N,A,S}, i::Int) where {T,N,A,S} = getindex(csa.parent, i)
+@inline Base.setindex!(csa::CircShiftedArray{T,N,A,S}, v, i::Int) where {T,N,A,S} = setindex!(csa.parent, v, i)
+
+# ttest(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = println("$S,  $(to_tuple(S))")
+
+# mod1 avoids first subtracting one and then adding one
+@inline Base.getindex(csa::CircShiftedArray{T,N,A,S}, i::Vararg{Int,N}) where {T,N,A,S} = 
+    getindex(csa.parent, (mod1(i[j]-to_tuple(S)[j], size(csa.parent, j)) for j in 1:N)...)
+
+@inline Base.setindex!(csa::CircShiftedArray{T,N,A,S}, v, i::Vararg{Int,N}) where {T,N,A,S} = 
+    (setindex!(csa.parent, v, (mod1(i[j]-to_tuple(S)[j], size(csa.parent, j)) for j in 1:N)...); v)
+
+# if materialize is provided, a broadcasting expression would always collapse to the base type.
+# Base.Broadcast.materialize(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = circshift(csa.parent, to_tuple(S))
+
+# These apply for broadcasted assignment operations.
+@inline Base.Broadcast.materialize!(dest::CircShiftedArray{T,N,A,S}, csa::CircShiftedArray{T2,N2,A2,S}) where {T,N,A,S,T2,N2,A2} = Base.Broadcast.materialize!(dest.parent, csa.parent)
+
+# function Base.Broadcast.materialize(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {T,N,A,S}
+#     similar(...size(bz)
+#     invoke(Base.Broadcast.materialize!, Tuple{CircShiftedArray{T,N,A,S}, Base.Broadcast.Broadcasted}, dest, bc)
+# end
+
+# remove all the circ-shift part if all shifts are the same
+@inline function Base.Broadcast.materialize!(dest::CircShiftedArray{T,N,A,S}, bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {T,N,A,S}
+    invoke(Base.Broadcast.materialize!, Tuple{A, Base.Broadcast.Broadcasted}, dest.parent, remove_csa_style(bc))
+    return dest
+end
+# we cannot specialize the Broadcast style here, since the rhs may not contain a CircShiftedArray and still wants to be assigned
+@inline function Base.Broadcast.materialize!(dest::CircShiftedArray{T,N,A,S}, bc::Base.Broadcast.Broadcasted) where {T,N,A,S}
+    #@show "materialize! cs"
+    if only_shifted(bc)
+        # fall back to standard assignment
+        @show "use raw"
+        # to avoid calling the method defined below, we need to use `invoke`:
+        invoke(Base.Broadcast.materialize!, Tuple{AbstractArray, Base.Broadcast.Broadcasted}, dest, bc) 
+    else
+        # get all not-shifted arrays and apply the materialize operations piecewise using array views
+        materialize_checkerboard!(dest.parent, bc, Tuple(1:N), wrapshift(size(dest) .- csa_shift(dest), size(dest)), true)
+    end
+    return dest
+end
+
+# function copy(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {N,S}
+#     @show "copy here"
+#     return 0
+# end
+
+@inline function Base.Broadcast.materialize!(dest::AbstractArray, bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {N,S}
+    materialize_checkerboard!(dest, bc, Tuple(1:N), wrapshift(size(dest) .- to_tuple(S), size(dest)), false)
+    return dest
+end
+
+# needs to generate both ranges as both appear in mixed broadcasting expressions
+function generate_shift_ranges(dest, myshift)
+    circshift_rng_1 = ntuple((d)->firstindex(dest,d):firstindex(dest,d)+myshift[d]-1, ndims(dest))
+    circshift_rng_2 = ntuple((d)->firstindex(dest,d)+myshift[d]:lastindex(dest,d), ndims(dest))
+    noshift_rng_1 = ntuple((d)->lastindex(dest,d)-myshift[d]+1:lastindex(dest,d), ndims(dest))
+    noshift_rng_2 = ntuple((d)->firstindex(dest,d):lastindex(dest,d)-myshift[d], ndims(dest))
+    return ((circshift_rng_1, circshift_rng_2), (noshift_rng_1, noshift_rng_2))
+end
+
+"""
+    materialize_checkerboard!(dest, bc, dims, myshift) 
+
+this function calls itself recursively to subdivide the array into tiles, which each needs to be processed individually via calls to `materialize!`.
+
+|--------|
+| a| b   |
+|--|-----|---|
+| c| dD  | C |
+|--+-----|---|
+   | B   | A |
+   |---------|
+
+"""
+function materialize_checkerboard!(dest, bc, dims, myshift, dest_is_cs_array=true) 
+    @show "materialize_checkerboard"
+    dest = refine_view(dest)
+    # gets Tuples of Tuples of 1D ranges (low and high) for each dimension
+    cs_rngs, ns_rngs = generate_shift_ranges(dest, myshift)
+
+    for n in CartesianIndices(ntuple((x)->2, ndims(dest)))
+        cs_rng = Tuple(cs_rngs[n[d]][d] for d=1:ndims(dest))
+        ns_rng = Tuple(ns_rngs[n[d]][d] for d=1:ndims(dest))
+        dst_rng = ifelse(dest_is_cs_array, cs_rng, ns_rng)
+        dst_rng = refine_shift_rng(dest, dst_rng)
+        dst_view = @view dest[dst_rng...]
+
+        bc1 = split_array_broadcast(bc, ns_rng, cs_rng)
+        if (prod(size(dst_view)) > 0)
+            Base.Broadcast.materialize!(dst_view, bc1)
+        end
+    end
+end
+
+# some code which determines whether all arrays are shifted
+@inline only_shifted(bc::Number)  = true
+@inline only_shifted(bc::AbstractArray)  = false
+@inline only_shifted(bc::CircShiftedArray)  = true
+@inline only_shifted(bc::Base.Broadcast.Broadcasted) = all(only_shifted.(bc.args))
+
+# These functions remove the CircShiftArray in a broadcast and replace each by a view into the original array 
+@inline split_array_broadcast(bc::Number, noshift_rng, shift_rng) = bc
+@inline split_array_broadcast(bc::AbstractArray, noshift_rng, shift_rng) = @view bc[noshift_rng...]
+@inline split_array_broadcast(bc::CircShiftedArray, noshift_rng, shift_rng) = @view bc.parent[shift_rng...]
+@inline split_array_broadcast(bc::CircShiftedArray{T,N,A,NTuple{N,0}}, noshift_rng, shift_rng)  where {T,N,A} =  @view bc.parent[noshift_rng...]
+@inline function split_array_broadcast(v::SubArray{T,N,P,I,L}, noshift_rng, shift_rng) where {T,N,P<:CircShiftedArray,I,L}    
+    new_cs = refine_view(v)
+    new_shift_rng = refine_shift_rng(v, shift_rng)
+    res = split_array_broadcast(new_cs, noshift_rng, new_shift_rng)
+    return res
+end
+
+@inline function refine_shift_rng(v::SubArray{T,N,P,I,L}, shift_rng) where {T,N,P,I,L}    
+    new_shift_rng = ntuple((d)-> ifelse(isa(v.indices[d],Base.Slice), shift_rng[d], Base.Colon()), ndims(v.parent))
+    return new_shift_rng
+end
+@inline refine_shift_rng(v, shift_rng) = shift_rng
+
+"""
+    function refine_view(v::SubArray{T,N,P,I,L}, shift_rng)
+
+returns a refined view of a CircShiftedArray as a CircShiftedArray, if necessary. Otherwise just the original array.
+find out, if the range of this view crosses any boundary of the parent CircShiftedArray
+by calculating the new indices
+if, so though an error. find the full slices, which can stay a circ shifted array withs shifts
+"""
+function refine_view(v::SubArray{T,N,P,I,L}) where {T,N,P<:CircShiftedArray,I,L}
+    myshift = csa_shift(v.parent)
+    sz = size(v.parent)
+    # find out, if the range of this view crosses any boundary of the parent CircShiftedArray
+    # by calculating the new indices
+    # if, so though an error.
+    # find the full slices, which can stay a circ shifted array withs shifts
+    sub_rngs = ntuple((d)-> !isa(v.indices[d], Base.Slice), ndims(v.parent))
+
+    new_ids_begin = wrapids(ntuple((d)-> v.indices[d][begin] .- myshift[d], ndims(v.parent)), sz)
+    new_ids_end = wrapids(ntuple((d)-> v.indices[d][end] .- myshift[d], ndims(v.parent)), sz)
+    if any(sub_rngs .&& (new_ids_end .< new_ids_begin))
+        error("a view of a shifted array is not allowed to cross boarders of the original array. Do not use a view here.")
+        # potentially this can be remedied, once there is a decent CatViews implementation
+    end
+    new_rngs = ntuple((d)-> ifelse(isa(v.indices[d],Base.Slice), v.indices[d], new_ids_begin[d]:new_ids_end[d]), ndims(v.parent))
+    new_shift = ntuple((d)-> ifelse(isa(v.indices[d],Base.Slice), 0, myshift[d]), ndims(v.parent))
+    new_cs = CircShiftedArray((@view v.parent.parent[new_rngs...]), new_shift)
+    return new_cs
+end
+
+refine_view(csa::AbstractArray) = csa
+
+function split_array_broadcast(bc::Base.Broadcast.Broadcasted, noshift_rng, shift_rng)
+    # Ref below protects the argument from broadcasting
+    bc_modified = split_array_broadcast.(bc.args, Ref(noshift_rng), Ref(shift_rng))
+    # @show size(bc_modified[1])
+    res=Base.Broadcast.broadcasted(bc.f, bc_modified...)
+    # @show typeof(res)
+    # Base.Broadcast.Broadcasted{Style, Tuple{modified_axes...}, F, Args}()
+    return res
+end
+
+Base.Broadcast.materialize!(dest::CircShiftedArray{T,N,A,S}, src::CircShiftedArray) where {T,N,A,S} = Base.Broadcast.materialize!(dest.parent, src.parent)
+Base.Broadcast.copyto!(dest::AbstractArray, bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {N,S} = Base.Broadcast.materialize!(dest, bc)
+
+# function copy(CircShiftedArray)
+#     collect(CircShiftedArray)
+# end
+
+Base.collect(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = circshift(csa.parent, to_tuple(S))
+
+# # interaction with numbers should not still stay a CSA
+# Base.Broadcast.promote_rule(csa::Type{CircShiftedArray}, na::Type{Number})  = typeof(csa)
+# Base.Broadcast.promote_rule(scsa::Type{SubArray{T,N,P,Rngs,B}}, t::T2) where {T,N,P<:CircShiftedArray,Rngs,B,T2}  = typeof(scsa.parent)
+
+#Base.Broadcast.promote_rule(::Type{CircShiftedArray{T,N}}, ::Type{S}) where {T,N,S} = CircShiftedArray{promote_type(T,S),N}
+#Base.Broadcast.promote_rule(::Type{CircShiftedArray{T,N}}, ::Type{<:Tuple}, shp...) where {T,N} = CircShiftedArray{T,length(shp)}
+
+# Base.Broadcast.promote_shape(::Type{CircShiftedArray{T,N,A,S}}, ::Type{<:AbstractArray}, ::Type{<:AbstractArray}) where {T,N,A<:AbstractArray,S} = CircShiftedArray{T,N,A,S}
+# Base.Broadcast.promote_shape(::Type{CircShiftedArray{T,N,A,S}}, ::Type{<:AbstractArray}, ::Type{<:Number}) where {T,N,A<:AbstractArray,S} = CircShiftedArray{T,N,A,S}
+
+function Base.similar(arr::CircShiftedArray, eltype::Type{T} = eltype(array), dims::Tuple{Int64, Vararg{Int64, N}} = size(array)) where {T,N}
+    @show "Similar arr"
+    na = similar(arr.parent, eltype, dims)
+    # the results-type depends on whether the result size is the same or not.
+    return ifelse(size(arr)==dims, na, CircShiftedArray(na, csa_shift(arr)))
+end
+
+@inline remove_csa_style(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {N,S} = Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{N}}(bc.f, bc.args, bc.axes) 
+@inline remove_csa_style(bc::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{N}}) where {N} = bc
+
+function Base.similar(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S},Ax,F,Args}, et::ET, dims::Any) where {N,S,ET,Ax,F,Args}
+    @show "Similar Bc"
+    # remove the CircShiftedArrayStyle from broadcast to call the original "similar" function 
+    bc_type = Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{N},Ax,F,Args}
+    bc_tmp = remove_csa_style(bc) #Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{N}}(bc.f, bc.args, bc.axes)
+    res = invoke(Base.Broadcast.similar, Tuple{bc_type,ET,Any}, bc_tmp, et, dims)
+    if only_shifted(bc)
+        # @show "only shifted"
+        return CircShiftedArray(res, to_tuple(S))
+    else
+        return res
+    end
+end
+
+function Base.show(io::IO, mm::MIME"text/plain", cs::CircShiftedArray) 
+    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
+end

From 29334fd061e4d00024574467e48f0454926684f6 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Fri, 21 Mar 2025 17:15:33 +0100
Subject: [PATCH 05/25] additions to make it work in CUDA

---
 Project.toml                                  |  22 ++-
 ext/CUDASupportExt.jl                         |  74 +++++++++-
 ...edArrays.jl => CircShiftedArrays_messy.jl} |  17 +--
 src/FourierTools.jl                           |   7 +-
 src/circshift.jl                              |  31 ++++
 src/circshiftedarray.jl                       | 134 ++++++++++++++++++
 src/fft_helpers.jl                            |   4 +-
 src/fftshift_alternatives.jl                  |   8 +-
 src/fourier_shear.jl                          |   3 +-
 test/czt.jl                                   |   6 +-
 test/fft_helpers.jl                           |  28 ++--
 test/fourier_rotate.jl                        |   8 +-
 test/runtests.jl                              |   6 +-
 13 files changed, 289 insertions(+), 59 deletions(-)
 rename src/{CircShiftedArrays.jl => CircShiftedArrays_messy.jl} (96%)
 create mode 100644 src/circshift.jl
 create mode 100644 src/circshiftedarray.jl

diff --git a/Project.toml b/Project.toml
index d6b1753..63db1b0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -11,36 +11,34 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NDTools = "98581153-e998-4eef-8d0d-5ec2c052313d"
 NFFT = "efe261a4-0d2b-5849-be55-fc731d526b0d"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a"
+
+[weakdeps]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+
+[extensions]
+CUDASupportExt = ["CUDA", "Adapt"]
 
 [compat]
+Adapt = "3.7, 4.0, 4.1"
+CUDA = "5.2, 5.3, 5.4, 5.5, 5.6"
 ChainRulesCore = "1, 1.0, 1.1"
 FFTW = "1.5"
 ImageTransformations = "0.9, 0.10"
 IndexFunArrays = "0.2"
 NFFT = "0.11, 0.12, 0.13"
 Reexport = "1"
-ShiftedArrays = "2.0.0"
 Zygote = "0.6, 0.7"
-CUDA = "5.2, 5.3, 5.4, 5.5, 5.6"
-Adapt = "3.7, 4.0, 4.1"
 julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11"
 
 [extras]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 FractionalTransforms = "e50ca838-b4f0-4a10-ad18-4b920bf1ae5c"
 ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TestImages = "5e47fb64-e119-507b-a336-dd2b206d9990"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-
-[extensions]
-CUDASupportExt = ["CUDA", "Adapt"]
-
-[weakdeps]
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 
 [targets]
 test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote", "CUDA"]
diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
index 1341735..bd269df 100644
--- a/ext/CUDASupportExt.jl
+++ b/ext/CUDASupportExt.jl
@@ -1,18 +1,76 @@
 module CUDASupportExt
 using CUDA 
 using Adapt
-using ShiftedArrays
+# using ShiftedArrays
 using FourierTools
 using Base # to allow displaying such arrays without causing the single indexing CUDA error
 
 # define adapt structures for the ShiftedArrays model. This will not be needed if the PR is merged:
-Adapt.adapt_structure(to, x::CircShiftedArray{T, D}) where {T, D} = CircShiftedArray(adapt(to, parent(x)), shifts(x));
-parent_type(::Type{CircShiftedArray{T, N, S}})  where {T, N, S} = S
-Base.Broadcast.BroadcastStyle(::Type{T})  where {T<:CircShiftedArray} = Base.Broadcast.BroadcastStyle(parent_type(T))
+# Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, D}) where {T, D} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
+# parent_type(::Type{FourierTools.CircShiftedArray{T, N, A, S}})  where {T, N, A, S} = A
+# Base.Broadcast.BroadcastStyle(::Type{T})  where {T<:FourierTools.CircShiftedArray} = Base.Broadcast.BroadcastStyle(parent_type(T))
+
+Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, N, S}) where {T, N, S} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
+parent_type(::Type{FourierTools.CircShiftedArray{T, N, S}})  where {T, N, S} = S
+
+# Base.Broadcast.BroadcastStyle(::Type{T}) where {T2, N, S, T <:FourierTools.CircShiftedArray{T2, N, S}}  = Base.Broadcast.BroadcastStyle(parent_type(T))
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+# Define the BroadcastStyle for SubArray of MutableShiftedArray with CuArray
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:SubArray{<:Any, <:Any, <:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+# Define the BroadcastStyle for ReshapedArray of MutableShiftedArray with CuArray
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:Base.ReshapedArray{<:Any, <:Any, <:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}, <:Any}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+function Base.collect(x::T)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    return copy(x) # stay on the GPU        
+end
+
+function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    return Array(copy(x)) # remove from GPU
+end
+
+function Base.:(==)(x::T, y::AbstractArray)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    return all(x .== y)
+end
+
+function Base.:(==)(y::AbstractArray, x::T)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    return all(x .== y)
+end
+
+function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    return all(x .== y)
+end
+
+function Base.isapprox(x::T, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.isapprox(y::AbstractArray, x::T; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.isapprox(x::T, y::T; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.show(io::IO, mm::MIME"text/plain", cs::T) where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
+end
+
 
 # cu_storage_type(::Type{T}) where {CT,CN,CD,T<:CuArray{CT,CN,CD}} = CD
 # lets do this for the ShiftedArray type
-# Adapt.adapt_structure(to, x::ShiftedArray{T, M, N}) where {T, M, N} = ShiftedArray(adapt(to, parent(x)), shifts(x); default=ShiftedArrays.default(x));
+# Adapt.adapt_structure(to, x::ShiftedArray{T, M, N}) where {T, M, N} = ShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x); default=ShiftedArrays.default(x));
 
 # # function Base.Broadcast.BroadcastStyle(::Type{T})  where (CT,CN,CD,T<: ShiftedArray{<:Any,<:Any,<:Any,<:CuArray})
 # function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:ShiftedArray{<:Any,<:Any,<:Any,<:CuArray{T2,N,CD}}}
@@ -32,7 +90,7 @@ function Base.collect(x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:An
 end
 
 function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    return Array(copy(x)) # stay on the GPU        
+    return Array(copy(x)) # remove from GPU
 end
 
 function Base.:(==)(x::T, y::AbstractArray)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
@@ -47,6 +105,8 @@ function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{
     return all(x .== y)
 end
 
-optional_collect(a::CuArray) = a 
+function FourierTools.optional_collect(a::CuArray)
+    a 
+end
 
 end
\ No newline at end of file
diff --git a/src/CircShiftedArrays.jl b/src/CircShiftedArrays_messy.jl
similarity index 96%
rename from src/CircShiftedArrays.jl
rename to src/CircShiftedArrays_messy.jl
index 1f5a308..9949d09 100644
--- a/src/CircShiftedArrays.jl
+++ b/src/CircShiftedArrays_messy.jl
@@ -1,6 +1,6 @@
-export CircShiftedArray
+# export CircShiftedArray
 using Base
-using CUDA
+# using CUDA
 
 # a = reshape(1:1000000,(1000,1000)) .+ 0
 # a = reshape(1:(15*15),(15,15)) .+ 0
@@ -11,7 +11,7 @@ using CUDA
 """
     CircShiftedArray{T, N, A<:AbstractArray{T,N}, myshift<:NTuple{N,Int}} <: AbstractArray{T,N}
 
-is a type which lazily encampsulates a circular shifted array. If broadcasted with another `CircShiftedArray` it will stay to be a `CircShiftedArray` as long as the shifts are equal.
+is a type which lazily encapsulates a circular shifted array. If broadcasted with another `CircShiftedArray` it will stay to be a `CircShiftedArray` as long as the shifts are equal.
 For unequal shifts, the `circshift` routine will be used. Note that the shift is encoded as an `NTuple{}` into the type definition.
 """
 struct CircShiftedArray{T, N, A<:AbstractArray{T,N}, myshift<:Tuple} <: AbstractArray{T,N}
@@ -29,6 +29,8 @@ struct CircShiftedArray{T, N, A<:AbstractArray{T,N}, myshift<:Tuple} <: Abstract
     #     parent
     # end
 end
+shifts(::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = to_tuple(S)
+
 # just a more convenient name
 circshift(arr, myshift) = CircShiftedArray(arr, myshift)
 # wraps shifts into the range 0...N-1
@@ -238,7 +240,7 @@ Base.Broadcast.copyto!(dest::AbstractArray, bc::Base.Broadcast.Broadcasted{CircS
 #     collect(CircShiftedArray)
 # end
 
-Base.collect(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = circshift(csa.parent, to_tuple(S))
+# Base.collect(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = circshift(csa.parent, to_tuple(S))
 
 # # interaction with numbers should not still stay a CSA
 # Base.Broadcast.promote_rule(csa::Type{CircShiftedArray}, na::Type{Number})  = typeof(csa)
@@ -250,8 +252,7 @@ Base.collect(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = circshift(csa.par
 # Base.Broadcast.promote_shape(::Type{CircShiftedArray{T,N,A,S}}, ::Type{<:AbstractArray}, ::Type{<:AbstractArray}) where {T,N,A<:AbstractArray,S} = CircShiftedArray{T,N,A,S}
 # Base.Broadcast.promote_shape(::Type{CircShiftedArray{T,N,A,S}}, ::Type{<:AbstractArray}, ::Type{<:Number}) where {T,N,A<:AbstractArray,S} = CircShiftedArray{T,N,A,S}
 
-function Base.similar(arr::CircShiftedArray, eltype::Type{T} = eltype(array), dims::Tuple{Int64, Vararg{Int64, N}} = size(array)) where {T,N}
-    @show "Similar arr"
+function Base.similar(arr::CircShiftedArray, eltype::Type{T} = eltype(arr), dims::Tuple{Int64, Vararg{Int64, N}} = size(arr)) where {T,N}
     na = similar(arr.parent, eltype, dims)
     # the results-type depends on whether the result size is the same or not.
     return ifelse(size(arr)==dims, na, CircShiftedArray(na, csa_shift(arr)))
@@ -273,7 +274,3 @@ function Base.similar(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S},
         return res
     end
 end
-
-function Base.show(io::IO, mm::MIME"text/plain", cs::CircShiftedArray) 
-    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
-end
diff --git a/src/FourierTools.jl b/src/FourierTools.jl
index 6befcc8..b93391d 100644
--- a/src/FourierTools.jl
+++ b/src/FourierTools.jl
@@ -3,15 +3,20 @@ module FourierTools
 
 using Reexport
 # using PaddedViews
-using ShiftedArrays
+# using ShiftedArrays
 @reexport using FFTW
 using LinearAlgebra
 using IndexFunArrays
 using ChainRulesCore
 using NDTools
+import Base: checkbounds, getindex, setindex!, parent, size, axes, copy, collect
+
 @reexport using NFFT
 FFTW.set_num_threads(4)
 
+# include("CircShiftedArrays.jl")
+include("circshiftedarray.jl") # from ShiftedArrays.jl
+include("circshift.jl") # from ShiftedArrays.jl
 include("utils.jl")
 include("nfft_nd.jl")
 include("resampling.jl")
diff --git a/src/circshift.jl b/src/circshift.jl
new file mode 100644
index 0000000..7551096
--- /dev/null
+++ b/src/circshift.jl
@@ -0,0 +1,31 @@
+"""
+    circshift(v::AbstractArray, n)
+
+Return a `CircShiftedArray` object which lazily represents the array `v` shifted
+circularly by `n` (an `Integer` or a `Tuple` of `Integer`s).
+If the number of dimensions of `v` exceeds the length of `n`, the shift in the
+remaining dimensions is assumed to be `0`.
+
+# Examples
+
+```jldoctest circshift
+julia> v = [1, 3, 5, 4];
+
+julia> FourierTools.circshift(v, 1)
+4-element CircShiftedVector{Int64, Vector{Int64}}:
+ 4
+ 1
+ 3
+ 5
+
+julia> w = reshape(1:16, 4, 4);
+
+julia> FourierTools.circshift(w, (1, -1))
+4×4 CircShiftedArray{Int64, 2, Base.ReshapedArray{Int64, 2, UnitRange{Int64}, Tuple{}}}:
+ 8  12  16  4
+ 5   9  13  1
+ 6  10  14  2
+ 7  11  15  3
+```
+"""
+circshift(v::AbstractArray, n) = CircShiftedArray(v, n)
diff --git a/src/circshiftedarray.jl b/src/circshiftedarray.jl
new file mode 100644
index 0000000..642c653
--- /dev/null
+++ b/src/circshiftedarray.jl
@@ -0,0 +1,134 @@
+"""
+    padded_tuple(v::AbstractVector, s)
+
+Internal function used to compute shifts. Return a `Tuple` with as many element
+as the dimensions of `v`. The first `length(s)` entries are filled with values
+from `s`, the remaining entries are `0`. `s` should be an integer, in which case
+`length(s) == 1`, or a container of integers with keys `1:length(s)`.
+
+# Examples
+
+```jldoctest padded_tuple
+julia> FourierTools.padded_tuple(rand(10, 10), 3)
+(3, 0)
+
+julia> FourierTools.padded_tuple(rand(10, 10), (4,))
+(4, 0)
+
+julia> FourierTools.padded_tuple(rand(10, 10), (1, 5))
+(1, 5)
+```
+"""
+padded_tuple(v::AbstractArray, s) = ntuple(i -> i ≤ length(s) ? s[i] : 0, ndims(v))
+
+# Computing a shifted index (subtracting the offset)
+offset(offsets::NTuple{N,Int}, inds::NTuple{N,Int}) where {N} = map(-, inds, offsets)
+
+"""
+    CircShiftedArray(parent::AbstractArray, shifts)
+
+Custom `AbstractArray` object to store an `AbstractArray` `parent` circularly shifted
+by `shifts` steps (where `shifts` is a `Tuple` with one `shift` value per dimension of `parent`).
+Use `copy` to collect the values of a `CircShiftedArray` into a normal `Array`.
+
+!!! note
+    `shift` is modified with a modulo operation and does not store the passed value
+    but instead a nonnegative number which leads to an equivalent shift.
+
+!!! note
+    If `parent` is itself a `CircShiftedArray`, the constructor does not nest
+    `CircShiftedArray` objects but rather combines the shifts additively.
+
+# Examples
+
+```jldoctest circshiftedarray
+julia> v = [1, 3, 5, 4];
+
+julia> s = CircShiftedArray(v, (1,))
+4-element CircShiftedVector{Int64, Vector{Int64}}:
+ 4
+ 1
+ 3
+ 5
+
+julia> copy(s)
+4-element Vector{Int64}:
+ 4
+ 1
+ 3
+ 5
+```
+"""
+struct CircShiftedArray{T, N, S<:AbstractArray} <: AbstractArray{T, N}
+    parent::S
+    # the field `shifts` stores the circular shifts modulo the size of the parent array
+    shifts::NTuple{N, Int}
+    function CircShiftedArray(p::AbstractArray{T, N}, n = ()) where {T, N}
+        myshifts = map(mod, padded_tuple(p, n), size(p))
+        return new{T, N, typeof(p)}(p, myshifts)
+    end
+end
+
+function CircShiftedArray(c::CircShiftedArray, n = ())
+    myshifts = map(+, shifts(c), padded_tuple(c, n))
+    return CircShiftedArray(parent(c), myshifts)
+end
+
+"""
+    CircShiftedVector{T, S<:AbstractArray}
+
+Shorthand for `CircShiftedArray{T, 1, S}`.
+"""
+const CircShiftedVector{T, S<:AbstractArray} = CircShiftedArray{T, 1, S}
+
+CircShiftedVector(v::AbstractVector, n = ()) = CircShiftedArray(v, n)
+
+Base.size(s::CircShiftedArray) = size(parent(s))
+Base.axes(s::CircShiftedArray) = axes(parent(s))
+
+@inline function bringwithin(ind_with_offset::Int, ranges::AbstractUnitRange)
+    return ifelse(ind_with_offset < first(ranges), ind_with_offset + length(ranges), ind_with_offset)
+end
+
+@inline function Base.getindex(s::CircShiftedArray{T, N}, x::Vararg{Int, N}) where {T, N}
+    @boundscheck checkbounds(s, x...)
+    v, ind = parent(s), offset(shifts(s), x)
+    i = map(bringwithin, ind, axes(s))
+    return @inbounds v[i...]
+end
+
+@inline function Base.setindex!(s::CircShiftedArray{T, N}, el, x::Vararg{Int, N}) where {T, N}
+    @boundscheck checkbounds(s, x...)
+    v, ind = parent(s), offset(shifts(s), x)
+    i = map(bringwithin, ind, axes(s))
+    @inbounds v[i...] = el
+    return s
+end
+
+Base.parent(s::CircShiftedArray) = s.parent
+
+"""
+    shifts(s::CircShiftedArray)
+
+Return amount by which `s` is shifted compared to `parent(s)`.
+"""
+shifts(s::CircShiftedArray) = s.shifts
+
+
+function copy(s::CircShiftedArray)
+    res = similar(parent(s), eltype(s), size(s))
+    res .= s
+end
+
+# function Base.copyto!(dst::AbstractArray, src::CircShiftedArray)
+#     dst[:] .= @view src[:]
+# end
+
+# function Base.copyto!(dst::AbstractArray, Rdest::CartesianIndices, src::CircShiftedArray, Rsrc::CartesianIndices)
+#     dst[Rdest...] .= @view src[Rsrc...]
+# end
+
+function collect(x::T)  where {T<:CircShiftedArray{<:Any,<:Any,<:CircShiftedArray}}
+    x = CircShiftedArray(collect(parent(x)), shifts(x))
+    return collect(x) # stay on the GPU
+end
diff --git a/src/fft_helpers.jl b/src/fft_helpers.jl
index 15ed047..69e385d 100644
--- a/src/fft_helpers.jl
+++ b/src/fft_helpers.jl
@@ -7,7 +7,7 @@ export ffts2d, ffts2d!, iffts2d, rffts2d, irffts2d
 """
     optional_collect(a)
 
-Only collects certain arrays, for a pure `Array` there is no collect
+Only collects certain arrays, for a pure `Array` or a `CuArray` there is no collect
 and it returns simply `a`.
 """
 # collect
@@ -17,7 +17,7 @@ optional_collect(a::Array) = a
 
 # for CircShiftedArray we only need collect if shifts is non-zero
 function optional_collect(csa::CircShiftedArray)
-    if all(iszero.(csa.shifts))
+    if all(iszero.(shifts(csa)))
         return optional_collect(parent(csa))
     else
         return collect(csa)
diff --git a/src/fftshift_alternatives.jl b/src/fftshift_alternatives.jl
index 14fd3ba..2fb97f0 100644
--- a/src/fftshift_alternatives.jl
+++ b/src/fftshift_alternatives.jl
@@ -39,7 +39,7 @@ Result is semantically equivalent to `fftshift(A, dims)` but returns
 a view instead. 
 """
 function fftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    ShiftedArrays.circshift(mat, ft_center_diff(size(mat), dims))
+    circshift(mat, ft_center_diff(size(mat), dims))
 end
 
 
@@ -51,7 +51,7 @@ a view instead.
 """
 function ifftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
     diff = .-(ft_center_diff(size(mat), dims))
-    return ShiftedArrays.circshift(mat, diff)
+    return circshift(mat, diff)
 end
 
 
@@ -63,7 +63,7 @@ Shifts the frequencies to the center expect for `dims[1]` because there os no ne
 and positive frequency.
 """
 function rfftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    ShiftedArrays.circshift(mat, rft_center_diff(size(mat), dims))
+    circshift(mat, rft_center_diff(size(mat), dims))
 end
 
 
@@ -75,7 +75,7 @@ Shifts the frequencies back to the corner except for `dims[1]` because there os
 and positive frequency.
 """
 function irfftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    ShiftedArrays.circshift(mat ,.-(rft_center_diff(size(mat), dims)))
+    circshift(mat ,.-(rft_center_diff(size(mat), dims)))
 end
 
 """
diff --git a/src/fourier_shear.jl b/src/fourier_shear.jl
index 23e9435..9e5fe1b 100644
--- a/src/fourier_shear.jl
+++ b/src/fourier_shear.jl
@@ -124,7 +124,8 @@ function apply_shift_strength!(arr::TA, arr_orig, shift, shear_dir_dim, shear_di
         r = real.(view(e, inds...))
         if fix_nyquist
             inv_r = 1 ./ r
-            inv_r = map(x -> (isinf(x) ? 0 : x), inv_r)
+            # inv_r = map(x -> (isinf(x) ? 0 : x), inv_r) # NOT GPU compatible
+            inv_r[isinf.(inv_r)] = 0 
             e[inds...] .= inv_r 
         else
             e[inds...] .= r 
diff --git a/test/czt.jl b/test/czt.jl
index 462b695..23a157a 100644
--- a/test/czt.jl
+++ b/test/czt.jl
@@ -5,13 +5,15 @@ using NDTools # this is needed for the select_region! function below.
         x = opt_cu(randn(ComplexF32, (5,6,7)), use_cuda)
         @test eltype(czt(x, (2.0,2.0,2.0))) == ComplexF32
         @test eltype(czt(x, (2f0,2f0,2f0))) == ComplexF32
-        @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), rtol=1e-5)
+        # @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), rtol=1e-5)
+        @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), atol=1e-4)
         @test ≈(czt(x, (1.0,1.0,1.0), (1,3), src_center=(1,1,1), dst_center=(1,1,1)), fft(x, (1,3)), rtol=1e-5)
         @test ≈(iczt(x, (1.0,1.0,1.0), (1,3), src_center=(1,1,1), dst_center=(1,1,1)), ifft(x, (1,3)), rtol=1e-5)
 
         y = randn(ComplexF32, (5,6))
         zoom = (1.0,1.0,1.0)
-        @test ≈(czt(x, zoom), ft(x), rtol=1e-4)         
+        # @test ≈(czt(x, zoom), ft(x), rtol=1e-4)         
+        @test ≈(czt(x, zoom), ft(x), atol=1e-4)         
         @test ≈(czt(y, (1.0,1.0)), ft(y), rtol=1e-5)
 
         @test ≈(iczt(czt(y, (1.0,1.0)), (1.0,1.0)), y, rtol=1e-5)
diff --git a/test/fft_helpers.jl b/test/fft_helpers.jl
index 391e77a..1b06152 100644
--- a/test/fft_helpers.jl
+++ b/test/fft_helpers.jl
@@ -2,7 +2,7 @@
 
     @testset "Optional collect" begin
         y = opt_cu([1,2,3],use_cuda)
-        x = fftshift_view(y, (1))
+        x = fftshift_view(y, (1));
         @test fftshift(y) == FourierTools.optional_collect(x)
     end
 
@@ -27,12 +27,10 @@
                 testift(arr, dims)
                 testffts(arr, dims)
                 testiffts(arr, dims)
-                
             end
         end
     end
 
-
     @testset "Test 2d fft helpers" begin
         arr = opt_cu(randn((6,7,8)), use_cuda)
         dims = [1,2]
@@ -57,30 +55,30 @@
         @test(irfft2d(arr, d) == irfft(arr, d, (1,2))) 
     end
 
-
     @testset "Test ft, ift, rft and irft real space centering" begin
+        atol = 1e-6
         szs = ((10,10),(11,10),(100,101),(101,101))
         for sz in szs
             my_ones = opt_cu(ones(sz), use_cuda)
             my_delta = opt_cu(collect(delta(sz)), use_cuda)
-            @test ft(my_ones) ≈ prod(sz) .* my_delta
-            @test ft(my_delta) ≈ my_ones
-            @test rft(my_ones) ≈ prod(sz) .* opt_cu(delta(rft_size(sz), offset=CtrRFT), use_cuda)
-            @test rft(my_delta) ≈ opt_cu(ones(rft_size(sz)), use_cuda)
-            @test ift(my_ones) ≈ my_delta
-            @test ift(my_delta) ≈ my_ones ./ prod(sz)
+            @test isapprox(ft(my_ones), prod(sz) .* my_delta, atol=atol)
+            @test isapprox(ft(my_delta), my_ones, atol=atol)
+            @test isapprox(rft(my_ones), prod(sz) .* opt_cu(delta(rft_size(sz), offset=CtrRFT), use_cuda), atol=atol)
+            @test isapprox(rft(my_delta), opt_cu(ones(rft_size(sz)), use_cuda), atol=atol)
+            @test isapprox(ift(my_ones), my_delta, atol=atol)
+            @test isapprox(ift(my_delta), my_ones ./ prod(sz), atol=atol)
             # needing to specify Complex datatype. Is a CUDA bug for irfft (!!!)
-            @test irft(opt_cu(ones(ComplexF64, rft_size(sz)), use_cuda), sz[1]) ≈ my_delta
-            @test irft(opt_cu(collect(delta(ComplexF64, rft_size(sz), offset=CtrRFT)), use_cuda), sz[1]) ≈ my_ones ./ prod(sz)
+            @test isapprox(irft(opt_cu(ones(ComplexF64, rft_size(sz)), use_cuda), sz[1]), opt_cu(my_delta, use_cuda), atol=atol)
+            @test isapprox(irft(opt_cu(collect(delta(ComplexF64, rft_size(sz), offset=CtrRFT)), use_cuda), sz[1]), opt_cu(my_ones ./ prod(sz), use_cuda), atol=atol)
         end
     end
 
-
     @testset "Test in place methods" begin
+        atol = 1e-6
         x = opt_cu(randn(ComplexF32, (5,3,10)), use_cuda)
         dims = (1,2)
-        @test fftshift(fft(x, dims), dims) ≈ ffts!(copy(x), dims)
-        @test ffts2d!(copy(x)) ≈ ffts!(copy(x), (1,2))
+        @test isapprox(fftshift(fft(x, dims), dims), ffts!(copy(x), dims), atol=atol)
+        @test isapprox(ffts2d!(copy(x)), ffts!(copy(x), (1,2)), atol=atol)
     end
 
 end
diff --git a/test/fourier_rotate.jl b/test/fourier_rotate.jl
index 52cb362..b09edc3 100644
--- a/test/fourier_rotate.jl
+++ b/test/fourier_rotate.jl
@@ -13,6 +13,7 @@
         
             m = sum(img) / length(img)
 
+            img = opt_cu(img, use_cuda)
             img_1 = opt_cu(parent(ImageTransformations.imrotate(collect(img), θ, m)), use_cuda)
             z = opt_cu(ones(Float32, size(img_1)), use_cuda)
             z .*= m
@@ -25,8 +26,11 @@
    
             @test maximum(abs.(img_1 .- img_2)) .< 0.65
             # @test all(.≈(img_1, img_2, rtol=0.65)) # 0.6
-            @test ≈(img_1, img_2, rtol=0.05) # 0.03
-            @test ≈(img_3, img_2, rtol=0.01)
+
+            # There is an issue here! Im-Rotate has a shift wrt. our center of rotation. This leads to 0.5 absolute error!!
+            # @test ≈(img_1, img_2, rtol=0.05) # 0.03
+
+            @test ≈(img_3, img_2, atol=0.0001)
             @test ==(img_4, z)
             @test ==(img_2, img_2b)
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 8bb3643..47f4c75 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -20,16 +20,16 @@ opt_cu(img, use_cuda) = ifelse(use_cuda, CuArray(img), img)
 include("fft_helpers.jl");
 include("fftshift_alternatives.jl");
 include("utils.jl");
-include("fourier_shifting.jl");
+include("fourier_shifting.jl"); ###
 include("fourier_shear.jl");
 include("fourier_rotate.jl");
-include("resampling_tests.jl");
+include("resampling_tests.jl"); ###
 
 include("convolutions.jl");
 include("correlations.jl");
 include("custom_fourier_types.jl");
 include("damping.jl");
-include("czt.jl"); #
+include("czt.jl"); 
 include("nfft_tests.jl");
 include("fractional_fourier_transform.jl");
 include("fourier_filtering.jl");

From 9a80dd723416a39075cf8becbf2d10c9ac840090 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 22 Mar 2025 13:55:18 +0100
Subject: [PATCH 06/25] before streamlining

---
 ext/CUDASupportExt.jl            | 59 +++++++++++++++++++++++++++++---
 src/circshiftedarray.jl          |  8 ++---
 src/custom_fourier_types.jl      |  5 +++
 src/fourier_resample_1D_based.jl |  9 +++--
 src/fourier_resizing.jl          |  4 +--
 src/fourier_shifting.jl          | 50 +++++++++++++++------------
 src/utils.jl                     | 33 ++++++++++++------
 test/fourier_shifting.jl         |  2 --
 test/runtests.jl                 |  2 +-
 9 files changed, 124 insertions(+), 48 deletions(-)

diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
index bd269df..b0e5b67 100644
--- a/ext/CUDASupportExt.jl
+++ b/ext/CUDASupportExt.jl
@@ -5,6 +5,10 @@ using Adapt
 using FourierTools
 using Base # to allow displaying such arrays without causing the single indexing CUDA error
 
+get_base_arr(arr::Array) = arr
+get_base_arr(arr::CuArray) = arr
+get_base_arr(arr::AbstractArray) = get_base_arr(parent(arr))
+
 # define adapt structures for the ShiftedArrays model. This will not be needed if the PR is merged:
 # Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, D}) where {T, D} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
 # parent_type(::Type{FourierTools.CircShiftedArray{T, N, A, S}})  where {T, N, A, S} = A
@@ -28,11 +32,23 @@ function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:Base.Res
     CUDA.CuArrayStyle{N,CD}()
 end
 
-function Base.collect(x::T)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.copy(s::FourierTools.CircShiftedArray)
+    res = similar(get_base_arr(s), eltype(s), size(s));
+    res .= s
+end
+
+AllShiftedType{N, CD} = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+                            FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+                            FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
+
+AllSubArrayType = SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any}
+AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
+
+function Base.collect(x::AllShiftedAndViews)  # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     return copy(x) # stay on the GPU        
 end
 
-function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.Array(x::FourierTools.CircShiftedArray) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     return Array(copy(x)) # remove from GPU
 end
 
@@ -63,7 +79,7 @@ function Base.isapprox(x::T, y::T; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltyp
     return all(abs.(x .- y) .<= atol)
 end
 
-function Base.show(io::IO, mm::MIME"text/plain", cs::T) where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.CircShiftedArray) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
 end
 
@@ -85,7 +101,12 @@ function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierT
     CUDA.CuArrayStyle{N,CD}()
 end
 
-function Base.collect(x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.copy(s::FourierTools.FourierSplit) #  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    res = similar(get_base_arr(s), eltype(s), size(s));
+    res .= s
+end
+
+function Base.collect(x::FourierTools.FourierSplit) # where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     return copy(x) # stay on the GPU        
 end
 
@@ -105,6 +126,36 @@ function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{
     return all(x .== y)
 end
 
+function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.FourierSplit) # where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
+end
+
+# for FourierJoin
+Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA}) where {T, M, AA} = FourierTools.FourierJoin(adapt(to, parent(x)), ndims(x), x.L1, x.L2, x.do_join);
+
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{T2,N,CD}}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+function Base.copy(s::FourierTools.FourierJoin)  # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    res = similar(get_base_arr(s), eltype(s), size(s));
+    res .= s
+end
+
+function Base.collect(x::FourierTools.FourierJoin) # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    return copy(x) # stay on the GPU        
+end
+
+function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    return Array(copy(x)) # remove from GPU
+end
+
+function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.FourierJoin) # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
+end
+
+### addition functions specific to CUDA
+
 function FourierTools.optional_collect(a::CuArray)
     a 
 end
diff --git a/src/circshiftedarray.jl b/src/circshiftedarray.jl
index 642c653..5e326dc 100644
--- a/src/circshiftedarray.jl
+++ b/src/circshiftedarray.jl
@@ -83,6 +83,8 @@ const CircShiftedVector{T, S<:AbstractArray} = CircShiftedArray{T, 1, S}
 
 CircShiftedVector(v::AbstractVector, n = ()) = CircShiftedArray(v, n)
 
+Base.similar(s::CircShiftedArray, el::Type, v::NTuple{N, Int64}) where {N} = similar(s.parent, el, v)
+
 Base.size(s::CircShiftedArray) = size(parent(s))
 Base.axes(s::CircShiftedArray) = axes(parent(s))
 
@@ -114,12 +116,6 @@ Return amount by which `s` is shifted compared to `parent(s)`.
 """
 shifts(s::CircShiftedArray) = s.shifts
 
-
-function copy(s::CircShiftedArray)
-    res = similar(parent(s), eltype(s), size(s))
-    res .= s
-end
-
 # function Base.copyto!(dst::AbstractArray, src::CircShiftedArray)
 #     dst[:] .= @view src[:]
 # end
diff --git a/src/custom_fourier_types.jl b/src/custom_fourier_types.jl
index 5cddf31..f159a44 100644
--- a/src/custom_fourier_types.jl
+++ b/src/custom_fourier_types.jl
@@ -32,6 +32,8 @@ end
 Base.IndexStyle(::Type{FD}) where {FD<:FourierSplit} = IndexStyle(parenttype(FD))
 parenttype(::Type{FourierSplit{T,N,AA}}) where {T,N,AA} = AA
 parenttype(A::FourierSplit) = parenttype(typeof(A))
+
+Base.similar(s::FourierSplit, el::Type, v::NTuple{N, Int64}) where {N} = similar(s.parent, el, v)
 Base.parent(A::FourierSplit) = A.parent 
 Base.size(A::FourierSplit) = size(parent(A))
 
@@ -94,6 +96,9 @@ end
 Base.IndexStyle(::Type{FS}) where {FS<:FourierJoin} = IndexStyle(parenttype(FS))
 parenttype(::Type{FourierJoin{T,N,AA}}) where {T,N,AA} = AA
 parenttype(A::FourierJoin) = parenttype(typeof(A))
+
+Base.similar(s::FourierJoin, el::Type, v::NTuple{N, Int64}) where {N} = similar(s.parent, el, v)
+
 Base.parent(A::FourierJoin) = A.parent
 Base.size(A::FourierJoin) = size(parent(A))
 
diff --git a/src/fourier_resample_1D_based.jl b/src/fourier_resample_1D_based.jl
index ebc3a74..ad5189a 100644
--- a/src/fourier_resample_1D_based.jl
+++ b/src/fourier_resample_1D_based.jl
@@ -21,7 +21,9 @@ function resample_by_1D_FT!(arr::AbstractArray{<:Complex, N}, new_size; normaliz
         # go to fourier space
         arr = ffts!(arr, d)
         if ns > s
-            out = zeros(eltype(arr), Base.setindex(size(arr), ns, d))
+            # out = zeros(eltype(arr), Base.setindex(size(arr), ns, d))
+            out = similar(arr, Base.setindex(size(arr), ns, d)) # to work with CuArary
+            out .= 0
             center_set!(out, arr)
             # in the even case we need to fix hermitian property
             if iseven(s)
@@ -40,10 +42,13 @@ function resample_by_1D_FT!(arr::AbstractArray{<:Complex, N}, new_size; normaliz
                 l, r = get_indices_around_center(s, ns)
                 inds_left = NDTools.slice_indices(axes(arr_v), d, 1)
                 inds_right = NDTools.slice_indices(axes(arr), d, r+1)
-                arr_v[inds_left...] += arr[inds_right...]
+                arr_v[inds_left...] .+= arr[inds_right...]
                 arr_v[inds_left...] ./= correction_factor
             end
             #overwrite old arr handle
+            @show typeof(arr_v)
+            @show typeof(optional_collect(arr_v))
+            @show d
             arr = iffts(arr_v, d)
         end
     end
diff --git a/src/fourier_resizing.jl b/src/fourier_resizing.jl
index 9e66c63..695d4b7 100644
--- a/src/fourier_resizing.jl
+++ b/src/fourier_resizing.jl
@@ -127,13 +127,13 @@ end
 # end
 
 function ft_pad(mat, new_size)
-    return select_region(mat; new_size = new_size)
+    return select_region(optional_collect(mat); new_size = new_size)
 end
 
 function rft_pad(mat, new_size)
     c2 = rft_center_diff(size(mat))
     c2 = Base.setindex(c2, new_size[1] .÷ 2, 1);
-    return select_region(mat; new_size=new_size, center = c2 .+ 1)
+    return select_region(optional_collect(mat); new_size=new_size, center = c2 .+ 1)
 end
 
 """
diff --git a/src/fourier_shifting.jl b/src/fourier_shifting.jl
index 26a9c67..5ec9bbe 100644
--- a/src/fourier_shifting.jl
+++ b/src/fourier_shifting.jl
@@ -70,20 +70,23 @@ function shift(arr, shifts; soft_fraction=0, fix_nyquist_frequency=false, take_r
     return shift!(copy(arr), shifts; soft_fraction=soft_fraction, fix_nyquist_frequency=fix_nyquist_frequency, take_real=take_real)
 end
 
-function soft_shift(freqs, shift, fraction=eltype(freqs)(0.1); corner=false)
-    rounded_shift = round.(shift);
+function soft_shift(freqs, myshift, fraction=eltype(freqs)(0.1); corner=false)
+    rounded_shift = round.(myshift);
     if corner
-        w = window_half_cos(size(freqs),border_in=2.0-2*fraction, border_out=2.0, offset=CtrCorner)
+        w = similar(freqs) # to also work with CuArray
+        w .= window_half_cos(size(freqs),border_in=2.0-2*fraction, border_out=2.0, offset=CtrCorner)
     else
-        w = ifftshift_view(window_half_cos(size(freqs),border_in=1.0-fraction, border_out=1.0))
+        w = similar(freqs) # to also work with CuArray
+        w .= window_half_cos(size(freqs),border_in=1.0-fraction, border_out=1.0)
+        w = ifftshift_view(w)
     end
-    return cispi.(-freqs .* 2 .* (w .* shift + (1.0 .-w).* rounded_shift))
+    return cispi.(-freqs .* 2 .* (w .* myshift + (1.0 .-w).* rounded_shift))
 end
 
 function shift_by_1D_FT!(arr::TA, shifts; soft_fraction=0, take_real=false, fix_nyquist_frequency=false) where {N, TA<:AbstractArray{<:Complex, N}}
     # iterates of the dimension d using the corresponding shift
-    for (d, shift) in pairs(shifts)
-        if iszero(shift)
+    for (d, myshift) in pairs(shifts)
+        if iszero(myshift)
             continue
         end
         # better use reorient from NDTools here?
@@ -95,18 +98,20 @@ function shift_by_1D_FT!(arr::TA, shifts; soft_fraction=0, take_real=false, fix_
         # @show size(freqs)
         # allocates a 1D slice of exp values 
         if iszero(soft_fraction)
-            ϕ = cispi.(- freqs .* 2 .* shift)
+            ϕ = cispi.(- freqs .* 2 .* myshift)
         else
-            ϕ = soft_shift(freqs, shift, soft_fraction)
+            ϕ = soft_shift(freqs, myshift, soft_fraction)
         end
         # ϕ = exp_ikx_sep(complex_arr_type(TA), size(arr), dims=(d,), shift_by = shift)[1]
         # in even case, set one value to real
         if iseven(size(arr, d))
             s = size(arr, d) ÷ 2 + 1
-            ϕ[s] = take_real ? real(ϕ[s]) : ϕ[s]
-            invr = 1 / ϕ[s]
-            invr = isinf(invr) ? 0 : invr
-            ϕ[s] = fix_nyquist_frequency ? invr : ϕ[s]
+            ϕ_val = Array(ϕ[s:s])[1]  # to work with CuArray without @allowscalar
+            ϕ_val = take_real ? real(ϕ_val) : ϕ_val 
+            invr = 1 / ϕ_val
+            invr = isinf.(invr) ? 0 : invr
+            ϕ_val = fix_nyquist_frequency ? invr : ϕ_val
+            ϕ[s:s] .= ϕ_val # to work with CuArray without @allowscalar
         end
         # go to fourier space and apply ϕ
         fft!(arr, d)
@@ -133,8 +138,8 @@ end
 # rfft(x, 1) -> exp shift -> fft(x, 2) -> exp shift ->  fft(x, 3) -> exp shift -> ifft(x, [2,3]) -> irfft(x, 1)
 # So once we did a rft to shift something we can call the routine for complex arrays to shift
 function shift_by_1D_RFT!(arr::TA, shifts; soft_fraction=0, fix_nyquist_frequency=false, take_real=true) where {N, TA<:AbstractArray{<:Real, N}}
-    for (d, shift) in pairs(shifts)
-        if iszero(shift)
+    for (d, myshift) in pairs(shifts)
+        if iszero(myshift)
             continue
         end
         
@@ -151,16 +156,19 @@ function shift_by_1D_RFT!(arr::TA, shifts; soft_fraction=0, fix_nyquist_frequenc
         # freqs = TR(reorient(fftfreq(size(arr, d))[1:s], d, Val(N)))
         freqs .= reorient(rfftfreq(size(arr, d)), d, Val(N))
         if iszero(soft_fraction)
-            ϕ = cispi.(-freqs .* 2 .* shift)
+            ϕ = cispi.(-freqs .* 2 .* myshift)
         else
-            ϕ = soft_shift(freqs, shift, soft_fraction, corner=true)
+            ϕ = soft_shift(freqs, myshift, soft_fraction, corner=true)
         end
         if iseven(size(arr, d))
             # take real and maybe fix nyquist frequency
-            ϕ[s] = take_real ? real(ϕ[s]) : ϕ[s]
-            invr = 1 / ϕ[s]
-            invr = isinf(invr) ? 0 : invr
-            ϕ[s] = fix_nyquist_frequency ? invr : ϕ[s]
+            s = size(arr, d) ÷ 2 + 1
+            ϕ_val = Array(ϕ[s:s])[1]  # to work with CuArray without @allowscalar
+            ϕ_val = take_real ? real(ϕ_val) : ϕ_val 
+            invr = 1 / ϕ_val
+            invr = isinf.(invr) ? 0 : invr
+            ϕ_val = fix_nyquist_frequency ? invr : ϕ_val
+            ϕ[s:s] .= ϕ_val # to work with CuArray without @allowscalar
         end
         arr_ft .*= ϕ
         # since we now did a single rfft dim, we can switch to the complex routine
diff --git a/src/utils.jl b/src/utils.jl
index d444e9d..efe82e9 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -221,15 +221,12 @@ function rfft_size(size, dims)
     Base.setindex(size, size[dim] ÷ 2 + 1, dim)
 end
 
-
-
-
 """
     get_indices_around_center(i_in, i_out)
 
 A function which provides two output indices `i1` and `i2`
 where `i2 - i1 = i_out`
-The indices are chosen in a way that the set `i1:i2`
+The indices are chosen such that the set `i1:i2`
 cuts the interval `1:i_in` in a way that the center frequency
 stays at the center position.
 Works for both odd and even indices
@@ -248,6 +245,22 @@ function get_indices_around_center(i_in, i_out)
     end
 end
 
+"""
+    get_idxrng_around_center(arr_1, arr_2)
+
+A function which provides a range of output indices `i1:i2`
+where `i2 - i1 = i_out`
+The indices are chosen in a way that the set `i1:i2`
+cuts the interval `1:i_in` such that the center frequency
+stays at the center position.
+Works for both odd and even indices
+"""
+function get_idxrng_around_center(arr_1, arr_2)
+    sz1 = size(arr_1)
+    sz2 = size(arr_2)
+    all_rng = ntuple((d) -> begin a,b = get_indices_around_center(sz1[d], sz2[d]); a:b end, ndims(arr_1))
+    return all_rng
+end
 
 """
     center_extract(arr, new_size_array)
@@ -311,14 +324,14 @@ julia> FourierTools.center_set!([1, 1, 1, 1, 1, 1], [5, 5, 5])
 ```
 """
 function center_set!(arr_large, arr_small)
-    out_is = []
-    for i = 1:ndims(arr_large)
-        a, b = get_indices_around_center(size(arr_large)[i], size(arr_small)[i])
-        push!(out_is, a:b)
-    end
+    # out_is = []
+    # for i = 1:ndims(arr_large)
+    #     a, b = get_indices_around_center(size(arr_large)[i], size(arr_small)[i])
+    #     push!(out_is, a:b)
+    # end
 
     #rest = ones(Int, ndims(arr_large) - 3)
-    arr_large[out_is...] = arr_small
+    arr_large[get_idxrng_around_center(arr_large, arr_small)...] = arr_small
     
     return arr_large
 end
diff --git a/test/fourier_shifting.jl b/test/fourier_shifting.jl
index 4421c58..e033433 100644
--- a/test/fourier_shifting.jl
+++ b/test/fourier_shifting.jl
@@ -1,10 +1,8 @@
 Random.seed!(42)
 
 @testset "Fourier shifting methods" begin
-
     # Int error
     @test_throws ArgumentError FourierTools.shift(opt_cu([1,2,3], use_cuda), (1,))
-
     @testset "Empty shifts" begin
         x = opt_cu(randn(ComplexF32, (11, 12, 13)), use_cuda);
         @test FourierTools.shift(x, []) == x
diff --git a/test/runtests.jl b/test/runtests.jl
index 47f4c75..9e8b4b6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -20,7 +20,7 @@ opt_cu(img, use_cuda) = ifelse(use_cuda, CuArray(img), img)
 include("fft_helpers.jl");
 include("fftshift_alternatives.jl");
 include("utils.jl");
-include("fourier_shifting.jl"); ###
+include("fourier_shifting.jl");
 include("fourier_shear.jl");
 include("fourier_rotate.jl");
 include("resampling_tests.jl"); ###

From 89de4890e9a91f9c1327289d2cd47fb54bbca137 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 22 Mar 2025 14:20:09 +0100
Subject: [PATCH 07/25] first part of streamlining

---
 ext/CUDASupportExt.jl | 111 +++++++++++++++++++++++-------------------
 1 file changed, 60 insertions(+), 51 deletions(-)

diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
index b0e5b67..8438104 100644
--- a/ext/CUDASupportExt.jl
+++ b/ext/CUDASupportExt.jl
@@ -9,6 +9,22 @@ get_base_arr(arr::Array) = arr
 get_base_arr(arr::CuArray) = arr
 get_base_arr(arr::AbstractArray) = get_base_arr(parent(arr))
 
+# define a number of Union types to not repeat all definitions for each type
+AllShiftedType = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:Any},
+                            FourierTools.FourierSplit{<:Any,<:Any,<:Any},
+                            FourierTools.FourierJoin{<:Any,<:Any,<:Any}}
+
+# these are special only if a CuArray is wrapped
+AllShiftedTypeCu{N, CD} = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+                            FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+                            FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
+
+AllSubArrayType = SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any}
+AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
+
+AllSubArrayTypeCu = SubArray{<:Any, <:Any, <:AllShiftedTypeCu, <:Any, <:Any}
+AllShiftedAndViewsCu = Union{AllShiftedTypeCu, AllSubArrayTypeCu}
+
 # define adapt structures for the ShiftedArrays model. This will not be needed if the PR is merged:
 # Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, D}) where {T, D} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
 # parent_type(::Type{FourierTools.CircShiftedArray{T, N, A, S}})  where {T, N, A, S} = A
@@ -32,54 +48,47 @@ function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:Base.Res
     CUDA.CuArrayStyle{N,CD}()
 end
 
-function Base.copy(s::FourierTools.CircShiftedArray)
+function Base.copy(s::AllShiftedAndViews) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     res = similar(get_base_arr(s), eltype(s), size(s));
     res .= s
 end
 
-AllShiftedType{N, CD} = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
-                            FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
-                            FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
-
-AllSubArrayType = SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any}
-AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
-
 function Base.collect(x::AllShiftedAndViews)  # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     return copy(x) # stay on the GPU        
 end
 
-function Base.Array(x::FourierTools.CircShiftedArray) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.Array(x::AllShiftedAndViews) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     return Array(copy(x)) # remove from GPU
 end
 
-function Base.:(==)(x::T, y::AbstractArray)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.:(==)(x::AllShiftedAndViewsCu, y::AbstractArray)  # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
     return all(x .== y)
 end
 
-function Base.:(==)(y::AbstractArray, x::T)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.:(==)(y::AbstractArray, x::AllShiftedAndViewsCu) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
     return all(x .== y)
 end
 
-function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.:(==)(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
     return all(x .== y)
 end
 
-function Base.isapprox(x::T, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.isapprox(x::AllShiftedAndViewsCu, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
     atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
     return all(abs.(x .- y) .<= atol)
 end
 
-function Base.isapprox(y::AbstractArray, x::T; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
     atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
     return all(abs.(x .- y) .<= atol)
 end
 
-function Base.isapprox(x::T, y::T; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
     atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
     return all(abs.(x .- y) .<= atol)
 end
 
-function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.CircShiftedArray) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.show(io::IO, mm::MIME"text/plain", cs::AllShiftedAndViews) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
 end
 
@@ -101,34 +110,34 @@ function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierT
     CUDA.CuArrayStyle{N,CD}()
 end
 
-function Base.copy(s::FourierTools.FourierSplit) #  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    res = similar(get_base_arr(s), eltype(s), size(s));
-    res .= s
-end
+# function Base.copy(s::FourierTools.FourierSplit) #  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     res = similar(get_base_arr(s), eltype(s), size(s));
+#     res .= s
+# end
 
-function Base.collect(x::FourierTools.FourierSplit) # where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    return copy(x) # stay on the GPU        
-end
+# function Base.collect(x::FourierTools.FourierSplit) # where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     return copy(x) # stay on the GPU        
+# end
 
-function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    return Array(copy(x)) # remove from GPU
-end
+# function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     return Array(copy(x)) # remove from GPU
+# end
 
-function Base.:(==)(x::T, y::AbstractArray)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
-    return all(x .== y)
-end
+# function Base.:(==)(x::T, y::AbstractArray)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+#     return all(x .== y)
+# end
 
-function Base.:(==)(y::AbstractArray, x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
-    return all(x .== y)
-end
+# function Base.:(==)(y::AbstractArray, x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+#     return all(x .== y)
+# end
 
-function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
-    return all(x .== y)
-end
+# function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+#     return all(x .== y)
+# end
 
-function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.FourierSplit) # where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
-end
+# function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.FourierSplit) # where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
+# end
 
 # for FourierJoin
 Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA}) where {T, M, AA} = FourierTools.FourierJoin(adapt(to, parent(x)), ndims(x), x.L1, x.L2, x.do_join);
@@ -137,22 +146,22 @@ function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierT
     CUDA.CuArrayStyle{N,CD}()
 end
 
-function Base.copy(s::FourierTools.FourierJoin)  # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    res = similar(get_base_arr(s), eltype(s), size(s));
-    res .= s
-end
+# function Base.copy(s::FourierTools.FourierJoin)  # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     res = similar(get_base_arr(s), eltype(s), size(s));
+#     res .= s
+# end
 
-function Base.collect(x::FourierTools.FourierJoin) # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    return copy(x) # stay on the GPU        
-end
+# function Base.collect(x::FourierTools.FourierJoin) # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     return copy(x) # stay on the GPU        
+# end
 
-function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    return Array(copy(x)) # remove from GPU
-end
+# function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     return Array(copy(x)) # remove from GPU
+# end
 
-function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.FourierJoin) # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
-end
+# function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.FourierJoin) # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
+# end
 
 ### addition functions specific to CUDA
 

From ce66b6078101a1985a1590c4f88559f2006f5eb7 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 22 Mar 2025 20:33:47 +0100
Subject: [PATCH 08/25] almost done

---
 ext/CUDASupportExt.jl                         | 66 +++++++++++-----
 ...edArrays_messy.jl => CircShiftedArrays.jl} |  0
 src/convolutions.jl                           |  1 +
 src/custom_fourier_types.jl                   | 79 +++++++++++++------
 src/fourier_resample_1D_based.jl              |  3 -
 src/fourier_resizing.jl                       | 44 ++++++-----
 src/fourier_shear.jl                          |  2 +-
 src/resampling.jl                             |  4 +-
 test/custom_fourier_types.jl                  | 15 +++-
 test/resampling_tests.jl                      | 20 ++---
 test/runtests.jl                              |  7 +-
 test_old/custom_fourier_types.jl              |  8 +-
 12 files changed, 158 insertions(+), 91 deletions(-)
 rename src/{CircShiftedArrays_messy.jl => CircShiftedArrays.jl} (100%)

diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
index 8438104..c2a18fa 100644
--- a/ext/CUDASupportExt.jl
+++ b/ext/CUDASupportExt.jl
@@ -3,11 +3,15 @@ using CUDA
 using Adapt
 # using ShiftedArrays
 using FourierTools
+using IndexFunArrays # to prevent a stack overflow in get_base_arr
 using Base # to allow displaying such arrays without causing the single indexing CUDA error
 
 get_base_arr(arr::Array) = arr
 get_base_arr(arr::CuArray) = arr
-get_base_arr(arr::AbstractArray) = get_base_arr(parent(arr))
+get_base_arr(arr::IndexFunArray) = arr
+function get_base_arr(arr::AbstractArray) 
+    get_base_arr(parent(arr))
+end
 
 # define a number of Union types to not repeat all definitions for each type
 AllShiftedType = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:Any},
@@ -15,15 +19,19 @@ AllShiftedType = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:Any},
                             FourierTools.FourierJoin{<:Any,<:Any,<:Any}}
 
 # these are special only if a CuArray is wrapped
-AllShiftedTypeCu{N, CD} = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
-                            FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
-                            FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
 
-AllSubArrayType = SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any}
+AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
+                        Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any},
+                        SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}}
 AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
 
-AllSubArrayTypeCu = SubArray{<:Any, <:Any, <:AllShiftedTypeCu, <:Any, <:Any}
-AllShiftedAndViewsCu = Union{AllShiftedTypeCu, AllSubArrayTypeCu}
+AllShiftedTypeCu{N, CD} = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+                            FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+                            FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
+AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
+                                 Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any},
+                                 SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
+AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
 
 # define adapt structures for the ShiftedArrays model. This will not be needed if the PR is merged:
 # Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, D}) where {T, D} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
@@ -31,26 +39,36 @@ AllShiftedAndViewsCu = Union{AllShiftedTypeCu, AllSubArrayTypeCu}
 # Base.Broadcast.BroadcastStyle(::Type{T})  where {T<:FourierTools.CircShiftedArray} = Base.Broadcast.BroadcastStyle(parent_type(T))
 
 Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, N, S}) where {T, N, S} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
-parent_type(::Type{FourierTools.CircShiftedArray{T, N, S}})  where {T, N, S} = S
+# parent_type(::Type{FourierTools.CircShiftedArray{T, N, S}})  where {T, N, S} = S
 
 # Base.Broadcast.BroadcastStyle(::Type{T}) where {T2, N, S, T <:FourierTools.CircShiftedArray{T2, N, S}}  = Base.Broadcast.BroadcastStyle(parent_type(T))
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+# function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+#     CUDA.CuArrayStyle{N,CD}()
+# end
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllShiftedTypeCu{N, CD}}
     CUDA.CuArrayStyle{N,CD}()
 end
 
 # Define the BroadcastStyle for SubArray of MutableShiftedArray with CuArray
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:SubArray{<:Any, <:Any, <:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}}
+# function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:SubArray{<:Any, <:Any, <:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}}
+#     CUDA.CuArrayStyle{N,CD}()
+# end
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllSubArrayTypeCu{N, CD}}
     CUDA.CuArrayStyle{N,CD}()
 end
 
 # Define the BroadcastStyle for ReshapedArray of MutableShiftedArray with CuArray
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:Base.ReshapedArray{<:Any, <:Any, <:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}, <:Any}}
-    CUDA.CuArrayStyle{N,CD}()
-end
+# function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:Base.ReshapedArray{<:Any, <:Any, <:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}, <:Any}}
+#     CUDA.CuArrayStyle{N,CD}()
+# end
 
 function Base.copy(s::AllShiftedAndViews) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
     res = similar(get_base_arr(s), eltype(s), size(s));
+    # @show "copy here"
+    # @show s.D
     res .= s
+    # CUDA.@allowscalar @show res[5]
+    return res
 end
 
 function Base.collect(x::AllShiftedAndViews)  # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
@@ -103,12 +121,17 @@ end
 # end
 
 # lets do this for the FourierSplit
-Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA}) where {T, M, AA} = FourierTools.FourierSplit(adapt(to, parent(x)), ndims(x), x.L1, x.L2, x.do_split);
+Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split);
+# parent_type(::Type{FourierTools.FourierSplit{T, N, S}})  where {T, N, S} = S
 
 # function Base.Broadcast.BroadcastStyle(::Type{T})  where (CT,CN,CD,T<: ShiftedArray{<:Any,<:Any,<:Any,<:CuArray})
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{T2,N,CD}}}
-    CUDA.CuArrayStyle{N,CD}()
-end
+# function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{T2,N,CD}}}
+#     CUDA.CuArrayStyle{N,CD}()
+# end
+
+# function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:SubArray{<:Any, <:Any, <:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}}
+#     CUDA.CuArrayStyle{N,CD}()
+# end
 
 # function Base.copy(s::FourierTools.FourierSplit) #  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
 #     res = similar(get_base_arr(s), eltype(s), size(s));
@@ -140,11 +163,12 @@ end
 # end
 
 # for FourierJoin
-Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA}) where {T, M, AA} = FourierTools.FourierJoin(adapt(to, parent(x)), ndims(x), x.L1, x.L2, x.do_join);
+Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join);
+# parent_type(::Type{FourierTools.FourierJoin{T, N, S}})  where {T, N, S} = S
 
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{T2,N,CD}}}
-    CUDA.CuArrayStyle{N,CD}()
-end
+# function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{T2,N,CD}}}
+#     CUDA.CuArrayStyle{N,CD}()
+# end
 
 # function Base.copy(s::FourierTools.FourierJoin)  # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
 #     res = similar(get_base_arr(s), eltype(s), size(s));
diff --git a/src/CircShiftedArrays_messy.jl b/src/CircShiftedArrays.jl
similarity index 100%
rename from src/CircShiftedArrays_messy.jl
rename to src/CircShiftedArrays.jl
diff --git a/src/convolutions.jl b/src/convolutions.jl
index 1081b34..b44a68e 100644
--- a/src/convolutions.jl
+++ b/src/convolutions.jl
@@ -148,6 +148,7 @@ end
     plan_conv_buffer(u, v [, dims]; kwargs...)
 
 Similar to [`plan_conv`](@ref) but instead uses buffers to prevent memory allocations.
+The three buffers are internal to the function and are not exposed to the user.
 Not AD friendly!
 
 """
diff --git a/src/custom_fourier_types.jl b/src/custom_fourier_types.jl
index f159a44..c81420e 100644
--- a/src/custom_fourier_types.jl
+++ b/src/custom_fourier_types.jl
@@ -6,9 +6,9 @@ and then replaces the value by half of the parent at L1
 `do_split` is a Bool that indicates whether this mechanism is active. 
 It is needed for type stability reasons of functions returnting this type.
 """
-struct FourierSplit{T,N, AA<:AbstractArray{T, N}} <: AbstractArray{T,N}
+struct FourierSplit{T, N, AA<:AbstractArray{T, N}, D} <: AbstractArray{T,N}
     parent::AA # holds the data (or is another view)
-    D::Int # dimension along which to apply to copy
+    # D::Int # dimension along which to apply to copy
     L1::Int # low index position to copy from (and half)
     L2::Int # high index positon to copy to (and half)
     do_split::Bool
@@ -19,41 +19,51 @@ struct FourierSplit{T,N, AA<:AbstractArray{T, N}} <: AbstractArray{T,N}
 This version below is needed to avoid a split for the first rft dimension,
 but still return half the value FFTs and other RFT dimension should use the version without L2.
 """
-    function FourierSplit(parent::AA, D::Int,L1::Int,L2::Int, do_split::Bool) where {T,N, AA<:AbstractArray{T, N}}
-        return new{T,N, AA}(parent, D, L1, L2, do_split)
+    function FourierSplit(parent::AA, ::Val{D}, L1::Int,L2::Int, do_split::Bool) where {T,N, D, AA<:AbstractArray{T, N}}
+        return new{T,N, AA, D}(parent, L1, L2, do_split)
     end
-    function FourierSplit(parent::AA, D::Int, L1::Int, do_split::Bool) where {T,N, AA<:AbstractArray{T, N}}
+    function FourierSplit(parent::AA, ::Val{D}, L1::Int, do_split::Bool) where {T,N, D, AA<:AbstractArray{T, N}}
         mid = fft_center(size(parent)[D])
         L2 = mid + (mid-L1)
-        return FourierSplit(parent, D,L1,L2, do_split)
+        return FourierSplit(parent, Val(D), L1, L2, do_split)
     end
+    # function FourierSplit(parent::AA, D::Int, L1::Int, do_split::Bool) where {T,N, AA<:AbstractArray{T, N}}
+    #     FourierSplit(parent, Val(D), L1, do_split)
+    # end
 end
 
+# get_D(A::FourierSplit{D}) where {D} = D
+
 Base.IndexStyle(::Type{FD}) where {FD<:FourierSplit} = IndexStyle(parenttype(FD))
-parenttype(::Type{FourierSplit{T,N,AA}}) where {T,N,AA} = AA
+parenttype(::Type{FourierSplit{T,N,AA,D}}) where {T,N,AA,D} = AA
 parenttype(A::FourierSplit) = parenttype(typeof(A))
 
 Base.similar(s::FourierSplit, el::Type, v::NTuple{N, Int64}) where {N} = similar(s.parent, el, v)
 Base.parent(A::FourierSplit) = A.parent 
 Base.size(A::FourierSplit) = size(parent(A))
 
-@inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}}, i::Vararg{Int,N}) where {T,N}
-    if (i[A.D]==A.L2 || i[A.D]==A.L1) && A.do_split # index along this dimension A.D corrsponds to slice L2
+@inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}, D}, i::Vararg{Int,N}) where {T,N, D}
+    # D = get_D(A) # causes huge troubles in CUDA!
+    # return eltype(A)(D) # 
+
+    if (i[D]==A.L2 || i[D]==A.L1) && A.do_split # index along this dimension A.D corrsponds to slice L2
         # note that "setindex" in the line below modifies only the index, not the array
-        @inbounds return parent(A)[Base.setindex(i, A.L1, A.D)...] / 2
-    else i[A.D]==A.L2
+        @inbounds return parent(A)[Base.setindex(i, A.L1, D)...] / 2
+    else i[D]==A.L2
         @inbounds return parent(A)[i...]
         # @inbounds return parent(A)[i...]
     end
 end
 
 # One-D version
-@inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}}, i::Int) where {T,N}
+@inline function Base.getindex(A::FourierSplit{T,N, <:AbstractArray{T, N}, D}, i::Int) where {T,N,D}
     if A.do_split
         # compute the ND index from the one-D index i
         ind = Tuple(CartesianIndices(parent(A))[i])
-        if (ind[A.D]==A.L2 || ind[A.D]==A.L1) 
-            return parent(A)[Base.setindex(ind, A.L1, A.D)...] / 2
+        # D = get_D(A) # causes huge troubles in CUDA!
+        # return eltype(A)(D) # 
+        if (ind[D]==A.L2 || ind[D]==A.L1) 
+            return parent(A)[Base.setindex(ind, A.L1, D)...] / 2
         else
             return parent(A)[i]
         end
@@ -70,9 +80,9 @@ and then replaces the value by add the value at the mirrored position L2
 `do_join` is a Bool that indicates whether this mechanism is active. 
 It is needed for type stability reasons of functions returnting this type
 """
-struct FourierJoin{T,N, AA<:AbstractArray{T, N}} <: AbstractArray{T, N}
+struct FourierJoin{T,N, AA<:AbstractArray{T, N}, D} <: AbstractArray{T, N}
     parent::AA
-    D::Int # dimension along which to apply to copy
+    # D::Int # dimension along which to apply to copy
     L1::Int # low index position to copy from (and half)
     L2::Int # high index positon to copy to (and half)
     do_join::Bool
@@ -82,19 +92,25 @@ This version below is needed to avoid a split for the
 first rft dimension but still return half the value
 FFTs and other RFT dimension should use the version without L2
 """
-    function FourierJoin(parent::AA, D::Int, L1::Int, L2::Int, do_join::Bool) where {T, N, AA<:AbstractArray{T, N}}
-        return new{T, N, AA}(parent, D, L1, L2, do_join)
+    function FourierJoin(parent::AA, ::Val{D}, L1::Int, L2::Int, do_join::Bool) where {T, N, AA<:AbstractArray{T, N}, D}
+        return new{T, N, AA, D}(parent, L1, L2, do_join)
     end
 
-    function FourierJoin(parent::AA, D::Int,L1::Int, do_join::Bool) where {T, N, AA<:AbstractArray{T, N}}
+    function FourierJoin(parent::AA, ::Val{D}, L1::Int, do_join::Bool) where {T, N, AA<:AbstractArray{T, N}, D}
         mid = fft_center(size(parent)[D])
         L2 = mid + (mid-L1)
-        return FourierJoin(parent, D, L1, L2, do_join)
+        return FourierJoin(parent, Val(D), L1, L2, do_join)
     end
+
+    # function FourierJoin(parent::AA, D::Int, L1::Int, do_split::Bool) where {T,N, AA<:AbstractArray{T, N}}
+    #     FourierJoin(parent, Val(D), L1, do_split)
+    # end
 end
 
+# get_D(A::FourierJoin) = A.D
+
 Base.IndexStyle(::Type{FS}) where {FS<:FourierJoin} = IndexStyle(parenttype(FS))
-parenttype(::Type{FourierJoin{T,N,AA}}) where {T,N,AA} = AA
+parenttype(::Type{FourierJoin{T,N,AA,D}}) where {T,N,AA,D} = AA
 parenttype(A::FourierJoin) = parenttype(typeof(A))
 
 Base.similar(s::FourierJoin, el::Type, v::NTuple{N, Int64}) where {N} = similar(s.parent, el, v)
@@ -102,11 +118,26 @@ Base.similar(s::FourierJoin, el::Type, v::NTuple{N, Int64}) where {N} = similar(
 Base.parent(A::FourierJoin) = A.parent
 Base.size(A::FourierJoin) = size(parent(A))
 
-@inline function Base.getindex(A::FourierJoin{T,N, <:AbstractArray{T, N}}, i::Vararg{Int,N}) where {T,N}
-    if i[A.D]==A.L1 && A.do_join
-        @inbounds return (parent(A)[i...] + parent(A)[Base.setindex(i, A.L2, A.D)...])
+@inline function Base.getindex(A::FourierJoin{T,N, <:AbstractArray{T, N}, D}, i::Vararg{Int,N}) where {T,N,D}
+    if i[D]==A.L1 && A.do_join
+        @inbounds return (parent(A)[i...] + parent(A)[Base.setindex(i, A.L2, D)...])
     else 
         @inbounds return (parent(A)[i...])
     end
 end
 
+# One-D version
+@inline function Base.getindex(A::FourierJoin{T,N, <:AbstractArray{T, N},D}, i::Int) where {T,N,D}
+    if A.do_join
+        # compute the ND index from the one-D index i
+        ind = Tuple(CartesianIndices(parent(A))[i])
+        if (ind[D]==A.L1) 
+            return parent(A)[i] + parent(A)[Base.setindex(ind, A.L2, D)...]
+        else
+            return parent(A)[i]
+        end
+    else
+        return parent(A)[i]
+    end
+end
+
diff --git a/src/fourier_resample_1D_based.jl b/src/fourier_resample_1D_based.jl
index ad5189a..5068921 100644
--- a/src/fourier_resample_1D_based.jl
+++ b/src/fourier_resample_1D_based.jl
@@ -46,9 +46,6 @@ function resample_by_1D_FT!(arr::AbstractArray{<:Complex, N}, new_size; normaliz
                 arr_v[inds_left...] ./= correction_factor
             end
             #overwrite old arr handle
-            @show typeof(arr_v)
-            @show typeof(optional_collect(arr_v))
-            @show d
             arr = iffts(arr_v, d)
         end
     end
diff --git a/src/fourier_resizing.jl b/src/fourier_resizing.jl
index 695d4b7..e37c958 100644
--- a/src/fourier_resizing.jl
+++ b/src/fourier_resizing.jl
@@ -89,6 +89,9 @@ is always assumed to align before and after the padding aperation.
 function select_region_rft(mat, old_size, new_size)
     # rft_old_size = size(mat)
     rft_new_size = Base.setindex(new_size,new_size[1] ÷ 2 + 1, 1)
+    # tmp = similar(mat, (8, 10))
+    # tmp .= 1
+    # return rft_fix_after(tmp, old_size, new_size)
     return rft_fix_after(rft_pad(
         rft_fix_before(mat, old_size, new_size), rft_new_size), old_size, new_size)
 end
@@ -147,7 +150,7 @@ function ft_fix_before(mat::MT, size_old, size_new, ::Val{N})::FourierJoin{T,N,M
     so = size_old[N]
     do_join = (sn < so && iseven(sn))
     L1 = (size_old[N] - size_new[N] ) ÷ 2 + 1
-    return FourierJoin(mat, N, L1, do_join)
+    return FourierJoin(mat, Val(N), L1, do_join)
 end
 
 """
@@ -162,20 +165,21 @@ function ft_fix_before(mat::MT, size_old, size_new, ::Val{D}=Val(1))  where {D,
         so = size_old[D]
         do_join = (sn < so && iseven(sn))
         L1 = (size_old[D] - size_new[D] )÷2 +1
-        mat = FourierJoin(mat, D, L1, do_join)
+        mat = FourierJoin(mat, Val(D), L1, do_join)
         return ft_fix_before(mat, size_old, size_new, Val(D + 1))
     else
         L1 = (size_old[N] -size_new[N] )÷2 +1
-        return FourierJoin(mat, N, L1, false)
+        return FourierJoin(mat, Val(N), L1, false)
     end
 end
 
-function ft_fix_after(mat::MT, size_old, size_new, ::Val{N})::FourierSplit{T,N,MT}   where {T, N, MT<:AbstractArray{T,N}}
+# routine only for the last dimensions N == D
+function ft_fix_after(mat::MT, size_old, size_new, ::Val{N})::FourierSplit{T,N,MT,N}   where {T, N, MT<:AbstractArray{T,N}}
     sn = size_new[N]
     so = size_old[N]
     do_split = (sn > so && iseven(so))
     L1 = (size_new[N] - size_old[N]) ÷ 2 + 1
-    return FourierSplit(mat, N, L1, do_split)
+    return FourierSplit(mat, Val(N), L1, do_split)
 end
 
 function ft_fix_after(mat::MT, size_old, size_new, ::Val{D}=Val(1)) where {D, T, N, MT<:AbstractArray{T,N}}
@@ -184,49 +188,49 @@ function ft_fix_after(mat::MT, size_old, size_new, ::Val{D}=Val(1)) where {D, T,
         so = size_old[D]
         do_split = (sn > so && iseven(so))
         L1 = (size_new[D]-size_old[D])÷2+1
-        mat = FourierSplit(mat, D, L1, do_split)
+        mat = FourierSplit(mat, Val(D), L1, do_split)
         return ft_fix_after(mat, size_old, size_new, Val(D + 1))
     else
         L1 = (size_new[N]-size_old[N])÷2+1
-        return FourierSplit(mat, N, L1, false)
+        return FourierSplit(mat, Val(N), L1, false)
     end
 end
 
-function rft_fix_first_dim_before(mat, size_old, size_new; dim=1)
+function rft_fix_first_dim_before(mat, size_old, size_new; dim::Val{D}=Val(1)) where {D}
     # Note that this dim is the corresponding real-space size
-    sn = size_new[dim] 
-    so = size_old[dim]
+    sn = size_new[D] 
+    so = size_old[D]
     # result size is even upon cropping
     do_join = (sn < so && iseven(sn))
-    L1 = size_new[dim] ÷ 2 + 1
+    L1 = size_new[D] ÷ 2 + 1
     # a hack to dublicate the value
-    mat = FourierJoin(mat, dim, L1, L1, do_join)
+    mat = FourierJoin(mat, Val(D), L1, L1, do_join)
     return mat
 end
 
-function rft_fix_first_dim_after(mat,size_old,size_new;dim=1)
+function rft_fix_first_dim_after(mat,size_old,size_new; dim::Val{D}=Val(1)) where {D}
     # Note that this dim is the corresponding real-space size
-    sn = size_new[dim] 
-    so = size_old[dim] 
+    sn = size_new[D] 
+    so = size_old[D] 
     # source size is even upon padding
     do_split = (sn > so && iseven(so)) 
-    L1 = size_old[dim] ÷ 2 + 1
+    L1 = size_old[D] ÷ 2 + 1
     # This hack prevents a second position to be affected
-    mat = FourierSplit(mat, dim, L1, -1, do_split)
+    mat = FourierSplit(mat, Val(D), L1, -1, do_split)
     # if equal do nothing
     return mat
 end
 
 function rft_fix_before(mat,size_old,size_new)
     # ignore the first dimension
-    mat=rft_fix_first_dim_before(mat,size_old,size_new;dim=1) 
+    mat=rft_fix_first_dim_before(mat,size_old,size_new; dim=Val(1)) 
     # ignore the first dimension since it starts at Val(2)
     ft_fix_before(mat, size_old, size_new, Val(2)) 
 end
 
-function rft_fix_after(mat,size_old,size_new)
+function rft_fix_after(mat, size_old, size_new)
     # ignore the first dimension
-    mat = rft_fix_first_dim_after(mat,size_old,size_new;dim=1) 
+    mat = rft_fix_first_dim_after(mat, size_old, size_new; dim=Val(1)) 
     # ignore the first dimension since it starts at Val(2)
     ft_fix_after(mat, size_old, size_new, Val(2))
 end
diff --git a/src/fourier_shear.jl b/src/fourier_shear.jl
index 9e5fe1b..978d0c7 100644
--- a/src/fourier_shear.jl
+++ b/src/fourier_shear.jl
@@ -125,7 +125,7 @@ function apply_shift_strength!(arr::TA, arr_orig, shift, shear_dir_dim, shear_di
         if fix_nyquist
             inv_r = 1 ./ r
             # inv_r = map(x -> (isinf(x) ? 0 : x), inv_r) # NOT GPU compatible
-            inv_r[isinf.(inv_r)] = 0 
+            inv_r[isinf.(inv_r)] .= 0 
             e[inds...] .= inv_r 
         else
             e[inds...] .= r 
diff --git a/src/resampling.jl b/src/resampling.jl
index 3be1e8b..a84e465 100644
--- a/src/resampling.jl
+++ b/src/resampling.jl
@@ -114,7 +114,9 @@ function upsample2_1D(mat::AbstractArray{T, N}, dim=1, fix_center=false, keep_si
         return mat
     end
     newsize = Tuple((d==dim) ? 2*size(mat,d) : size(mat,d) for d in 1:N)
-    res = zeros(eltype(mat), newsize)
+    res = similar(mat, newsize)
+    res .= 0;
+    # res = zeros(eltype(mat), newsize)
     if fix_center && isodd(size(mat,dim))
         selectdim(res,dim,2:2:size(res,dim)) .= mat  
         shifts = Tuple((d==dim) ? 0.5 : 0.0 for d in 1:N)
diff --git a/test/custom_fourier_types.jl b/test/custom_fourier_types.jl
index 6049f51..4843870 100644
--- a/test/custom_fourier_types.jl
+++ b/test/custom_fourier_types.jl
@@ -2,18 +2,25 @@
 @testset "Custom Fourier Types" begin
     N = 5
     x = opt_cu(randn((N, N)), use_cuda)
-    fs = FourierTools.FourierSplit(x, 2, 2, 4, true)
+    fs = FourierTools.FourierSplit(x, Val(2), 2, 4, true)
     @test FourierTools.parenttype(fs) == typeof(x)
-    fs = FourierTools.FourierSplit(x, 2, 2, 4, false)
+    fs = FourierTools.FourierSplit(x, Val(2), 2, 4, false)
     @test FourierTools.parenttype(fs) == typeof(x)
     
-    fj = FourierTools.FourierJoin(x, 2, 2, 4, true)
+    fj = FourierTools.FourierJoin(x, Val(2), 2, 4, true)
     @test FourierTools.parenttype(fj) == typeof(x)
 
-    fj = FourierTools.FourierJoin(x, 2, 2, 4, false)
+    fj = FourierTools.FourierJoin(x, Val(2), 2, 4, false)
     @test FourierTools.parenttype(fj) == typeof(x)
 
     @test FourierTools.parenttype(typeof(fj)) == typeof(x)
 
     @test FourierTools.IndexStyle(typeof(fj)) == IndexStyle(typeof(fj))
+
+    x = opt_cu(ones((4, 7)), use_cuda)
+    fs = FourierTools.FourierSplit(x, Val(2), 2, 4, true)
+    fj = FourierTools.FourierJoin(x, Val(2), 2, 4, true)
+    @test all(fs[:,2] .== 0.5)
+    @test all(fj[:,2] .== 2)
+
 end
diff --git a/test/resampling_tests.jl b/test/resampling_tests.jl
index bea59c3..e5b8ead 100644
--- a/test/resampling_tests.jl
+++ b/test/resampling_tests.jl
@@ -45,7 +45,6 @@
         end
     end
 
-
     @testset "Tests that resample_by_FFT is purely real" begin
         function test_real(s_1, s_2)
             x = opt_cu(randn(Float32, (s_1)), use_cuda)
@@ -108,7 +107,7 @@
     @testset "Upsample2 compared to resample" begin
     for sz in ((10,10),(5,8,9),(20,5,4))
         a = opt_cu(rand(sz...), use_cuda)
-        @test ≈(upsample2(a),resample(a,sz.*2))
+        @test ≈(upsample2(a), resample(a,sz.*2))
         @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2)))
         a = opt_cu(rand(ComplexF32, sz...), use_cuda)
         @test ≈(upsample2(a),resample(a,sz.*2))
@@ -147,12 +146,9 @@
     test_resample(253, 254)
     test_resample(253, 1001)
     test_resample(99, 100101)
-
     end
 
-    @testset "FFT resample in 2D" begin
-    
-    
+    @testset "FFT resample in 2D" begin    
         function test_2D(in_s, out_s)
             x = opt_cu(range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1], use_cuda)
             y = opt_cu(range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]', use_cuda)
@@ -256,9 +252,11 @@
 
     @testset "test select_region_ft" begin
         x = opt_cu([1,2,3,4], use_cuda)
-        @test select_region_ft(ffts(x), (5,)) == ComplexF64[-1.0 + 0.0im, -2.0 - 2.0im, 10.0 + 0.0im, -2.0 + 2.0im, -1.0 + 0.0im]
+        res = select_region_ft(ffts(x), (5,))
+        @test res == opt_cu(ComplexF64[-1.0 + 0.0im, -2.0 - 2.0im, 10.0 + 0.0im, -2.0 + 2.0im, -1.0 + 0.0im], use_cuda)
         x = opt_cu([3.1495759241275225 0.24720770605505335 -1.311507800204285 -0.3387627167144301; -0.7214121984874265 -0.02566249380406308 0.687066447881175 -0.09536748694092163; -0.577092696986848 -0.6320809680268722 -0.09460071173365793 0.7689715736798227; 0.4593837753047561 -1.0204193548690512 -0.28474772376166907 1.442443602597533], use_cuda)
-        @test select_region_ft(ffts(x), (7, 7)) == ComplexF64[0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 + 0.16703621316206385im 2.4110077589815555 - 0.16558718095884828im 2.2813159163314163 - 0.7520360306228049im 7.47614366018844 - 4.139633109911205im 1.3691035744780353 + 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.4801675770812479 + 0.0im 3.3142445917764407 - 3.2082400832669373im 1.6529948781166373 + 0.0im 3.3142445917764407 + 3.2082400832669373im 0.4801675770812479 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 - 0.16703621316206385im 7.47614366018844 + 4.139633109911205im 2.2813159163314163 + 0.7520360306228049im 2.4110077589815555 + 0.16558718095884828im 1.3691035744780353 - 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im]
+        res = select_region_ft(ffts(x), (7, 7))
+        @test collect(res) ≈ opt_cu(ComplexF64[0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 + 0.16703621316206385im 2.4110077589815555 - 0.16558718095884828im 2.2813159163314163 - 0.7520360306228049im 7.47614366018844 - 4.139633109911205im 1.3691035744780353 + 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.4801675770812479 + 0.0im 3.3142445917764407 - 3.2082400832669373im 1.6529948781166373 + 0.0im 3.3142445917764407 + 3.2082400832669373im 0.4801675770812479 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 - 0.16703621316206385im 7.47614366018844 + 4.139633109911205im 2.2813159163314163 + 0.7520360306228049im 2.4110077589815555 + 0.16558718095884828im 1.3691035744780353 - 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im], use_cuda)
     end
 
     @testset "test resample_czt" begin
@@ -281,7 +279,8 @@
         @test rs2b ≈ rs2
     end
 
-    @testset "test resample_nfft" begin
+    if (dat isa Array)
+        @testset "test resample_nfft" begin
         dim =2
         s_small = (12,16) # ntuple(_ -> rand(1:13), dim)
         s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim)
@@ -319,6 +318,9 @@
         rs6 = FourierTools.resample_nfft(1im .* dat , t->t .* 2.0, s_small.÷2, is_src_coords=false, is_in_pixels=false, pad_value=0.0)
         rs7 = FourierTools.resample_nfft(1im .* dat, t->t .* 0.5, s_small.÷2, is_src_coords=true, is_in_pixels=false, pad_value=0.0)
         @test rs6.*4 ≈ rs7
+        end
+    else
+        @warn "Skipping test for CuArray, since nfft does not support CuArray"
     end
 
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 9e8b4b6..077c296 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,7 +11,7 @@ using CUDA
 
 Random.seed!(42)
 
-use_cuda = true
+use_cuda = false
 if use_cuda
     CUDA.allowscalar(false);
 end
@@ -23,9 +23,8 @@ include("utils.jl");
 include("fourier_shifting.jl");
 include("fourier_shear.jl");
 include("fourier_rotate.jl");
-include("resampling_tests.jl"); ###
-
-include("convolutions.jl");
+include("resampling_tests.jl"); ### nfft does not work with CUDA
+include("convolutions.jl"); # spurious buffer problem in conv_p4 in CUDA?
 include("correlations.jl");
 include("custom_fourier_types.jl");
 include("damping.jl");
diff --git a/test_old/custom_fourier_types.jl b/test_old/custom_fourier_types.jl
index d735c27..c556567 100644
--- a/test_old/custom_fourier_types.jl
+++ b/test_old/custom_fourier_types.jl
@@ -2,15 +2,15 @@
 @testset "Custom Fourier Types" begin
     N = 5
     x = randn((N, N))
-    fs = FourierTools.FourierSplit(x, 2, 2, 4, true)
+    fs = FourierTools.FourierSplit(x, Val(2), 2, 4, true)
     @test FourierTools.parenttype(fs) == typeof(x)
-    fs = FourierTools.FourierSplit(x, 2, 2, 4, false)
+    fs = FourierTools.FourierSplit(x, Val(2), 2, 4, false)
     @test FourierTools.parenttype(fs) == typeof(x)
     
-    fj = FourierTools.FourierJoin(x, 2, 2, 4, true)
+    fj = FourierTools.FourierJoin(x, Val(2), 2, 4, true)
     @test FourierTools.parenttype(fj) == typeof(x)
 
-    fj = FourierTools.FourierJoin(x, 2, 2, 4, false)
+    fj = FourierTools.FourierJoin(x, Val(2), 2, 4, false)
     @test FourierTools.parenttype(fj) == typeof(x)
 
     @test FourierTools.parenttype(typeof(fj)) == typeof(x)

From 48edce36c6ad2d3720bd8b8661fd5ee944dbf21b Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 22 Mar 2025 20:38:17 +0100
Subject: [PATCH 09/25] cleanup in CUDASupportExt

---
 ext/CUDASupportExt.jl | 123 +++++-------------------------------------
 1 file changed, 14 insertions(+), 109 deletions(-)

diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
index c2a18fa..6760224 100644
--- a/ext/CUDASupportExt.jl
+++ b/ext/CUDASupportExt.jl
@@ -3,8 +3,8 @@ using CUDA
 using Adapt
 # using ShiftedArrays
 using FourierTools
-using IndexFunArrays # to prevent a stack overflow in get_base_arr
-using Base # to allow displaying such arrays without causing the single indexing CUDA error
+using IndexFunArrays # to prevent a recuursive stack overflow in get_base_arr
+using Base 
 
 get_base_arr(arr::Array) = arr
 get_base_arr(arr::CuArray) = arr
@@ -33,70 +33,52 @@ AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}
                                  SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
 AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
 
-# define adapt structures for the ShiftedArrays model. This will not be needed if the PR is merged:
-# Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, D}) where {T, D} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
-# parent_type(::Type{FourierTools.CircShiftedArray{T, N, A, S}})  where {T, N, A, S} = A
-# Base.Broadcast.BroadcastStyle(::Type{T})  where {T<:FourierTools.CircShiftedArray} = Base.Broadcast.BroadcastStyle(parent_type(T))
-
 Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, N, S}) where {T, N, S} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
-# parent_type(::Type{FourierTools.CircShiftedArray{T, N, S}})  where {T, N, S} = S
+Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split);
+Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join);
 
-# Base.Broadcast.BroadcastStyle(::Type{T}) where {T2, N, S, T <:FourierTools.CircShiftedArray{T2, N, S}}  = Base.Broadcast.BroadcastStyle(parent_type(T))
-# function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     CUDA.CuArrayStyle{N,CD}()
-# end
 function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllShiftedTypeCu{N, CD}}
     CUDA.CuArrayStyle{N,CD}()
 end
 
 # Define the BroadcastStyle for SubArray of MutableShiftedArray with CuArray
-# function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:SubArray{<:Any, <:Any, <:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}}
-#     CUDA.CuArrayStyle{N,CD}()
-# end
+
 function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllSubArrayTypeCu{N, CD}}
     CUDA.CuArrayStyle{N,CD}()
 end
 
-# Define the BroadcastStyle for ReshapedArray of MutableShiftedArray with CuArray
-# function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:Base.ReshapedArray{<:Any, <:Any, <:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}, <:Any}}
-#     CUDA.CuArrayStyle{N,CD}()
-# end
-
-function Base.copy(s::AllShiftedAndViews) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.copy(s::AllShiftedAndViews)
     res = similar(get_base_arr(s), eltype(s), size(s));
-    # @show "copy here"
-    # @show s.D
     res .= s
-    # CUDA.@allowscalar @show res[5]
     return res
 end
 
-function Base.collect(x::AllShiftedAndViews)  # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.collect(x::AllShiftedAndViews) 
     return copy(x) # stay on the GPU        
 end
 
-function Base.Array(x::AllShiftedAndViews) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.Array(x::AllShiftedAndViews) 
     return Array(copy(x)) # remove from GPU
 end
 
-function Base.:(==)(x::AllShiftedAndViewsCu, y::AbstractArray)  # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.:(==)(x::AllShiftedAndViewsCu, y::AbstractArray) 
     return all(x .== y)
 end
 
-function Base.:(==)(y::AbstractArray, x::AllShiftedAndViewsCu) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.:(==)(y::AbstractArray, x::AllShiftedAndViewsCu) 
     return all(x .== y)
 end
 
-function Base.:(==)(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.:(==)(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu) 
     return all(x .== y)
 end
 
-function Base.isapprox(x::AllShiftedAndViewsCu, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.isapprox(x::AllShiftedAndViewsCu, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) 
     atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
     return all(abs.(x .- y) .<= atol)
 end
 
-function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)     
     atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
     return all(abs.(x .- y) .<= atol)
 end
@@ -106,87 +88,10 @@ function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0,
     return all(abs.(x .- y) .<= atol)
 end
 
-function Base.show(io::IO, mm::MIME"text/plain", cs::AllShiftedAndViews) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}
+function Base.show(io::IO, mm::MIME"text/plain", cs::AllShiftedAndViews) 
     CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
 end
 
-
-# cu_storage_type(::Type{T}) where {CT,CN,CD,T<:CuArray{CT,CN,CD}} = CD
-# lets do this for the ShiftedArray type
-# Adapt.adapt_structure(to, x::ShiftedArray{T, M, N}) where {T, M, N} = ShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x); default=ShiftedArrays.default(x));
-
-# # function Base.Broadcast.BroadcastStyle(::Type{T})  where (CT,CN,CD,T<: ShiftedArray{<:Any,<:Any,<:Any,<:CuArray})
-# function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:ShiftedArray{<:Any,<:Any,<:Any,<:CuArray{T2,N,CD}}}
-#     CUDA.CuArrayStyle{N,CD}()
-# end
-
-# lets do this for the FourierSplit
-Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split);
-# parent_type(::Type{FourierTools.FourierSplit{T, N, S}})  where {T, N, S} = S
-
-# function Base.Broadcast.BroadcastStyle(::Type{T})  where (CT,CN,CD,T<: ShiftedArray{<:Any,<:Any,<:Any,<:CuArray})
-# function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{T2,N,CD}}}
-#     CUDA.CuArrayStyle{N,CD}()
-# end
-
-# function Base.Broadcast.BroadcastStyle(::Type{T})  where {CT, N, CD, T<:SubArray{<:Any, <:Any, <:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}}
-#     CUDA.CuArrayStyle{N,CD}()
-# end
-
-# function Base.copy(s::FourierTools.FourierSplit) #  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     res = similar(get_base_arr(s), eltype(s), size(s));
-#     res .= s
-# end
-
-# function Base.collect(x::FourierTools.FourierSplit) # where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     return copy(x) # stay on the GPU        
-# end
-
-# function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     return Array(copy(x)) # remove from GPU
-# end
-
-# function Base.:(==)(x::T, y::AbstractArray)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
-#     return all(x .== y)
-# end
-
-# function Base.:(==)(y::AbstractArray, x::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
-#     return all(x .== y)
-# end
-
-# function Base.:(==)(x::T, y::T)  where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
-#     return all(x .== y)
-# end
-
-# function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.FourierSplit) # where {CT, N, CD, T<:FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
-# end
-
-# for FourierJoin
-Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join);
-# parent_type(::Type{FourierTools.FourierJoin{T, N, S}})  where {T, N, S} = S
-
-# function Base.Broadcast.BroadcastStyle(::Type{T})  where {T2, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{T2,N,CD}}}
-#     CUDA.CuArrayStyle{N,CD}()
-# end
-
-# function Base.copy(s::FourierTools.FourierJoin)  # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     res = similar(get_base_arr(s), eltype(s), size(s));
-#     res .= s
-# end
-
-# function Base.collect(x::FourierTools.FourierJoin) # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     return copy(x) # stay on the GPU        
-# end
-
-# function Base.Array(x::T)  where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     return Array(copy(x)) # remove from GPU
-# end
-
-# function Base.show(io::IO, mm::MIME"text/plain", cs::FourierTools.FourierJoin) # where {CT, N, CD, T<:FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{CT,N,CD}}}
-#     CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
-# end
-
 ### addition functions specific to CUDA
 
 function FourierTools.optional_collect(a::CuArray)

From 3e5370052295b6905050173e6b7c7065b05871fc Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 22 Mar 2025 20:48:13 +0100
Subject: [PATCH 10/25] tiny bug fix in tests

---
 test/resampling_tests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/resampling_tests.jl b/test/resampling_tests.jl
index e5b8ead..df4e80f 100644
--- a/test/resampling_tests.jl
+++ b/test/resampling_tests.jl
@@ -279,7 +279,7 @@
         @test rs2b ≈ rs2
     end
 
-    if (dat isa Array)
+    if (! use_cuda)
         @testset "test resample_nfft" begin
         dim =2
         s_small = (12,16) # ntuple(_ -> rand(1:13), dim)

From 68a7cb522d1221ad93aa652e48832f5d4e0862ce Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 22 Mar 2025 21:16:45 +0100
Subject: [PATCH 11/25] removed old tests and bug-fix with unavailable CUDA
 hardware support.

---
 test/runtests.jl                         |  52 ++--
 test_old/convolutions.jl                 | 109 --------
 test_old/correlations.jl                 |  14 -
 test_old/custom_fourier_types.jl         |  19 --
 test_old/czt.jl                          |  39 ---
 test_old/damping.jl                      |  14 -
 test_old/fft_helpers.jl                  |  83 ------
 test_old/fftshift_alternatives.jl        |  45 ----
 test_old/fourier_filtering.jl            |  44 ---
 test_old/fourier_rotate.jl               |  44 ---
 test_old/fourier_shear.jl                |  46 ----
 test_old/fourier_shifting.jl             |  81 ------
 test_old/fractional_fourier_transform.jl |  45 ----
 test_old/nfft_tests.jl                   |  25 --
 test_old/resampling_tests.jl             | 325 -----------------------
 test_old/runtests.jl                     |  30 ---
 test_old/sdft.jl                         | 102 -------
 test_old/utils.jl                        | 141 ----------
 18 files changed, 33 insertions(+), 1225 deletions(-)
 delete mode 100644 test_old/convolutions.jl
 delete mode 100644 test_old/correlations.jl
 delete mode 100644 test_old/custom_fourier_types.jl
 delete mode 100644 test_old/czt.jl
 delete mode 100644 test_old/damping.jl
 delete mode 100644 test_old/fft_helpers.jl
 delete mode 100644 test_old/fftshift_alternatives.jl
 delete mode 100644 test_old/fourier_filtering.jl
 delete mode 100644 test_old/fourier_rotate.jl
 delete mode 100644 test_old/fourier_shear.jl
 delete mode 100644 test_old/fourier_shifting.jl
 delete mode 100644 test_old/fractional_fourier_transform.jl
 delete mode 100644 test_old/nfft_tests.jl
 delete mode 100644 test_old/resampling_tests.jl
 delete mode 100644 test_old/runtests.jl
 delete mode 100644 test_old/sdft.jl
 delete mode 100644 test_old/utils.jl

diff --git a/test/runtests.jl b/test/runtests.jl
index 077c296..4481897 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,26 +12,40 @@ using CUDA
 Random.seed!(42)
 
 use_cuda = false
-if use_cuda
-    CUDA.allowscalar(false);
-end
 opt_cu(img, use_cuda) = ifelse(use_cuda, CuArray(img), img)
 
-include("fft_helpers.jl");
-include("fftshift_alternatives.jl");
-include("utils.jl");
-include("fourier_shifting.jl");
-include("fourier_shear.jl");
-include("fourier_rotate.jl");
-include("resampling_tests.jl"); ### nfft does not work with CUDA
-include("convolutions.jl"); # spurious buffer problem in conv_p4 in CUDA?
-include("correlations.jl");
-include("custom_fourier_types.jl");
-include("damping.jl");
-include("czt.jl"); 
-include("nfft_tests.jl");
-include("fractional_fourier_transform.jl");
-include("fourier_filtering.jl");
-include("sdft.jl");
+function run_all_tests()
+    include("fft_helpers.jl");
+    include("fftshift_alternatives.jl");
+    include("utils.jl");
+    include("fourier_shifting.jl");
+    include("fourier_shear.jl");
+    include("fourier_rotate.jl");
+    include("resampling_tests.jl"); ### nfft does not work with CUDA
+    include("convolutions.jl"); # spurious buffer problem in conv_p4 in CUDA?
+    include("correlations.jl");
+    include("custom_fourier_types.jl");
+    include("damping.jl");
+    include("czt.jl"); 
+    include("nfft_tests.jl");
+    include("fractional_fourier_transform.jl");
+    include("fourier_filtering.jl");
+    include("sdft.jl");
+end
+
+use_cuda=false
+run_all_tests()
+
+if CUDA.functional()
+    @testset "all in CUDA" begin
+    CUDA.allowscalar(false);
+    use_cuda=true
+    run_all_tests()
+    end
+else
+    @testset "no CUDA available!" begin
+        @test true == true
+    end
+end
 
 return
diff --git a/test_old/convolutions.jl b/test_old/convolutions.jl
deleted file mode 100644
index 1018eb3..0000000
--- a/test_old/convolutions.jl
+++ /dev/null
@@ -1,109 +0,0 @@
-@testset "Convolution methods" begin
-
-    conv_gen(u, v, dims) = real(ifft(fft(u, dims) .* fft(v, dims), dims))
-
-    function conv_test(psf, img, img_out, dims, s)
-        otf = fft(psf, dims)
-        otf_r = rfft(psf, dims)
-        otf_p, conv_p = plan_conv(img, psf, dims, flags=FFTW.ESTIMATE)
-        otf_p2, conv_p2 = plan_conv(img .+ 0.0im, 0.0im .+ psf, dims)
-        otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims)
-        otf_p3, conv_p3 = plan_conv_psf(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE)
-        otf_p4, conv_p4 = plan_conv_psf_buffer(img, fftshift(psf,dims), dims, flags=FFTW.MEASURE)
-        @testset "$s" begin
-            @test img_out ≈ conv(0.0im .+ img, psf, dims)
-            @test img_out ≈ conv(img, psf, dims)
-            @test img_out ≈ conv_p(img, otf_p)
-            @test img_out ≈ conv_p(img)
-            @test img_out ≈ conv_p2(img, otf_p2)
-            @test img_out ≈ conv_p2(img)
-            @test img_out ≈ conv_psf(img, fftshift(psf, dims), dims)
-            @test img_out ≈ conv_p3(img)
-            @test img_out ≈ conv_p4(img)
-        end
-    end
-    
-
-    N = 5
-    psf = zeros((N, N))
-    psf[1, 1] = 1
-    img = randn((N, N))
-    conv_test(psf, img, img, [1,2], "Convolution random image with delta peak")
-
-
-    N = 5
-    psf = zeros((N, N))
-    psf[1, 1] = 1
-    img = randn((N, N, N))
-    conv_test(psf, img, img, [1,2], "Convolution with different dimensions psf, img delta")
-
-
-    N = 5
-    psf = abs.(randn((N, N, 2)))
-    img = randn((N, N, 2))
-    dims = [1, 2]
-    img_out = conv_gen(img, psf, dims)
-    conv_test(psf, img, img_out, dims, "Convolution with random 3D PSF and random 3D image over 2D dimensions")
- 
-    N = 5
-    psf = abs.(randn((N, N, N, N, N)))
-    img = randn((N, N, N, N, N))
-    dims = [1, 2, 3, 4]
-    img_out = conv_gen(img, psf, dims)
-    conv_test(psf, img, img_out, dims, "Convolution with random 5D PSF and random 5D image over 4 Dimensions")
-
-    N = 5
-    psf = abs.(zeros((N, N, N, N, N)))
-    for i = 1:N
-        psf[1,1,1,1, i] = 1
-    end
-    img = randn((N, N, N, N, N))
-    dims = [1, 2, 3, 4]
-    img_out = conv_gen(img, psf, dims)
-    conv_test(psf, img, img, dims, "Convolution with 5D delta peak and random 5D image over 4 Dimensions")
-
-
-    @testset "Check broadcasting convolution" begin
-        img = randn((5,6,7))
-        psf = randn((5,6,7, 2, 3))
-        _, p = plan_conv_buffer(img, psf)
-        @test conv(img, psf) ≈ p(img)
-    end
-    
-
-   @testset "Check types" begin
-        N = 10
-        img = randn(Float32, (N, N))
-        psf = abs.(randn(Float32, (N, N)))
-        dims = [1, 2] 
-        @test typeof(conv_gen(img, psf, dims)) == typeof(conv(img, psf))
-        @test typeof(conv_gen(img, psf, dims)) != typeof(conv(img .+ 0f0im, psf))
-        @test conv_gen(img, psf, dims) .+ 1f0im ≈ 1f0im .+ conv(img .+ 0f0im, psf)
-    end
-
-
-    @testset "Check type get_plan" begin
-        @test plan_rfft === FourierTools.get_plan(typeof(1f0))  
-        @test plan_fft === FourierTools.get_plan(typeof(1im))  
-    end
-
-    @testset "dims argument nothing" begin
-        N = 5
-        psf = abs.(randn((N, N, N, N, N)))
-        img = randn((N, N, N, N, N))
-        dims = [1,2,3,4,5] 
-        @test conv(psf, img) ≈ conv(img, psf, dims)
-        @test conv(psf, img) ≈ conv(psf, img, dims)
-        @test conv(img, psf) ≈ conv(img, psf, dims)
-    end
-
-    @testset "adjoint convolution" begin
-        x = randn(ComplexF32, (5,6))
-        y = randn(ComplexF32, (5,6))
-
-        y_ft, p = plan_conv(x, y)
-        @test ≈(exp(1im * 1.23) .+ conv(ones(eltype(y), size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(conv(x, y))), x)[1], rtol=1e-4)   
-        @test ≈(exp(1im * 1.23) .+ conv(ones(ComplexF32, size(x)), conj.(y)), exp(1im * 1.23) .+ Zygote.gradient(x -> sum(real(p(x, y_ft))), x)[1], rtol=1e-4) 
-    end
-
-end
diff --git a/test_old/correlations.jl b/test_old/correlations.jl
deleted file mode 100644
index 609b439..0000000
--- a/test_old/correlations.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-
-
-@testset "Correlations methods" begin
-     
-    @test ccorr([1, 0], [1, 0], centered = true) == [0.0, 1.0]
-    @test ccorr([1, 0], [1, 0]) == [1.0, 0.0]
-    
-    x = [1,2,3,4,5]
-    y = [1,2,3,4,5]
-    @test ccorr(x,y) ≈ [55, 45, 40, 40, 45]
-    @test ccorr(x,y, centered=true) ≈ [40, 45, 55, 45, 40]
-
-    @test ccorr(x, x .* (1im)) == ComplexF64[0.0 - 55.0im, 0.0 - 45.0im, 0.0 - 40.0im, 0.0 - 40.0im, 0.0 - 45.0im]
-end
diff --git a/test_old/custom_fourier_types.jl b/test_old/custom_fourier_types.jl
deleted file mode 100644
index c556567..0000000
--- a/test_old/custom_fourier_types.jl
+++ /dev/null
@@ -1,19 +0,0 @@
-
-@testset "Custom Fourier Types" begin
-    N = 5
-    x = randn((N, N))
-    fs = FourierTools.FourierSplit(x, Val(2), 2, 4, true)
-    @test FourierTools.parenttype(fs) == typeof(x)
-    fs = FourierTools.FourierSplit(x, Val(2), 2, 4, false)
-    @test FourierTools.parenttype(fs) == typeof(x)
-    
-    fj = FourierTools.FourierJoin(x, Val(2), 2, 4, true)
-    @test FourierTools.parenttype(fj) == typeof(x)
-
-    fj = FourierTools.FourierJoin(x, Val(2), 2, 4, false)
-    @test FourierTools.parenttype(fj) == typeof(x)
-
-    @test FourierTools.parenttype(typeof(fj)) == typeof(x)
-
-    @test FourierTools.IndexStyle(typeof(fj)) == IndexStyle(typeof(fj))
-end
diff --git a/test_old/czt.jl b/test_old/czt.jl
deleted file mode 100644
index 3ec04bf..0000000
--- a/test_old/czt.jl
+++ /dev/null
@@ -1,39 +0,0 @@
-using NDTools # this is needed for the select_region! function below.
-
-@testset "chirp z-transformation" begin
-    @testset "czt" begin
-        x = randn(ComplexF32, (5,6,7))
-        @test eltype(czt(x, (2.0,2.0,2.0))) == ComplexF32
-        @test eltype(czt(x, (2f0,2f0,2f0))) == ComplexF32
-        @test ≈(czt(x, (1.0,1.0,1.0), (1,3)), ft(x, (1,3)), rtol=1e-5)
-        @test ≈(czt(x, (1.0,1.0,1.0), (1,3), src_center=(1,1,1), dst_center=(1,1,1)), fft(x, (1,3)), rtol=1e-5)
-        @test ≈(iczt(x, (1.0,1.0,1.0), (1,3), src_center=(1,1,1), dst_center=(1,1,1)), ifft(x, (1,3)), rtol=1e-5)
-
-        y = randn(ComplexF32, (5,6))
-        zoom = (1.0,1.0,1.0)
-        @test ≈(czt(x, zoom), ft(x), rtol=1e-4)         
-        @test ≈(czt(y, (1.0,1.0)), ft(y), rtol=1e-5)
-
-        @test ≈(iczt(czt(y, (1.0,1.0)), (1.0,1.0)), y, rtol=1e-5)
-        zoom = (2.0,2.0)
-        @test sum(abs.(imag(czt(ones(5,6),zoom, src_center=((5,6).+1)./2)))) < 1e-8
-
-        # for even sizes the czt is not the same as the ft and upsample operation. But should it be or not?
-        # @test ≈(czt(y,zoom),  select_region(upsample2(ft(y), fix_center=true), new_size=size(y)), rtol=1e-5)
-        # @test ≈(czt(y,zoom, src_center=(size(y).+1)./2), select_region(upsample2(ft(y), fix_center=true), new_size=size(y)), rtol=1e-5)
-
-        # for uneven sizes this works:
-        @test ≈(czt(y[1:5,1:5], zoom, (1,2), (10,10)),  upsample2(ft(y[1:5,1:5]), fix_center=true), rtol=1e-5)
-        p_czt = plan_czt(y, zoom, (1,2), (11,12))
-        @test ≈(p_czt * y, czt(y, zoom, (1,2), (11,12)))
-        # zoom smaller 1.0 causes wrap around:
-        zoom = (0.5,2.0)
-        @test abs(czt(y,zoom)[1,1]) > 1e-5
-        zoom = (2.0, 0.5)
-        # check if the remove_wrap works
-        @test abs(czt(y, zoom; remove_wrap=true)[1,1]) == 0.0
-        @test abs(iczt(y, zoom; remove_wrap=true)[1,1]) == 0.0
-        @test abs(czt(y, zoom; pad_value=0.2, remove_wrap=true)[1,1]) == 0.2f0
-        @test abs(iczt(y, zoom; pad_value=0.5f0, remove_wrap=true)[1,1]) == 0.5f0
-    end
-end
diff --git a/test_old/damping.jl b/test_old/damping.jl
deleted file mode 100644
index 55a0a86..0000000
--- a/test_old/damping.jl
+++ /dev/null
@@ -1,14 +0,0 @@
-using IndexFunArrays
-@testset "Test damping functions" begin
-
-    @testset "Test damp_edge_outside" begin
-        sz = (512,512)
-        data = disc(sz,150.0, offset=CtrCorner);
-        data_d = damp_edge_outside(data);
-        fta = abs.(ft(data));
-        ftb = abs.(ft(data_d));
-        @test fta[size(fta)[1]÷2+1,1] > 50
-        @test ftb[size(ftb)[1]÷2+1,1] < 15
-    end
-
-end
diff --git a/test_old/fft_helpers.jl b/test_old/fft_helpers.jl
deleted file mode 100644
index badff06..0000000
--- a/test_old/fft_helpers.jl
+++ /dev/null
@@ -1,83 +0,0 @@
-@testset "test fft_helpers" begin
-
-    @testset "Optional collect" begin
-        y = [1,2,3]
-        x = fftshift_view(y, (1))
-        @test fftshift(y) == FourierTools.optional_collect(x)
-    end
-
-    @testset "Test ft and ift wrappers" begin
-        Random.seed!(42)
-        testft(arr, dims) = @test(ft(arr, dims) ≈ fftshift(fft(ifftshift(arr, dims), dims), dims))
-        testift(arr, dims) = @test(ift(arr, dims) ≈ fftshift(ifft(ifftshift(arr, dims), dims), dims))
-        testffts(arr, dims) = @test(ffts(arr, dims) ≈ fftshift(fft(arr, dims), dims))
-        testiffts(arr, dims) = @test(iffts(arr, dims) ≈ ifft(ifftshift(arr, dims), dims))
-        testrft(arr, dims) = @test(rffts(arr, dims) ≈ fftshift(rfft(arr, dims), dims[2:end]))
-        testirft(arr, dims, d) = @test(irffts(arr, d, dims) ≈ irfft(ifftshift(arr, dims[2:end]), d, dims))
-        for dim = 1:4
-            for _ in 1:3
-                s = ntuple(_ -> rand(1:13), dim)
-                arr = randn(ComplexF32, s)
-                dims = 1:dim
-                testft(arr, dims)
-                testift(arr, dims)
-                dims = 1:rand(1:dim)
-                testft(arr, dims)
-                testift(arr, dims)
-                testffts(arr, dims)
-                testiffts(arr, dims)
-                
-            end
-        end
-    end
-
-
-    @testset "Test 2d fft helpers" begin
-        arr = randn((6,7,8))
-        dims = [1,2]
-        d = 6
-        @test(ft2d(arr) == fftshift(fft(ifftshift(arr, (1,2)), (1,2)), dims))
-        @test(ift2d(arr) == fftshift(ifft(ifftshift(arr, (1,2)), (1,2)), dims))
-        @test(ffts2d(arr) == fftshift(fft(arr, (1,2)), (1,2)))
-        @test(iffts2d(arr) == ifft(ifftshift(arr, (1,2)), (1,2)))
-        @test(rffts2d(arr) == fftshift(rfft(arr, (1,2)), dims[2:2]))
-        @test(rft2d(arr) == fftshift(rfft(ifftshift(arr, (1,2)), (1,2)), dims[2:2]))
-        @test(fft2d(arr) == fft(arr, dims))
-        @test(ifft2d(arr) == ifft(arr, dims))
-        @test(rfft2d(arr) == rfft(arr, (1,2)))
-        @test(fftshift2d(arr) == fftshift(arr, (1,2)))
-        @test(ifftshift2d(arr) == ifftshift(arr, (1,2)))
-        @test(fftshift2d_view(arr) == fftshift_view(arr, (1,2)))
-        @test(ifftshift2d_view(arr) == ifftshift_view(arr, (1,2)))
-
-        arr = randn(ComplexF32, (4,7,8))
-        @test(irffts2d(arr, d) == irfft(ifftshift(arr, dims[2:2]), d, (1,2)))
-        @test(irft2d(arr, d) == irft(arr, d, (1,2))) 
-        @test(irfft2d(arr, d) == irfft(arr, d, (1,2))) 
-    end
-
-
-    @testset "Test ft, ift, rft and irft real space centering" begin
-        szs = ((10,10),(11,10),(100,101),(101,101))
-        for sz in szs
-            @test ft(ones(sz)) ≈ prod(sz) .* delta(sz)
-            @test ft(delta(sz)) ≈ ones(sz)
-            @test rft(ones(sz)) ≈ prod(sz) .* delta(rft_size(sz), offset=CtrRFT)
-            @test rft(delta(sz)) ≈ ones(rft_size(sz))
-            @test ift(ones(sz)) ≈ delta(sz)
-            @test ift(delta(sz)) ≈ ones(sz) ./ prod(sz)
-            @test irft(ones(rft_size(sz)),sz[1]) ≈ delta(sz)
-            @test irft(delta(rft_size(sz),offset=CtrRFT),sz[1]) ≈ ones(sz) ./ prod(sz)
-        end
-    end
-
-
-    @testset "Test in place methods" begin
-        x = randn(ComplexF32, (5,3,10))
-        dims = (1,2)
-        @test fftshift(fft(x, dims), dims) ≈ ffts!(copy(x), dims)
-        @test ffts2d!(copy(x)) ≈ ffts!(copy(x), (1,2))
-    end
-
-
-end
diff --git a/test_old/fftshift_alternatives.jl b/test_old/fftshift_alternatives.jl
deleted file mode 100644
index e5f0f5f..0000000
--- a/test_old/fftshift_alternatives.jl
+++ /dev/null
@@ -1,45 +0,0 @@
-@testset "fftshift alternatives" begin
-    @testset "Test fftshift_view and ifftshift_view" begin
-        Random.seed!(42)
-        x = randn((2,1,4,1,6,7,4,7))
-        dims = (4,6,7)
-        @test fftshift(x,dims) == FourierTools.fftshift_view(x, dims)
-        @test ifftshift(x,dims) == FourierTools.ifftshift_view(x, dims)
-        @test x === FourierTools.optional_collect(ifftshift_view(fftshift_view(x)))
-        @test x === FourierTools.optional_collect(fftshift_view(ifftshift_view(x)))
-        @test x === FourierTools.optional_collect(ifftshift_view(fftshift_view(x, dims), dims))
-        @test x === FourierTools.optional_collect(fftshift_view(ifftshift_view(x, dims), dims))
-
-        x = randn((13, 13, 14))
-        @test fftshift(x) == FourierTools.fftshift_view(x)
-        @test ifftshift(x) == FourierTools.ifftshift_view(x)
-        @test fftshift(x, (2,3)) == FourierTools.fftshift_view(x, (2,3))
-        @test ifftshift(x, (2,3) ) == FourierTools.ifftshift_view(x, (2,3))
-
-    end
-end
-
-
-@testset "fftshift and ifftshift in-place" begin
-    function f(arr, dims)
-        arr3 = copy(arr)
-        @test fftshift(arr, dims) == FourierTools._fftshift!(copy(arr), arr, dims)
-        @test arr3 == arr
-        @test ifftshift(arr, dims) == FourierTools._ifftshift!(copy(arr), arr, dims)
-        @test arr3 == arr
-        @test FourierTools._fftshift!(copy(arr), arr, dims) != arr
-    end
-
-    f(randn((8,)), 1)
-    f(randn((2,)), 1)
-    f(randn((3,)), 1)
-    f(randn((3,4)), 1)
-    f(randn((3,4)), 2)
-    f(randn((4,4)), (1,2))
-    f(randn((5,5)), (1, 2))
-    f(randn((5,5)), (1,))
-    f(randn((8, 7, 6,4,1)), (1,2))
-    f(randn((8, 7, 6,4,1)), (2,3))
-    f(randn((8, 7, 6,4,1)), 3)
-    f(randn((8, 7, 6,4,1)), (1,2,3,4,5))
-end
diff --git a/test_old/fourier_filtering.jl b/test_old/fourier_filtering.jl
deleted file mode 100644
index 71fbd2b..0000000
--- a/test_old/fourier_filtering.jl
+++ /dev/null
@@ -1,44 +0,0 @@
-Random.seed!(42)
-
-@testset "Fourier filtering" begin
-
-    @testset "Gaussian filter complex" begin
-        sz = (21, 22)
-        x = randn(ComplexF32, sz)
-        sigma = (1.1,2.2)
-        gf = filter_gaussian(x, sigma, real_space_kernel=false)
-        # Note that this is not the same, since one kernel is generated in real space and one in Fourier space!
-        # with sizes around 10, the difference is huge!
-        k = gaussian(Float32, sz, sigma=sigma)
-        k = k./sum(k) # different than "normal".
-        gfc = conv_psf(x, k)
-        @test ≈(gf,gfc, rtol=1e-2) # it is realatively inaccurate due to the kernel being generated in different places
-        gfr = filter_gaussian(x, sigma, real_space_kernel=true)
-        @test ≈(gfr, gfc) # it can be debated how to best normalize a Gaussian filter
-        gfr = filter_gaussian(zeros(5).+1im, (1.0,), real_space_kernel=true)
-        @test ≈(gfr, zeros(5).+1im) # it can be debated how to best normalize a Gaussian filter
-    end
-
-    @testset "Gaussian filter real" begin
-        sz = (21, 22)
-        x = randn(Float32, sz)
-        sigma = (1.1, 2.2)
-        gf = filter_gaussian(x, sigma, real_space_kernel=true)
-        # Note that this is not the same, since one kernel is generated in real space and one in Fourier space!
-        # with sizes around 10, the difference is huge!
-        k = gaussian(sz, sigma=sigma)
-        k = k./sum(k) # different than "normal".
-        gf2 = conv_psf(x, k)
-        @test ≈(gf, gf2, rtol=1e-2) # it is realatively inaccurate due to the kernel being generated in different places
-        gf2 = filter_gaussian(zeros(sz), sigma, real_space_kernel=true)
-        @test ≈(gf2, zeros(sz)) # it can be debated how to best normalize a Gaussian filter
-    end
-    @testset "Other filters" begin
-        @test filter_hamming(FourierTools.delta(Float32, (3,)), border_in=0.0, border_out=1.0) ≈ [0.23,0.54, 0.23]
-        @test filter_hann(FourierTools.delta(Float32, (3,)), border_in=0.0, border_out=1.0) ≈ [0.25,0.5, 0.25]
-        @test FourierTools.fourier_filter_by_1D_FT!(ones(ComplexF64, 6), [ones(ComplexF64, 6)]; transform_win=true, normalize_win=false) ≈ 6 .* ones(ComplexF64, 6)
-        @test FourierTools.fourier_filter_by_1D_FT!(ones(ComplexF64, 6), [ones(ComplexF64, 6)]; transform_win=true, normalize_win=true) ≈ ones(ComplexF64, 6)
-        @test FourierTools.fourier_filter_by_1D_RFT!(ones(6), [ones(6)]; transform_win=true, normalize_win=false) ≈ 6 .* ones(6)
-        @test FourierTools.fourier_filter_by_1D_RFT!(ones(6), [ones(6)]; transform_win=true, normalize_win=true) ≈ ones(6)
-    end
-end
diff --git a/test_old/fourier_rotate.jl b/test_old/fourier_rotate.jl
deleted file mode 100644
index fb33fb9..0000000
--- a/test_old/fourier_rotate.jl
+++ /dev/null
@@ -1,44 +0,0 @@
-@testset "Fourier Rotate" begin
-
-    @testset "Compare with ImageTransformations" begin
-        
-        function f(θ)
-            x = 1.0 .* range(0.0, 1.0, length=256)' .* range(0.0, 1.0, length=256)
-            f(x) = sin(x * 20) + tan(1.2 * x) + sin(x) + cos(1.1323 * x) * x^3 + x^3 + 0.23 * x^4 + sin(1/(x+0.1))
-            img = 5 .+ abs.(f.(x))
-            img ./= maximum(img)
-            img[20:40, 100:200] .= 1
-            img[20:200, 20:90] .= 0.3
-            img[20:200, 100:102] .= 0.7
-        
-            m = sum(img) / length(img)
-
-            img_1 = parent(ImageTransformations.imrotate(img, θ, m))
-            z = ones(Float32, size(img_1))
-            z .*= m
-            FourierTools.center_set!(z, img)
-            img_2 = FourierTools.rotate(z, θ, pad_value=img_1[1,1])
-            img_2b = FourierTools.center_extract(FourierTools.rotate(z, θ, pad_value=img_1[1,1], keep_new_size=true), size(img_2))
-            img_3 = real(FourierTools.rotate(z .+ 0im, θ, pad_value=img_1[1,1]))
-            img_4 = FourierTools.rotate!(z, θ)
-   
-            @test all(.≈(img_1, img_2, rtol=0.6))
-            @test ≈(img_1, img_2, rtol=0.03)
-            @test ≈(img_3, img_2, rtol=0.01)
-            @test ==(img_4, z)
-            @test ==(img_2, img_2b)
-
-            img_1c = FourierTools.center_extract(img_1, (100, 100))
-            img_2c = FourierTools.center_extract(img_2, (100, 100))
-            @test all(.≈(img_1c, img_2c, rtol=0.3))
-            @test ≈(img_1c, img_2c, rtol=0.05)
-        end
-
-        f(deg2rad(-54.31))
-        f(deg2rad(-95.31))
-        f(deg2rad(107.55))
-        f(deg2rad(-32.31))
-        f(deg2rad(32.31))
-        f(deg2rad(0))
-    end
-end
diff --git a/test_old/fourier_shear.jl b/test_old/fourier_shear.jl
deleted file mode 100644
index e46dbdd..0000000
--- a/test_old/fourier_shear.jl
+++ /dev/null
@@ -1,46 +0,0 @@
-@testset "Fourier Shear" begin
-
-  
-    @testset "Complex and real shear produce similar results" begin
-        function f(a, b, Δ)
-            x = randn((30, 24, 13))
-            xc = 0im .+ x
-            xc2 = 1im  .* x
-            @test shear(x, Δ, a, b) ≈ real(shear(xc, Δ, a, b))
-            @test shear(x, Δ, a, b) ≈ imag(shear(xc2, Δ, a, b))
-        end
-
-        f(2, 3, 123.1)
-        f(3, 2, 13.1)
-        f(1, 2, 13.1)
-        f(3, 1, 13.1)
-    end
-
-    @testset "Test that in-place works in-place" begin
-        function f(a, b, Δ)
-            x = randn((30, 24, 13))
-            xc = randn(ComplexF32, (30, 24, 13)) 
-            xc2 = 1im  .* x
-            @test shear!(x, Δ, a, b) ≈ x 
-            @test shear!(xc, Δ, a, b) ≈ xc 
-            @test shear!(xc2, Δ, a, b) ≈ xc2
-        end
-
-        f(2, 3, 123.1)
-        f(3, 2, 13.1)
-        f(1, 2, 13.1)
-        f(3, 1, 13.1)
-    end
-
-
-    @testset "Fix Nyquist" begin
-        @test shear(shear([1 2; 3 4.0], 0.123), -0.123, fix_nyquist = true) == [1.0 2.0; 3.0 4.0]
-        @test shear(shear([1 2; 3 4.0], 0.123), -0.123, fix_nyquist = false) != [1.0 2.0; 3.0 4.0]
-    end
-
-    @testset "assign_shear_wrap!" begin
-        q = ones((10,11))
-        assign_shear_wrap!(q, 10)
-        @test q[:,1] == [0,0,0,0,0,1,1,1,1,1]
-    end
-end
diff --git a/test_old/fourier_shifting.jl b/test_old/fourier_shifting.jl
deleted file mode 100644
index 12f109f..0000000
--- a/test_old/fourier_shifting.jl
+++ /dev/null
@@ -1,81 +0,0 @@
-Random.seed!(42)
-
-@testset "Fourier shifting methods" begin
-
-    # Int error
-    @test_throws ArgumentError FourierTools.shift([1,2,3], (1,))
-
-    @testset "Empty shifts" begin
-        x = randn(ComplexF32, (11, 12, 13))
-        @test FourierTools.shift(x, []) == x
-        
-        x = randn(Float32, (11, 12, 13))
-        @test FourierTools.shift(x, []) == x
-    end
-
-    @testset "Integer shifts for complex and real arrays" begin
-        x = randn(ComplexF32, (11, 12, 13))
-
-        s = (2,2,2)
-        @test FourierTools.shift(x, s) ≈ circshift(x, s) 
-        s = (3,2,1)
-        @test FourierTools.shift(x, s) ≈ circshift(x, s) 
-
-        @test FourierTools.shift(x, (0,0,0)) == x
-        x = randn(Float32, (11, 12, 13))
-
-        s = (2,2,2)
-        @test FourierTools.shift!(copy(x), s) ≈ circshift(x, s) 
-        s = (3,2,1)
-        @test FourierTools.shift!(copy(x), s) ≈ circshift(x, s) 
-        
-        @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
-
-    end
-
-    @testset "Half integer shifts" begin
-
-        x = [0.0, 1.0, 0.0, 1.0]
-        xc = ComplexF32.(x)
-        
-        s = [0.5]
-        @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(xc), s))
-        @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(xc), 0.5))
-        @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
-        
-        @test sum(xc) ≈ sum(FourierTools.shift!(copy(xc), s))
-    end
-
-    @testset "Check shifts with soft_fraction" begin
-        a = shift(delta((255,255)), (1.5,1.25),soft_fraction=0.1);
-        @test abs(sum(a[real(a).<0])) < 3.0
-        a = shift(delta((255,255)), (1.5,1.25),soft_fraction=0.0);
-        @test abs(sum(a[real(a).<0])) > 5.0
-    end
-
-    @testset "Random shifts consistency between both methods" begin
-        x = randn((11, 12, 13))
-        s = randn((3,)) .* 10
-        @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
-        @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(x) .+ 0im, s))
-        x = randn((11, 12, 13))
-        s = randn((3,)) .* 10
-        @test FourierTools.shift!(copy(x), s) ≈ real(FourierTools.shift!(copy(x) .+ 0im, s))
-        @test sum(x) ≈ sum(FourierTools.shift!(copy(x), s))
-    end
-
-
-    @testset "Check revertibility for complex and real data" begin
-        @testset "Complex data" begin
-            x = randn(ComplexF32, (11, 12, 13))
-            s = (-1.1, 12.123, 0.21)
-            @test x ≈ shift(shift(x, s), .- s, fix_nyquist_frequency=true) 
-        end
-        @testset "Real data" begin
-            x = randn(Float32, (11, 12, 13))
-            s = (-1.1, 12.123, 0.21)
-            @test x ≈ shift(shift(x, s), .- s, fix_nyquist_frequency=true) 
-        end
-    end
-
-end
diff --git a/test_old/fractional_fourier_transform.jl b/test_old/fractional_fourier_transform.jl
deleted file mode 100644
index 3b1307a..0000000
--- a/test_old/fractional_fourier_transform.jl
+++ /dev/null
@@ -1,45 +0,0 @@
-@testset "Fractional Fast Fourier Transform" begin 
-
-    box1d = collect(box(Float32, (100,)))
-    box1d_ = collect(box(Float32, (101,)))
-    
-
-    # consistency with fft
-    @test abs.(ft(box1d)[30:70]) ./ sqrt(length(box1d)) ≈ abs.(frfft(box1d, 1.0, shift=true)[30:70])
-    @test all(.≈(1 .+ abs.(ft(box1d)[30:70]) ./ sqrt(length(box1d)), 
-              1 .+ abs.(frfft(frfft(box1d, 0.5, shift=true), 0.5, shift=true)[30:70]), rtol=5e-3))
-    @test eltype(frfft(box1d, 1.0)) === ComplexF32
-    
-    @test all(.≈(1 .+ abs.(ft(box1d_)[30:70]) ./ sqrt(length(box1d_)), 1 .+ abs.(frfft(box1d_, 1.0, shift=true)[30:70]), rtol=5e-2))
-    @test all(.≈(1 .+ abs.(ft(box1d_)[30:70]) ./ sqrt(length(box1d_)), 
-              1 .+ abs.(frfft(frfft(box1d_, 0.5, shift=true), 0.5, shift=true)[30:70]), rtol=7e-3))
-
-
-    for frac in [0, -0.999, 0.99, 2.001,-3.001, -3.999,4,-2, 1.1, 2.2, 3.3, 4.4, 5.5, -1.1, -2.2, -3.3, -4.4]
-        @test all(.≈(10 .+ abs.(FractionalTransforms.frft(collect(box1d_), frac))[30:70], 
-                 10 .+ abs.(frfft(box1d_, frac, shift=true))[30:70], rtol=9e-3))
-
-        @test all(.≈(10 .+ real.(FractionalTransforms.frft(collect(box1d_), frac))[30:70], 
-                     10 .+ real.(frfft(box1d_, frac, shift=true))[30:70], rtol=9e-3))
-
-        @test all(.≈(10 .+ imag.(FractionalTransforms.frft(collect(box1d_), frac))[30:70], 
-                 10 .+ imag.(frfft(box1d_, frac, shift=true))[30:70], rtol=9e-3))
-    end
-    # reversibility
-    @test all(.≈(real(frfft(frfft(box1d, 0.5, shift=true), -0.5, shift=true))[30:70] , real(box1d)[30:70], rtol=1e-4))
-    @test all(.≈(real(frfft(frfft(box1d_, 0.5, shift=true), -0.5, shift=true))[30:70] , real(box1d_)[30:70], rtol=1e-4))
-
-
-
-    img = Float64.(testimage("resolution_test"))
-
-    @test abs.(ft(img)) ./ sqrt(length(img)) .+ 10 ≈ 10 .+ abs.(frfft(img, 0.9999999)) rtol=1e-5
-    @test (real.(ft(img)) ./ sqrt(length(img)))[200:300] ≈ (real.(frfft(img, 0.9999999)))[200:300] rtol=0.001
-
-    
-    x = randn((12,))
-    x2 = randn((13,))
-    @test frfft(x, 0.5) ≈ frfft(reshape(x, 12,1,1,1,1), 0.5)
-    @test frfft(x, 0.5) ≈ reshape(frfft(collect(reshape(x, 1,12,1,1)), 0.5), 12)
-    @test reshape(frfft(reshape(x, 1,12,1,1), 0.43), 12) ≈ frfft(x, 0.43)
-end
diff --git a/test_old/nfft_tests.jl b/test_old/nfft_tests.jl
deleted file mode 100644
index b595dec..0000000
--- a/test_old/nfft_tests.jl
+++ /dev/null
@@ -1,25 +0,0 @@
-@testset "Test nfft_nd  methods" begin
-    @testset "nfft_nd" begin
-        sz = (6,8, 10)
-        dat = rand(sz...)
-        nft = fftshift(fft(ifftshift(dat)))
-        @test isapprox(nfft_nd(dat, t->(0.0,0.0,0.0), is_in_pixels=true, is_local_shift=true), nft, rtol=1e-6)
-        @test isapprox(nfft_nd(dat, t->(0.0,0.0,0.0), is_in_pixels=false, is_local_shift=true), nft, rtol=1e-6)
-        nift = fftshift(ifft(ifftshift(dat)))
-        mynfft = nfft_nd(dat, t->(0.0,0.0,0.0), is_in_pixels=false, is_local_shift=true, is_adjoint=true) ./ prod(size(nift))
-        @test isapprox(mynfft, nift, rtol=1e-6)
-        @test isapprox(nfft_nd(dat, t->t, pad_value=nothing), nft, rtol=1e-6)
-        p =plan_nfft_nd(dat, t->t, pad_value=0.0)
-        @test isapprox(p*dat, nft, rtol=1e-6)
-        @test isapprox(nfft_nd(dat, t->(10.0,10.0,10.0), pad_value=0.0), zeros(sz), rtol=1e-6)
-        p = plan_nfft_nd(dat, t->t)
-        @test isapprox(p*dat, nft, rtol=1e-6)
-        b = nfft_nd(dat, t->t)
-        @test isapprox(b, nft, rtol=1e-6)
-        b = nfft_nd(dat .+ 0im, idx(size(dat), scale=ScaFT))
-        @test isapprox(b, nft .+ 0im, rtol=1e-6)
-        res = zeros(complex(eltype(dat)), sz)
-        LinearAlgebra.mul!(res, p, dat)
-        @test isapprox(res, nft, rtol=1e-6)
-    end
-end
diff --git a/test_old/resampling_tests.jl b/test_old/resampling_tests.jl
deleted file mode 100644
index 929a85b..0000000
--- a/test_old/resampling_tests.jl
+++ /dev/null
@@ -1,325 +0,0 @@
-@testset "Test resampling  methods" begin
-    @testset "Test that upsample and downsample is reversible" begin
-        for dim = 1:3
-            for _ in 1:5
-                s_small = ntuple(_ -> rand(1:13), dim)
-                s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
-                
-                
-                x = randn(Float32, (s_small))
-                @test x == resample(x, s_small)
-                @test Float32.(x) ≈ Float32.(resample(resample(x, s_large), s_small))
-                @test x ≈ resample_by_FFT(resample_by_FFT(x, s_large), s_small)
-                @test Float32.(x) ≈ Float32.(resample_by_RFFT(resample_by_RFFT(x, s_large), s_small))
-                @test x ≈ FourierTools.resample_by_1D(FourierTools.resample_by_1D(x, s_large), s_small)
-                x = randn(ComplexF32, (s_small))
-                @test x ≈ resample(resample(x, s_large), s_small)
-                @test x ≈ resample_by_FFT(resample_by_FFT(x, s_large), s_small)
-                @test x ≈ resample_by_FFT(resample_by_FFT(real(x), s_large), s_small) + 1im .* resample_by_FFT(resample_by_FFT(imag(x), s_large), s_small) 
-                @test x ≈ FourierTools.resample_by_1D(FourierTools.resample_by_1D(x, s_large), s_small)
-            end
-        end
-    end
-
-    @testset "Test that different resample methods are consistent" begin
-        for dim = 1:3
-            for _ in 1:5
-                s_small = ntuple(_ -> rand(1:13), dim)
-                s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
-                
-                x = randn(Float32, (s_small))
-                @test ≈(FourierTools.resample(x, s_large), FourierTools.resample_by_1D(x, s_large))
-            end
-        end
-    end
-
-    @testset "Test that complex and real routine produce same result for real array" begin
-        for dim = 1:3
-            for _ in 1:5
-                s_small = ntuple(_ -> rand(1:13), dim)
-                s_large = ntuple(i -> max.(s_small[i], rand(10:16)), dim)
-                
-                x = randn(Float32, (s_small))
-                @test Float32.(resample(x, s_large)) ≈ Float32.(real(resample(ComplexF32.(x), s_large)))
-                @test FourierTools.resample_by_1D(x, s_large) ≈ real(FourierTools.resample_by_1D(ComplexF32.(x), s_large))
-            end
-        end
-    end
-
-
-    @testset "Tests that resample_by_FFT is purely real" begin
-        function test_real(s_1, s_2)
-            x = randn(Float32, (s_1))
-            y = resample_by_FFT(x, s_2)
-            @test all(( imag.(y) .+ 1 .≈ 1))
-            y = FourierTools.resample_by_1D(x, s_2)
-            @test all(( imag.(y) .+ 1 .≈ 1))
-        end
-
-        for dim = 1:3
-            for _ in 1:5
-                s_1 = ntuple(_ -> rand(1:13), dim)
-                s_2 = ntuple(i -> rand(1:13), dim)
-                test_real(s_1, s_2)
-            end
-        end
-            
-        test_real((4, 4),(6, 6))
-        test_real((4, 4),(6, 7))
-        test_real((4, 4),(9, 9))
-        test_real((4, 5),(9, 9))
-        test_real((4, 5),(9, 8))
-        test_real((8, 8),(6, 7))
-        test_real((8, 8),(6, 5))
-        test_real((8, 8),(4, 5))
-        test_real((9, 9),(4, 5))
-        test_real((9, 9),(4, 5))
-        test_real((9, 9),(7, 8))
-        test_real((9, 9),(6, 5))
-
-    end
-
-    @testset "Sinc interpolation based on FFT" begin
-
-    function test_interpolation_sum_fft(N_low, N)
-	    x_min = 0.0
-	    x_max = 16π
-	    
-	    xs_low = range(x_min, x_max, length=N_low+1)[1:N_low]
-	    xs_high = range(x_min, x_max, length=N)[1:end-1]
-	    f(x) = sin(0.5*x) + cos(x) + cos(2 * x) + sin(0.25*x)
-	    arr_low = f.(xs_low)
-	    arr_high = f.(xs_high)
-
-	    xs_interp = range(x_min, x_max, length=N+1)[1:N]
-	    arr_interp = resample(arr_low, N)
-	    arr_interp2 = FourierTools.resample_by_1D(arr_low, N)
-
-
-        @test ≈(arr_interp[2*N ÷10: N*8÷10], arr_high[2* N ÷10: N*8÷10], rtol=0.05)
-        @test ≈(arr_interp2[2*N ÷10: N*8÷10], arr_high[2* N ÷10: N*8÷10], rtol=0.05)
-    end
-
-    test_interpolation_sum_fft(128, 1000)
-    test_interpolation_sum_fft(129, 1000)
-    test_interpolation_sum_fft(120, 1531)
-    test_interpolation_sum_fft(121, 1211)
-    end
-
-    @testset "Upsample2 compared to resample" begin
-    for sz in ((10,10),(5,8,9),(20,5,4))
-        a = rand(sz...)
-        @test ≈(upsample2(a),resample(a,sz.*2))
-        @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2)))
-        a = rand(ComplexF32, sz...)
-        @test ≈(upsample2(a),resample(a,sz.*2))
-        @test ≈(upsample2_abs2(a),abs2.(resample(a,sz.*2)))
-        s2 = (d == 2 ? sz[d]*2 : sz[d] for d in 1:length(sz))
-        @test ≈(upsample2(a, dims=(2,)),resample(a,s2))
-        @test ≈(upsample2_abs2(a, dims=(2,)),abs2.(resample(a,s2)))
-        @test size( upsample2(collect(collect(1.0:9.0)'); fix_center=true, keep_singleton=true)) == (1,18)
-        @test upsample2(collect(1.0:9.0); fix_center=false)[1:16] ≈ upsample2(collect(1.0:9.0); fix_center=true)[2:17]
-    end
-    end
-
-    @testset "Downsampling based on frequency cutting" begin
-    function test_resample(N_low, N)
-	    x_min = 0.0
-	    x_max = 16π
-	    
-	    xs_low = range(x_min, x_max, length=N_low+1)[1:N_low]
-	    f(x) = sin(0.5*x) + cos(x) + cos(2 * x) + sin(0.25*x)
-	    arr_low = f.(xs_low)
-
-	    xs_interp = range(x_min, x_max, length=N+1)[1:N]
-	    arr_interp = resample(arr_low, N)
-
-	    xs_interp_s = range(x_min, x_max, length=N+1)[1:N]
-
-        arr_ds = resample(arr_interp, (N_low,) )
-        @test ≈(arr_ds, arr_low)
-        @test eltype(arr_low) === eltype(arr_ds)
-        @test eltype(arr_interp) === eltype(arr_ds)
-    end
-
-    test_resample(128, 1000)
-    test_resample(128, 1232)
-    test_resample(128, 255)
-    test_resample(253, 254)
-    test_resample(253, 1001)
-    test_resample(99, 100101)
-
-    end
-
-    @testset "FFT resample in 2D" begin
-    
-    
-        function test_2D(in_s, out_s)
-            x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
-            y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
-    	    arr = abs.(x) .+ abs.(y) .+ sinc.(sqrt.(x .^2 .+ y .^2))
-    	    arr_interp = resample(arr[1:end, 1:end], out_s);
-    	    arr_ds = resample(arr_interp, in_s)
-            @test arr_ds ≈ arr
-        end
-    
-        test_2D((128, 128), (150, 150))
-        test_2D((128, 128), (151, 151))
-        test_2D((129, 129), (150, 150))
-        test_2D((129, 129), (151, 151))
-        
-        test_2D((150, 128), (151, 150))
-        test_2D((128, 128), (151, 153))
-        test_2D((129, 128), (150, 153))
-        test_2D((129, 128), (129, 153))
-    
-    
-        x = range(-10.0, 10.0, length=129)[1:end-1]
-        x2 = range(-10.0, 10.0, length=130)[1:end-1]
-        x_exact = range(-10.0, 10.0, length=2049)[1:end-1]
-        y = x'
-        y2 = x2'
-        y_exact = x_exact'
-        arr = abs.(x) .+ abs.(y) .+sinc.(sqrt.(x .^2 .+ y .^2))
-        arr2 = abs.(x) .+ abs.(y) .+sinc.(sqrt.(x .^2 .+ y .^2))
-        arr_exact = abs.(x_exact) .+ abs.(y_exact) .+ sinc.(sqrt.(x_exact .^2 .+ y_exact .^2))
-        arr_interp = resample(arr[1:end, 1:end], (131, 131));
-        arr_interp2 = resample(arr[1:end, 1:end], (512, 512));
-        arr_interp3 = resample(arr[1:end, 1:end], (1024, 1024));
-        arr_ds = resample(arr_interp, (128, 128))
-        arr_ds2 = resample(arr_interp, (128, 128))
-        arr_ds23 = resample(arr_interp2, (512, 512))
-        arr_ds3 = resample(arr_interp, (128, 128))
-    
-        @test ≈(arr_ds3, arr)
-        @test ≈(arr_ds2, arr)
-        @test ≈(arr_ds, arr)
-        @test ≈(arr_ds23, arr_interp2)
-    
-    end
-    
-    
-    @testset "FFT resample 2D for a complex signal" begin
-    
-        function test_2D(in_s, out_s)
-        	x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
-        	y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
-        	f(x, y) = 1im * (abs(x) + abs(y) + sinc(sqrt(x ^2 + y ^2)))
-        	f2(x, y) =  abs(x) + abs(y) + sinc(sqrt((x - 5) ^2 + (y - 5)^2))
-        
-        	arr = f.(x, y) .+ f2.(x, y)
-        	arr_interp = resample(arr[1:end, 1:end], out_s);
-        	arr_ds = resample(arr_interp, in_s)
-            
-            @test eltype(arr) === eltype(arr_ds)
-            @test eltype(arr_interp) === eltype(arr_ds)
-            @test imag(arr) ≈ imag(arr_ds)
-            @test real(arr) ≈ real(arr_ds)
-        end
-    
-        test_2D((128, 128), (150, 150))
-        test_2D((128, 128), (151, 151))
-        test_2D((129, 129), (150, 150))
-        test_2D((129, 129), (151, 151))
-        
-        test_2D((150, 128), (151, 150))
-        test_2D((128, 128), (151, 153))
-        test_2D((129, 128), (150, 153))
-        test_2D((129, 128), (129, 153))
-    end
-    
-    
-    @testset "FFT resample in 2D for a purely imaginary signal" begin
-        function test_2D(in_s, out_s)
-        	x = range(-10.0, 10.0, length=in_s[1] + 1)[1:end-1]
-        	y = range(-10.0, 10.0, length=in_s[2] + 1)[1:end-1]'
-        	f(x, y) = 1im * (abs(x) + abs(y) + sinc(sqrt(x ^2 + y ^2)))
-        
-        	arr = f.(x, y)
-        	arr_interp = resample(arr[1:end, 1:end], out_s);
-        	arr_ds = resample(arr_interp, in_s)
-            
-            @test imag(arr) ≈ imag(arr_ds)
-            @test all(real(arr_ds) .< 1e-13)
-            @test all(real(arr_interp) .< 1e-13)
-        end 
-    
-        test_2D((128, 128), (150, 150))
-        test_2D((128, 128), (151, 151))
-        test_2D((129, 129), (150, 150))
-        test_2D((129, 129), (151, 151))
-        
-        test_2D((150, 128), (151, 150))
-        test_2D((128, 128), (151, 153))
-        test_2D((129, 128), (150, 153))
-        test_2D((129, 128), (129, 153))
-    end
-
-    @testset "test select_region_ft" begin
-        x = [1,2,3,4]
-        @test select_region_ft(ffts(x), (5,)) == ComplexF64[-1.0 + 0.0im, -2.0 - 2.0im, 10.0 + 0.0im, -2.0 + 2.0im, -1.0 + 0.0im]
-        x = [3.1495759241275225 0.24720770605505335 -1.311507800204285 -0.3387627167144301; -0.7214121984874265 -0.02566249380406308 0.687066447881175 -0.09536748694092163; -0.577092696986848 -0.6320809680268722 -0.09460071173365793 0.7689715736798227; 0.4593837753047561 -1.0204193548690512 -0.28474772376166907 1.442443602597533]
-        @test select_region_ft(ffts(x), (7, 7)) == ComplexF64[0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 + 0.16703621316206385im 2.4110077589815555 - 0.16558718095884828im 2.2813159163314163 - 0.7520360306228049im 7.47614366018844 - 4.139633109911205im 1.3691035744780353 + 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.4801675770812479 + 0.0im 3.3142445917764407 - 3.2082400832669373im 1.6529948781166373 + 0.0im 3.3142445917764407 + 3.2082400832669373im 0.4801675770812479 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 1.3691035744780353 - 0.16703621316206385im 7.47614366018844 + 4.139633109911205im 2.2813159163314163 + 0.7520360306228049im 2.4110077589815555 + 0.16558718095884828im 1.3691035744780353 - 0.16703621316206385im 0.0 + 0.0im; 0.0 + 0.0im 0.32043577156395486 + 0.0im 2.321469443190397 + 0.7890379226962572im 0.38521287113798636 + 0.0im 2.321469443190397 - 0.7890379226962572im 0.32043577156395486 + 0.0im 0.0 + 0.0im; 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im 0.0 + 0.0im]
-    end
-
-    @testset "test resample_czt" begin
-        dim =2
-        s_small = (12,16) # ntuple(_ -> rand(1:13), dim)
-        s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim)
-        dat = select_region(randn(Float32, (5,6)), new_size= s_small)
-        rs1 = FourierTools.resample(dat, s_large)
-        rs1b = select_region(rs1, new_size=size(dat))
-        rs2 = FourierTools.resample_czt(dat, s_large./s_small, do_damp=false)
-        @test rs1b ≈ rs2
-        rs2 = FourierTools.resample_czt(dat, (x->s_large[2]./s_small[2], y->s_large[1]./s_small[1]), do_damp=false)
-        @test rs1b ≈ rs2
-        rs2 = FourierTools.resample_czt(dat, (x->1.0, y->1.0), shear=(x->10.0,y->0.0),do_damp=false)
-        @test shear(dat,10) ≈ rs2
-        rs2 = FourierTools.resample_czt(dat, (x->1.0, y->1.0), shear=(10.0,0.0),do_damp=false)
-        @test shear(dat,10) ≈ rs2
-        rs2 = barrel_pin(dat, 0.5)
-        rs2b = FourierTools.resample_czt(dat, (x -> 1.0 + 0.5 .* (x-0.5)^2,x -> 1.0 + 0.5 .* (x-0.5)^2))
-        @test rs2b ≈ rs2
-    end
-
-    @testset "test resample_nfft" begin
-        dim =2
-        s_small = (12,16) # ntuple(_ -> rand(1:13), dim)
-        s_large = (20,18) # ntuple(i -> max.(s_small[i], rand(10:16)), dim)
-        dat = select_region(randn(Float32, (5,6)), new_size= s_small)
-        rs1 = FourierTools.resample(dat, s_large)
-        rs1b = select_region(rs1, new_size=size(dat))
-        mymap = (t) -> t .* s_small ./ s_large  
-        rs3 = FourierTools.resample_nfft(dat, mymap)
-        @test isapprox(rs1b, rs3, rtol=0.1)
-        new_pos = mymap.(idx(size(dat), scale=ScaFT))
-        rs4 = FourierTools.resample_nfft(dat, new_pos)
-        @test rs4 ≈ rs3
-        new_pos = cat(s_small[1]./s_large[1] .* xx(size(dat), scale=ScaFT), s_small[2]./s_large[2] .* yy(size(dat), scale=ScaFT),dims=3)
-        rs5 = FourierTools.resample_nfft(dat, new_pos)
-        @test rs5 ≈ rs3
-        # @test rs1b ≈ rs3
-        # test both modes: src and destination but only for a 1-pixel shift
-        rs6 = FourierTools.resample_nfft(dat, t->t .+ 1.0, is_src_coords=false, is_in_pixels=true)
-        rs7 = FourierTools.resample_nfft(dat, t->t .- 1.0, is_src_coords=true, is_in_pixels=true)
-        @test rs6 ≈ rs7
-        # test shrinking by a factor of two
-        new_pos = cat(xx(s_small.÷2, scale=ScaFT),yy(s_small.÷2, scale=ScaFT), dims=3)
-        rs8 = FourierTools.resample_nfft(dat, t->t, s_small.÷2, is_src_coords=true)
-        rs9 = FourierTools.resample_nfft(dat, new_pos,  is_src_coords=true)
-        rss = FourierTools.resample(dat, s_small.÷2)
-        @test rs8 ≈ rs9
-        rs10 = FourierTools.resample_nfft(dat, t->t, s_small.÷2; is_src_coords=false, is_in_pixels=true)
-        new_pos = cat(xx(s_small, offset=(0,0)),yy(s_small,offset=(0,0)), dims=3)
-        rs11 = FourierTools.resample_nfft(dat, new_pos, s_small.÷2; is_src_coords=false, is_in_pixels=true)
-        @test rs10 ≈ rs11    
-        # test the non-strided array
-        rs6 = FourierTools.resample_nfft(Base.PermutedDimsArray(dat,(2,1)), t->t .+ 1.0, is_src_coords=false, is_in_pixels=true)
-        rs7 = FourierTools.resample_nfft(Base.PermutedDimsArray(dat,(2,1)), t->t .- 1.0, is_src_coords=true, is_in_pixels=true)
-        @test rs6 ≈ rs7
-        rs6 = FourierTools.resample_nfft(1im .* dat , t->t .* 2.0, s_small.÷2, is_src_coords=false, is_in_pixels=false, pad_value=0.0)
-        rs7 = FourierTools.resample_nfft(1im .* dat, t->t .* 0.5, s_small.÷2, is_src_coords=true, is_in_pixels=false, pad_value=0.0)
-        @test rs6.*4 ≈ rs7
-    end
-
-end
diff --git a/test_old/runtests.jl b/test_old/runtests.jl
deleted file mode 100644
index 0c8ec32..0000000
--- a/test_old/runtests.jl
+++ /dev/null
@@ -1,30 +0,0 @@
-using Random, Test, FFTW
-using FourierTools
-using ImageTransformations
-using IndexFunArrays
-using Zygote
-using NDTools
-using LinearAlgebra # for the assigned nfft function LinearAlgebra.mul!
-using FractionalTransforms
-using TestImages
-
-Random.seed!(42)
-
-include("fft_helpers.jl")
-include("fftshift_alternatives.jl")
-include("utils.jl")
-include("fourier_shifting.jl")
-include("fourier_shear.jl")
-include("fourier_rotate.jl")
-include("resampling_tests.jl")
-include("convolutions.jl")
-include("correlations.jl")
-include("custom_fourier_types.jl")
-include("damping.jl")
-include("czt.jl")
-include("nfft_tests.jl")
-include("fractional_fourier_transform.jl")
-include("fourier_filtering.jl")
-include("sdft.jl")
-
-return
diff --git a/test_old/sdft.jl b/test_old/sdft.jl
deleted file mode 100644
index 02a7d64..0000000
--- a/test_old/sdft.jl
+++ /dev/null
@@ -1,102 +0,0 @@
-import FourierTools:
-    sdft_windowlength,
-    sdft_update!,
-    sdft_previousdft,
-    sdft_previousdata,
-    sdft_nextdata,
-    sdft_iteration,
-    sdft_backindices,
-    sdft_dataoffsets
-
-# Dummy method to test more complex designs
-struct TestSDFT{T,C} <: AbstractSDFT
-    n::T
-    factor::C
-end
-TestSDFT(n) = TestSDFT(n, exp(2π*im/n))
-sdft_windowlength(method::TestSDFT) = method.n
-sdft_backindices(::TestSDFT) = [0, 2]
-sdft_dataoffsets(::TestSDFT) = [0, 1]
-
-function sdft_update!(dft, x, method::TestSDFT{T,C}, state) where {T,C}
-    twiddle = one(C)
-    dft0 = sdft_previousdft(state, 0)
-    unused_dft = sdft_previousdft(state, 2) # not used - add for coverage
-    unused_data = sdft_previousdata(state, 1) # not used - add for coverage
-    unused_count = sdft_iteration(state) # not used - add for coverage
-    for k in eachindex(dft)
-        dft[k] = twiddle * (dft0[k] + sdft_nextdata(state) - sdft_previousdata(state)) +
-            0.0 * (unused_dft[k] + unused_data + unused_count)
-        twiddle *= method.factor
-    end
-end
-
-# Dummy method to test exceptions
-struct ErrorSDFT  <: AbstractSDFT end
-sdft_windowlength(method::ErrorSDFT) = 2
-function sdft_update!(dft, x, ::ErrorSDFT, state)
-    doesnotexist = sdft_previousdft(state, 1)
-    nothing
-end
-
-# Piecewise sinusoidal signal
-function signal(x)
-    if x < 1
-        5*cos(4π*x)
-    elseif x < 2
-        (-2x+7)*cos(2π*(x^2+1))
-    else
-        3*cos(10π*x)
-    end
-end
-
-y = signal.(range(0, 3, length=61))
-n = 20
-sample_offsets = (0, 20, 40)
-dfty_sample = [fft(view(y, (1:n) .+ offset)) for offset in sample_offsets]
-
-@testset "Sliding DFT" begin
-    # Compare SDFT
-    @testset "SDFT" begin
-        method = SDFT(n)
-        dfty = collect(method(y))
-        @testset "stateless" for i in eachindex(sample_offsets)
-            @test dfty[1 + sample_offsets[i]] ≈ dfty_sample[i]
-        end
-        dfty = collect(method(Iterators.Stateful(y)))
-        @testset "stateful" for i in eachindex(sample_offsets)
-            @test dfty[1 + sample_offsets[i]] ≈ dfty_sample[i]
-        end
-    end
-
-    # Method with dft history and more data points
-    @testset "TestSDFT" begin
-        method = TestSDFT(n)
-        dfty = collect(method(y))
-        @testset for i in eachindex(sample_offsets)
-            @test dfty[1 + sample_offsets[i]] ≈ dfty_sample[i]
-        end
-    end
-
-    # Exceptions
-    @testset "Exceptions" begin
-        @test_throws "insufficient data" iterate(SDFT(10)(ones(5)))
-        @test_throws "insufficient data" iterate(SDFT(10)(Float64[]))
-        @test_throws "previous DFT results not available" collect(ErrorSDFT()(y))
-    end
-
-    # Additional coverage
-    @testset "Extra" begin
-        itr = SDFT(n)(y)
-        _, state = iterate(itr)
-        @test ismissing(Base.isdone(itr))
-        @test ismissing(Base.isdone(itr, state))
-        FourierTools.sdft_updatedfthistory!(nothing)
-        FourierTools.sdft_updatefragment!(nothing, nothing, nothing)
-        dummy_state = FourierTools.SDFTStateData(nothing, nothing, 1.0, 1, 1)
-        @test FourierTools.haspreviousdata(dummy_state) == false
-        # sdft_dataoffsets
-        @test iszero(FourierTools.sdft_dataoffsets(SDFT(n)))
-        @test isnothing(FourierTools.sdft_dataoffsets(nothing))
-    end
-end
\ No newline at end of file
diff --git a/test_old/utils.jl b/test_old/utils.jl
deleted file mode 100644
index 5fdf23a..0000000
--- a/test_old/utils.jl
+++ /dev/null
@@ -1,141 +0,0 @@
-@testset "Test util functions" begin
-
-    @testset "Test fft center and rfft_center_diff" begin
-        Random.seed!(42)
-        @test 2 == FourierTools.fft_center(3)
-        @test 3 == FourierTools.fft_center(4)
-        @test 3 == FourierTools.fft_center(5)
-        @test (2,3,4) == FourierTools.fft_center.((3,4,6))
-
-
-        @test (0, 1, 2, 3) == FourierTools.ft_center_diff((12, 3, 5,6), (2,3,4))
-        @test (6, 1, 2, 3) == FourierTools.ft_center_diff((12, 3, 5,6))
-
-
-        @test (0, 0, 2, 3) == FourierTools.rft_center_diff((12, 3, 5,6), (2,3,4))
-        @test (0, 0, 0, 3) == FourierTools.rft_center_diff((12, 3, 5,6), (3,4))
-        @test (0, 0, 2, 3) == FourierTools.rft_center_diff((13, 3, 5,6), (1,3,4))
-        @test (0, 1, 2, 3) == FourierTools.rft_center_diff((13, 3, 5,6))
-
-    end
-
-
-
-    @testset "Test rfft_size" begin
-        s = (11, 20, 10)
-        @test FourierTools.rfft_size(s, 2) == size(rfft(randn(s),2))
-        @test FourierTools.rft_size(randn(s), 2) == size(rfft(randn(s),2))
-         
-        s = (11, 21, 10)
-        @test FourierTools.rfft_size(s, 2) == size(rfft(randn(s),2))
-        
-        s = (11, 21, 10)
-        @test FourierTools.rfft_size(s, 1) == size(rfft(randn(s),(1,2,3)))
-    end
-
-
-
-    function center_test(x1, x2, x3, y1, y2, y3)
-        arr1 = randn((x1, x2, x3))
-        arr2 = zeros((y1, y2, y3))
-    
-        FourierTools.center_set!(arr2, arr1)
-        arr3 = FourierTools.center_extract(arr2, (x1, x2, x3))
-        @test arr1 == arr3
-    end
-    
-     # test center set and center extract methods
-    @testset "center methods" begin
-        center_test(4, 4, 4, 6,7,4)
-        center_test(5, 4, 4, 7, 8, 4)
-        center_test(5, 4, 4, 8, 8, 8)
-        center_test(6, 4, 4, 7, 8, 8)
-    
-    
-        @test 1 == FourierTools.center_pos(1)
-        @test 2 == FourierTools.center_pos(2)
-        @test 2 == FourierTools.center_pos(3)
-        @test 3 == FourierTools.center_pos(4)
-        @test 3 == FourierTools.center_pos(5)
-        @test 513 == FourierTools.center_pos(1024)
-    
-        @test FourierTools.get_indices_around_center((5), (2)) == (2, 3)
-        @test FourierTools.get_indices_around_center((5), (3)) == (2, 4)
-        @test FourierTools.get_indices_around_center((4), (3)) == (2, 4)
-        @test FourierTools.get_indices_around_center((4), (2)) == (2, 3)
-    end
-
-
-    @testset "Test fftpos" begin
-
-        @test fftpos(1, 4, CenterFT) ≈  -0.5:0.25:0.25 
-        @test fftpos(1, 4, CenterLast) ≈ -0.75:0.25:0.0 
-        @test fftpos(1, 4, CenterMiddle) ≈ -0.375:0.25:0.375 
-        @test fftpos(1, 4, CenterFirst) ≈ 0.0:0.25:0.75 
-        @test fftpos(1, 4) ≈ 0.0:0.25:0.75 
-        @test fftpos(1f0, 4, 2) ≈ -0.25f0:0.25f0:0.5f0
-
-
-        function f(l, N)
-            a = fftpos(l, N, CenterFT)
-            b = fftpos(l, N, CenterFirst)
-            c = fftpos(l, N, CenterLast)
-            d = fftpos(l, N, CenterMiddle)
-            @test (a[end] - a[begin] ≈ b[end] - b[begin] ≈ c[end] - c[begin] ≈ d[end] -d[begin])
-        end
-
-        f(1, 2)
-        f(1, 3)
-        f(42, 4)
-        f(42, 5)
-    end
-
-
-    @testset "Test δ" begin 
-        @test δ((3, 3)) == [0 0 0; 0 1 0; 0 0 0]
-        @test δ((4, 3)) == [0 0 0; 0 0 0; 0 1 0; 0 0 0]
-        @test δ(Float32, (4, 3)) == Float32[0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 0.0]
-        @test δ(Float32, (4, 3)) |> eltype == Float32
-        @test δ(Float32, (4, 3)) |> eltype == Float32
-        @test δ(Float32, (4, 3)) == Float32[0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 1.0 0.0; 0.0 0.0 0.0]
-    end
-
-
-
-    @testset "Pixel size conversion" begin
-        @test fourierspace_pixelsize(1, 512) ≈ 1 / 512 
-        @test all(fourierspace_pixelsize(1, (512,256)) .≈ 1 ./ (512, 256))
-        @test realspace_pixelsize(1, 512) ≈ 1 / 512 
-        @test all(realspace_pixelsize(1, (512,256)) .≈ 1 ./ (512, 256))
-
-    end
-
-
-    @testset "Check eltype error" begin
-        @test_throws ArgumentError FourierTools.eltype_error(Float32, Float64)
-        @test isnothing(FourierTools.eltype_error(Int, Int))
-    end
-
-    @testset "odd_view, fourier_reverse!" begin
-        a = [1 2 3;4 5 6;7 8 9;10 11 12]
-        @test FourierTools.odd_view(a) == [4 5 6;7 8 9; 10 11 12]
-        fourier_reverse!(a)
-        @test a == [3 2 1;12 11 10;9 8 7;6 5 4]
-        a = [1 2 3;4 5 6;7 8 9;10 11 12]
-        b = copy(a);
-        fourier_reverse!(a,dims=1);
-        @test a[2:end,:] == b[end:-1:2,:]
-        a = [1 2 3 4;5 6 7 8;9 10 11 12 ;13 14 15 16]
-        b = copy(a);
-        fourier_reverse!(a);
-        @test a[2,2] == b[4,4]
-        @test a[2,3] == b[4,3]
-        fourier_reverse!(a);
-        @test a == b
-        fourier_reverse!(a;dims=1);
-        @test a[2:end,:] == b[end:-1:2,:]
-        @test sum(abs.(imag.(ift(fourier_reverse!(ft(rand(5,6,7))))))) < 1e-10
-        sz = (10,9,6)
-        @test sum(abs.(real.(ift(fourier_reverse!(ft(box((sz)))))) .- box(sz))) < 1e-10
-    end
-end

From faa56772633bc7a2a638fdb11788be9aafa57ead Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 22 Mar 2025 21:38:08 +0100
Subject: [PATCH 12/25] changed opt_cu to not call CuArray under non-cuda
 conditions

---
 test/runtests.jl | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 4481897..00520a3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,7 +12,13 @@ using CUDA
 Random.seed!(42)
 
 use_cuda = false
-opt_cu(img, use_cuda) = ifelse(use_cuda, CuArray(img), img)
+function opt_cu(img, use_cuda=false)
+    if (use_cuda)
+        CuArray(img)
+    else
+        img
+    end
+end
 
 function run_all_tests()
     include("fft_helpers.jl");

From 2ed72a46aa606b9c53a11ef6eaf13a394e6f84c4 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sun, 23 Mar 2025 13:21:11 +0100
Subject: [PATCH 13/25] better generation of zeros [skip ci]

---
 Project.toml                     |  7 +++++-
 ext/CUDASupportExt.jl            | 20 +++++++++++------
 src/fourier_resample_1D_based.jl |  3 +--
 src/resampling.jl                |  3 +--
 src/utils.jl                     | 27 +++++++++++++++++++++++
 test/performance_tests.jl        | 38 ++++++++++++++++++++++++++++++++
 6 files changed, 86 insertions(+), 12 deletions(-)
 create mode 100644 test/performance_tests.jl

diff --git a/Project.toml b/Project.toml
index 63db1b0..21e61c2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,14 +15,17 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 [weakdeps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+# CuNFFT = "a9291f20-7f4c-4d50-b30d-4e07b13252e1"
 
 [extensions]
+# CUDASupportExt = ["CUDA", "Adapt", "CuNFFT"]
 CUDASupportExt = ["CUDA", "Adapt"]
 
 [compat]
 Adapt = "3.7, 4.0, 4.1"
-CUDA = "5.2, 5.3, 5.4, 5.5, 5.6"
+CUDA = "5.2, 5.3, 5.4, 5.5, 5.6, 5.7" 
 ChainRulesCore = "1, 1.0, 1.1"
+# CuNFFT = "0.3.8"
 FFTW = "1.5"
 ImageTransformations = "0.9, 0.10"
 IndexFunArrays = "0.2"
@@ -33,6 +36,7 @@ julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11"
 
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+# CuNFFT = "a9291f20-7f4c-4d50-b30d-4e07b13252e1"
 FractionalTransforms = "e50ca838-b4f0-4a10-ad18-4b920bf1ae5c"
 ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -41,4 +45,5 @@ TestImages = "5e47fb64-e119-507b-a336-dd2b206d9990"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
+# test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote", "CUDA", "CuNFFT"]
 test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote", "CUDA"]
diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
index 6760224..44f8435 100644
--- a/ext/CUDASupportExt.jl
+++ b/ext/CUDASupportExt.jl
@@ -5,13 +5,8 @@ using Adapt
 using FourierTools
 using IndexFunArrays # to prevent a recuursive stack overflow in get_base_arr
 using Base 
-
-get_base_arr(arr::Array) = arr
-get_base_arr(arr::CuArray) = arr
-get_base_arr(arr::IndexFunArray) = arr
-function get_base_arr(arr::AbstractArray) 
-    get_base_arr(parent(arr))
-end
+# using NFFT
+# using CuNFFT
 
 # define a number of Union types to not repeat all definitions for each type
 AllShiftedType = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:Any},
@@ -98,4 +93,15 @@ function FourierTools.optional_collect(a::CuArray)
     a 
 end
 
+get_base_arr(arr::CuArray) = arr
+get_base_arr(arr::Array) = arr
+get_base_arr(arr::IndexFunArray) = arr
+function get_base_arr(arr::AbstractArray) 
+    get_base_arr(parent(arr))
+end
+
+function similar_zeros(arr::CuArray, sz::NTuple=size(arr))
+    CUDA.zeros(sz)
+end
+
 end
\ No newline at end of file
diff --git a/src/fourier_resample_1D_based.jl b/src/fourier_resample_1D_based.jl
index 5068921..68022ac 100644
--- a/src/fourier_resample_1D_based.jl
+++ b/src/fourier_resample_1D_based.jl
@@ -22,8 +22,7 @@ function resample_by_1D_FT!(arr::AbstractArray{<:Complex, N}, new_size; normaliz
         arr = ffts!(arr, d)
         if ns > s
             # out = zeros(eltype(arr), Base.setindex(size(arr), ns, d))
-            out = similar(arr, Base.setindex(size(arr), ns, d)) # to work with CuArary
-            out .= 0
+            out = similar_zeros(arr, Base.setindex(size(arr), ns, d)) # to work with CuArary
             center_set!(out, arr)
             # in the even case we need to fix hermitian property
             if iseven(s)
diff --git a/src/resampling.jl b/src/resampling.jl
index a84e465..66abd83 100644
--- a/src/resampling.jl
+++ b/src/resampling.jl
@@ -114,8 +114,7 @@ function upsample2_1D(mat::AbstractArray{T, N}, dim=1, fix_center=false, keep_si
         return mat
     end
     newsize = Tuple((d==dim) ? 2*size(mat,d) : size(mat,d) for d in 1:N)
-    res = similar(mat, newsize)
-    res .= 0;
+    res = similar_zeros(mat, newsize)
     # res = zeros(eltype(mat), newsize)
     if fix_center && isodd(size(mat,dim))
         selectdim(res,dim,2:2:size(res,dim)) .= mat  
diff --git a/src/utils.jl b/src/utils.jl
index efe82e9..70fc035 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -3,6 +3,33 @@ export expanddims, fourierspace_pixelsize, realspace_pixelsize
 export δ
 export fourier_reverse!
 
+"""
+    similar_zeros(arr::AbstractArray, sz::NTuple)
+
+Creates a similar array to `arr` with zeros. This is useful to also support CuArrays.
+There are specializations for `Array` and `CuArray` which use the original `zeros` function.
+
+# parameters
+- `arr`: array to copy the type and size from
+- `sz`: size of the new array. Default is the size of `arr`.
+
+# Examples
+```jldoctest
+julia> FourierTools.similar_zeros([1, 2, 3], (3,))
+3-element Vector{Int64}:
+ 0
+ 0
+ 0
+"""
+function similar_zeros(arr::AbstractArray, sz::NTuple=size(arr))
+    res = similar(arr, sz)
+    fill!(res, zero(eltype(res)))
+    return res
+end
+
+function similar_zeros(arr::Array, sz::NTuple=size(arr))
+    zeros(eltype(arr), sz)
+end
 
  #get_RFT_scale(real_size) = 0.5 ./ (max.(real_size ./ 2, 1))  # The same as the FFT scale but for the full array in real space!
 
diff --git a/test/performance_tests.jl b/test/performance_tests.jl
new file mode 100644
index 0000000..f5b4502
--- /dev/null
+++ b/test/performance_tests.jl
@@ -0,0 +1,38 @@
+using BenchmarkTools
+using CUDA
+using Test
+
+function test_fft()
+    img = rand(ComplexF32, 512, 512)
+    img = opt_cu(img, use_cuda)
+    img = fft(img)
+    img = ifft(img)
+    return img
+end
+
+function test_ft()
+    img = rand(ComplexF32, 512, 512)
+    img = opt_cu(img, use_cuda)
+    img = ft(img)
+    img = ift(img)
+    return img
+end
+
+diplay(@benchmark test_fft())
+diplay(@benchmark test_ft())
+
+function test_nfft()
+    J, N = 8, 16
+    k = range(-0.4, stop=0.4, length=J)  # nodes at which the NFFT is evaluated
+    f = cu(randn(ComplexF64, J))         # data to be transformed
+    p = plan_nfft(k, N, reltol=1e-9)     # create plan
+    fHat = adjoint(p) * f                # calculate adjoint NFFT
+    y = p * fHat                         # calculate forward NFFT
+end
+
+# using CUDA, NFFT, CuNFFT
+# Ny, Nx = 1024, 2048
+# x = CUDA.randn(Ny, Nx);
+# knots = CUDA.rand(2, Ny*Nx) .- 0.5f0;
+# plan = NFFT.plan_nfft(CuArray{Float32}, knots, size(x));
+# CUDA.@allowscalar [(adjoint(plan) * complex(x[:]))[1] for i=1:10]

From 08c02952817daf5d426e2b0f71f9802845a9b913 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sun, 23 Mar 2025 14:02:58 +0100
Subject: [PATCH 14/25] saved some memory on plan_conv_buffer [skip ci]

---
 src/convolutions.jl       |  4 +++-
 src/utils.jl              | 19 +++++++++++++++++++
 test/performance_tests.jl | 21 +++++++++++++--------
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/src/convolutions.jl b/src/convolutions.jl
index b44a68e..ca18fb8 100644
--- a/src/convolutions.jl
+++ b/src/convolutions.jl
@@ -162,7 +162,9 @@ function plan_conv_buffer(u::AbstractArray{T1, N}, v::AbstractArray{T2, M}, dims
 
     u_buff = P_u * u
     v_ft = P_v * v
-    uv_buff = u_buff .* v_ft
+    uv_sz = bc_size(u_buff, v_ft)
+    # this saves memory allocations:
+    uv_buff = (uv_sz == size(u_buff)) ? u_buff : u_buff .* v_ft;
     
     # for fourier space we need a new plan
     P = plan(u .* v, dims; kwargs...)
diff --git a/src/utils.jl b/src/utils.jl
index 70fc035..317f0a7 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -3,6 +3,25 @@ export expanddims, fourierspace_pixelsize, realspace_pixelsize
 export δ
 export fourier_reverse!
 
+"""
+    bc_size(arr1, arr2)
+
+Calculates the size of the broadcasted array of `arr1` and `arr2`.
+
+# Arguments
+- `arr1`: first array
+- `arr2`: second array
+
+# Examples
+```jldoctest
+julia> FourierTools.bc_size(rand(5, 2, 3), rand(1, 2))
+(5, 2, 3)
+"""
+function bc_size(arr1, arr2)
+    md = max(ndims(arr1), ndims(arr2))
+    return ntuple((d) -> max(size(arr1, d), size(arr2, d)), md)
+end
+
 """
     similar_zeros(arr::AbstractArray, sz::NTuple)
 
diff --git a/test/performance_tests.jl b/test/performance_tests.jl
index f5b4502..5222bc0 100644
--- a/test/performance_tests.jl
+++ b/test/performance_tests.jl
@@ -2,24 +2,29 @@ using BenchmarkTools
 using CUDA
 using Test
 
-function test_fft()
-    img = rand(ComplexF32, 512, 512)
-    img = opt_cu(img, use_cuda)
+function test_fft(img)
     img = fft(img)
     img = ifft(img)
     return img
 end
 
-function test_ft()
-    img = rand(ComplexF32, 512, 512)
-    img = opt_cu(img, use_cuda)
+function test_ft(img)
     img = ft(img)
     img = ift(img)
     return img
 end
 
-diplay(@benchmark test_fft())
-diplay(@benchmark test_ft())
+use_cuda = false
+sz = (1024, 1024)
+dat = rand(ComplexF32, sz...)
+img = opt_cu(dat, use_cuda)
+display(@benchmark test_fft($img)) # 33 ms
+display(@benchmark test_ft($img)) # 38 ms
+
+use_cuda = true
+img = opt_cu(dat, use_cuda)
+display(@benchmark CUDA.@sync test_fft($img)) # 834 µs
+display(@benchmark CUDA.@sync test_ft($img)) # 1086 ms
 
 function test_nfft()
     J, N = 8, 16

From 9bece7e9c32d547d79b2e2a0e1e7ece063dd9c34 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 5 Apr 2025 13:14:06 +0200
Subject: [PATCH 15/25] updated NDTools dependency to 0.8

---
 Project.toml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/Project.toml b/Project.toml
index 21e61c2..9eed194 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,20 +15,18 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 [weakdeps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-# CuNFFT = "a9291f20-7f4c-4d50-b30d-4e07b13252e1"
 
 [extensions]
-# CUDASupportExt = ["CUDA", "Adapt", "CuNFFT"]
 CUDASupportExt = ["CUDA", "Adapt"]
 
 [compat]
 Adapt = "3.7, 4.0, 4.1"
-CUDA = "5.2, 5.3, 5.4, 5.5, 5.6, 5.7" 
+CUDA = "5.2, 5.3, 5.4, 5.5, 5.6, 5.7"
 ChainRulesCore = "1, 1.0, 1.1"
-# CuNFFT = "0.3.8"
 FFTW = "1.5"
 ImageTransformations = "0.9, 0.10"
 IndexFunArrays = "0.2"
+NDTools = "0.8"
 NFFT = "0.11, 0.12, 0.13"
 Reexport = "1"
 Zygote = "0.6, 0.7"
@@ -36,7 +34,6 @@ julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11"
 
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-# CuNFFT = "a9291f20-7f4c-4d50-b30d-4e07b13252e1"
 FractionalTransforms = "e50ca838-b4f0-4a10-ad18-4b920bf1ae5c"
 ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -45,5 +42,4 @@ TestImages = "5e47fb64-e119-507b-a336-dd2b206d9990"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-# test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote", "CUDA", "CuNFFT"]
 test = ["Test", "TestImages", "FractionalTransforms", "Random", "ImageTransformations", "Zygote", "CUDA"]

From 1713bf45aa798db536d638810db8cd801a6bc726 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sat, 5 Apr 2025 13:24:17 +0200
Subject: [PATCH 16/25] updated Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 9eed194..73e62b6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -26,7 +26,7 @@ ChainRulesCore = "1, 1.0, 1.1"
 FFTW = "1.5"
 ImageTransformations = "0.9, 0.10"
 IndexFunArrays = "0.2"
-NDTools = "0.8"
+NDTools = "0.8.0"
 NFFT = "0.11, 0.12, 0.13"
 Reexport = "1"
 Zygote = "0.6, 0.7"

From 0ab645036e425ad44602fbfaf78dafb5a858e93d Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sun, 6 Apr 2025 11:42:14 +0200
Subject: [PATCH 17/25] back to using ShiftedArrays.jl

---
 Project.toml                 |   4 +-
 ext/CUDASupportExt.jl        |  12 +-
 src/CircShiftedArrays.jl     | 276 -----------------------------------
 src/FourierTools.jl          |   7 +-
 src/circshift.jl             |  31 ----
 src/circshiftedarray.jl      | 130 -----------------
 src/fftshift_alternatives.jl |  12 +-
 7 files changed, 18 insertions(+), 454 deletions(-)
 delete mode 100644 src/CircShiftedArrays.jl
 delete mode 100644 src/circshift.jl
 delete mode 100644 src/circshiftedarray.jl

diff --git a/Project.toml b/Project.toml
index 73e62b6..f0c7451 100644
--- a/Project.toml
+++ b/Project.toml
@@ -11,13 +11,14 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NDTools = "98581153-e998-4eef-8d0d-5ec2c052313d"
 NFFT = "efe261a4-0d2b-5849-be55-fc731d526b0d"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a"
 
 [weakdeps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 
 [extensions]
-CUDASupportExt = ["CUDA", "Adapt"]
+CUDASupportExt_FT = ["CUDA", "Adapt"]
 
 [compat]
 Adapt = "3.7, 4.0, 4.1"
@@ -29,6 +30,7 @@ IndexFunArrays = "0.2"
 NDTools = "0.8.0"
 NFFT = "0.11, 0.12, 0.13"
 Reexport = "1"
+ShiftedArrays = "2.0.0"
 Zygote = "0.6, 0.7"
 julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11"
 
diff --git a/ext/CUDASupportExt.jl b/ext/CUDASupportExt.jl
index 44f8435..4174f67 100644
--- a/ext/CUDASupportExt.jl
+++ b/ext/CUDASupportExt.jl
@@ -1,7 +1,7 @@
-module CUDASupportExt
+module CUDASupportExt_FT
 using CUDA 
 using Adapt
-# using ShiftedArrays
+using ShiftedArrays
 using FourierTools
 using IndexFunArrays # to prevent a recuursive stack overflow in get_base_arr
 using Base 
@@ -9,7 +9,7 @@ using Base
 # using CuNFFT
 
 # define a number of Union types to not repeat all definitions for each type
-AllShiftedType = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:Any},
+AllShiftedType = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:Any},
                             FourierTools.FourierSplit{<:Any,<:Any,<:Any},
                             FourierTools.FourierJoin{<:Any,<:Any,<:Any}}
 
@@ -20,7 +20,7 @@ AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
                         SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}}
 AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
 
-AllShiftedTypeCu{N, CD} = Union{FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+AllShiftedTypeCu{N, CD} = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
                             FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
                             FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
 AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
@@ -28,7 +28,7 @@ AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}
                                  SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
 AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
 
-Adapt.adapt_structure(to, x::FourierTools.CircShiftedArray{T, N, S}) where {T, N, S} = FourierTools.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
+Adapt.adapt_structure(to, x::ShiftedArrays.CircShiftedArray{T, N, S}) where {T, N, S} = ShiftedArrays.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
 Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split);
 Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join);
 
@@ -78,7 +78,7 @@ function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=a
     return all(abs.(x .- y) .<= atol)
 end
 
-function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) # where {CT, N, CD, T<:FourierTools.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) # where {CT, N, CD, T<:ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
     atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
     return all(abs.(x .- y) .<= atol)
 end
diff --git a/src/CircShiftedArrays.jl b/src/CircShiftedArrays.jl
deleted file mode 100644
index 9949d09..0000000
--- a/src/CircShiftedArrays.jl
+++ /dev/null
@@ -1,276 +0,0 @@
-# export CircShiftedArray
-using Base
-# using CUDA
-
-# a = reshape(1:1000000,(1000,1000)) .+ 0
-# a = reshape(1:(15*15),(15,15)) .+ 0
-# c = CircShiftedArray(a,(3,3));
-# b = copy(a)
-# d = c .+ c;
-
-"""
-    CircShiftedArray{T, N, A<:AbstractArray{T,N}, myshift<:NTuple{N,Int}} <: AbstractArray{T,N}
-
-is a type which lazily encapsulates a circular shifted array. If broadcasted with another `CircShiftedArray` it will stay to be a `CircShiftedArray` as long as the shifts are equal.
-For unequal shifts, the `circshift` routine will be used. Note that the shift is encoded as an `NTuple{}` into the type definition.
-"""
-struct CircShiftedArray{T, N, A<:AbstractArray{T,N}, myshift<:Tuple} <: AbstractArray{T,N}
-    parent::A    
-
-    function CircShiftedArray(parent::A, myshift::NTuple{N,Int}) where {T,N,A<:AbstractArray{T,N}}
-        ws = wrapshift(myshift, size(parent))
-        new{T,N,A, Tuple{ws...}}(parent)
-    end
-    function CircShiftedArray(parent::CircShiftedArray{T,N,A,S}, myshift::NTuple{N,Int}) where {T,N,A,S}
-        ws = wrapshift(myshift .+ to_tuple(csa_shift(typeof(parent))), size(parent))
-        new{T,N,A, Tuple{ws...}}(parent.parent)
-    end
-    # function CircShiftedArray(parent::CircShiftedArray{T,N,A,S}, myshift::NTuple{N,Int}) where {T,N,A,S==myshift}
-    #     parent
-    # end
-end
-shifts(::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = to_tuple(S)
-
-# just a more convenient name
-circshift(arr, myshift) = CircShiftedArray(arr, myshift)
-# wraps shifts into the range 0...N-1
-wrapshift(shift::NTuple, dims::NTuple) = ntuple(i -> mod(shift[i], dims[i]), length(dims))
-# wraps indices into the range 1...N
-wrapids(shift::NTuple, dims::NTuple) = ntuple(i -> mod1(shift[i], dims[i]), length(dims))
-invert_rng(s, sz) = wrapshift(sz .- s, sz)
-
-# define a new broadcast style
-struct CircShiftedArrayStyle{N,S} <: Base.Broadcast.AbstractArrayStyle{N} end
-csa_shift(::Type{CircShiftedArray{T,N,A,S}}) where {T,N,A,S} = S
-to_tuple(S::Type{T}) where {T<:Tuple}= tuple(S.parameters...)
-csa_shift(::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = to_tuple(S)
-
-# convenient constructor
-CircShiftedArrayStyle{N,S}(::Val{M}, t::Tuple) where {N,S,M} = CircShiftedArrayStyle{max(N,M), Tuple{t...}}()
-# make it known to the system
-Base.Broadcast.BroadcastStyle(::Type{T}) where (T<: CircShiftedArray) = CircShiftedArrayStyle{ndims(T), csa_shift(T)}()
-# make subarrays (views) of CircShiftedArray also broadcast inthe CircArray style:
-Base.Broadcast.BroadcastStyle(::Type{SubArray{T,N,P,I,L}}) where {T,N,P<:CircShiftedArray,I,L} = CircShiftedArrayStyle{ndims(P), csa_shift(P)}()
-# Base.Broadcast.BroadcastStyle(::Type{T}) where (T2,N,P,I,L, T <: SubArray{T2,N,P,I,L})= CircShiftedArrayStyle{ndims(P), csa_shift(p)}()
-Base.Broadcast.BroadcastStyle(::CircShiftedArrayStyle{N,S}, ::Base.Broadcast.DefaultArrayStyle{M}) where {N,S,M} = CircShiftedArrayStyle{max(N,M),S}() #Broadcast.DefaultArrayStyle{CuArray}()
-function Base.Broadcast.BroadcastStyle(::CircShiftedArrayStyle{N,S1}, ::CircShiftedArrayStyle{M,S2}) where {N,S1,M,S2}
-    if S1 != S2
-        # maybe one could force materialization at this point instead.
-        error("You currently cannot mix CircShiftedArray of different shifts in a broadcasted expression.")
-    end
-    CircShiftedArrayStyle{max(N,M),S1}() #Broadcast.DefaultArrayStyle{CuArray}()
-end
-#Base.Broadcast.BroadcastStyle(::CircShiftedArrayStyle{0,S}, ::Base.Broadcast.DefaultArrayStyle{M}) where {S,M} = CircShiftedArrayStyle{M,S} #Broadcast.DefaultArrayStyle{CuArray}()
-
-@inline Base.size(csa::CircShiftedArray) = size(csa.parent)
-@inline Base.size(csa::CircShiftedArray, d::Int) = size(csa.parent, d)
-@inline Base.axes(csa::CircShiftedArray) = axes(csa.parent)
-@inline Base.IndexStyle(::Type{<:CircShiftedArray}) = IndexLinear()
-@inline Base.parent(csa::CircShiftedArray) = csa.parent
-
-CircShiftedVector(v::AbstractVector, n = ()) = CircShiftedArray(v, n)
-
-
-# linear indexing ignores the shifts
-@inline Base.getindex(csa::CircShiftedArray{T,N,A,S}, i::Int) where {T,N,A,S} = getindex(csa.parent, i)
-@inline Base.setindex!(csa::CircShiftedArray{T,N,A,S}, v, i::Int) where {T,N,A,S} = setindex!(csa.parent, v, i)
-
-# ttest(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = println("$S,  $(to_tuple(S))")
-
-# mod1 avoids first subtracting one and then adding one
-@inline Base.getindex(csa::CircShiftedArray{T,N,A,S}, i::Vararg{Int,N}) where {T,N,A,S} = 
-    getindex(csa.parent, (mod1(i[j]-to_tuple(S)[j], size(csa.parent, j)) for j in 1:N)...)
-
-@inline Base.setindex!(csa::CircShiftedArray{T,N,A,S}, v, i::Vararg{Int,N}) where {T,N,A,S} = 
-    (setindex!(csa.parent, v, (mod1(i[j]-to_tuple(S)[j], size(csa.parent, j)) for j in 1:N)...); v)
-
-# if materialize is provided, a broadcasting expression would always collapse to the base type.
-# Base.Broadcast.materialize(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = circshift(csa.parent, to_tuple(S))
-
-# These apply for broadcasted assignment operations.
-@inline Base.Broadcast.materialize!(dest::CircShiftedArray{T,N,A,S}, csa::CircShiftedArray{T2,N2,A2,S}) where {T,N,A,S,T2,N2,A2} = Base.Broadcast.materialize!(dest.parent, csa.parent)
-
-# function Base.Broadcast.materialize(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {T,N,A,S}
-#     similar(...size(bz)
-#     invoke(Base.Broadcast.materialize!, Tuple{CircShiftedArray{T,N,A,S}, Base.Broadcast.Broadcasted}, dest, bc)
-# end
-
-# remove all the circ-shift part if all shifts are the same
-@inline function Base.Broadcast.materialize!(dest::CircShiftedArray{T,N,A,S}, bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {T,N,A,S}
-    invoke(Base.Broadcast.materialize!, Tuple{A, Base.Broadcast.Broadcasted}, dest.parent, remove_csa_style(bc))
-    return dest
-end
-# we cannot specialize the Broadcast style here, since the rhs may not contain a CircShiftedArray and still wants to be assigned
-@inline function Base.Broadcast.materialize!(dest::CircShiftedArray{T,N,A,S}, bc::Base.Broadcast.Broadcasted) where {T,N,A,S}
-    #@show "materialize! cs"
-    if only_shifted(bc)
-        # fall back to standard assignment
-        @show "use raw"
-        # to avoid calling the method defined below, we need to use `invoke`:
-        invoke(Base.Broadcast.materialize!, Tuple{AbstractArray, Base.Broadcast.Broadcasted}, dest, bc) 
-    else
-        # get all not-shifted arrays and apply the materialize operations piecewise using array views
-        materialize_checkerboard!(dest.parent, bc, Tuple(1:N), wrapshift(size(dest) .- csa_shift(dest), size(dest)), true)
-    end
-    return dest
-end
-
-# function copy(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {N,S}
-#     @show "copy here"
-#     return 0
-# end
-
-@inline function Base.Broadcast.materialize!(dest::AbstractArray, bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {N,S}
-    materialize_checkerboard!(dest, bc, Tuple(1:N), wrapshift(size(dest) .- to_tuple(S), size(dest)), false)
-    return dest
-end
-
-# needs to generate both ranges as both appear in mixed broadcasting expressions
-function generate_shift_ranges(dest, myshift)
-    circshift_rng_1 = ntuple((d)->firstindex(dest,d):firstindex(dest,d)+myshift[d]-1, ndims(dest))
-    circshift_rng_2 = ntuple((d)->firstindex(dest,d)+myshift[d]:lastindex(dest,d), ndims(dest))
-    noshift_rng_1 = ntuple((d)->lastindex(dest,d)-myshift[d]+1:lastindex(dest,d), ndims(dest))
-    noshift_rng_2 = ntuple((d)->firstindex(dest,d):lastindex(dest,d)-myshift[d], ndims(dest))
-    return ((circshift_rng_1, circshift_rng_2), (noshift_rng_1, noshift_rng_2))
-end
-
-"""
-    materialize_checkerboard!(dest, bc, dims, myshift) 
-
-this function calls itself recursively to subdivide the array into tiles, which each needs to be processed individually via calls to `materialize!`.
-
-|--------|
-| a| b   |
-|--|-----|---|
-| c| dD  | C |
-|--+-----|---|
-   | B   | A |
-   |---------|
-
-"""
-function materialize_checkerboard!(dest, bc, dims, myshift, dest_is_cs_array=true) 
-    @show "materialize_checkerboard"
-    dest = refine_view(dest)
-    # gets Tuples of Tuples of 1D ranges (low and high) for each dimension
-    cs_rngs, ns_rngs = generate_shift_ranges(dest, myshift)
-
-    for n in CartesianIndices(ntuple((x)->2, ndims(dest)))
-        cs_rng = Tuple(cs_rngs[n[d]][d] for d=1:ndims(dest))
-        ns_rng = Tuple(ns_rngs[n[d]][d] for d=1:ndims(dest))
-        dst_rng = ifelse(dest_is_cs_array, cs_rng, ns_rng)
-        dst_rng = refine_shift_rng(dest, dst_rng)
-        dst_view = @view dest[dst_rng...]
-
-        bc1 = split_array_broadcast(bc, ns_rng, cs_rng)
-        if (prod(size(dst_view)) > 0)
-            Base.Broadcast.materialize!(dst_view, bc1)
-        end
-    end
-end
-
-# some code which determines whether all arrays are shifted
-@inline only_shifted(bc::Number)  = true
-@inline only_shifted(bc::AbstractArray)  = false
-@inline only_shifted(bc::CircShiftedArray)  = true
-@inline only_shifted(bc::Base.Broadcast.Broadcasted) = all(only_shifted.(bc.args))
-
-# These functions remove the CircShiftArray in a broadcast and replace each by a view into the original array 
-@inline split_array_broadcast(bc::Number, noshift_rng, shift_rng) = bc
-@inline split_array_broadcast(bc::AbstractArray, noshift_rng, shift_rng) = @view bc[noshift_rng...]
-@inline split_array_broadcast(bc::CircShiftedArray, noshift_rng, shift_rng) = @view bc.parent[shift_rng...]
-@inline split_array_broadcast(bc::CircShiftedArray{T,N,A,NTuple{N,0}}, noshift_rng, shift_rng)  where {T,N,A} =  @view bc.parent[noshift_rng...]
-@inline function split_array_broadcast(v::SubArray{T,N,P,I,L}, noshift_rng, shift_rng) where {T,N,P<:CircShiftedArray,I,L}    
-    new_cs = refine_view(v)
-    new_shift_rng = refine_shift_rng(v, shift_rng)
-    res = split_array_broadcast(new_cs, noshift_rng, new_shift_rng)
-    return res
-end
-
-@inline function refine_shift_rng(v::SubArray{T,N,P,I,L}, shift_rng) where {T,N,P,I,L}    
-    new_shift_rng = ntuple((d)-> ifelse(isa(v.indices[d],Base.Slice), shift_rng[d], Base.Colon()), ndims(v.parent))
-    return new_shift_rng
-end
-@inline refine_shift_rng(v, shift_rng) = shift_rng
-
-"""
-    function refine_view(v::SubArray{T,N,P,I,L}, shift_rng)
-
-returns a refined view of a CircShiftedArray as a CircShiftedArray, if necessary. Otherwise just the original array.
-find out, if the range of this view crosses any boundary of the parent CircShiftedArray
-by calculating the new indices
-if, so though an error. find the full slices, which can stay a circ shifted array withs shifts
-"""
-function refine_view(v::SubArray{T,N,P,I,L}) where {T,N,P<:CircShiftedArray,I,L}
-    myshift = csa_shift(v.parent)
-    sz = size(v.parent)
-    # find out, if the range of this view crosses any boundary of the parent CircShiftedArray
-    # by calculating the new indices
-    # if, so though an error.
-    # find the full slices, which can stay a circ shifted array withs shifts
-    sub_rngs = ntuple((d)-> !isa(v.indices[d], Base.Slice), ndims(v.parent))
-
-    new_ids_begin = wrapids(ntuple((d)-> v.indices[d][begin] .- myshift[d], ndims(v.parent)), sz)
-    new_ids_end = wrapids(ntuple((d)-> v.indices[d][end] .- myshift[d], ndims(v.parent)), sz)
-    if any(sub_rngs .&& (new_ids_end .< new_ids_begin))
-        error("a view of a shifted array is not allowed to cross boarders of the original array. Do not use a view here.")
-        # potentially this can be remedied, once there is a decent CatViews implementation
-    end
-    new_rngs = ntuple((d)-> ifelse(isa(v.indices[d],Base.Slice), v.indices[d], new_ids_begin[d]:new_ids_end[d]), ndims(v.parent))
-    new_shift = ntuple((d)-> ifelse(isa(v.indices[d],Base.Slice), 0, myshift[d]), ndims(v.parent))
-    new_cs = CircShiftedArray((@view v.parent.parent[new_rngs...]), new_shift)
-    return new_cs
-end
-
-refine_view(csa::AbstractArray) = csa
-
-function split_array_broadcast(bc::Base.Broadcast.Broadcasted, noshift_rng, shift_rng)
-    # Ref below protects the argument from broadcasting
-    bc_modified = split_array_broadcast.(bc.args, Ref(noshift_rng), Ref(shift_rng))
-    # @show size(bc_modified[1])
-    res=Base.Broadcast.broadcasted(bc.f, bc_modified...)
-    # @show typeof(res)
-    # Base.Broadcast.Broadcasted{Style, Tuple{modified_axes...}, F, Args}()
-    return res
-end
-
-Base.Broadcast.materialize!(dest::CircShiftedArray{T,N,A,S}, src::CircShiftedArray) where {T,N,A,S} = Base.Broadcast.materialize!(dest.parent, src.parent)
-Base.Broadcast.copyto!(dest::AbstractArray, bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {N,S} = Base.Broadcast.materialize!(dest, bc)
-
-# function copy(CircShiftedArray)
-#     collect(CircShiftedArray)
-# end
-
-# Base.collect(csa::CircShiftedArray{T,N,A,S}) where {T,N,A,S} = circshift(csa.parent, to_tuple(S))
-
-# # interaction with numbers should not still stay a CSA
-# Base.Broadcast.promote_rule(csa::Type{CircShiftedArray}, na::Type{Number})  = typeof(csa)
-# Base.Broadcast.promote_rule(scsa::Type{SubArray{T,N,P,Rngs,B}}, t::T2) where {T,N,P<:CircShiftedArray,Rngs,B,T2}  = typeof(scsa.parent)
-
-#Base.Broadcast.promote_rule(::Type{CircShiftedArray{T,N}}, ::Type{S}) where {T,N,S} = CircShiftedArray{promote_type(T,S),N}
-#Base.Broadcast.promote_rule(::Type{CircShiftedArray{T,N}}, ::Type{<:Tuple}, shp...) where {T,N} = CircShiftedArray{T,length(shp)}
-
-# Base.Broadcast.promote_shape(::Type{CircShiftedArray{T,N,A,S}}, ::Type{<:AbstractArray}, ::Type{<:AbstractArray}) where {T,N,A<:AbstractArray,S} = CircShiftedArray{T,N,A,S}
-# Base.Broadcast.promote_shape(::Type{CircShiftedArray{T,N,A,S}}, ::Type{<:AbstractArray}, ::Type{<:Number}) where {T,N,A<:AbstractArray,S} = CircShiftedArray{T,N,A,S}
-
-function Base.similar(arr::CircShiftedArray, eltype::Type{T} = eltype(arr), dims::Tuple{Int64, Vararg{Int64, N}} = size(arr)) where {T,N}
-    na = similar(arr.parent, eltype, dims)
-    # the results-type depends on whether the result size is the same or not.
-    return ifelse(size(arr)==dims, na, CircShiftedArray(na, csa_shift(arr)))
-end
-
-@inline remove_csa_style(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S}}) where {N,S} = Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{N}}(bc.f, bc.args, bc.axes) 
-@inline remove_csa_style(bc::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{N}}) where {N} = bc
-
-function Base.similar(bc::Base.Broadcast.Broadcasted{CircShiftedArrayStyle{N,S},Ax,F,Args}, et::ET, dims::Any) where {N,S,ET,Ax,F,Args}
-    @show "Similar Bc"
-    # remove the CircShiftedArrayStyle from broadcast to call the original "similar" function 
-    bc_type = Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{N},Ax,F,Args}
-    bc_tmp = remove_csa_style(bc) #Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{N}}(bc.f, bc.args, bc.axes)
-    res = invoke(Base.Broadcast.similar, Tuple{bc_type,ET,Any}, bc_tmp, et, dims)
-    if only_shifted(bc)
-        # @show "only shifted"
-        return CircShiftedArray(res, to_tuple(S))
-    else
-        return res
-    end
-end
diff --git a/src/FourierTools.jl b/src/FourierTools.jl
index b93391d..7284d49 100644
--- a/src/FourierTools.jl
+++ b/src/FourierTools.jl
@@ -3,7 +3,7 @@ module FourierTools
 
 using Reexport
 # using PaddedViews
-# using ShiftedArrays
+using ShiftedArrays # for circshift
 @reexport using FFTW
 using LinearAlgebra
 using IndexFunArrays
@@ -14,9 +14,8 @@ import Base: checkbounds, getindex, setindex!, parent, size, axes, copy, collect
 @reexport using NFFT
 FFTW.set_num_threads(4)
 
-# include("CircShiftedArrays.jl")
-include("circshiftedarray.jl") # from ShiftedArrays.jl
-include("circshift.jl") # from ShiftedArrays.jl
+# include("circshiftedarray.jl") # from ShiftedArrays.jl
+# include("circshift.jl") # from ShiftedArrays.jl
 include("utils.jl")
 include("nfft_nd.jl")
 include("resampling.jl")
diff --git a/src/circshift.jl b/src/circshift.jl
deleted file mode 100644
index 7551096..0000000
--- a/src/circshift.jl
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-    circshift(v::AbstractArray, n)
-
-Return a `CircShiftedArray` object which lazily represents the array `v` shifted
-circularly by `n` (an `Integer` or a `Tuple` of `Integer`s).
-If the number of dimensions of `v` exceeds the length of `n`, the shift in the
-remaining dimensions is assumed to be `0`.
-
-# Examples
-
-```jldoctest circshift
-julia> v = [1, 3, 5, 4];
-
-julia> FourierTools.circshift(v, 1)
-4-element CircShiftedVector{Int64, Vector{Int64}}:
- 4
- 1
- 3
- 5
-
-julia> w = reshape(1:16, 4, 4);
-
-julia> FourierTools.circshift(w, (1, -1))
-4×4 CircShiftedArray{Int64, 2, Base.ReshapedArray{Int64, 2, UnitRange{Int64}, Tuple{}}}:
- 8  12  16  4
- 5   9  13  1
- 6  10  14  2
- 7  11  15  3
-```
-"""
-circshift(v::AbstractArray, n) = CircShiftedArray(v, n)
diff --git a/src/circshiftedarray.jl b/src/circshiftedarray.jl
deleted file mode 100644
index 5e326dc..0000000
--- a/src/circshiftedarray.jl
+++ /dev/null
@@ -1,130 +0,0 @@
-"""
-    padded_tuple(v::AbstractVector, s)
-
-Internal function used to compute shifts. Return a `Tuple` with as many element
-as the dimensions of `v`. The first `length(s)` entries are filled with values
-from `s`, the remaining entries are `0`. `s` should be an integer, in which case
-`length(s) == 1`, or a container of integers with keys `1:length(s)`.
-
-# Examples
-
-```jldoctest padded_tuple
-julia> FourierTools.padded_tuple(rand(10, 10), 3)
-(3, 0)
-
-julia> FourierTools.padded_tuple(rand(10, 10), (4,))
-(4, 0)
-
-julia> FourierTools.padded_tuple(rand(10, 10), (1, 5))
-(1, 5)
-```
-"""
-padded_tuple(v::AbstractArray, s) = ntuple(i -> i ≤ length(s) ? s[i] : 0, ndims(v))
-
-# Computing a shifted index (subtracting the offset)
-offset(offsets::NTuple{N,Int}, inds::NTuple{N,Int}) where {N} = map(-, inds, offsets)
-
-"""
-    CircShiftedArray(parent::AbstractArray, shifts)
-
-Custom `AbstractArray` object to store an `AbstractArray` `parent` circularly shifted
-by `shifts` steps (where `shifts` is a `Tuple` with one `shift` value per dimension of `parent`).
-Use `copy` to collect the values of a `CircShiftedArray` into a normal `Array`.
-
-!!! note
-    `shift` is modified with a modulo operation and does not store the passed value
-    but instead a nonnegative number which leads to an equivalent shift.
-
-!!! note
-    If `parent` is itself a `CircShiftedArray`, the constructor does not nest
-    `CircShiftedArray` objects but rather combines the shifts additively.
-
-# Examples
-
-```jldoctest circshiftedarray
-julia> v = [1, 3, 5, 4];
-
-julia> s = CircShiftedArray(v, (1,))
-4-element CircShiftedVector{Int64, Vector{Int64}}:
- 4
- 1
- 3
- 5
-
-julia> copy(s)
-4-element Vector{Int64}:
- 4
- 1
- 3
- 5
-```
-"""
-struct CircShiftedArray{T, N, S<:AbstractArray} <: AbstractArray{T, N}
-    parent::S
-    # the field `shifts` stores the circular shifts modulo the size of the parent array
-    shifts::NTuple{N, Int}
-    function CircShiftedArray(p::AbstractArray{T, N}, n = ()) where {T, N}
-        myshifts = map(mod, padded_tuple(p, n), size(p))
-        return new{T, N, typeof(p)}(p, myshifts)
-    end
-end
-
-function CircShiftedArray(c::CircShiftedArray, n = ())
-    myshifts = map(+, shifts(c), padded_tuple(c, n))
-    return CircShiftedArray(parent(c), myshifts)
-end
-
-"""
-    CircShiftedVector{T, S<:AbstractArray}
-
-Shorthand for `CircShiftedArray{T, 1, S}`.
-"""
-const CircShiftedVector{T, S<:AbstractArray} = CircShiftedArray{T, 1, S}
-
-CircShiftedVector(v::AbstractVector, n = ()) = CircShiftedArray(v, n)
-
-Base.similar(s::CircShiftedArray, el::Type, v::NTuple{N, Int64}) where {N} = similar(s.parent, el, v)
-
-Base.size(s::CircShiftedArray) = size(parent(s))
-Base.axes(s::CircShiftedArray) = axes(parent(s))
-
-@inline function bringwithin(ind_with_offset::Int, ranges::AbstractUnitRange)
-    return ifelse(ind_with_offset < first(ranges), ind_with_offset + length(ranges), ind_with_offset)
-end
-
-@inline function Base.getindex(s::CircShiftedArray{T, N}, x::Vararg{Int, N}) where {T, N}
-    @boundscheck checkbounds(s, x...)
-    v, ind = parent(s), offset(shifts(s), x)
-    i = map(bringwithin, ind, axes(s))
-    return @inbounds v[i...]
-end
-
-@inline function Base.setindex!(s::CircShiftedArray{T, N}, el, x::Vararg{Int, N}) where {T, N}
-    @boundscheck checkbounds(s, x...)
-    v, ind = parent(s), offset(shifts(s), x)
-    i = map(bringwithin, ind, axes(s))
-    @inbounds v[i...] = el
-    return s
-end
-
-Base.parent(s::CircShiftedArray) = s.parent
-
-"""
-    shifts(s::CircShiftedArray)
-
-Return amount by which `s` is shifted compared to `parent(s)`.
-"""
-shifts(s::CircShiftedArray) = s.shifts
-
-# function Base.copyto!(dst::AbstractArray, src::CircShiftedArray)
-#     dst[:] .= @view src[:]
-# end
-
-# function Base.copyto!(dst::AbstractArray, Rdest::CartesianIndices, src::CircShiftedArray, Rsrc::CartesianIndices)
-#     dst[Rdest...] .= @view src[Rsrc...]
-# end
-
-function collect(x::T)  where {T<:CircShiftedArray{<:Any,<:Any,<:CircShiftedArray}}
-    x = CircShiftedArray(collect(parent(x)), shifts(x))
-    return collect(x) # stay on the GPU
-end
diff --git a/src/fftshift_alternatives.jl b/src/fftshift_alternatives.jl
index 2fb97f0..b6ac47a 100644
--- a/src/fftshift_alternatives.jl
+++ b/src/fftshift_alternatives.jl
@@ -13,7 +13,7 @@ If `dims` is not given then the signal is shifted along each dimension.
 function _fftshift!(dst::AbstractArray{T, N}, src::AbstractArray{T, N},
                    dims=ntuple(i -> i, Val(N))) where {T, N}
     Δ = ntuple(i -> i ∈ dims ? size(src, i) ÷ 2 : 0, Val(N))
-    circshift!(dst, src, Δ)
+    ShiftedArrays.circshift!(dst, src, Δ)
 end
 
 """
@@ -27,7 +27,7 @@ If `dims` is not given then the signal is shifted along each dimension.
 function _ifftshift!(dst::AbstractArray{T, N}, src::AbstractArray{T, N},
                    dims=ntuple(i -> i, Val(N))) where {T, N}
     Δ = ntuple(i -> i ∈ dims ? - size(src, i) ÷ 2 : 0, Val(N))
-    circshift!(dst, src, Δ)
+    ShiftedArrays.circshift!(dst, src, Δ)
 end
 
 
@@ -39,7 +39,7 @@ Result is semantically equivalent to `fftshift(A, dims)` but returns
 a view instead. 
 """
 function fftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    circshift(mat, ft_center_diff(size(mat), dims))
+    ShiftedArrays.circshift(mat, ft_center_diff(size(mat), dims))
 end
 
 
@@ -51,7 +51,7 @@ a view instead.
 """
 function ifftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
     diff = .-(ft_center_diff(size(mat), dims))
-    return circshift(mat, diff)
+    return ShiftedArrays.circshift(mat, diff)
 end
 
 
@@ -63,7 +63,7 @@ Shifts the frequencies to the center expect for `dims[1]` because there os no ne
 and positive frequency.
 """
 function rfftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    circshift(mat, rft_center_diff(size(mat), dims))
+    ShiftedArrays.circshift(mat, rft_center_diff(size(mat), dims))
 end
 
 
@@ -75,7 +75,7 @@ Shifts the frequencies back to the corner except for `dims[1]` because there os
 and positive frequency.
 """
 function irfftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    circshift(mat ,.-(rft_center_diff(size(mat), dims)))
+    ShiftedArrays.circshift(mat ,.-(rft_center_diff(size(mat), dims)))
 end
 
 """

From c1ca43516fe075a0cf83378cf83c4000576ade06 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sun, 6 Apr 2025 12:38:17 +0200
Subject: [PATCH 18/25] separate cuda support extension for ShiftedArray

---
 Project.toml                                  |   1 +
 ext/CUDASupportExt_FT.jl                      | 102 ++++++++++++++++++
 ext/CUDASupportExt_SA.jl                      |  87 +++++++++++++++
 .../CUDASupportExt_both.jl                    |   2 +-
 4 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 ext/CUDASupportExt_FT.jl
 create mode 100644 ext/CUDASupportExt_SA.jl
 rename ext/CUDASupportExt.jl => src/CUDASupportExt_both.jl (99%)

diff --git a/Project.toml b/Project.toml
index f0c7451..52142e4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -19,6 +19,7 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 
 [extensions]
 CUDASupportExt_FT = ["CUDA", "Adapt"]
+CUDASupportExt_SA = ["CUDA", "Adapt"]
 
 [compat]
 Adapt = "3.7, 4.0, 4.1"
diff --git a/ext/CUDASupportExt_FT.jl b/ext/CUDASupportExt_FT.jl
new file mode 100644
index 0000000..5677b30
--- /dev/null
+++ b/ext/CUDASupportExt_FT.jl
@@ -0,0 +1,102 @@
+module CUDASupportExt_FT
+using CUDA 
+using Adapt
+using FourierTools
+using Base 
+# using NFFT
+# using CuNFFT
+
+# define a number of Union types to not repeat all definitions for each type
+AllShiftedType = Union{FourierTools.FourierSplit{<:Any,<:Any,<:Any},
+                            FourierTools.FourierJoin{<:Any,<:Any,<:Any}}
+
+# these are special only if a CuArray is wrapped
+
+AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
+                        Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any},
+                        SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}}
+AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
+
+AllShiftedTypeCu{N, CD} = Union{FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+                            FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
+AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
+                                 Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any},
+                                 SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
+AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
+
+Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split);
+Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join);
+
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllShiftedTypeCu{N, CD}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+# Define the BroadcastStyle for SubArray of MutableShiftedArray with CuArray
+
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllSubArrayTypeCu{N, CD}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+function Base.copy(s::AllShiftedAndViews)
+    res = similar(get_base_arr(s), eltype(s), size(s));
+    res .= s
+    return res
+end
+
+function Base.collect(x::AllShiftedAndViews) 
+    return copy(x) # stay on the GPU        
+end
+
+function Base.Array(x::AllShiftedAndViews) 
+    return Array(copy(x)) # remove from GPU
+end
+
+function Base.:(==)(x::AllShiftedAndViewsCu, y::AbstractArray) 
+    return all(x .== y)
+end
+
+function Base.:(==)(y::AbstractArray, x::AllShiftedAndViewsCu) 
+    return all(x .== y)
+end
+
+function Base.:(==)(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu) 
+    return all(x .== y)
+end
+
+function Base.isapprox(x::AllShiftedAndViewsCu, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) 
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)     
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) 
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.show(io::IO, mm::MIME"text/plain", cs::AllShiftedAndViews) 
+    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
+end
+
+### addition functions specific to CUDA
+
+function FourierTools.optional_collect(a::CuArray)
+    a 
+end
+
+get_base_arr(arr::CuArray) = arr
+get_base_arr(arr::Array) = arr
+function get_base_arr(arr::AbstractArray) 
+    p = parent(arr)
+    return (p == arr) ? arr : get_base_arr(parent(arr))
+end
+
+function similar_zeros(arr::CuArray, sz::NTuple=size(arr))
+    CUDA.zeros(sz)
+end
+
+end
\ No newline at end of file
diff --git a/ext/CUDASupportExt_SA.jl b/ext/CUDASupportExt_SA.jl
new file mode 100644
index 0000000..9715d02
--- /dev/null
+++ b/ext/CUDASupportExt_SA.jl
@@ -0,0 +1,87 @@
+module CUDASupportExt_SA
+using CUDA 
+using Adapt
+using ShiftedArrays
+using Base 
+
+get_base_arr(arr::CuArray) = arr
+get_base_arr(arr::Array) = arr
+function get_base_arr(arr::AbstractArray) 
+    p = parent(arr)
+    return (p == arr) ? arr : get_base_arr(parent(arr))
+end
+
+# define a number of Union types to not repeat all definitions for each type
+AllShiftedType = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:Any}}
+
+# these are special only if a CuArray is wrapped
+
+AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
+                        Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any},
+                        SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}}
+AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
+
+AllShiftedTypeCu{N, CD} = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
+AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
+                                 Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any},
+                                 SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
+AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
+
+Adapt.adapt_structure(to, x::ShiftedArrays.CircShiftedArray{T, N, S}) where {T, N, S} = ShiftedArrays.CircShiftedArray(adapt(to, parent(x)), ShiftedArrays.shifts(x));
+
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllShiftedTypeCu{N, CD}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+# Define the BroadcastStyle for SubArray of MutableShiftedArray with CuArray
+
+function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllSubArrayTypeCu{N, CD}}
+    CUDA.CuArrayStyle{N,CD}()
+end
+
+function Base.copy(s::AllShiftedAndViews)
+    res = similar(get_base_arr(s), eltype(s), size(s));
+    res .= s
+    return res
+end
+
+function Base.collect(x::AllShiftedAndViews) 
+    return copy(x) # stay on the GPU        
+end
+
+function Base.Array(x::AllShiftedAndViews) 
+    return Array(copy(x)) # remove from GPU
+end
+
+function Base.:(==)(x::AllShiftedAndViewsCu, y::AbstractArray) 
+    return all(x .== y)
+end
+
+function Base.:(==)(y::AbstractArray, x::AllShiftedAndViewsCu) 
+    return all(x .== y)
+end
+
+function Base.:(==)(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu) 
+    return all(x .== y)
+end
+
+function Base.isapprox(x::AllShiftedAndViewsCu, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) 
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)     
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) # where {CT, N, CD, T<:ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
+    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
+    return all(abs.(x .- y) .<= atol)
+end
+
+function Base.show(io::IO, mm::MIME"text/plain", cs::AllShiftedAndViews) 
+    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
+end
+
+end
\ No newline at end of file
diff --git a/ext/CUDASupportExt.jl b/src/CUDASupportExt_both.jl
similarity index 99%
rename from ext/CUDASupportExt.jl
rename to src/CUDASupportExt_both.jl
index 4174f67..21fdaa9 100644
--- a/ext/CUDASupportExt.jl
+++ b/src/CUDASupportExt_both.jl
@@ -28,7 +28,7 @@ AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}
                                  SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
 AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
 
-Adapt.adapt_structure(to, x::ShiftedArrays.CircShiftedArray{T, N, S}) where {T, N, S} = ShiftedArrays.CircShiftedArray(adapt(to, parent(x)), FourierTools.shifts(x));
+Adapt.adapt_structure(to, x::ShiftedArrays.CircShiftedArray{T, N, S}) where {T, N, S} = ShiftedArrays.CircShiftedArray(adapt(to, parent(x)), CirShiftedArray.shifts(x));
 Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split);
 Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join);
 

From 60c0cfeb1920dc5926eb85d9e7c4c7c697fa0fd5 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sun, 6 Apr 2025 21:43:32 +0200
Subject: [PATCH 19/25] removed both Extension stub

---
 src/CUDASupportExt_both.jl | 107 -------------------------------------
 1 file changed, 107 deletions(-)
 delete mode 100644 src/CUDASupportExt_both.jl

diff --git a/src/CUDASupportExt_both.jl b/src/CUDASupportExt_both.jl
deleted file mode 100644
index 21fdaa9..0000000
--- a/src/CUDASupportExt_both.jl
+++ /dev/null
@@ -1,107 +0,0 @@
-module CUDASupportExt_FT
-using CUDA 
-using Adapt
-using ShiftedArrays
-using FourierTools
-using IndexFunArrays # to prevent a recuursive stack overflow in get_base_arr
-using Base 
-# using NFFT
-# using CuNFFT
-
-# define a number of Union types to not repeat all definitions for each type
-AllShiftedType = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:Any},
-                            FourierTools.FourierSplit{<:Any,<:Any,<:Any},
-                            FourierTools.FourierJoin{<:Any,<:Any,<:Any}}
-
-# these are special only if a CuArray is wrapped
-
-AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
-                        Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any},
-                        SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}}
-AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
-
-AllShiftedTypeCu{N, CD} = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
-                            FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
-                            FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
-AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
-                                 Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any},
-                                 SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
-AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
-
-Adapt.adapt_structure(to, x::ShiftedArrays.CircShiftedArray{T, N, S}) where {T, N, S} = ShiftedArrays.CircShiftedArray(adapt(to, parent(x)), CirShiftedArray.shifts(x));
-Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split);
-Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join);
-
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllShiftedTypeCu{N, CD}}
-    CUDA.CuArrayStyle{N,CD}()
-end
-
-# Define the BroadcastStyle for SubArray of MutableShiftedArray with CuArray
-
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllSubArrayTypeCu{N, CD}}
-    CUDA.CuArrayStyle{N,CD}()
-end
-
-function Base.copy(s::AllShiftedAndViews)
-    res = similar(get_base_arr(s), eltype(s), size(s));
-    res .= s
-    return res
-end
-
-function Base.collect(x::AllShiftedAndViews) 
-    return copy(x) # stay on the GPU        
-end
-
-function Base.Array(x::AllShiftedAndViews) 
-    return Array(copy(x)) # remove from GPU
-end
-
-function Base.:(==)(x::AllShiftedAndViewsCu, y::AbstractArray) 
-    return all(x .== y)
-end
-
-function Base.:(==)(y::AbstractArray, x::AllShiftedAndViewsCu) 
-    return all(x .== y)
-end
-
-function Base.:(==)(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu) 
-    return all(x .== y)
-end
-
-function Base.isapprox(x::AllShiftedAndViewsCu, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) 
-    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
-    return all(abs.(x .- y) .<= atol)
-end
-
-function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)     
-    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
-    return all(abs.(x .- y) .<= atol)
-end
-
-function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) # where {CT, N, CD, T<:ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
-    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
-    return all(abs.(x .- y) .<= atol)
-end
-
-function Base.show(io::IO, mm::MIME"text/plain", cs::AllShiftedAndViews) 
-    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
-end
-
-### addition functions specific to CUDA
-
-function FourierTools.optional_collect(a::CuArray)
-    a 
-end
-
-get_base_arr(arr::CuArray) = arr
-get_base_arr(arr::Array) = arr
-get_base_arr(arr::IndexFunArray) = arr
-function get_base_arr(arr::AbstractArray) 
-    get_base_arr(parent(arr))
-end
-
-function similar_zeros(arr::CuArray, sz::NTuple=size(arr))
-    CUDA.zeros(sz)
-end
-
-end
\ No newline at end of file

From 5116de8d03be38663c535bca0449a8a3d7bee9ff Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Mon, 7 Apr 2025 11:18:24 +0200
Subject: [PATCH 20/25] better code coverage in 1d version of getindex.

---
 test/custom_fourier_types.jl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/custom_fourier_types.jl b/test/custom_fourier_types.jl
index 4843870..59c6121 100644
--- a/test/custom_fourier_types.jl
+++ b/test/custom_fourier_types.jl
@@ -4,11 +4,15 @@
     x = opt_cu(randn((N, N)), use_cuda)
     fs = FourierTools.FourierSplit(x, Val(2), 2, 4, true)
     @test FourierTools.parenttype(fs) == typeof(x)
+    @test fs[1,1] == fs[1]
+    @test fs[1,2] == fs[6]
     fs = FourierTools.FourierSplit(x, Val(2), 2, 4, false)
     @test FourierTools.parenttype(fs) == typeof(x)
     
     fj = FourierTools.FourierJoin(x, Val(2), 2, 4, true)
     @test FourierTools.parenttype(fj) == typeof(x)
+    @test fj[1,1] == fj[1]
+    @test fj[1,2] == fj[6]
 
     fj = FourierTools.FourierJoin(x, Val(2), 2, 4, false)
     @test FourierTools.parenttype(fj) == typeof(x)

From 5220a1f742ee64cb82c508ad2d05442ee320f560 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Tue, 8 Apr 2025 22:58:33 +0200
Subject: [PATCH 21/25] get_base_arr to identity check

---
 ext/CUDASupportExt_FT.jl | 2 +-
 ext/CUDASupportExt_SA.jl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ext/CUDASupportExt_FT.jl b/ext/CUDASupportExt_FT.jl
index 5677b30..d874c9e 100644
--- a/ext/CUDASupportExt_FT.jl
+++ b/ext/CUDASupportExt_FT.jl
@@ -92,7 +92,7 @@ get_base_arr(arr::CuArray) = arr
 get_base_arr(arr::Array) = arr
 function get_base_arr(arr::AbstractArray) 
     p = parent(arr)
-    return (p == arr) ? arr : get_base_arr(parent(arr))
+    return (p === arr) ? arr : get_base_arr(parent(arr))
 end
 
 function similar_zeros(arr::CuArray, sz::NTuple=size(arr))
diff --git a/ext/CUDASupportExt_SA.jl b/ext/CUDASupportExt_SA.jl
index 9715d02..0ad426a 100644
--- a/ext/CUDASupportExt_SA.jl
+++ b/ext/CUDASupportExt_SA.jl
@@ -8,7 +8,7 @@ get_base_arr(arr::CuArray) = arr
 get_base_arr(arr::Array) = arr
 function get_base_arr(arr::AbstractArray) 
     p = parent(arr)
-    return (p == arr) ? arr : get_base_arr(parent(arr))
+    return (p === arr) ? arr : get_base_arr(parent(arr))
 end
 
 # define a number of Union types to not repeat all definitions for each type

From bf0c713af3cac5cbf951fd2562eda72267775dee Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Mon, 30 Jun 2025 14:45:00 +0200
Subject: [PATCH 22/25] bug fixes as indicated by Felix

---
 Project.toml             |  6 +++---
 ext/CUDASupportExt_FT.jl | 12 ++++++------
 ext/CUDASupportExt_SA.jl |  6 +++---
 src/fourier_shifting.jl  |  2 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/Project.toml b/Project.toml
index 52142e4..4baa885 100644
--- a/Project.toml
+++ b/Project.toml
@@ -24,11 +24,11 @@ CUDASupportExt_SA = ["CUDA", "Adapt"]
 [compat]
 Adapt = "3.7, 4.0, 4.1"
 CUDA = "5.2, 5.3, 5.4, 5.5, 5.6, 5.7"
-ChainRulesCore = "1, 1.0, 1.1"
-FFTW = "1.5"
+ChainRulesCore = "1"
+FFTW = "1.5, 1.6, 1.7, 1.8, 1.9"
 ImageTransformations = "0.9, 0.10"
 IndexFunArrays = "0.2"
-NDTools = "0.8.0"
+NDTools = "0.8"
 NFFT = "0.11, 0.12, 0.13"
 Reexport = "1"
 ShiftedArrays = "2.0.0"
diff --git a/ext/CUDASupportExt_FT.jl b/ext/CUDASupportExt_FT.jl
index d874c9e..f6e6bf8 100644
--- a/ext/CUDASupportExt_FT.jl
+++ b/ext/CUDASupportExt_FT.jl
@@ -7,22 +7,22 @@ using Base
 # using CuNFFT
 
 # define a number of Union types to not repeat all definitions for each type
-AllShiftedType = Union{FourierTools.FourierSplit{<:Any,<:Any,<:Any},
+const AllShiftedType = Union{FourierTools.FourierSplit{<:Any,<:Any,<:Any},
                             FourierTools.FourierJoin{<:Any,<:Any,<:Any}}
 
 # these are special only if a CuArray is wrapped
 
-AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
+const AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
                         Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any},
                         SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}}
-AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
+const AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
 
-AllShiftedTypeCu{N, CD} = Union{FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
+const AllShiftedTypeCu{N, CD} = Union{FourierTools.FourierSplit{<:Any,<:Any,<:CuArray{<:Any,N,CD}},
                             FourierTools.FourierJoin{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
-AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
+const AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
                                  Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any},
                                  SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
-AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
+const AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
 
 Adapt.adapt_structure(to, x::FourierTools.FourierSplit{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierSplit(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_split);
 Adapt.adapt_structure(to, x::FourierTools.FourierJoin{T, M, AA, D}) where {T, M, AA, D} = FourierTools.FourierJoin(adapt(to, parent(x)), Val(D), x.L1, x.L2, x.do_join);
diff --git a/ext/CUDASupportExt_SA.jl b/ext/CUDASupportExt_SA.jl
index 0ad426a..39ed0a2 100644
--- a/ext/CUDASupportExt_SA.jl
+++ b/ext/CUDASupportExt_SA.jl
@@ -12,14 +12,14 @@ function get_base_arr(arr::AbstractArray)
 end
 
 # define a number of Union types to not repeat all definitions for each type
-AllShiftedType = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:Any}}
+const AllShiftedType = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:Any}}
 
 # these are special only if a CuArray is wrapped
 
-AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
+const AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
                         Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any},
                         SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}}
-AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
+const AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
 
 AllShiftedTypeCu{N, CD} = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
 AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
diff --git a/src/fourier_shifting.jl b/src/fourier_shifting.jl
index 5ec9bbe..212bbee 100644
--- a/src/fourier_shifting.jl
+++ b/src/fourier_shifting.jl
@@ -80,7 +80,7 @@ function soft_shift(freqs, myshift, fraction=eltype(freqs)(0.1); corner=false)
         w .= window_half_cos(size(freqs),border_in=1.0-fraction, border_out=1.0)
         w = ifftshift_view(w)
     end
-    return cispi.(-freqs .* 2 .* (w .* myshift + (1.0 .-w).* rounded_shift))
+    return cispi.(-freqs .* 2 .* (w .* myshift + (1 .-w).* rounded_shift))
 end
 
 function shift_by_1D_FT!(arr::TA, shifts; soft_fraction=0, take_real=false, fix_nyquist_frequency=false) where {N, TA<:AbstractArray{<:Complex, N}}

From 32acf41e1d4ae6dea676a619175271bed4c51d07 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Mon, 30 Jun 2025 14:52:37 +0200
Subject: [PATCH 23/25] added comment [skip ci]

---
 ext/CUDASupportExt_SA.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ext/CUDASupportExt_SA.jl b/ext/CUDASupportExt_SA.jl
index 39ed0a2..fd1849d 100644
--- a/ext/CUDASupportExt_SA.jl
+++ b/ext/CUDASupportExt_SA.jl
@@ -4,6 +4,8 @@ using Adapt
 using ShiftedArrays
 using Base 
 
+# This should live in ShiftedArrays.jl, as otherwise it is type piracy!
+
 get_base_arr(arr::CuArray) = arr
 get_base_arr(arr::Array) = arr
 function get_base_arr(arr::AbstractArray) 

From 08efce8a697a0b88cbffe88716b3eb90ac1472c0 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Mon, 4 Aug 2025 16:38:15 +0200
Subject: [PATCH 24/25] minor modifications

---
 ext/CUDASupportExt_SA.jl |  2 +-
 src/fourier_resizing.jl  | 37 -------------------------------------
 src/utils.jl             |  9 ++-------
 3 files changed, 3 insertions(+), 45 deletions(-)

diff --git a/ext/CUDASupportExt_SA.jl b/ext/CUDASupportExt_SA.jl
index fd1849d..9c7789d 100644
--- a/ext/CUDASupportExt_SA.jl
+++ b/ext/CUDASupportExt_SA.jl
@@ -14,7 +14,7 @@ function get_base_arr(arr::AbstractArray)
 end
 
 # define a number of Union types to not repeat all definitions for each type
-const AllShiftedType = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:Any}}
+const AllShiftedType = ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:Any}
 
 # these are special only if a CuArray is wrapped
 
diff --git a/src/fourier_resizing.jl b/src/fourier_resizing.jl
index e37c958..e4e6f53 100644
--- a/src/fourier_resizing.jl
+++ b/src/fourier_resizing.jl
@@ -87,48 +87,11 @@ The size of the corresponding real-space array view after the operation finished
 is always assumed to align before and after the padding aperation.
 """
 function select_region_rft(mat, old_size, new_size)
-    # rft_old_size = size(mat)
     rft_new_size = Base.setindex(new_size,new_size[1] ÷ 2 + 1, 1)
-    # tmp = similar(mat, (8, 10))
-    # tmp .= 1
-    # return rft_fix_after(tmp, old_size, new_size)
     return rft_fix_after(rft_pad(
         rft_fix_before(mat, old_size, new_size), rft_new_size), old_size, new_size)
 end
 
-# """
-#     select_region(mat; new_size)
-
-# performs the necessary Fourier-space operations of resampling
-# in the space of ft (meaning the already circshifted version of fft).
-
-# `new_size`.
-# The size of the array view after the operation finished. 
-
-# `center`.
-# Specifies the center of the new view in coordinates of the old view. By default an alignment of the Fourier-centers is assumed.
-# # Examples
-# ```jldoctest
-# julia> using FFTW, FourierTools
-
-# julia> select_region(ones(3,3),new_size=(7,7),center=(1,3))
-# 7×7 PaddedView(0.0, OffsetArray(::Matrix{Float64}, 4:6, 2:4), (Base.OneTo(7), Base.OneTo(7))) with eltype Float64:
-#  0.0  0.0  0.0  0.0  0.0  0.0  0.0
-#  0.0  0.0  0.0  0.0  0.0  0.0  0.0
-#  0.0  0.0  0.0  0.0  0.0  0.0  0.0
-#  0.0  1.0  1.0  1.0  0.0  0.0  0.0
-#  0.0  1.0  1.0  1.0  0.0  0.0  0.0
-#  0.0  1.0  1.0  1.0  0.0  0.0  0.0
-#  0.0  0.0  0.0  0.0  0.0  0.0  0.0
-# ```
-# """
-# function select_region(mat; new_size=size(mat), center=ft_center_diff(size(mat)).+1, pad_value=zero(eltype(mat)))
-#     new_size = Tuple(expand_size(new_size, size(mat)))
-#     center = Tuple(expand_size(center, ft_center_diff(size(mat)) .+ 1))
-#     oldcenter = ft_center_diff(new_size) .+ 1
-#     PaddedView(pad_value, mat, new_size, oldcenter .- center.+1);
-# end
-
 function ft_pad(mat, new_size)
     return select_region(optional_collect(mat); new_size = new_size)
 end
diff --git a/src/utils.jl b/src/utils.jl
index 317f0a7..02f433f 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -16,6 +16,7 @@ Calculates the size of the broadcasted array of `arr1` and `arr2`.
 ```jldoctest
 julia> FourierTools.bc_size(rand(5, 2, 3), rand(1, 2))
 (5, 2, 3)
+```
 """
 function bc_size(arr1, arr2)
     md = max(ndims(arr1), ndims(arr2))
@@ -39,6 +40,7 @@ julia> FourierTools.similar_zeros([1, 2, 3], (3,))
  0
  0
  0
+```
 """
 function similar_zeros(arr::AbstractArray, sz::NTuple=size(arr))
     res = similar(arr, sz)
@@ -370,13 +372,6 @@ julia> FourierTools.center_set!([1, 1, 1, 1, 1, 1], [5, 5, 5])
 ```
 """
 function center_set!(arr_large, arr_small)
-    # out_is = []
-    # for i = 1:ndims(arr_large)
-    #     a, b = get_indices_around_center(size(arr_large)[i], size(arr_small)[i])
-    #     push!(out_is, a:b)
-    # end
-
-    #rest = ones(Int, ndims(arr_large) - 3)
     arr_large[get_idxrng_around_center(arr_large, arr_small)...] = arr_small
     
     return arr_large

From 80b6f4bf16b50c9d58edc3352624b49d6a43ef52 Mon Sep 17 00:00:00 2001
From: RainerHeintzmann <heintzmann@gmail.com>
Date: Sun, 10 Aug 2025 19:19:04 +0200
Subject: [PATCH 25/25] switched entirely to MutableShiftedArrays.jl, removing
 ShiftedArrays.jl, to enable clean CUDA support

---
 Project.toml                 |  6 +--
 docs/src/utils.md            |  1 +
 ext/CUDASupportExt_SA.jl     | 89 ------------------------------------
 src/FourierTools.jl          |  4 +-
 src/czt.jl                   |  2 +-
 src/fft_helpers.jl           | 10 ++--
 src/fftshift_alternatives.jl | 12 ++---
 src/fourier_resizing.jl      |  8 ++--
 src/fourier_rotate.jl        |  3 +-
 src/fourier_shifting.jl      |  2 +-
 src/utils.jl                 |  6 +--
 11 files changed, 26 insertions(+), 117 deletions(-)
 delete mode 100644 ext/CUDASupportExt_SA.jl

diff --git a/Project.toml b/Project.toml
index 4baa885..236f725 100644
--- a/Project.toml
+++ b/Project.toml
@@ -8,10 +8,10 @@ ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 IndexFunArrays = "613c443e-d742-454e-bfc6-1d7f8dd76566"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+MutableShiftedArrays = "d3d30c82-a38e-471c-a45a-3d24d2f4d22d"
 NDTools = "98581153-e998-4eef-8d0d-5ec2c052313d"
 NFFT = "efe261a4-0d2b-5849-be55-fc731d526b0d"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a"
 
 [weakdeps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -19,7 +19,6 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 
 [extensions]
 CUDASupportExt_FT = ["CUDA", "Adapt"]
-CUDASupportExt_SA = ["CUDA", "Adapt"]
 
 [compat]
 Adapt = "3.7, 4.0, 4.1"
@@ -28,12 +27,11 @@ ChainRulesCore = "1"
 FFTW = "1.5, 1.6, 1.7, 1.8, 1.9"
 ImageTransformations = "0.9, 0.10"
 IndexFunArrays = "0.2"
-NDTools = "0.8"
 NFFT = "0.11, 0.12, 0.13"
 Reexport = "1"
-ShiftedArrays = "2.0.0"
 Zygote = "0.6, 0.7"
 julia = "1, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11"
+MutableShiftedArrays = "0.3"
 
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
diff --git a/docs/src/utils.md b/docs/src/utils.md
index 718a828..dcbc8f0 100644
--- a/docs/src/utils.md
+++ b/docs/src/utils.md
@@ -8,4 +8,5 @@ FourierTools.get_indices_around_center
 FourierTools.center_extract
 FourierTools.odd_view
 FourierTools.fourier_reverse!
+FourierTools.get_indexrange_around_center
 ```
diff --git a/ext/CUDASupportExt_SA.jl b/ext/CUDASupportExt_SA.jl
deleted file mode 100644
index 9c7789d..0000000
--- a/ext/CUDASupportExt_SA.jl
+++ /dev/null
@@ -1,89 +0,0 @@
-module CUDASupportExt_SA
-using CUDA 
-using Adapt
-using ShiftedArrays
-using Base 
-
-# This should live in ShiftedArrays.jl, as otherwise it is type piracy!
-
-get_base_arr(arr::CuArray) = arr
-get_base_arr(arr::Array) = arr
-function get_base_arr(arr::AbstractArray) 
-    p = parent(arr)
-    return (p === arr) ? arr : get_base_arr(parent(arr))
-end
-
-# define a number of Union types to not repeat all definitions for each type
-const AllShiftedType = ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:Any}
-
-# these are special only if a CuArray is wrapped
-
-const AllSubArrayType = Union{SubArray{<:Any, <:Any, <:AllShiftedType, <:Any, <:Any},
-                        Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any},
-                        SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedType, <:Any}, <:Any, <:Any}}
-const AllShiftedAndViews = Union{AllShiftedType, AllSubArrayType}
-
-AllShiftedTypeCu{N, CD} = Union{ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{<:Any,N,CD}}}
-AllSubArrayTypeCu{N, CD} = Union{SubArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any, <:Any},
-                                 Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any},
-                                 SubArray{<:Any, <:Any, <:Base.ReshapedArray{<:Any, <:Any, <:AllShiftedTypeCu{N,CD}, <:Any}, <:Any, <:Any}}
-AllShiftedAndViewsCu{N, CD} = Union{AllShiftedTypeCu{N, CD}, AllSubArrayTypeCu{N, CD}}
-
-Adapt.adapt_structure(to, x::ShiftedArrays.CircShiftedArray{T, N, S}) where {T, N, S} = ShiftedArrays.CircShiftedArray(adapt(to, parent(x)), ShiftedArrays.shifts(x));
-
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllShiftedTypeCu{N, CD}}
-    CUDA.CuArrayStyle{N,CD}()
-end
-
-# Define the BroadcastStyle for SubArray of MutableShiftedArray with CuArray
-
-function Base.Broadcast.BroadcastStyle(::Type{T})  where {N, CD, T<:AllSubArrayTypeCu{N, CD}}
-    CUDA.CuArrayStyle{N,CD}()
-end
-
-function Base.copy(s::AllShiftedAndViews)
-    res = similar(get_base_arr(s), eltype(s), size(s));
-    res .= s
-    return res
-end
-
-function Base.collect(x::AllShiftedAndViews) 
-    return copy(x) # stay on the GPU        
-end
-
-function Base.Array(x::AllShiftedAndViews) 
-    return Array(copy(x)) # remove from GPU
-end
-
-function Base.:(==)(x::AllShiftedAndViewsCu, y::AbstractArray) 
-    return all(x .== y)
-end
-
-function Base.:(==)(y::AbstractArray, x::AllShiftedAndViewsCu) 
-    return all(x .== y)
-end
-
-function Base.:(==)(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu) 
-    return all(x .== y)
-end
-
-function Base.isapprox(x::AllShiftedAndViewsCu, y::AbstractArray; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))), va...) 
-    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
-    return all(abs.(x .- y) .<= atol)
-end
-
-function Base.isapprox(y::AbstractArray, x::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...)     
-    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
-    return all(abs.(x .- y) .<= atol)
-end
-
-function Base.isapprox(x::AllShiftedAndViewsCu, y::AllShiftedAndViewsCu; atol=0, rtol=atol>0 ? 0 : sqrt(eps(real(eltype(x)))),  va...) # where {CT, N, CD, T<:ShiftedArrays.CircShiftedArray{<:Any,<:Any,<:CuArray{CT,N,CD}}}    
-    atol = (atol != 0) ? atol : rtol * maximum(abs.(x))
-    return all(abs.(x .- y) .<= atol)
-end
-
-function Base.show(io::IO, mm::MIME"text/plain", cs::AllShiftedAndViews) 
-    CUDA.@allowscalar invoke(Base.show, Tuple{IO, typeof(mm), AbstractArray}, io, mm, cs) 
-end
-
-end
\ No newline at end of file
diff --git a/src/FourierTools.jl b/src/FourierTools.jl
index 7284d49..99bd43b 100644
--- a/src/FourierTools.jl
+++ b/src/FourierTools.jl
@@ -3,7 +3,7 @@ module FourierTools
 
 using Reexport
 # using PaddedViews
-using ShiftedArrays # for circshift
+using MutableShiftedArrays # for circshift
 @reexport using FFTW
 using LinearAlgebra
 using IndexFunArrays
@@ -14,8 +14,6 @@ import Base: checkbounds, getindex, setindex!, parent, size, axes, copy, collect
 @reexport using NFFT
 FFTW.set_num_threads(4)
 
-# include("circshiftedarray.jl") # from ShiftedArrays.jl
-# include("circshift.jl") # from ShiftedArrays.jl
 include("utils.jl")
 include("nfft_nd.jl")
 include("resampling.jl")
diff --git a/src/czt.jl b/src/czt.jl
index 91ff20c..8c91cf5 100644
--- a/src/czt.jl
+++ b/src/czt.jl
@@ -27,7 +27,7 @@ function get_kernel_1d(arr::AT, N::Integer, M::Integer; a= 1.0, w = cispi(-2/N),
     CT = (RT <: Real) ? Complex{RT} : RT
     RT = real(CT)
 
-    # converts ShiftedArrays.CircShiftedArray into a plain array type:
+    # converts MutableShiftedArrays.CircShiftedArray into a plain array type:
     tmp = similar(arr, RT, (1,))
     RAT = real_arr_type(typeof(tmp), Val(1))
 
diff --git a/src/fft_helpers.jl b/src/fft_helpers.jl
index 69e385d..71315cf 100644
--- a/src/fft_helpers.jl
+++ b/src/fft_helpers.jl
@@ -30,7 +30,7 @@ end
     ffts(A [, dims])
 
 Result is semantically equivalent to `fftshift(fft(A, dims), dims)`
-However, the shift is done with `ShiftedArrays` and therefore doesn't allocate memory.
+However, the shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory.
 
 See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft),
           [`ffts`](@ref ffts),  [`iffts`](@ref iffts),  [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), 
@@ -45,7 +45,7 @@ end
 
 Result is semantically equivalent to `fftshift(fft!(A, dims), dims)`.
 `A` is in-place modified.
-However, the shift is done with `ShiftedArrays` and therefore doesn't allocate memory.
+However, the shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory.
 
 See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft),
           [`ffts`](@ref ffts),  [`iffts`](@ref iffts),  [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), 
@@ -59,7 +59,7 @@ end
 
 Result is semantically equivalent to `ifft(ifftshift(A, dims), dims)`.
 `A` is in-place modified.
-However, the shift is done with `ShiftedArrays` and therefore doesn't allocate memory.
+However, the shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory.
 
 See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft),
           [`ffts`](@ref ffts),  [`iffts`](@ref iffts),  [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), 
@@ -74,7 +74,7 @@ end
 
 Calculates a `rfft(A, dims)` and then shift the frequencies to the center.
 `dims[1]` is not shifted, because there is no negative and positive frequency.
-The shift is done with `ShiftedArrays` and therefore doesn't allocate memory.
+The shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory.
 
 See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft),
           [`ffts`](@ref ffts),  [`iffts`](@ref iffts),  [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), 
@@ -88,7 +88,7 @@ end
 
 Calculates a `irfft(A, d, dims)` and then shift the frequencies back to the corner.
 `dims[1]` is not shifted, because there is no negative and positive frequency.
-The shift is done with `ShiftedArrays` and therefore doesn't allocate memory.
+The shift is done with `MutableShiftedArrays` and therefore doesn't allocate memory.
 
 See also: [`ft`](@ref ift), [`ift`](@ref ift), [`rft`](@ref rft), [`irft`](@ref irft),
           [`ffts`](@ref ffts),  [`iffts`](@ref iffts),  [`ffts!`](@ref ffts!), [`rffts`](@ref rffts), [`irffts`](@ref irffts), 
diff --git a/src/fftshift_alternatives.jl b/src/fftshift_alternatives.jl
index b6ac47a..560eaac 100644
--- a/src/fftshift_alternatives.jl
+++ b/src/fftshift_alternatives.jl
@@ -13,7 +13,7 @@ If `dims` is not given then the signal is shifted along each dimension.
 function _fftshift!(dst::AbstractArray{T, N}, src::AbstractArray{T, N},
                    dims=ntuple(i -> i, Val(N))) where {T, N}
     Δ = ntuple(i -> i ∈ dims ? size(src, i) ÷ 2 : 0, Val(N))
-    ShiftedArrays.circshift!(dst, src, Δ)
+    MutableShiftedArrays.circshift!(dst, src, Δ)
 end
 
 """
@@ -27,7 +27,7 @@ If `dims` is not given then the signal is shifted along each dimension.
 function _ifftshift!(dst::AbstractArray{T, N}, src::AbstractArray{T, N},
                    dims=ntuple(i -> i, Val(N))) where {T, N}
     Δ = ntuple(i -> i ∈ dims ? - size(src, i) ÷ 2 : 0, Val(N))
-    ShiftedArrays.circshift!(dst, src, Δ)
+    MutableShiftedArrays.circshift!(dst, src, Δ)
 end
 
 
@@ -39,7 +39,7 @@ Result is semantically equivalent to `fftshift(A, dims)` but returns
 a view instead. 
 """
 function fftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    ShiftedArrays.circshift(mat, ft_center_diff(size(mat), dims))
+    MutableShiftedArrays.circshift(mat, ft_center_diff(size(mat), dims))
 end
 
 
@@ -51,7 +51,7 @@ a view instead.
 """
 function ifftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
     diff = .-(ft_center_diff(size(mat), dims))
-    return ShiftedArrays.circshift(mat, diff)
+    return MutableShiftedArrays.circshift(mat, diff)
 end
 
 
@@ -63,7 +63,7 @@ Shifts the frequencies to the center expect for `dims[1]` because there os no ne
 and positive frequency.
 """
 function rfftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    ShiftedArrays.circshift(mat, rft_center_diff(size(mat), dims))
+    MutableShiftedArrays.circshift(mat, rft_center_diff(size(mat), dims))
 end
 
 
@@ -75,7 +75,7 @@ Shifts the frequencies back to the corner except for `dims[1]` because there os
 and positive frequency.
 """
 function irfftshift_view(mat::AbstractArray{T, N}, dims=ntuple(identity, Val(N))) where {T, N}
-    ShiftedArrays.circshift(mat ,.-(rft_center_diff(size(mat), dims)))
+    MutableShiftedArrays.circshift(mat ,.-(rft_center_diff(size(mat), dims)))
 end
 
 """
diff --git a/src/fourier_resizing.jl b/src/fourier_resizing.jl
index e4e6f53..990940b 100644
--- a/src/fourier_resizing.jl
+++ b/src/fourier_resizing.jl
@@ -26,13 +26,13 @@ julia> x = [1 20 3; 4 500 6; -7 821 923]
  -7  821  923
 
 julia> ffts(x)
-3×3 ShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}:
+3×3 MutableShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}:
    106.5+390.577im  -1099.5-1062.61im   1000.5+700.615im
  -1138.5+354.204im   2271.0+0.0im      -1138.5-354.204im
   1000.5-700.615im  -1099.5+1062.61im    106.5-390.577im
 
 julia> select_region_ft(ffts(x), (4,4))
-4×4 PaddedView(0.0 + 0.0im, OffsetArray(::ShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}, 2:4, 2:4), (Base.OneTo(4), Base.OneTo(4))) with eltype ComplexF64:
+4×4 PaddedView(0.0 + 0.0im, OffsetArray(::MutableShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}, 2:4, 2:4), (Base.OneTo(4), Base.OneTo(4))) with eltype ComplexF64:
  0.0+0.0im      0.0+0.0im          0.0+0.0im          0.0+0.0im
  0.0+0.0im    106.5+390.577im  -1099.5-1062.61im   1000.5+700.615im
  0.0+0.0im  -1138.5+354.204im   2271.0+0.0im      -1138.5-354.204im
@@ -46,14 +46,14 @@ julia> x = [1 20; 4 500; -7 821; -2 2]
  -2    2
 
 julia> ffts(x)
-4×2 ShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}:
+4×2 MutableShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}:
   -347.0+0.0im     331.0+0.0im
    809.0-492.0im  -793.0+504.0im
  -1347.0+0.0im    1339.0+0.0im
    809.0+492.0im  -793.0-504.0im
 
 julia> select_region_ft(ffts(x), (5,3))
-5×3 FourierTools.FourierSplit{ComplexF64, 2, FourierTools.FourierSplit{ComplexF64, 2, PaddedViews.PaddedView{ComplexF64, 2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, OffsetArrays.OffsetMatrix{ComplexF64, ShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}}}}}:
+5×3 FourierTools.FourierSplit{ComplexF64, 2, FourierTools.FourierSplit{ComplexF64, 2, PaddedViews.PaddedView{ComplexF64, 2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, OffsetArrays.OffsetMatrix{ComplexF64, MutableShiftedArrays.CircShiftedArray{ComplexF64, 2, Matrix{ComplexF64}}}}}}:
  -86.75+0.0im     165.5+0.0im    -86.75+0.0im
   404.5-246.0im  -793.0+504.0im   404.5-246.0im
  -673.5+0.0im    1339.0+0.0im    -673.5+0.0im
diff --git a/src/fourier_rotate.jl b/src/fourier_rotate.jl
index 568d30e..f5f0595 100644
--- a/src/fourier_rotate.jl
+++ b/src/fourier_rotate.jl
@@ -56,7 +56,8 @@ function rotate(arr, θ, rotation_plane=(1, 2); adapt_size=true, keep_new_size=f
             end
         end
 
-        arr = select_region_view(arr, new_size=old_size .+ extra_size, pad_value=pad_value)
+        # ToDo: This can be select_region_view for better performance, but should be benchmarked first
+        arr = select_region(arr, new_size=old_size .+ extra_size, pad_value=pad_value)
         # convert to radiants
 
         # parameters for shearing
diff --git a/src/fourier_shifting.jl b/src/fourier_shifting.jl
index 212bbee..03182e3 100644
--- a/src/fourier_shifting.jl
+++ b/src/fourier_shifting.jl
@@ -7,7 +7,7 @@ export shift, shift!
 Shifts an array in-place. For real arrays it is based on `rfft`.
 For complex arrays based on `fft`.
 `shifts` can be non-integer, for integer shifts one should prefer
-`circshift` or `ShiftedArrays.circshift` because a FFT-based methods
+`circshift` or `MutableShiftedArrays.circshift` because a FFT-based methods
 introduces numerical errors.
 
 ## kwargs...
diff --git a/src/utils.jl b/src/utils.jl
index 02f433f..f3fd155 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -294,7 +294,7 @@ function get_indices_around_center(i_in, i_out)
 end
 
 """
-    get_idxrng_around_center(arr_1, arr_2)
+    get_indexrange_around_center(arr_1, arr_2)
 
 A function which provides a range of output indices `i1:i2`
 where `i2 - i1 = i_out`
@@ -303,7 +303,7 @@ cuts the interval `1:i_in` such that the center frequency
 stays at the center position.
 Works for both odd and even indices
 """
-function get_idxrng_around_center(arr_1, arr_2)
+function get_indexrange_around_center(arr_1, arr_2)
     sz1 = size(arr_1)
     sz2 = size(arr_2)
     all_rng = ntuple((d) -> begin a,b = get_indices_around_center(sz1[d], sz2[d]); a:b end, ndims(arr_1))
@@ -372,7 +372,7 @@ julia> FourierTools.center_set!([1, 1, 1, 1, 1, 1], [5, 5, 5])
 ```
 """
 function center_set!(arr_large, arr_small)
-    arr_large[get_idxrng_around_center(arr_large, arr_small)...] = arr_small
+    arr_large[get_indexrange_around_center(arr_large, arr_small)...] = arr_small
     
     return arr_large
 end