File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -145,14 +145,14 @@ end
145
145
# Call the generic kernel that is defined below, which only calls a function with
146
146
# the global GPU index.
147
147
n_gpus = length (CUDA. devices ())
148
- ndrange_local = [ div (ndrange, n_gpus) for _ in 1 : n_gpus]
149
- ndrange_local[ end ] += ndrange % n_gpus
148
+ indices_split = Iterators . partition (indices, ceil (Int, length (indices) / n_gpus))
149
+ @assert length (indices_split) == n_gpus
150
150
151
- @sync for i in 1 : n_gpus
151
+ @sync for (i, indices_) in enumerate (indices_split)
152
152
Threads. @spawn begin
153
153
CUDA. device! (i - 1 )
154
- generic_kernel (backend)(ndrange = ndrange_local[i] ) do j
155
- @inbounds @inline f (iterator[indices [j]])
154
+ generic_kernel (backend)(ndrange = length (indices_) ) do j
155
+ @inbounds @inline f (iterator[indices_ [j]])
156
156
end
157
157
KernelAbstractions. synchronize (backend)
158
158
end
You can’t perform that action at this time.
0 commit comments