Skip to content

Commit 2413f86

Browse files
authored
Merge pull request #3600 from chacha21:remap_relative
first proposal of cv::remap with relative displacement field Relates to [#24621](opencv/opencv#24621), [#24603](opencv/opencv#24603) CUDA implementation of the feature ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [X] There is a reference to the original bug report and related work - [X] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
1 parent 667a66e commit 2413f86

File tree

4 files changed

+149
-31
lines changed

4 files changed

+149
-31
lines changed

modules/cudawarping/include/opencv2/cudawarping.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ namespace cv { namespace cuda {
7070
@param ymap Y values. Only CV_32FC1 type is supported.
7171
@param interpolation Interpolation method (see resize ). INTER_NEAREST , INTER_LINEAR and
7272
INTER_CUBIC are supported for now.
73+
The extra flag WARP_RELATIVE_MAP can be ORed to the interpolation method
74+
(e.g. INTER_LINEAR | WARP_RELATIVE_MAP)
7375
@param borderMode Pixel extrapolation method (see borderInterpolate ). BORDER_REFLECT101 ,
7476
BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now.
7577
@param borderValue Value used in case of a constant border. By default, it is 0.
@@ -79,6 +81,10 @@ The function transforms the source image using the specified map:
7981
8082
\f[\texttt{dst} (x,y) = \texttt{src} (xmap(x,y), ymap(x,y))\f]
8183
84+
with the WARP_RELATIVE_MAP flag :
85+
86+
\f[\texttt{dst} (x,y) = \texttt{src} (x+map_x(x,y),y+map_y(x,y))\f]
87+
8288
Values of pixels with non-integer coordinates are computed using the bilinear interpolation.
8389
8490
@sa remap

modules/cudawarping/src/cuda/remap.cu

Lines changed: 60 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,23 @@ namespace cv { namespace cuda { namespace device
6868
}
6969
}
7070

71+
template <typename Ptr2D, typename T> __global__ void remap_relative(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
72+
{
73+
const int x = blockDim.x * blockIdx.x + threadIdx.x;
74+
const int y = blockDim.y * blockIdx.y + threadIdx.y;
75+
76+
if (x < dst.cols && y < dst.rows)
77+
{
78+
const float xcoo = x+mapx.ptr(y)[x];
79+
const float ycoo = y+mapy.ptr(y)[x];
80+
81+
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
82+
}
83+
}
84+
7185
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
7286
{
73-
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool)
87+
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool, bool isRelative)
7488
{
7589
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
7690

@@ -81,14 +95,17 @@ namespace cv { namespace cuda { namespace device
8195
BorderReader<PtrStep<T>, B<work_type>> brdSrc(src, brd);
8296
Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src(brdSrc);
8397

84-
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
98+
if (isRelative)
99+
remap_relative<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
100+
else
101+
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
85102
cudaSafeCall( cudaGetLastError() );
86103
}
87104
};
88105

89106
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
90107
{
91-
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool)
108+
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, bool, bool isRelative)
92109
{
93110
CV_UNUSED(srcWhole);
94111
CV_UNUSED(xoff);
@@ -102,7 +119,10 @@ namespace cv { namespace cuda { namespace device
102119
BorderReader<PtrStep<T>, B<work_type>> brdSrc(src, brd);
103120
Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src(brdSrc);
104121

105-
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
122+
if (isRelative)
123+
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
124+
else
125+
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
106126
cudaSafeCall( cudaGetLastError() );
107127

108128
cudaSafeCall( cudaDeviceSynchronize() );
@@ -112,7 +132,7 @@ namespace cv { namespace cuda { namespace device
112132
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStreamTex
113133
{
114134
static void call(PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
115-
PtrStepSz< T > dst, const float* borderValue, bool cc20)
135+
PtrStepSz< T > dst, const float* borderValue, bool cc20, bool isRelative)
116136
{
117137
typedef typename TypeVec<float, VecTraits< T >::cn>::vec_type work_type;
118138
dim3 block(32, cc20 ? 8 : 4);
@@ -123,15 +143,21 @@ namespace cv { namespace cuda { namespace device
123143
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
124144
BorderReader<cudev::TexturePtr<T>, B<work_type>> brdSrc(texSrcWhole, brd);
125145
Filter<BorderReader<cudev::TexturePtr<T>, B<work_type>>> filter_src(brdSrc);
126-
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
146+
if (isRelative)
147+
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
148+
else
149+
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
127150

128151
}
129152
else {
130153
cudev::TextureOff<T> texSrcWhole(srcWhole, yoff, xoff);
131154
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
132155
BorderReader<cudev::TextureOffPtr<T>, B<work_type>> brdSrc(texSrcWhole, brd);
133156
Filter<BorderReader<cudev::TextureOffPtr<T>, B<work_type>>> filter_src(brdSrc);
134-
remap<<<grid, block >>>(filter_src, mapx, mapy, dst);
157+
if (isRelative)
158+
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
159+
else
160+
remap<<<grid, block >>>(filter_src, mapx, mapy, dst);
135161
}
136162

137163
cudaSafeCall( cudaGetLastError() );
@@ -142,23 +168,29 @@ namespace cv { namespace cuda { namespace device
142168
template <template <typename> class Filter, typename T> struct RemapDispatcherNonStreamTex<Filter, BrdReplicate, T>
143169
{
144170
static void call(PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
145-
PtrStepSz< T > dst, const float*, bool)
171+
PtrStepSz< T > dst, const float*, bool, bool isRelative)
146172
{
147173
dim3 block(32, 8);
148174
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
149175
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows)
150176
{
151177
cudev::Texture<T> texSrcWhole(srcWhole);
152178
Filter<cudev::TexturePtr<T>> filter_src(texSrcWhole);
153-
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
179+
if (isRelative)
180+
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
181+
else
182+
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
154183
}
155184
else
156185
{
157186
cudev::TextureOff<T> texSrcWhole(srcWhole, yoff, xoff);
158187
BrdReplicate<T> brd(src.rows, src.cols);
159188
BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>> brdSrc(texSrcWhole, brd);
160189
Filter<BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>>> filter_src(brdSrc);
161-
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
190+
if (isRelative)
191+
remap_relative<<<grid, block>>>(filter_src, mapx, mapy, dst);
192+
else
193+
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
162194
}
163195
cudaSafeCall( cudaGetLastError() );
164196
cudaSafeCall( cudaDeviceSynchronize() );
@@ -203,20 +235,20 @@ namespace cv { namespace cuda { namespace device
203235
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
204236
{
205237
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
206-
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20)
238+
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative)
207239
{
208240
if (stream == 0)
209-
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
241+
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20, isRelative);
210242
else
211-
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20);
243+
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc20, isRelative);
212244
}
213245
};
214246

215247
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
216-
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
248+
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative)
217249
{
218250
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
219-
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20);
251+
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
220252

221253
static const caller_t callers[3][5] =
222254
{
@@ -244,24 +276,24 @@ namespace cv { namespace cuda { namespace device
244276
};
245277

246278
callers[interpolation][borderMode](static_cast<PtrStepSz<T>>(src), static_cast<PtrStepSz<T>>(srcWhole), xoff, yoff, xmap, ymap,
247-
static_cast<PtrStepSz<T>>(dst), borderValue, stream, cc20);
279+
static_cast<PtrStepSz<T>>(dst), borderValue, stream, cc20, isRelative);
248280
}
249281

250-
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
251-
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
252-
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
282+
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
283+
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
284+
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
253285

254-
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
255-
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
256-
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
286+
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
287+
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
288+
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
257289

258-
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
259-
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
260-
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
290+
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
291+
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
292+
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
261293

262-
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
263-
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
264-
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
294+
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
295+
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
296+
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
265297
} // namespace imgproc
266298
}}} // namespace cv { namespace cuda { namespace cudev
267299

modules/cudawarping/src/remap.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,19 @@ namespace cv { namespace cuda { namespace device
5454
{
5555
template <typename T>
5656
void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
57-
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
57+
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
5858
}
5959
}}}
6060

6161
void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputArray _ymap, int interpolation, int borderMode, Scalar borderValue, Stream& stream)
6262
{
6363
using namespace cv::cuda::device::imgproc;
6464

65+
const bool hasRelativeFlag = ((interpolation & WARP_RELATIVE_MAP) != 0);
66+
interpolation &= ~WARP_RELATIVE_MAP;
67+
6568
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
66-
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20);
69+
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20, bool isRelative);
6770
static const func_t funcs[6][4] =
6871
{
6972
{remap_gpu<uchar> , 0 /*remap_gpu<uchar2>*/ , remap_gpu<uchar3> , remap_gpu<uchar4> },
@@ -98,7 +101,7 @@ void cv::cuda::remap(InputArray _src, OutputArray _dst, InputArray _xmap, InputA
98101
src.locateROI(wholeSize, ofs);
99102

100103
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
101-
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20));
104+
dst, interpolation, borderMode, borderValueFloat.val, StreamAccessor::getStream(stream), deviceSupports(FEATURE_SET_COMPUTE_20), hasRelativeFlag);
102105
}
103106

104107
#endif // HAVE_CUDA

0 commit comments

Comments
 (0)