@@ -68,9 +68,23 @@ namespace cv { namespace cuda { namespace device
68
68
}
69
69
}
70
70
71
+ template <typename Ptr2D, typename T> __global__ void remap_relative (const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
72
+ {
73
+ const int x = blockDim .x * blockIdx .x + threadIdx .x ;
74
+ const int y = blockDim .y * blockIdx .y + threadIdx .y ;
75
+
76
+ if (x < dst.cols && y < dst.rows )
77
+ {
78
+ const float xcoo = x+mapx.ptr (y)[x];
79
+ const float ycoo = y+mapy.ptr (y)[x];
80
+
81
+ dst.ptr (y)[x] = saturate_cast<T>(src (ycoo, xcoo));
82
+ }
83
+ }
84
+
71
85
template <template <typename > class Filter , template <typename > class B , typename T> struct RemapDispatcherStream
72
86
{
73
- static void call (PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool )
87
+ static void call (PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool , bool isRelative )
74
88
{
75
89
typedef typename TypeVec<float , VecTraits<T>::cn>::vec_type work_type;
76
90
@@ -81,14 +95,17 @@ namespace cv { namespace cuda { namespace device
81
95
BorderReader<PtrStep<T>, B<work_type>> brdSrc (src, brd);
82
96
Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src (brdSrc);
83
97
84
- remap<<<grid, block, 0 , stream>>> (filter_src, mapx, mapy, dst);
98
+ if (isRelative)
99
+ remap_relative<<<grid, block, 0 , stream>>> (filter_src, mapx, mapy, dst);
100
+ else
101
+ remap<<<grid, block, 0 , stream>>> (filter_src, mapx, mapy, dst);
85
102
cudaSafeCall ( cudaGetLastError () );
86
103
}
87
104
};
88
105
89
106
template <template <typename > class Filter , template <typename > class B , typename T> struct RemapDispatcherNonStream
90
107
{
91
- static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float * borderValue, bool )
108
+ static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float * borderValue, bool , bool isRelative )
92
109
{
93
110
CV_UNUSED (srcWhole);
94
111
CV_UNUSED (xoff);
@@ -102,7 +119,10 @@ namespace cv { namespace cuda { namespace device
102
119
BorderReader<PtrStep<T>, B<work_type>> brdSrc (src, brd);
103
120
Filter<BorderReader<PtrStep<T>, B<work_type>>> filter_src (brdSrc);
104
121
105
- remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
122
+ if (isRelative)
123
+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
124
+ else
125
+ remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
106
126
cudaSafeCall ( cudaGetLastError () );
107
127
108
128
cudaSafeCall ( cudaDeviceSynchronize () );
@@ -112,7 +132,7 @@ namespace cv { namespace cuda { namespace device
112
132
template <template <typename > class Filter , template <typename > class B , typename T> struct RemapDispatcherNonStreamTex
113
133
{
114
134
static void call (PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
115
- PtrStepSz< T > dst, const float * borderValue, bool cc20)
135
+ PtrStepSz< T > dst, const float * borderValue, bool cc20, bool isRelative )
116
136
{
117
137
typedef typename TypeVec<float , VecTraits< T >::cn>::vec_type work_type;
118
138
dim3 block (32 , cc20 ? 8 : 4 );
@@ -123,15 +143,21 @@ namespace cv { namespace cuda { namespace device
123
143
B<work_type> brd (src.rows , src.cols , VecTraits<work_type>::make (borderValue));
124
144
BorderReader<cudev::TexturePtr<T>, B<work_type>> brdSrc (texSrcWhole, brd);
125
145
Filter<BorderReader<cudev::TexturePtr<T>, B<work_type>>> filter_src (brdSrc);
126
- remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
146
+ if (isRelative)
147
+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
148
+ else
149
+ remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
127
150
128
151
}
129
152
else {
130
153
cudev::TextureOff<T> texSrcWhole (srcWhole, yoff, xoff);
131
154
B<work_type> brd (src.rows , src.cols , VecTraits<work_type>::make (borderValue));
132
155
BorderReader<cudev::TextureOffPtr<T>, B<work_type>> brdSrc (texSrcWhole, brd);
133
156
Filter<BorderReader<cudev::TextureOffPtr<T>, B<work_type>>> filter_src (brdSrc);
134
- remap<<<grid, block >>> (filter_src, mapx, mapy, dst);
157
+ if (isRelative)
158
+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
159
+ else
160
+ remap<<<grid, block >>> (filter_src, mapx, mapy, dst);
135
161
}
136
162
137
163
cudaSafeCall ( cudaGetLastError () );
@@ -142,23 +168,29 @@ namespace cv { namespace cuda { namespace device
142
168
template <template <typename > class Filter , typename T> struct RemapDispatcherNonStreamTex <Filter, BrdReplicate, T>
143
169
{
144
170
static void call (PtrStepSz< T > src, PtrStepSz< T > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
145
- PtrStepSz< T > dst, const float *, bool )
171
+ PtrStepSz< T > dst, const float *, bool , bool isRelative )
146
172
{
147
173
dim3 block (32 , 8 );
148
174
dim3 grid (divUp (dst.cols , block.x ), divUp (dst.rows , block.y ));
149
175
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows )
150
176
{
151
177
cudev::Texture<T> texSrcWhole (srcWhole);
152
178
Filter<cudev::TexturePtr<T>> filter_src (texSrcWhole);
153
- remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
179
+ if (isRelative)
180
+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
181
+ else
182
+ remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
154
183
}
155
184
else
156
185
{
157
186
cudev::TextureOff<T> texSrcWhole (srcWhole, yoff, xoff);
158
187
BrdReplicate<T> brd (src.rows , src.cols );
159
188
BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>> brdSrc (texSrcWhole, brd);
160
189
Filter<BorderReader<cudev::TextureOffPtr<T>, BrdReplicate<T>>> filter_src (brdSrc);
161
- remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
190
+ if (isRelative)
191
+ remap_relative<<<grid, block>>> (filter_src, mapx, mapy, dst);
192
+ else
193
+ remap<<<grid, block>>> (filter_src, mapx, mapy, dst);
162
194
}
163
195
cudaSafeCall ( cudaGetLastError () );
164
196
cudaSafeCall ( cudaDeviceSynchronize () );
@@ -203,20 +235,20 @@ namespace cv { namespace cuda { namespace device
203
235
template <template <typename > class Filter , template <typename > class B , typename T> struct RemapDispatcher
204
236
{
205
237
static void call (PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
206
- PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20)
238
+ PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative )
207
239
{
208
240
if (stream == 0 )
209
- RemapDispatcherNonStream<Filter, B, T>::call (src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20);
241
+ RemapDispatcherNonStream<Filter, B, T>::call (src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc20, isRelative );
210
242
else
211
- RemapDispatcherStream<Filter, B, T>::call (src, mapx, mapy, dst, borderValue, stream, cc20);
243
+ RemapDispatcherStream<Filter, B, T>::call (src, mapx, mapy, dst, borderValue, stream, cc20, isRelative );
212
244
}
213
245
};
214
246
215
247
template <typename T> void remap_gpu (PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
216
- PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20)
248
+ PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative )
217
249
{
218
250
typedef void (*caller_t )(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
219
- PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20);
251
+ PtrStepSz<T> dst, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
220
252
221
253
static const caller_t callers[3 ][5 ] =
222
254
{
@@ -244,24 +276,24 @@ namespace cv { namespace cuda { namespace device
244
276
};
245
277
246
278
callers[interpolation][borderMode](static_cast <PtrStepSz<T>>(src), static_cast <PtrStepSz<T>>(srcWhole), xoff, yoff, xmap, ymap,
247
- static_cast <PtrStepSz<T>>(dst), borderValue, stream, cc20);
279
+ static_cast <PtrStepSz<T>>(dst), borderValue, stream, cc20, isRelative );
248
280
}
249
281
250
- template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
251
- template void remap_gpu<uchar3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
252
- template void remap_gpu<uchar4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
282
+ template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
283
+ template void remap_gpu<uchar3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
284
+ template void remap_gpu<uchar4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
253
285
254
- template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
255
- template void remap_gpu<ushort3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
256
- template void remap_gpu<ushort4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
286
+ template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
287
+ template void remap_gpu<ushort3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
288
+ template void remap_gpu<ushort4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
257
289
258
- template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
259
- template void remap_gpu<short3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
260
- template void remap_gpu<short4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
290
+ template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
291
+ template void remap_gpu<short3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
292
+ template void remap_gpu<short4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
261
293
262
- template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
263
- template void remap_gpu<float3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
264
- template void remap_gpu<float4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20);
294
+ template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
295
+ template void remap_gpu<float3 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
296
+ template void remap_gpu<float4 >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float * borderValue, cudaStream_t stream, bool cc20, bool isRelative );
265
297
} // namespace imgproc
266
298
}}} // namespace cv { namespace cuda { namespace cudev
267
299
0 commit comments