45
45
using namespace cv ;
46
46
using namespace cv ::cuda;
47
47
48
+ #define USE_NPP_STREAM_CONTEXT NPP_VERSION >= (10 * 1000 + 1 * 100 + 0 )
49
+ #define USE_NPP_STREAM_CONTEXT 0
50
+ #if USE_NPP_STREAM_CONTEXT
51
+ #define CTX_PREFIX _Ctx
52
+ #else
53
+ #define CTX_PREFIX
54
+ #endif
55
+ #define PPCAT_NX (A, B ) A ## B
56
+ #define PPCAT (A, B ) PPCAT_NX(A, B)
57
+ #define TRY_CTX (func ) PPCAT(func, CTX_PREFIX)
48
58
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
49
59
50
60
void cv::cuda::warpAffine (InputArray, OutputArray, InputArray, Size, int , int , Scalar, Stream&) { throw_no_cuda (); }
@@ -135,13 +145,16 @@ void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize,
135
145
136
146
namespace
137
147
{
138
- template <int DEPTH> struct NppWarpFunc
148
+ template <int DEPTH>
149
+ struct NppWarpFunc
139
150
{
140
151
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
141
-
142
- typedef NppStatus (*func_t )(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
143
- int dstStep, NppiRect dstRoi, const double coeffs[][3 ],
144
- int interpolation);
152
+ #if USE_NPP_STREAM_CONTEXT
153
+ typedef NppStatus (*func_t )(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst, int dstStep, NppiRect dstRoi, const double coeffs[][3 ], int interpolation,
154
+ NppStreamContext stream_ctx);
155
+ #else
156
+ typedef NppStatus (*func_t )(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst, int dstStep, NppiRect dstRoi, const double coeffs[][3 ], int interpolation);
157
+ #endif
145
158
};
146
159
147
160
template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
@@ -168,11 +181,15 @@ namespace
168
181
dstroi.height = dst.rows ;
169
182
dstroi.width = dst.cols ;
170
183
171
- cv::cuda::NppStreamHandler h (stream);
172
-
173
- nppSafeCall ( func (src.ptr <npp_type>(), srcsz, static_cast <int >(src.step ), srcroi,
174
- dst.ptr <npp_type>(), static_cast <int >(dst.step ), dstroi,
175
- coeffs, npp_inter[interpolation]) );
184
+ #if USE_NPP_STREAM_CONTEXT
185
+ NppStreamContext nppStreamContext{};
186
+ nppSafeCall (nppGetStreamContext (&nppStreamContext));
187
+ nppStreamContext.hStream = stream;
188
+ nppSafeCall (func (src.ptr <npp_type>(), srcsz, static_cast <int >(src.step ), srcroi, dst.ptr <npp_type>(), static_cast <int >(dst.step ), dstroi, coeffs, npp_inter[interpolation], nppStreamContext));
189
+ #else
190
+ NppStreamHandler h (stream);
191
+ nppSafeCall (func (src.ptr <npp_type>(), srcsz, static_cast <int >(src.step ), srcroi, dst.ptr <npp_type>(), static_cast <int >(dst.step ), dstroi, coeffs, npp_inter[interpolation]));
192
+ #endif
176
193
177
194
if (stream == 0 )
178
195
cudaSafeCall ( cudaDeviceSynchronize () );
@@ -251,20 +268,20 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size
251
268
static const func_t funcs[2 ][6 ][4 ] =
252
269
{
253
270
{
254
- {NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0 , NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
271
+ {NppWarp<CV_8U, TRY_CTX ( nppiWarpAffine_8u_C1R) >::call, 0 , NppWarp<CV_8U, TRY_CTX ( nppiWarpAffine_8u_C3R) >::call, NppWarp<CV_8U, TRY_CTX ( nppiWarpAffine_8u_C4R) >::call},
255
272
{0 , 0 , 0 , 0 },
256
- {NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0 , NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
273
+ {NppWarp<CV_16U, TRY_CTX ( nppiWarpAffine_16u_C1R) >::call, 0 , NppWarp<CV_16U, TRY_CTX ( nppiWarpAffine_16u_C3R) >::call, NppWarp<CV_16U, TRY_CTX ( nppiWarpAffine_16u_C4R) >::call},
257
274
{0 , 0 , 0 , 0 },
258
- {NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0 , NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
259
- {NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0 , NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
275
+ {NppWarp<CV_32S, TRY_CTX ( nppiWarpAffine_32s_C1R) >::call, 0 , NppWarp<CV_32S, TRY_CTX ( nppiWarpAffine_32s_C3R) >::call, NppWarp<CV_32S, TRY_CTX ( nppiWarpAffine_32s_C4R) >::call},
276
+ {NppWarp<CV_32F, TRY_CTX ( nppiWarpAffine_32f_C1R) >::call, 0 , NppWarp<CV_32F, TRY_CTX ( nppiWarpAffine_32f_C3R) >::call, NppWarp<CV_32F, TRY_CTX ( nppiWarpAffine_32f_C4R) >::call}
260
277
},
261
278
{
262
- {NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0 , NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
279
+ {NppWarp<CV_8U, TRY_CTX ( nppiWarpAffineBack_8u_C1R) >::call, 0 , NppWarp<CV_8U, TRY_CTX ( nppiWarpAffineBack_8u_C3R) >::call, NppWarp<CV_8U, TRY_CTX ( nppiWarpAffineBack_8u_C4R) >::call},
263
280
{0 , 0 , 0 , 0 },
264
- {NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0 , NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
281
+ {NppWarp<CV_16U, TRY_CTX ( nppiWarpAffineBack_16u_C1R) >::call, 0 , NppWarp<CV_16U, TRY_CTX ( nppiWarpAffineBack_16u_C3R) >::call, NppWarp<CV_16U, TRY_CTX ( nppiWarpAffineBack_16u_C4R) >::call},
265
282
{0 , 0 , 0 , 0 },
266
- {NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0 , NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
267
- {NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0 , NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
283
+ {NppWarp<CV_32S, TRY_CTX ( nppiWarpAffineBack_32s_C1R) >::call, 0 , NppWarp<CV_32S, TRY_CTX ( nppiWarpAffineBack_32s_C3R) >::call, NppWarp<CV_32S, TRY_CTX ( nppiWarpAffineBack_32s_C4R) >::call},
284
+ {NppWarp<CV_32F, TRY_CTX ( nppiWarpAffineBack_32f_C1R) >::call, 0 , NppWarp<CV_32F, TRY_CTX ( nppiWarpAffineBack_32f_C3R) >::call, NppWarp<CV_32F, TRY_CTX ( nppiWarpAffineBack_32f_C4R) >::call}
268
285
}
269
286
};
270
287
@@ -390,20 +407,20 @@ void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
390
407
static const func_t funcs[2 ][6 ][4 ] =
391
408
{
392
409
{
393
- {NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0 , NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
410
+ {NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspective_8u_C1R) >::call, 0 , NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspective_8u_C3R) >::call, NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspective_8u_C4R) >::call},
394
411
{0 , 0 , 0 , 0 },
395
- {NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0 , NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
412
+ {NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspective_16u_C1R) >::call, 0 , NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspective_16u_C3R) >::call, NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspective_16u_C4R) >::call},
396
413
{0 , 0 , 0 , 0 },
397
- {NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0 , NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
398
- {NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0 , NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
414
+ {NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspective_32s_C1R) >::call, 0 , NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspective_32s_C3R) >::call, NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspective_32s_C4R) >::call},
415
+ {NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspective_32f_C1R) >::call, 0 , NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspective_32f_C3R) >::call, NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspective_32f_C4R) >::call}
399
416
},
400
417
{
401
- {NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0 , NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
418
+ {NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspectiveBack_8u_C1R) >::call, 0 , NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspectiveBack_8u_C3R) >::call, NppWarp<CV_8U, TRY_CTX ( nppiWarpPerspectiveBack_8u_C4R) >::call},
402
419
{0 , 0 , 0 , 0 },
403
- {NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0 , NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
420
+ {NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspectiveBack_16u_C1R) >::call, 0 , NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspectiveBack_16u_C3R) >::call, NppWarp<CV_16U, TRY_CTX ( nppiWarpPerspectiveBack_16u_C4R) >::call},
404
421
{0 , 0 , 0 , 0 },
405
- {NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0 , NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
406
- {NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0 , NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
422
+ {NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspectiveBack_32s_C1R) >::call, 0 , NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspectiveBack_32s_C3R) >::call, NppWarp<CV_32S, TRY_CTX ( nppiWarpPerspectiveBack_32s_C4R) >::call},
423
+ {NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspectiveBack_32f_C1R) >::call, 0 , NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspectiveBack_32f_C3R) >::call, NppWarp<CV_32F, TRY_CTX ( nppiWarpPerspectiveBack_32f_C4R) >::call}
407
424
}
408
425
};
409
426
0 commit comments