Skip to content

Commit ec9d7bc

Browse files
committed
Add nppStreamContext to cuda::warpAffine
1 parent d6102ef commit ec9d7bc

File tree

2 files changed

+45
-27
lines changed

2 files changed

+45
-27
lines changed

modules/cudawarping/src/warp.cpp

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,16 @@
4545
using namespace cv;
4646
using namespace cv::cuda;
4747

48+
#define USE_NPP_STREAM_CONTEXT NPP_VERSION >= (10 * 1000 + 1 * 100 + 0)
49+
#define USE_NPP_STREAM_CONTEXT 0
50+
#if USE_NPP_STREAM_CONTEXT
51+
#define CTX_PREFIX _Ctx
52+
#else
53+
#define CTX_PREFIX
54+
#endif
55+
#define PPCAT_NX(A, B) A ## B
56+
#define PPCAT(A, B) PPCAT_NX(A, B)
57+
#define TRY_CTX(func) PPCAT(func, CTX_PREFIX)
4858
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
4959

5060
void cv::cuda::warpAffine(InputArray, OutputArray, InputArray, Size, int, int, Scalar, Stream&) { throw_no_cuda(); }
@@ -135,13 +145,16 @@ void cv::cuda::buildWarpPerspectiveMaps(InputArray _M, bool inverse, Size dsize,
135145

136146
namespace
137147
{
138-
template <int DEPTH> struct NppWarpFunc
148+
template <int DEPTH>
149+
struct NppWarpFunc
139150
{
140151
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
141-
142-
typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst,
143-
int dstStep, NppiRect dstRoi, const double coeffs[][3],
144-
int interpolation);
152+
#if USE_NPP_STREAM_CONTEXT
153+
typedef NppStatus (*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst, int dstStep, NppiRect dstRoi, const double coeffs[][3], int interpolation,
154+
NppStreamContext stream_ctx);
155+
#else
156+
typedef NppStatus(*func_t)(const npp_type* pSrc, NppiSize srcSize, int srcStep, NppiRect srcRoi, npp_type* pDst, int dstStep, NppiRect dstRoi, const double coeffs[][3], int interpolation);
157+
#endif
145158
};
146159

147160
template <int DEPTH, typename NppWarpFunc<DEPTH>::func_t func> struct NppWarp
@@ -168,11 +181,15 @@ namespace
168181
dstroi.height = dst.rows;
169182
dstroi.width = dst.cols;
170183

171-
cv::cuda::NppStreamHandler h(stream);
172-
173-
nppSafeCall( func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi,
174-
dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi,
175-
coeffs, npp_inter[interpolation]) );
184+
#if USE_NPP_STREAM_CONTEXT
185+
NppStreamContext nppStreamContext{};
186+
nppSafeCall(nppGetStreamContext(&nppStreamContext));
187+
nppStreamContext.hStream = stream;
188+
nppSafeCall(func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi, dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation], nppStreamContext));
189+
#else
190+
NppStreamHandler h(stream);
191+
nppSafeCall(func(src.ptr<npp_type>(), srcsz, static_cast<int>(src.step), srcroi, dst.ptr<npp_type>(), static_cast<int>(dst.step), dstroi, coeffs, npp_inter[interpolation]));
192+
#endif
176193

177194
if (stream == 0)
178195
cudaSafeCall( cudaDeviceSynchronize() );
@@ -251,20 +268,20 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size
251268
static const func_t funcs[2][6][4] =
252269
{
253270
{
254-
{NppWarp<CV_8U, nppiWarpAffine_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffine_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffine_8u_C4R>::call},
271+
{NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpAffine_8u_C4R)>::call},
255272
{0, 0, 0, 0},
256-
{NppWarp<CV_16U, nppiWarpAffine_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffine_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffine_16u_C4R>::call},
273+
{NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpAffine_16u_C4R)>::call},
257274
{0, 0, 0, 0},
258-
{NppWarp<CV_32S, nppiWarpAffine_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffine_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffine_32s_C4R>::call},
259-
{NppWarp<CV_32F, nppiWarpAffine_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffine_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffine_32f_C4R>::call}
275+
{NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpAffine_32s_C4R)>::call},
276+
{NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpAffine_32f_C4R)>::call}
260277
},
261278
{
262-
{NppWarp<CV_8U, nppiWarpAffineBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpAffineBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpAffineBack_8u_C4R>::call},
279+
{NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpAffineBack_8u_C4R)>::call},
263280
{0, 0, 0, 0},
264-
{NppWarp<CV_16U, nppiWarpAffineBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpAffineBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpAffineBack_16u_C4R>::call},
281+
{NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpAffineBack_16u_C4R)>::call},
265282
{0, 0, 0, 0},
266-
{NppWarp<CV_32S, nppiWarpAffineBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpAffineBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpAffineBack_32s_C4R>::call},
267-
{NppWarp<CV_32F, nppiWarpAffineBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpAffineBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpAffineBack_32f_C4R>::call}
283+
{NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpAffineBack_32s_C4R)>::call},
284+
{NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpAffineBack_32f_C4R)>::call}
268285
}
269286
};
270287

@@ -390,20 +407,20 @@ void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M,
390407
static const func_t funcs[2][6][4] =
391408
{
392409
{
393-
{NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
410+
{NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspective_8u_C4R)>::call},
394411
{0, 0, 0, 0},
395-
{NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
412+
{NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspective_16u_C4R)>::call},
396413
{0, 0, 0, 0},
397-
{NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
398-
{NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
414+
{NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspective_32s_C4R)>::call},
415+
{NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspective_32f_C4R)>::call}
399416
},
400417
{
401-
{NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
418+
{NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C1R)>::call, 0, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C3R)>::call, NppWarp<CV_8U, TRY_CTX(nppiWarpPerspectiveBack_8u_C4R)>::call},
402419
{0, 0, 0, 0},
403-
{NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
420+
{NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C1R)>::call, 0, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C3R)>::call, NppWarp<CV_16U, TRY_CTX(nppiWarpPerspectiveBack_16u_C4R)>::call},
404421
{0, 0, 0, 0},
405-
{NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
406-
{NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
422+
{NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C1R)>::call, 0, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C3R)>::call, NppWarp<CV_32S, TRY_CTX(nppiWarpPerspectiveBack_32s_C4R)>::call},
423+
{NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C1R)>::call, 0, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C3R)>::call, NppWarp<CV_32F, TRY_CTX(nppiWarpPerspectiveBack_32f_C4R)>::call}
407424
}
408425
};
409426

modules/cudawarping/test/test_warp_affine.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,9 @@ CUDA_TEST_P(WarpAffineNPP, Accuracy)
262262
if (inverse)
263263
flags |= cv::WARP_INVERSE_MAP;
264264

265+
cv::cuda::Stream stream;
265266
cv::cuda::GpuMat dst;
266-
cv::cuda::warpAffine(loadMat(src), dst, M, src.size(), flags);
267+
cv::cuda::warpAffine(loadMat(src), dst, M, src.size(), flags, 0, cv::Scalar(), stream);
267268

268269
cv::Mat dst_gold;
269270
warpAffineGold(src, M, inverse, src.size(), dst_gold, interpolation, cv::BORDER_CONSTANT, cv::Scalar::all(0));

0 commit comments

Comments
 (0)