-
Notifications
You must be signed in to change notification settings - Fork 5.8k
More data types supported in cv::cuda::transpose() #3371
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 4.x
Are you sure you want to change the base?
Changes from 6 commits
51180e9
be2dd8d
9e3eb3d
00abef3
e5b152e
4f0e470
43a1691
29cb16a
3672694
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,34 +60,202 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream) | |
{ | ||
GpuMat src = getInputMat(_src, stream); | ||
|
||
const int srcType = src.type(); | ||
const int srcDepth = src.depth(); | ||
const int srcCn = src.channels(); | ||
const size_t elemSize = src.elemSize(); | ||
|
||
CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 ); | ||
const size_t elemSize1 = src.elemSize1(); | ||
|
||
GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream); | ||
|
||
if (elemSize == 1) | ||
{ | ||
NppStreamHandler h(StreamAccessor::getStream(stream)); | ||
const bool isSupported = | ||
(elemSize == 1) || (elemSize == 2) || (elemSize == 3) || (elemSize == 4) || | ||
(elemSize == 6) || (elemSize == 8) || (elemSize == 12) || (elemSize == 16); | ||
|
||
if (!isSupported) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would CV_Assert() be better here? |
||
CV_Error(Error::StsUnsupportedFormat, ""); | ||
else if (src.empty()) | ||
CV_Error(Error::StsBadArg,"image is empty"); | ||
|
||
if ((src.rows == 1) && (src.cols == 1)) | ||
src.copyTo(dst, stream); | ||
else if (src.rows == 1) | ||
src.reshape(0, src.cols).copyTo(dst, stream); | ||
else if ((src.cols == 1) && src.isContinuous()) | ||
src.reshape(0, src.cols).copyTo(dst, stream); | ||
else | ||
{ | ||
NppiSize sz; | ||
sz.width = src.cols; | ||
sz.height = src.rows; | ||
|
||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
if (!stream) | ||
{ | ||
//native implementation | ||
if (srcType == CV_8UC1) | ||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_8UC3) | ||
nppSafeCall( nppiTranspose_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_8UC4) | ||
nppSafeCall( nppiTranspose_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16UC1) | ||
nppSafeCall( nppiTranspose_16u_C1R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16UC3) | ||
nppSafeCall( nppiTranspose_16u_C3R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16UC4) | ||
nppSafeCall( nppiTranspose_16u_C4R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16SC1) | ||
nppSafeCall( nppiTranspose_16s_C1R(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16SC3) | ||
nppSafeCall( nppiTranspose_16s_C3R(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16SC4) | ||
nppSafeCall( nppiTranspose_16s_C4R(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32SC1) | ||
nppSafeCall( nppiTranspose_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32SC3) | ||
nppSafeCall( nppiTranspose_32s_C3R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32SC4) | ||
nppSafeCall( nppiTranspose_32s_C4R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32FC1) | ||
nppSafeCall( nppiTranspose_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32FC3) | ||
nppSafeCall( nppiTranspose_32f_C3R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32FC4) | ||
nppSafeCall( nppiTranspose_32f_C4R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
//reinterpretation | ||
else if (elemSize == 1) | ||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (elemSize == 2) | ||
nppSafeCall( nppiTranspose_16u_C1R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (elemSize == 3) | ||
nppSafeCall( nppiTranspose_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (elemSize == 4) | ||
nppSafeCall( nppiTranspose_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
else if (elemSize == 6) | ||
nppSafeCall( nppiTranspose_16u_C3R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (elemSize == 8) | ||
nppSafeCall( nppiTranspose_16u_C4R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (elemSize == 12) | ||
nppSafeCall( nppiTranspose_32s_C3R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
else if (elemSize == 16) | ||
nppSafeCall( nppiTranspose_32s_C4R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The interpretation block does the transpose second time, if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Well... no ? This is still an "else if" |
||
}//end if (!stream) | ||
else//if (stream != 0) | ||
{ | ||
NppStreamContext ctx; | ||
nppSafeCall( nppGetStreamContext(&ctx) ); | ||
ctx.hStream = StreamAccessor::getStream(stream); | ||
|
||
//native implementation | ||
if (srcType == CV_8UC1) | ||
nppSafeCall( nppiTranspose_8u_C1R_Ctx(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_8UC3) | ||
nppSafeCall( nppiTranspose_8u_C3R_Ctx(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_8UC4) | ||
nppSafeCall( nppiTranspose_8u_C4R_Ctx(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_16UC1) | ||
nppSafeCall( nppiTranspose_16u_C1R_Ctx(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_16UC3) | ||
nppSafeCall( nppiTranspose_16u_C3R_Ctx(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_16UC4) | ||
nppSafeCall( nppiTranspose_16u_C4R_Ctx(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_16SC1) | ||
nppSafeCall( nppiTranspose_16s_C1R_Ctx(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_16SC3) | ||
nppSafeCall( nppiTranspose_16s_C3R_Ctx(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_16SC4) | ||
nppSafeCall( nppiTranspose_16s_C4R_Ctx(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_32SC1) | ||
nppSafeCall( nppiTranspose_32s_C1R_Ctx(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_32SC3) | ||
nppSafeCall( nppiTranspose_32s_C3R_Ctx(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_32SC4) | ||
nppSafeCall( nppiTranspose_32s_C4R_Ctx(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_32FC1) | ||
nppSafeCall( nppiTranspose_32f_C1R_Ctx(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_32FC3) | ||
nppSafeCall( nppiTranspose_32f_C3R_Ctx(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (srcType == CV_32FC4) | ||
nppSafeCall( nppiTranspose_32f_C4R_Ctx(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ctx) ); | ||
//reinterpretation | ||
else if (elemSize == 1) | ||
nppSafeCall( nppiTranspose_8u_C1R_Ctx(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (elemSize == 2) | ||
nppSafeCall( nppiTranspose_16u_C1R_Ctx(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (elemSize == 3) | ||
nppSafeCall( nppiTranspose_8u_C3R_Ctx(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (elemSize == 4) | ||
nppSafeCall( nppiTranspose_32s_C1R_Ctx(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (elemSize == 6) | ||
nppSafeCall( nppiTranspose_16u_C3R_Ctx(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (elemSize == 8) | ||
nppSafeCall( nppiTranspose_16u_C4R_Ctx(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (elemSize == 12) | ||
nppSafeCall( nppiTranspose_32s_C3R_Ctx(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
else if (elemSize == 16) | ||
nppSafeCall( nppiTranspose_32s_C4R_Ctx(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz, ctx) ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same here. |
||
}//end if (stream != 0) | ||
|
||
if (!stream) | ||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); | ||
} | ||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Syncronious NPP call is used if |
||
}//end if | ||
|
||
/* | ||
if (elemSize == 1) | ||
gridTranspose(globPtr<unsigned char>(src), globPtr<unsigned char>(dst), stream); | ||
else if (elemSize == 2) | ||
gridTranspose(globPtr<unsigned short>(src), globPtr<unsigned short>(dst), stream); | ||
else if (elemSize == 4) | ||
{ | ||
gridTranspose(globPtr<int>(src), globPtr<int>(dst), stream); | ||
} | ||
else // if (elemSize == 8) | ||
{ | ||
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream); | ||
} | ||
gridTranspose(globPtr<signed int>(src), globPtr<signed int>(dst), stream); | ||
else if (elemSize == 8) | ||
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream); | ||
*/ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remove dead code. |
||
|
||
syncOutput(dst, _dst, stream); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this redundant?