-
Notifications
You must be signed in to change notification settings - Fork 5.8k
More data types supported in cv::cuda::transpose() #3371
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 4.x
Are you sure you want to change the base?
Changes from 1 commit
51180e9
be2dd8d
9e3eb3d
00abef3
e5b152e
4f0e470
43a1691
29cb16a
3672694
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,34 +60,147 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream) | |
{ | ||
GpuMat src = getInputMat(_src, stream); | ||
|
||
const int srcType = src.type(); | ||
const int srcDepth = src.depth(); | ||
const int srcCn = src.channels(); | ||
const size_t elemSize = src.elemSize(); | ||
const size_t elemSize1 = src.elemSize1(); | ||
|
||
CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 ); | ||
//CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if What kind of error message we would show to the user? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is handled by
|
||
|
||
GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream); | ||
|
||
if (elemSize == 1) | ||
const bool isNppiNativelySupported = | ||
(srcType == CV_8UC1) || (srcType == CV_8UC3) || (srcType == CV_8UC4) || | ||
(srcType == CV_16UC1) || (srcType == CV_16UC3) || (srcType == CV_16UC4) || | ||
(srcType == CV_16SC1) || (srcType == CV_16SC3) || (srcType == CV_16SC4) || | ||
(srcType == CV_32SC1) || (srcType == CV_32SC3) || (srcType == CV_32SC4) || | ||
(srcType == CV_32FC1) || (srcType == CV_32FC3) || (srcType == CV_32FC4); | ||
const bool isElemSizeSupportedByNppi = | ||
((elemSize != 0) && !(elemSize%1) && ((elemSize/1)<=4)) || | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apologies if I have completely misunderstood the logic but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, but it was on purpose for code clarity/readibility Ok to get rid of the (elemSize != 0) check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For clarity I would use elemSize1 and channels? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is overlap between isNppiNativelySupported, isElemSizeSupportedByNppi and isElemSizeSupportedByGridTranspose For instance, 16UC2 is not supported by NPP but can be mapped to 32SC1 (isNppiNativelySupported == false and isElemSizeSupportedByNppi == true) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a complete overlap, i.e. is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There are cases where (isNppiNativelySupported == false) and (isElemSizeSupportedByNppi== true) (e.g. 16UC2) But there are currently no cases where (isElemSizeSupportedByNppi== false) and (isElemSizeSupportedByGridTranspose == true). This is a fallback implementation if :
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is
If so wouldn't it be better to have this under
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exactly, this is not a native nppi Call (isNppiNativelySupported == false) but it can be handled by cheating on the data type (isElemSizeSupportedByNppi == true) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if this makes it easier or harder for the next person who looks at it to see whats going on, what do you think @alalek ? Either way I think having overlap between both makes it difficult to understand, maybe if the "cheat" only included the cases where it was applicable it would be more obvious? |
||
((elemSize != 0) && !(elemSize%2) && ((elemSize/2)<=4)) || | ||
((elemSize != 0) && !(elemSize%4) && ((elemSize/4)<=4)) || | ||
((elemSize != 0) && !(elemSize%8) && ((elemSize/8)<=2)); | ||
if (src.empty()) | ||
dst.release(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Empty input should be a error as nobody want to process "nothing" in real use cases: opencv/opencv#8300 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, I thought it was better since Allow empty matrices in most functions (personnaly I prefer a no-op rather than an exception) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will also add a fastpath for size == (1, 1), where transpose is just a copy |
||
else if (isNppiNativelySupported) | ||
{ | ||
NppStreamHandler h(StreamAccessor::getStream(stream)); | ||
|
||
NppiSize sz; | ||
sz.width = src.cols; | ||
sz.height = src.rows; | ||
|
||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
if (srcType == CV_8UC1) | ||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_8UC3) | ||
nppSafeCall( nppiTranspose_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_8UC4) | ||
nppSafeCall( nppiTranspose_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16UC1) | ||
nppSafeCall( nppiTranspose_16u_C1R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16UC3) | ||
nppSafeCall( nppiTranspose_16u_C3R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16UC4) | ||
nppSafeCall( nppiTranspose_16u_C4R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16SC1) | ||
nppSafeCall( nppiTranspose_16s_C1R(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16SC3) | ||
nppSafeCall( nppiTranspose_16s_C3R(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_16SC4) | ||
nppSafeCall( nppiTranspose_16s_C4R(src.ptr<Npp16s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32SC1) | ||
nppSafeCall( nppiTranspose_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32SC3) | ||
nppSafeCall( nppiTranspose_32s_C3R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32SC4) | ||
nppSafeCall( nppiTranspose_32s_C4R(src.ptr<Npp32s>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32s>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32FC1) | ||
nppSafeCall( nppiTranspose_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32FC3) | ||
nppSafeCall( nppiTranspose_32f_C3R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
else if (srcType == CV_32FC4) | ||
nppSafeCall( nppiTranspose_32f_C4R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
|
||
if (!stream) | ||
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); | ||
} | ||
else if (elemSize == 4) | ||
{ | ||
gridTranspose(globPtr<int>(src), globPtr<int>(dst), stream); | ||
} | ||
else // if (elemSize == 8) | ||
}//end if (isNppiNativelySupported) | ||
else if (isElemSizeSupportedByNppi) | ||
{ | ||
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream); | ||
} | ||
NppStreamHandler h(StreamAccessor::getStream(stream)); | ||
|
||
NppiSize sz; | ||
sz.width = src.cols; | ||
sz.height = src.rows; | ||
|
||
if (!(elemSize%1) && ((elemSize/1)==1)) | ||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%1) && ((elemSize/1)==2)) | ||
nppSafeCall( nppiTranspose_16u_C1R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%1) && ((elemSize/1)==3)) | ||
nppSafeCall( nppiTranspose_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%1) && ((elemSize/1)==4)) | ||
nppSafeCall( nppiTranspose_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%2) && ((elemSize/2)==1)) | ||
nppSafeCall( nppiTranspose_16u_C1R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%2) && ((elemSize/2)==2)) | ||
nppSafeCall( nppiTranspose_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%2) && ((elemSize/2)==3)) | ||
nppSafeCall( nppiTranspose_16u_C3R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%2) && ((elemSize/2)==4)) | ||
nppSafeCall( nppiTranspose_16u_C4R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%4) && ((elemSize/4)==1)) | ||
nppSafeCall( nppiTranspose_32f_C1R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%4) && ((elemSize/4)==2)) | ||
nppSafeCall( nppiTranspose_16u_C4R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%4) && ((elemSize/4)==3)) | ||
nppSafeCall( nppiTranspose_32f_C3R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%4) && ((elemSize/4)==4)) | ||
nppSafeCall( nppiTranspose_32f_C4R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%8) && ((elemSize/8)==1)) | ||
nppSafeCall( nppiTranspose_16u_C4R(src.ptr<Npp16u>(), static_cast<int>(src.step), | ||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); | ||
else if (!(elemSize%8) && ((elemSize/8)==2)) | ||
nppSafeCall( nppiTranspose_32f_C4R(src.ptr<Npp32f>(), static_cast<int>(src.step), | ||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); | ||
}//end if (isElemSizeSupportedByNppi) | ||
else if (elemSize == 1) | ||
gridTranspose(globPtr<unsigned char>(src), globPtr<unsigned char>(dst), stream); | ||
else if (elemSize == 2) | ||
gridTranspose(globPtr<unsigned short>(src), globPtr<unsigned short>(dst), stream); | ||
else if (elemSize == 4) | ||
gridTranspose(globPtr<signed int>(src), globPtr<signed int>(dst), stream); | ||
else if (elemSize == 8) | ||
gridTranspose(globPtr<double>(src), globPtr<float>(dst), stream); | ||
else | ||
CV_Error(Error::StsUnsupportedFormat, ""); | ||
|
||
syncOutput(dst, _dst, stream); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this redundant?