Skip to content

Commit c0b1f13

Browse files
authored
Merge pull request #1240 from cppchedy/chedy/fix-bi-copy-image
[Bindless][exp] 1D Image copy fix
2 parents 475ce8b + cf806c3 commit c0b1f13

File tree

1 file changed

+48
-8
lines changed

1 file changed

+48
-8
lines changed

source/adapters/cuda/image.cpp

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -628,15 +628,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
628628
ScopedContext Active(hQueue->getContext());
629629
CUstream Stream = hQueue->getNextTransferStream();
630630
enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList);
631+
631632
// We have to use a different copy function for each image dimensionality.
632633

633634
if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) {
634635
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
636+
CUmemorytype memType;
637+
638+
// Check what type of memory is pDst. If cuPointerGetAttribute returns
639+
// somthing different from CUDA_SUCCESS then we know that pDst memory
640+
// type is a CuArray. Otherwise, it's CU_MEMORYTYPE_DEVICE.
641+
bool isCudaArray =
642+
cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
643+
(CUdeviceptr)pDst) != CUDA_SUCCESS;
644+
635645
size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width;
636646
char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
637-
UR_CHECK_ERROR(
638-
cuMemcpyHtoAAsync((CUarray)pDst, dstOffset.x * PixelSizeBytes,
639-
(void *)SrcWithOffset, CopyExtentBytes, Stream));
647+
648+
if (isCudaArray) {
649+
UR_CHECK_ERROR(cuMemcpyHtoAAsync(
650+
(CUarray)pDst, dstOffset.x * PixelSizeBytes,
651+
(void *)SrcWithOffset, CopyExtentBytes, Stream));
652+
} else if (memType == CU_MEMORYTYPE_DEVICE) {
653+
void *DstWithOffset =
654+
(void *)((char *)pDst + (PixelSizeBytes * dstOffset.x));
655+
UR_CHECK_ERROR(cuMemcpyHtoDAsync((CUdeviceptr)DstWithOffset,
656+
(void *)SrcWithOffset,
657+
CopyExtentBytes, Stream));
658+
} else {
659+
// This should be unreachable.
660+
return UR_RESULT_ERROR_INVALID_VALUE;
661+
}
640662
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
641663
CUDA_MEMCPY2D cpy_desc = {};
642664
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -679,13 +701,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
679701
}
680702
} else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
681703
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
704+
CUmemorytype memType;
705+
// Check what type of memory is pSrc. If cuPointerGetAttribute returns
706+
// somthing different from CUDA_SUCCESS then we know that pSrc memory
707+
// type is a CuArray. Otherwise, it's CU_MEMORYTYPE_DEVICE.
708+
bool isCudaArray =
709+
cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
710+
(CUdeviceptr)pSrc) != CUDA_SUCCESS;
711+
682712
size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width;
683-
size_t src_offset_bytes = PixelSizeBytes * srcOffset.x;
684-
void *dst_with_offset =
713+
void *DstWithOffset =
685714
(void *)((char *)pDst + (PixelSizeBytes * dstOffset.x));
686-
UR_CHECK_ERROR(cuMemcpyAtoHAsync(dst_with_offset, (CUarray)pSrc,
687-
src_offset_bytes, CopyExtentBytes,
688-
Stream));
715+
716+
if (isCudaArray) {
717+
UR_CHECK_ERROR(cuMemcpyAtoHAsync(DstWithOffset, (CUarray)pSrc,
718+
PixelSizeBytes * srcOffset.x,
719+
CopyExtentBytes, Stream));
720+
} else if (memType == CU_MEMORYTYPE_DEVICE) {
721+
char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
722+
UR_CHECK_ERROR(cuMemcpyDtoHAsync(DstWithOffset,
723+
(CUdeviceptr)SrcWithOffset,
724+
CopyExtentBytes, Stream));
725+
} else {
726+
// This should be unreachable.
727+
return UR_RESULT_ERROR_INVALID_VALUE;
728+
}
689729
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
690730
CUDA_MEMCPY2D cpy_desc = {};
691731
cpy_desc.srcXInBytes = srcOffset.x;

0 commit comments

Comments
 (0)