@@ -628,15 +628,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
628
628
ScopedContext Active (hQueue->getContext ());
629
629
CUstream Stream = hQueue->getNextTransferStream ();
630
630
enqueueEventsWait (hQueue, Stream, numEventsInWaitList, phEventWaitList);
631
+
631
632
// We have to use a different copy function for each image dimensionality.
632
633
633
634
if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) {
634
635
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
636
+ CUmemorytype memType;
637
+
638
+ // Check what type of memory is pDst. If cuPointerGetAttribute returns
639
+ // somthing different from CUDA_SUCCESS then we know that pDst memory
640
+ // type is a CuArray. Otherwise, it's CU_MEMORYTYPE_DEVICE.
641
+ bool isCudaArray =
642
+ cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
643
+ (CUdeviceptr)pDst) != CUDA_SUCCESS;
644
+
635
645
size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width ;
636
646
char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
637
- UR_CHECK_ERROR (
638
- cuMemcpyHtoAAsync ((CUarray)pDst, dstOffset.x * PixelSizeBytes,
639
- (void *)SrcWithOffset, CopyExtentBytes, Stream));
647
+
648
+ if (isCudaArray) {
649
+ UR_CHECK_ERROR (cuMemcpyHtoAAsync (
650
+ (CUarray)pDst, dstOffset.x * PixelSizeBytes,
651
+ (void *)SrcWithOffset, CopyExtentBytes, Stream));
652
+ } else if (memType == CU_MEMORYTYPE_DEVICE) {
653
+ void *DstWithOffset =
654
+ (void *)((char *)pDst + (PixelSizeBytes * dstOffset.x ));
655
+ UR_CHECK_ERROR (cuMemcpyHtoDAsync ((CUdeviceptr)DstWithOffset,
656
+ (void *)SrcWithOffset,
657
+ CopyExtentBytes, Stream));
658
+ } else {
659
+ // This should be unreachable.
660
+ return UR_RESULT_ERROR_INVALID_VALUE;
661
+ }
640
662
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
641
663
CUDA_MEMCPY2D cpy_desc = {};
642
664
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -679,13 +701,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
679
701
}
680
702
} else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
681
703
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
704
+ CUmemorytype memType;
705
+ // Check what type of memory is pSrc. If cuPointerGetAttribute returns
706
+ // somthing different from CUDA_SUCCESS then we know that pSrc memory
707
+ // type is a CuArray. Otherwise, it's CU_MEMORYTYPE_DEVICE.
708
+ bool isCudaArray =
709
+ cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
710
+ (CUdeviceptr)pSrc) != CUDA_SUCCESS;
711
+
682
712
size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width ;
683
- size_t src_offset_bytes = PixelSizeBytes * srcOffset.x ;
684
- void *dst_with_offset =
713
+ void *DstWithOffset =
685
714
(void *)((char *)pDst + (PixelSizeBytes * dstOffset.x ));
686
- UR_CHECK_ERROR (cuMemcpyAtoHAsync (dst_with_offset, (CUarray)pSrc,
687
- src_offset_bytes, CopyExtentBytes,
688
- Stream));
715
+
716
+ if (isCudaArray) {
717
+ UR_CHECK_ERROR (cuMemcpyAtoHAsync (DstWithOffset, (CUarray)pSrc,
718
+ PixelSizeBytes * srcOffset.x ,
719
+ CopyExtentBytes, Stream));
720
+ } else if (memType == CU_MEMORYTYPE_DEVICE) {
721
+ char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
722
+ UR_CHECK_ERROR (cuMemcpyDtoHAsync (DstWithOffset,
723
+ (CUdeviceptr)SrcWithOffset,
724
+ CopyExtentBytes, Stream));
725
+ } else {
726
+ // This should be unreachable.
727
+ return UR_RESULT_ERROR_INVALID_VALUE;
728
+ }
689
729
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
690
730
CUDA_MEMCPY2D cpy_desc = {};
691
731
cpy_desc.srcXInBytes = srcOffset.x ;
0 commit comments