@@ -1663,8 +1663,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D(
1663
1663
UR_CHECK_ERROR (RetImplEvent->start ());
1664
1664
}
1665
1665
1666
+ // There is an issue with hipMemcpy2D* when hipMemcpyDefault is used, which
1667
+ // makes the HIP runtime not correctly derive the copy kind (direction) for
1668
+ // the copies since ROCm 5.6.0+. See: https://github.com/ROCm/clr/issues/40
1669
+ // TODO: Add maximum HIP_VERSION when bug has been fixed.
1670
+ #if HIP_VERSION >= 50600000
1671
+ hipPointerAttribute_t srcAttribs{};
1672
+ hipPointerAttribute_t dstAttribs{};
1673
+
1674
+ bool srcIsSystemAlloc{false };
1675
+ bool dstIsSystemAlloc{false };
1676
+
1677
+ hipError_t hipRes{};
1678
+ // hipErrorInvalidValue returned from hipPointerGetAttributes for a non-null
1679
+ // pointer refers to an OS-allocation, hence pageable host memory. However,
1680
+ // this means we cannot rely on the attributes result, hence we mark system
1681
+ // pageable memory allocation manually as host memory. The HIP runtime can
1682
+ // handle the registering/unregistering of the memory as long as the right
1683
+ // copy-kind (direction) is provided to hipMemcpy2DAsync for this case.
1684
+ hipRes = hipPointerGetAttributes (&srcAttribs, (const void *)pSrc);
1685
+ if (hipRes == hipErrorInvalidValue && pSrc)
1686
+ srcIsSystemAlloc = true ;
1687
+ hipRes = hipPointerGetAttributes (&dstAttribs, (const void *)pDst);
1688
+ if (hipRes == hipErrorInvalidValue && pDst)
1689
+ dstIsSystemAlloc = true ;
1690
+
1691
+ const unsigned int srcMemType{srcAttribs.type };
1692
+ const unsigned int dstMemType{dstAttribs.type };
1693
+
1694
+ const bool srcIsHost{(srcMemType == hipMemoryTypeHost) || srcIsSystemAlloc};
1695
+ const bool srcIsDevice{srcMemType == hipMemoryTypeDevice};
1696
+ const bool dstIsHost{(dstMemType == hipMemoryTypeHost) || dstIsSystemAlloc};
1697
+ const bool dstIsDevice{dstMemType == hipMemoryTypeDevice};
1698
+
1699
+ unsigned int cpyKind{};
1700
+ if (srcIsHost && dstIsHost)
1701
+ cpyKind = hipMemcpyHostToHost;
1702
+ else if (srcIsHost && dstIsDevice)
1703
+ cpyKind = hipMemcpyHostToDevice;
1704
+ else if (srcIsDevice && dstIsHost)
1705
+ cpyKind = hipMemcpyDeviceToHost;
1706
+ else if (srcIsDevice && dstIsDevice)
1707
+ cpyKind = hipMemcpyDeviceToDevice;
1708
+ else
1709
+ cpyKind = hipMemcpyDefault;
1710
+
1711
+ UR_CHECK_ERROR (hipMemcpy2DAsync (pDst, dstPitch, pSrc, srcPitch, width,
1712
+ height, (hipMemcpyKind)cpyKind, HIPStream));
1713
+ #else
1666
1714
UR_CHECK_ERROR (hipMemcpy2DAsync (pDst, dstPitch, pSrc, srcPitch, width,
1667
1715
height, hipMemcpyDefault, HIPStream));
1716
+ #endif
1668
1717
1669
1718
if (phEvent) {
1670
1719
UR_CHECK_ERROR (RetImplEvent->record ());
0 commit comments