Skip to content

Commit e4b5b89

Browse files
committed
Merge branch 'main' into fix-coverity/sanitizer-layer
2 parents 6ea45a4 + cd97e17 commit e4b5b89

File tree

8 files changed

+193
-45
lines changed

8 files changed

+193
-45
lines changed

include/ur_api.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3237,13 +3237,16 @@ typedef enum ur_usm_advice_flag_t {
32373237
UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST = UR_BIT(12), ///< Removes the affect of ::UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST
32383238
UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST = UR_BIT(13), ///< Hint that the preferred memory location is the host
32393239
UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST = UR_BIT(14), ///< Removes the affect of ::UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST
3240+
UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY = UR_BIT(15), ///< Hint that memory coherence will be coarse-grained (up-to-date only at
3241+
///< kernel boundaries)
3242+
UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY = UR_BIT(16), ///< Removes the effect of ::UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY
32403243
/// @cond
32413244
UR_USM_ADVICE_FLAG_FORCE_UINT32 = 0x7fffffff
32423245
/// @endcond
32433246

32443247
} ur_usm_advice_flag_t;
32453248
/// @brief Bit Mask for validating ur_usm_advice_flags_t
3246-
#define UR_USM_ADVICE_FLAGS_MASK 0xffff8000
3249+
#define UR_USM_ADVICE_FLAGS_MASK 0xfffe0000
32473250

32483251
///////////////////////////////////////////////////////////////////////////////
32493252
/// @brief Handle of USM pool

include/ur_print.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6277,6 +6277,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_usm_advice_flag_t value) {
62776277
case UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST:
62786278
os << "UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST";
62796279
break;
6280+
case UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY:
6281+
os << "UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY";
6282+
break;
6283+
case UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY:
6284+
os << "UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY";
6285+
break;
62806286
default:
62816287
os << "unknown enumerator";
62826288
break;
@@ -6441,6 +6447,26 @@ inline ur_result_t printFlag<ur_usm_advice_flag_t>(std::ostream &os, uint32_t fl
64416447
}
64426448
os << UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST;
64436449
}
6450+
6451+
if ((val & UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY) == (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY) {
6452+
val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY;
6453+
if (!first) {
6454+
os << " | ";
6455+
} else {
6456+
first = false;
6457+
}
6458+
os << UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY;
6459+
}
6460+
6461+
if ((val & UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY) {
6462+
val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY;
6463+
if (!first) {
6464+
os << " | ";
6465+
} else {
6466+
first = false;
6467+
}
6468+
os << UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY;
6469+
}
64446470
if (val != 0) {
64456471
std::bitset<32> bits(val);
64466472
if (!first) {

scripts/core/usm.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ etors:
126126
- name: CLEAR_PREFERRED_LOCATION_HOST
127127
value: "$X_BIT(14)"
128128
desc: "Removes the affect of $X_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST"
129+
- name: SET_NON_COHERENT_MEMORY
130+
value: "$X_BIT(15)"
131+
desc: "Hint that memory coherence will be coarse-grained (up-to-date only at kernel boundaries)"
132+
- name: CLEAR_NON_COHERENT_MEMORY
133+
value: "$X_BIT(16)"
134+
desc: "Removes the effect of $X_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY"
129135
--- #--------------------------------------------------------------------------
130136
type: handle
131137
desc: "Handle of USM pool"

source/adapters/cuda/enqueue.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,13 @@ ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size,
114114
}
115115
}
116116

117-
std::array<ur_usm_advice_flags_t, 4> UnmappedMemAdviceFlags = {
117+
std::array<ur_usm_advice_flags_t, 6> UnmappedMemAdviceFlags = {
118118
UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY,
119119
UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY,
120-
UR_USM_ADVICE_FLAG_BIAS_CACHED, UR_USM_ADVICE_FLAG_BIAS_UNCACHED};
120+
UR_USM_ADVICE_FLAG_BIAS_CACHED,
121+
UR_USM_ADVICE_FLAG_BIAS_UNCACHED,
122+
UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY,
123+
UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY};
121124

122125
for (auto &UnmappedFlag : UnmappedMemAdviceFlags) {
123126
if (URAdviceFlags & UnmappedFlag) {

source/adapters/hip/device.hpp

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,37 @@ struct ur_device_handle_t_ {
2626
ur_platform_handle_t Platform;
2727
hipCtx_t HIPContext;
2828
uint32_t DeviceIndex;
29+
int MaxWorkGroupSize{0};
30+
int MaxBlockDimX{0};
31+
int MaxBlockDimY{0};
32+
int MaxBlockDimZ{0};
33+
int DeviceMaxLocalMem{0};
34+
int ManagedMemSupport{0};
35+
int ConcurrentManagedAccess{0};
2936

3037
public:
3138
ur_device_handle_t_(native_type HipDevice, hipCtx_t Context,
3239
ur_platform_handle_t Platform, uint32_t DeviceIndex)
3340
: HIPDevice(HipDevice), RefCount{1}, Platform(Platform),
34-
HIPContext(Context), DeviceIndex(DeviceIndex) {}
41+
HIPContext(Context), DeviceIndex(DeviceIndex) {
42+
43+
UR_CHECK_ERROR(hipDeviceGetAttribute(
44+
&MaxWorkGroupSize, hipDeviceAttributeMaxThreadsPerBlock, HIPDevice));
45+
UR_CHECK_ERROR(hipDeviceGetAttribute(
46+
&MaxBlockDimX, hipDeviceAttributeMaxBlockDimX, HIPDevice));
47+
UR_CHECK_ERROR(hipDeviceGetAttribute(
48+
&MaxBlockDimY, hipDeviceAttributeMaxBlockDimY, HIPDevice));
49+
UR_CHECK_ERROR(hipDeviceGetAttribute(
50+
&MaxBlockDimZ, hipDeviceAttributeMaxBlockDimZ, HIPDevice));
51+
UR_CHECK_ERROR(hipDeviceGetAttribute(
52+
&DeviceMaxLocalMem, hipDeviceAttributeMaxSharedMemoryPerBlock,
53+
HIPDevice));
54+
UR_CHECK_ERROR(hipDeviceGetAttribute(
55+
&ManagedMemSupport, hipDeviceAttributeManagedMemory, HIPDevice));
56+
UR_CHECK_ERROR(hipDeviceGetAttribute(
57+
&ConcurrentManagedAccess, hipDeviceAttributeConcurrentManagedAccess,
58+
HIPDevice));
59+
}
3560

3661
~ur_device_handle_t_() noexcept(false) {
3762
UR_CHECK_ERROR(hipDevicePrimaryCtxRelease(HIPDevice));
@@ -48,6 +73,22 @@ struct ur_device_handle_t_ {
4873
// Returns the index of the device relative to the other devices in the same
4974
// platform
5075
uint32_t getIndex() const noexcept { return DeviceIndex; };
76+
77+
int getMaxWorkGroupSize() const noexcept { return MaxWorkGroupSize; };
78+
79+
int getMaxBlockDimX() const noexcept { return MaxBlockDimX; };
80+
81+
int getMaxBlockDimY() const noexcept { return MaxBlockDimY; };
82+
83+
int getMaxBlockDimZ() const noexcept { return MaxBlockDimZ; };
84+
85+
int getDeviceMaxLocalMem() const noexcept { return DeviceMaxLocalMem; };
86+
87+
int getManagedMemSupport() const noexcept { return ManagedMemSupport; };
88+
89+
int getConcurrentManagedAccess() const noexcept {
90+
return ConcurrentManagedAccess;
91+
};
5192
};
5293

5394
int getAttribute(ur_device_handle_t Device, hipDeviceAttribute_t Attribute);

source/adapters/hip/enqueue.cpp

Lines changed: 99 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -75,30 +75,46 @@ ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size,
7575
if (URAdviceFlags &
7676
(UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY |
7777
UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY |
78-
UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) {
78+
UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED
79+
#if !defined(__HIP_PLATFORM_AMD__)
80+
| UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY |
81+
UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY
82+
#endif
83+
)) {
7984
return UR_RESULT_ERROR_INVALID_ENUMERATION;
8085
}
8186

8287
using ur_to_hip_advice_t = std::pair<ur_usm_advice_flags_t, hipMemoryAdvise>;
8388

84-
static constexpr std::array<ur_to_hip_advice_t, 6>
85-
URToHIPMemAdviseDeviceFlags{
86-
std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY,
87-
hipMemAdviseSetReadMostly),
88-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY,
89-
hipMemAdviseUnsetReadMostly),
90-
std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION,
91-
hipMemAdviseSetPreferredLocation),
92-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION,
93-
hipMemAdviseUnsetPreferredLocation),
94-
std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE,
95-
hipMemAdviseSetAccessedBy),
96-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE,
97-
hipMemAdviseUnsetAccessedBy),
98-
};
99-
for (auto &FlagPair : URToHIPMemAdviseDeviceFlags) {
100-
if (URAdviceFlags & FlagPair.first) {
101-
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, FlagPair.second, Device));
89+
#if defined(__HIP_PLATFORM_AMD__)
90+
constexpr size_t DeviceFlagCount = 8;
91+
#else
92+
constexpr size_t DeviceFlagCount = 6;
93+
#endif
94+
static constexpr std::array<ur_to_hip_advice_t, DeviceFlagCount>
95+
URToHIPMemAdviseDeviceFlags {
96+
std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY,
97+
hipMemAdviseSetReadMostly),
98+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY,
99+
hipMemAdviseUnsetReadMostly),
100+
std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION,
101+
hipMemAdviseSetPreferredLocation),
102+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION,
103+
hipMemAdviseUnsetPreferredLocation),
104+
std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE,
105+
hipMemAdviseSetAccessedBy),
106+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE,
107+
hipMemAdviseUnsetAccessedBy),
108+
#if defined(__HIP_PLATFORM_AMD__)
109+
std::make_pair(UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY,
110+
hipMemAdviseSetCoarseGrain),
111+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY,
112+
hipMemAdviseUnsetCoarseGrain),
113+
#endif
114+
};
115+
for (const auto &[URAdvice, HIPAdvice] : URToHIPMemAdviseDeviceFlags) {
116+
if (URAdviceFlags & URAdvice) {
117+
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HIPAdvice, Device));
102118
}
103119
}
104120

@@ -113,10 +129,9 @@ ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size,
113129
hipMemAdviseUnsetAccessedBy),
114130
};
115131

116-
for (auto &FlagPair : URToHIPMemAdviseHostFlags) {
117-
if (URAdviceFlags & FlagPair.first) {
118-
UR_CHECK_ERROR(
119-
hipMemAdvise(DevPtr, Size, FlagPair.second, hipCpuDeviceId));
132+
for (const auto &[URAdvice, HIPAdvice] : URToHIPMemAdviseHostFlags) {
133+
if (URAdviceFlags & URAdvice) {
134+
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HIPAdvice, hipCpuDeviceId));
120135
}
121136
}
122137

@@ -300,15 +315,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
300315
bool ProvidedLocalWorkGroupSize = (pLocalWorkSize != nullptr);
301316

302317
{
303-
ur_result_t Result = urDeviceGetInfo(
304-
hQueue->Device, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES,
305-
sizeof(MaxThreadsPerBlock), MaxThreadsPerBlock, nullptr);
306-
UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
318+
MaxThreadsPerBlock[0] = hQueue->Device->getMaxBlockDimX();
319+
MaxThreadsPerBlock[1] = hQueue->Device->getMaxBlockDimY();
320+
MaxThreadsPerBlock[2] = hQueue->Device->getMaxBlockDimZ();
307321

308-
Result =
309-
urDeviceGetInfo(hQueue->Device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
310-
sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, nullptr);
311-
UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
322+
MaxWorkGroupSize = hQueue->Device->getMaxWorkGroupSize();
312323

313324
// The MaxWorkGroupSize = 1024 for AMD GPU
314325
// The MaxThreadsPerBlock = {1024, 1024, 1024}
@@ -423,11 +434,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
423434
: (LocalMemSzPtrPI ? LocalMemSzPtrPI : nullptr);
424435

425436
if (LocalMemSzPtr) {
426-
int DeviceMaxLocalMem = 0;
427-
UR_CHECK_ERROR(hipDeviceGetAttribute(
428-
&DeviceMaxLocalMem, hipDeviceAttributeMaxSharedMemoryPerBlock,
429-
Dev->get()));
430-
437+
int DeviceMaxLocalMem = Dev->getDeviceMaxLocalMem();
431438
static const int EnvVal = std::atoi(LocalMemSzPtr);
432439
if (EnvVal <= 0 || EnvVal > DeviceMaxLocalMem) {
433440
setErrorMessage(LocalMemSzPtrUR ? "Invalid value specified for "
@@ -1484,7 +1491,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch(
14841491

14851492
// If the device does not support managed memory access, we can't set
14861493
// mem_advise.
1487-
if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) {
1494+
if (!Device->getManagedMemSupport()) {
14881495
releaseEvent();
14891496
setErrorMessage("mem_advise ignored as device does not support "
14901497
"managed memory access",
@@ -1558,7 +1565,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
15581565

15591566
// If the device does not support managed memory access, we can't set
15601567
// mem_advise.
1561-
if (!getAttribute(Device, hipDeviceAttributeManagedMemory)) {
1568+
if (!Device->getManagedMemSupport()) {
15621569
releaseEvent();
15631570
setErrorMessage("mem_advise ignored as device does not support "
15641571
"managed memory access",
@@ -1575,7 +1582,7 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
15751582
UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE |
15761583
UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE |
15771584
UR_USM_ADVICE_FLAG_DEFAULT)) {
1578-
if (!getAttribute(Device, hipDeviceAttributeConcurrentManagedAccess)) {
1585+
if (!Device->getConcurrentManagedAccess()) {
15791586
releaseEvent();
15801587
setErrorMessage("mem_advise ignored as device does not support "
15811588
"concurrent managed access",
@@ -1598,6 +1605,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
15981605
pMem, size, hipMemAdviseUnsetPreferredLocation, DeviceID));
15991606
UR_CHECK_ERROR(
16001607
hipMemAdvise(pMem, size, hipMemAdviseUnsetAccessedBy, DeviceID));
1608+
#if defined(__HIP_PLATFORM_AMD__)
1609+
UR_CHECK_ERROR(
1610+
hipMemAdvise(pMem, size, hipMemAdviseUnsetCoarseGrain, DeviceID));
1611+
#endif
16011612
} else {
16021613
Result = setHipMemAdvise(HIPDevicePtr, size, advice, DeviceID);
16031614
// UR_RESULT_ERROR_INVALID_ENUMERATION is returned when using a valid but
@@ -1663,8 +1674,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D(
16631674
UR_CHECK_ERROR(RetImplEvent->start());
16641675
}
16651676

1677+
// There is an issue with hipMemcpy2D* when hipMemcpyDefault is used, which
1678+
// makes the HIP runtime not correctly derive the copy kind (direction) for
1679+
// the copies since ROCm 5.6.0+. See: https://github.com/ROCm/clr/issues/40
1680+
// TODO: Add maximum HIP_VERSION when bug has been fixed.
1681+
#if HIP_VERSION >= 50600000
1682+
hipPointerAttribute_t srcAttribs{};
1683+
hipPointerAttribute_t dstAttribs{};
1684+
1685+
bool srcIsSystemAlloc{false};
1686+
bool dstIsSystemAlloc{false};
1687+
1688+
hipError_t hipRes{};
1689+
// hipErrorInvalidValue returned from hipPointerGetAttributes for a non-null
1690+
// pointer refers to an OS-allocation, hence pageable host memory. However,
1691+
// this means we cannot rely on the attributes result, hence we mark system
1692+
// pageable memory allocation manually as host memory. The HIP runtime can
1693+
// handle the registering/unregistering of the memory as long as the right
1694+
// copy-kind (direction) is provided to hipMemcpy2DAsync for this case.
1695+
hipRes = hipPointerGetAttributes(&srcAttribs, (const void *)pSrc);
1696+
if (hipRes == hipErrorInvalidValue && pSrc)
1697+
srcIsSystemAlloc = true;
1698+
hipRes = hipPointerGetAttributes(&dstAttribs, (const void *)pDst);
1699+
if (hipRes == hipErrorInvalidValue && pDst)
1700+
dstIsSystemAlloc = true;
1701+
1702+
const unsigned int srcMemType{srcAttribs.type};
1703+
const unsigned int dstMemType{dstAttribs.type};
1704+
1705+
const bool srcIsHost{(srcMemType == hipMemoryTypeHost) || srcIsSystemAlloc};
1706+
const bool srcIsDevice{srcMemType == hipMemoryTypeDevice};
1707+
const bool dstIsHost{(dstMemType == hipMemoryTypeHost) || dstIsSystemAlloc};
1708+
const bool dstIsDevice{dstMemType == hipMemoryTypeDevice};
1709+
1710+
unsigned int cpyKind{};
1711+
if (srcIsHost && dstIsHost)
1712+
cpyKind = hipMemcpyHostToHost;
1713+
else if (srcIsHost && dstIsDevice)
1714+
cpyKind = hipMemcpyHostToDevice;
1715+
else if (srcIsDevice && dstIsHost)
1716+
cpyKind = hipMemcpyDeviceToHost;
1717+
else if (srcIsDevice && dstIsDevice)
1718+
cpyKind = hipMemcpyDeviceToDevice;
1719+
else
1720+
cpyKind = hipMemcpyDefault;
1721+
1722+
UR_CHECK_ERROR(hipMemcpy2DAsync(pDst, dstPitch, pSrc, srcPitch, width,
1723+
height, (hipMemcpyKind)cpyKind, HIPStream));
1724+
#else
16661725
UR_CHECK_ERROR(hipMemcpy2DAsync(pDst, dstPitch, pSrc, srcPitch, width,
16671726
height, hipMemcpyDefault, HIPStream));
1727+
#endif
16681728

16691729
if (phEvent) {
16701730
UR_CHECK_ERROR(RetImplEvent->record());

source/adapters/hip/usm.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,10 @@ USMFreeImpl([[maybe_unused]] ur_context_handle_t hContext, void *pMem) {
7878
#else
7979
const auto Type = hipPointerAttributeType.memoryType;
8080
#endif
81-
UR_ASSERT(Type == hipMemoryTypeDevice || Type == hipMemoryTypeHost,
81+
UR_ASSERT(Type == hipMemoryTypeDevice || Type == hipMemoryTypeHost ||
82+
Type == hipMemoryTypeManaged,
8283
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
83-
if (Type == hipMemoryTypeDevice) {
84+
if (Type == hipMemoryTypeDevice || Type == hipMemoryTypeManaged) {
8485
UR_CHECK_ERROR(hipFree(pMem));
8586
}
8687
if (Type == hipMemoryTypeHost) {

test/conformance/enqueue/urEnqueueUSMAdvise.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,11 @@ TEST_P(urEnqueueUSMAdviseTest, InvalidSizeTooLarge) {
6868
urEnqueueUSMAdvise(queue, ptr, allocation_size * 2,
6969
UR_USM_ADVICE_FLAG_DEFAULT, nullptr));
7070
}
71+
72+
TEST_P(urEnqueueUSMAdviseTest, NonCoherentDeviceMemorySuccessOrWarning) {
73+
ur_result_t result =
74+
urEnqueueUSMAdvise(queue, ptr, allocation_size,
75+
UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY, nullptr);
76+
ASSERT_EQ(result,
77+
result & (UR_RESULT_SUCCESS | UR_RESULT_ERROR_ADAPTER_SPECIFIC));
78+
}

0 commit comments

Comments
 (0)