Skip to content

Commit cd97e17

Browse files
authored
Merge pull request #1249 from GeorgeWeb/georgi/hip_memadvise_coarse_grained
[HIP] Implement coarse-grained memory advice for the HIP adapter
2 parents bd745d1 + 2a9ded6 commit cd97e17

File tree

6 files changed

+91
-26
lines changed

6 files changed

+91
-26
lines changed

include/ur_api.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3237,13 +3237,16 @@ typedef enum ur_usm_advice_flag_t {
32373237
UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST = UR_BIT(12), ///< Removes the affect of ::UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_HOST
32383238
UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST = UR_BIT(13), ///< Hint that the preferred memory location is the host
32393239
UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST = UR_BIT(14), ///< Removes the affect of ::UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST
3240+
UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY = UR_BIT(15), ///< Hint that memory coherence will be coarse-grained (up-to-date only at
3241+
///< kernel boundaries)
3242+
UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY = UR_BIT(16), ///< Removes the effect of ::UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY
32403243
/// @cond
32413244
UR_USM_ADVICE_FLAG_FORCE_UINT32 = 0x7fffffff
32423245
/// @endcond
32433246

32443247
} ur_usm_advice_flag_t;
32453248
/// @brief Bit Mask for validating ur_usm_advice_flags_t
3246-
#define UR_USM_ADVICE_FLAGS_MASK 0xffff8000
3249+
#define UR_USM_ADVICE_FLAGS_MASK 0xfffe0000
32473250

32483251
///////////////////////////////////////////////////////////////////////////////
32493252
/// @brief Handle of USM pool

include/ur_print.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6277,6 +6277,12 @@ inline std::ostream &operator<<(std::ostream &os, ur_usm_advice_flag_t value) {
62776277
case UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST:
62786278
os << "UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST";
62796279
break;
6280+
case UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY:
6281+
os << "UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY";
6282+
break;
6283+
case UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY:
6284+
os << "UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY";
6285+
break;
62806286
default:
62816287
os << "unknown enumerator";
62826288
break;
@@ -6441,6 +6447,26 @@ inline ur_result_t printFlag<ur_usm_advice_flag_t>(std::ostream &os, uint32_t fl
64416447
}
64426448
os << UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION_HOST;
64436449
}
6450+
6451+
if ((val & UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY) == (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY) {
6452+
val ^= (uint32_t)UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY;
6453+
if (!first) {
6454+
os << " | ";
6455+
} else {
6456+
first = false;
6457+
}
6458+
os << UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY;
6459+
}
6460+
6461+
if ((val & UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY) == (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY) {
6462+
val ^= (uint32_t)UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY;
6463+
if (!first) {
6464+
os << " | ";
6465+
} else {
6466+
first = false;
6467+
}
6468+
os << UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY;
6469+
}
64446470
if (val != 0) {
64456471
std::bitset<32> bits(val);
64466472
if (!first) {

scripts/core/usm.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ etors:
126126
- name: CLEAR_PREFERRED_LOCATION_HOST
127127
value: "$X_BIT(14)"
128128
desc: "Removes the affect of $X_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION_HOST"
129+
- name: SET_NON_COHERENT_MEMORY
130+
value: "$X_BIT(15)"
131+
desc: "Hint that memory coherence will be coarse-grained (up-to-date only at kernel boundaries)"
132+
- name: CLEAR_NON_COHERENT_MEMORY
133+
value: "$X_BIT(16)"
134+
desc: "Removes the effect of $X_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY"
129135
--- #--------------------------------------------------------------------------
130136
type: handle
131137
desc: "Handle of USM pool"

source/adapters/cuda/enqueue.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,13 @@ ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size,
114114
}
115115
}
116116

117-
std::array<ur_usm_advice_flags_t, 4> UnmappedMemAdviceFlags = {
117+
std::array<ur_usm_advice_flags_t, 6> UnmappedMemAdviceFlags = {
118118
UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY,
119119
UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY,
120-
UR_USM_ADVICE_FLAG_BIAS_CACHED, UR_USM_ADVICE_FLAG_BIAS_UNCACHED};
120+
UR_USM_ADVICE_FLAG_BIAS_CACHED,
121+
UR_USM_ADVICE_FLAG_BIAS_UNCACHED,
122+
UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY,
123+
UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY};
121124

122125
for (auto &UnmappedFlag : UnmappedMemAdviceFlags) {
123126
if (URAdviceFlags & UnmappedFlag) {

source/adapters/hip/enqueue.cpp

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -75,30 +75,46 @@ ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size,
7575
if (URAdviceFlags &
7676
(UR_USM_ADVICE_FLAG_SET_NON_ATOMIC_MOSTLY |
7777
UR_USM_ADVICE_FLAG_CLEAR_NON_ATOMIC_MOSTLY |
78-
UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED)) {
78+
UR_USM_ADVICE_FLAG_BIAS_CACHED | UR_USM_ADVICE_FLAG_BIAS_UNCACHED
79+
#if !defined(__HIP_PLATFORM_AMD__)
80+
| UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY |
81+
UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY
82+
#endif
83+
)) {
7984
return UR_RESULT_ERROR_INVALID_ENUMERATION;
8085
}
8186

8287
using ur_to_hip_advice_t = std::pair<ur_usm_advice_flags_t, hipMemoryAdvise>;
8388

84-
static constexpr std::array<ur_to_hip_advice_t, 6>
85-
URToHIPMemAdviseDeviceFlags{
86-
std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY,
87-
hipMemAdviseSetReadMostly),
88-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY,
89-
hipMemAdviseUnsetReadMostly),
90-
std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION,
91-
hipMemAdviseSetPreferredLocation),
92-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION,
93-
hipMemAdviseUnsetPreferredLocation),
94-
std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE,
95-
hipMemAdviseSetAccessedBy),
96-
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE,
97-
hipMemAdviseUnsetAccessedBy),
98-
};
99-
for (auto &FlagPair : URToHIPMemAdviseDeviceFlags) {
100-
if (URAdviceFlags & FlagPair.first) {
101-
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, FlagPair.second, Device));
89+
#if defined(__HIP_PLATFORM_AMD__)
90+
constexpr size_t DeviceFlagCount = 8;
91+
#else
92+
constexpr size_t DeviceFlagCount = 6;
93+
#endif
94+
static constexpr std::array<ur_to_hip_advice_t, DeviceFlagCount>
95+
URToHIPMemAdviseDeviceFlags {
96+
std::make_pair(UR_USM_ADVICE_FLAG_SET_READ_MOSTLY,
97+
hipMemAdviseSetReadMostly),
98+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_READ_MOSTLY,
99+
hipMemAdviseUnsetReadMostly),
100+
std::make_pair(UR_USM_ADVICE_FLAG_SET_PREFERRED_LOCATION,
101+
hipMemAdviseSetPreferredLocation),
102+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_PREFERRED_LOCATION,
103+
hipMemAdviseUnsetPreferredLocation),
104+
std::make_pair(UR_USM_ADVICE_FLAG_SET_ACCESSED_BY_DEVICE,
105+
hipMemAdviseSetAccessedBy),
106+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_DEVICE,
107+
hipMemAdviseUnsetAccessedBy),
108+
#if defined(__HIP_PLATFORM_AMD__)
109+
std::make_pair(UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY,
110+
hipMemAdviseSetCoarseGrain),
111+
std::make_pair(UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY,
112+
hipMemAdviseUnsetCoarseGrain),
113+
#endif
114+
};
115+
for (const auto &[URAdvice, HIPAdvice] : URToHIPMemAdviseDeviceFlags) {
116+
if (URAdviceFlags & URAdvice) {
117+
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HIPAdvice, Device));
102118
}
103119
}
104120

@@ -113,10 +129,9 @@ ur_result_t setHipMemAdvise(const void *DevPtr, const size_t Size,
113129
hipMemAdviseUnsetAccessedBy),
114130
};
115131

116-
for (auto &FlagPair : URToHIPMemAdviseHostFlags) {
117-
if (URAdviceFlags & FlagPair.first) {
118-
UR_CHECK_ERROR(
119-
hipMemAdvise(DevPtr, Size, FlagPair.second, hipCpuDeviceId));
132+
for (const auto &[URAdvice, HIPAdvice] : URToHIPMemAdviseHostFlags) {
133+
if (URAdviceFlags & URAdvice) {
134+
UR_CHECK_ERROR(hipMemAdvise(DevPtr, Size, HIPAdvice, hipCpuDeviceId));
120135
}
121136
}
122137

@@ -1590,6 +1605,10 @@ urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size,
15901605
pMem, size, hipMemAdviseUnsetPreferredLocation, DeviceID));
15911606
UR_CHECK_ERROR(
15921607
hipMemAdvise(pMem, size, hipMemAdviseUnsetAccessedBy, DeviceID));
1608+
#if defined(__HIP_PLATFORM_AMD__)
1609+
UR_CHECK_ERROR(
1610+
hipMemAdvise(pMem, size, hipMemAdviseUnsetCoarseGrain, DeviceID));
1611+
#endif
15931612
} else {
15941613
Result = setHipMemAdvise(HIPDevicePtr, size, advice, DeviceID);
15951614
// UR_RESULT_ERROR_INVALID_ENUMERATION is returned when using a valid but

test/conformance/enqueue/urEnqueueUSMAdvise.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,11 @@ TEST_P(urEnqueueUSMAdviseTest, InvalidSizeTooLarge) {
6868
urEnqueueUSMAdvise(queue, ptr, allocation_size * 2,
6969
UR_USM_ADVICE_FLAG_DEFAULT, nullptr));
7070
}
71+
72+
TEST_P(urEnqueueUSMAdviseTest, NonCoherentDeviceMemorySuccessOrWarning) {
73+
ur_result_t result =
74+
urEnqueueUSMAdvise(queue, ptr, allocation_size,
75+
UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY, nullptr);
76+
ASSERT_EQ(result,
77+
result & (UR_RESULT_SUCCESS | UR_RESULT_ERROR_ADAPTER_SPECIFIC));
78+
}

0 commit comments

Comments
 (0)