Skip to content

Commit d152bce

Browse files
committed
more
1 parent dddfeb2 commit d152bce

File tree

3 files changed

+42
-14
lines changed

3 files changed

+42
-14
lines changed

Src/Base/AMReX_BaseFab.H

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,8 +1205,14 @@ BaseFab<T>::prefetchToHost () const noexcept
12051205
#elif defined(AMREX_USE_CUDA) && !defined(_WIN32)
12061206
if (Gpu::Device::devicePropMajor() >= 6) {
12071207
std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
1208-
AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(this->dptr, s,
1209-
cudaCpuDeviceId,
1208+
#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 13)
1209+
cudaMemLocation location = {};
1210+
location.type = cudaMemLocationTypeDevice;
1211+
location.id = cudaCpuDeviceId;
1212+
#else
1213+
auto location = cudaCpuDeviceId;
1214+
#endif
1215+
AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(this->dptr, s, location,
12101216
Gpu::gpuStream()));
12111217
}
12121218
#elif defined(AMREX_USE_HIP)
@@ -1229,8 +1235,14 @@ BaseFab<T>::prefetchToDevice () const noexcept
12291235
#elif defined(AMREX_USE_CUDA) && !defined(_WIN32)
12301236
if (Gpu::Device::devicePropMajor() >= 6) {
12311237
std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
1232-
AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(this->dptr, s,
1233-
Gpu::Device::deviceId(),
1238+
#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 13)
1239+
cudaMemLocation location = {};
1240+
location.type = cudaMemLocationTypeDevice;
1241+
location.id = Gpu::Device::deviceId();
1242+
#else
1243+
auto location = Gpu::Device::deviceId();
1244+
#endif
1245+
AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(this->dptr, s, location,
12341246
Gpu::gpuStream()));
12351247
}
12361248
#elif defined(AMREX_USE_HIP)

Src/Base/AMReX_GpuContainers.H

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,16 @@ namespace amrex::Gpu {
344344
// Currently only implemented for CUDA.
345345
#if defined(AMREX_USE_CUDA) && !defined(_WIN32)
346346
if (Gpu::Device::devicePropMajor() >= 6) {
347+
#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 13)
348+
cudaMemLocation location = {};
349+
location.type = cudaMemLocationTypeDevice;
350+
location.id = cudaCpuDeviceId;
351+
#else
352+
auto location = cudaCpuDeviceId;
353+
#endif
347354
AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(&(*begin),
348355
size*sizeof(value_type),
349-
cudaCpuDeviceId,
356+
location,
350357
Gpu::gpuStream()));
351358
}
352359
#endif
@@ -375,9 +382,16 @@ namespace amrex::Gpu {
375382
// Currently only implemented for CUDA.
376383
#if defined(AMREX_USE_CUDA) && !defined(_WIN32)
377384
if (Gpu::Device::devicePropMajor() >= 6) {
385+
#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 13)
386+
cudaMemLocation location = {};
387+
location.type = cudaMemLocationTypeDevice;
388+
location.id = Gpu::Device::deviceId();
389+
#else
390+
auto location = Gpu::Device::deviceId();
391+
#endif
378392
AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(&(*begin),
379393
size*sizeof(value_type),
380-
Gpu::Device::deviceId(),
394+
location,
381395
Gpu::gpuStream()));
382396
}
383397
#endif

Src/Base/AMReX_GpuDevice.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -828,19 +828,20 @@ Device::mem_advise_set_preferred (void* p, std::size_t sz, int device)
828828
#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
829829
if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
830830
{
831+
#if defined(AMREX_USE_CUDA)
831832
#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 13)
832833
cudaMemLocation location = {};
833834
location.type = cudaMemLocationTypeDevice;
834835
location.id = device;
835-
AMREX_CUDA_SAFE_CALL(
836-
cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, location));
837836
#else
837+
auto location = device;
838+
#endif
839+
#endif
838840
AMREX_HIP_OR_CUDA
839841
(AMREX_HIP_SAFE_CALL(
840842
hipMemAdvise(p, sz, hipMemAdviseSetPreferredLocation, device)),
841843
AMREX_CUDA_SAFE_CALL(
842-
cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, device)));
843-
#endif
844+
cudaMemAdvise(p, sz, cudaMemAdviseSetPreferredLocation, location)));
844845
}
845846
#elif defined(AMREX_USE_SYCL)
846847
// xxxxx SYCL todo: mem_advise
@@ -859,19 +860,20 @@ Device::mem_advise_set_readonly (void* p, std::size_t sz)
859860
#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
860861
if (device_prop.managedMemory == 1 && device_prop.concurrentManagedAccess == 1)
861862
{
863+
#if defined(AMREX_USE_CUDA)
862864
#if defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 13)
863865
cudaMemLocation location = {};
864866
location.type = cudaMemLocationTypeDevice;
865867
location.id = cudaCpuDeviceId;
866-
AMREX_CUDA_SAFE_CALL(
867-
cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, location));
868868
#else
869+
auto location = cudaCpuDeviceId;
870+
#endif
871+
#endif
869872
AMREX_HIP_OR_CUDA
870873
(AMREX_HIP_SAFE_CALL(
871874
hipMemAdvise(p, sz, hipMemAdviseSetReadMostly, hipCpuDeviceId)),
872875
AMREX_CUDA_SAFE_CALL(
873-
cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, cudaCpuDeviceId)));
874-
#endif
876+
cudaMemAdvise(p, sz, cudaMemAdviseSetReadMostly, location)));
875877
}
876878
#elif defined(AMREX_USE_SYCL)
877879
// xxxxx SYCL todo: mem_advise

0 commit comments

Comments
 (0)