From 7b178977642b934de9ed03ff16e06771685f60d7 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Tue, 22 Jul 2025 22:10:53 +0000 Subject: [PATCH] [ROCm] do not use __shfl sync functions This avoids a static_assert compile-time failure for HIP. --- csrc/cuda/utils.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csrc/cuda/utils.cuh b/csrc/cuda/utils.cuh index 396b4fa1..53d328cd 100644 --- a/csrc/cuda/utils.cuh +++ b/csrc/cuda/utils.cuh @@ -6,6 +6,7 @@ AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor") #define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch") +#ifndef USE_ROCM __device__ __inline__ at::Half __shfl_up_sync(const unsigned mask, const at::Half var, const unsigned int delta) { @@ -17,6 +18,7 @@ __device__ __inline__ at::Half __shfl_down_sync(const unsigned mask, const unsigned int delta) { return __shfl_down_sync(mask, var.operator __half(), delta); } +#endif __device__ __inline__ at::Half __shfl_up(const at::Half var, const unsigned int delta) {