Skip to content

Commit 5977f97

Browse files
jeffdailyfacebook-github-bot
authored andcommitted
more hipify v2 fixes (#4921)
Summary: Pull Request resolved: #4921 X-link: facebookresearch/FBGEMM#1898 Prior to the pytorch hipify v2 PR is landed, additional fixes are needed for the experimental gen_ai HIP sources. The fbgemm_gpu *.hip sources do not undergo additional hipify steps and they were written to assume pytorch's hipify v1 interfaces. Some small changes are necessary to make the sources more flexible to either hipify v1 or v2 torch APIs. Pull Request resolved: #4854 Reviewed By: atalman Differential Revision: D82186865 Pulled By: q10
1 parent 1eacb9e commit 5977f97

File tree

20 files changed

+70
-45
lines changed

20 files changed

+70
-45
lines changed

cmake/modules/RocmSetup.cmake

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,24 @@ if(FBGEMM_BUILD_VARIANT STREQUAL BUILD_VARIANT_ROCM)
2727
-Wno-ignored-attributes
2828
-Wno-unused-result)
2929

30+
# is this hipify v2?
31+
execute_process(
32+
COMMAND "${Python_EXECUTABLE}" -c
33+
"from torch.utils.hipify import __version__; print(__version__)"
34+
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
35+
OUTPUT_VARIABLE _tempvar
36+
RESULT_VARIABLE _resvar
37+
ERROR_VARIABLE _errvar)
38+
if(NOT "${_resvar}" EQUAL "0")
39+
message(WARNING "Failed to execute Python (${Python_EXECUTABLE})\n"
40+
"Result: ${_resvar}\n"
41+
"Error: ${_errvar}\n")
42+
endif()
43+
string(FIND "${_tempvar}" "2" found_pos)
44+
if(found_pos GREATER_EQUAL 0)
45+
list(APPEND HIP_HCC_FLAGS -DHIPIFY_V2)
46+
endif()
47+
3048
BLOCK_PRINT(
3149
"HIP found: ${HIP_FOUND}"
3250
"HIPCC compiler flags:"

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/bf16_grouped_gemm.hip

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
#include "ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp"
2424
#include "kernels/bf16_grouped_kernel_manifest.h"
2525

26+
#ifdef HIPIFY_V2
27+
#define getCurrentHIPStream getCurrentCUDAStream
28+
#endif
29+
2630
namespace fbgemm_gpu {
2731

2832
// Define useful types that are needed for various kernels.

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/bf16_grouped/kernels/bf16_grouped_common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
*/
88

99
#include <ATen/ATen.h>
10-
#ifdef USE_ROCM
1110
#include <c10/hip/HIPStream.h>
12-
#else
13-
#include <c10/cuda/CUDAStream.h>
11+
12+
#ifdef HIPIFY_V2
13+
#define getCurrentHIPStream getCurrentCUDAStream
1414
#endif
1515

1616
#include "ck/ck.hpp"

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/ck_utility.hip

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
#include <ATen/ATen.h>
1818
#include <c10/hip/HIPStream.h>
1919

20+
#ifdef HIPIFY_V2
21+
#define getCurrentHIPStream getCurrentCUDAStream
22+
#endif
23+
2024
#if defined(USE_ROCM)
2125

2226
#include "ck/ck.hpp"

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_blockwise_gemm.hip

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
#include <ATen/ATen.h>
1515
#include <c10/hip/HIPStream.h>
1616

17+
#ifdef HIPIFY_V2
18+
#define getCurrentHIPStream getCurrentCUDAStream
19+
#endif
20+
1721
#include "ck/ck.hpp"
1822
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
1923
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise/kernels/fp8_rowwise_common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
#include <iostream>
1010

1111
#include <ATen/ATen.h>
12-
#ifdef USE_ROCM
1312
#include <c10/hip/HIPStream.h>
14-
#else
15-
#include <c10/cuda/CUDAStream.h>
13+
14+
#ifdef HIPIFY_V2
15+
#define getCurrentHIPStream getCurrentCUDAStream
1616
#endif
1717

1818
#include "ck/ck.hpp"

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_batched/kernels/fp8_rowwise_batched_common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
*/
88

99
#include <ATen/ATen.h>
10-
#ifdef USE_ROCM
1110
#include <c10/hip/HIPStream.h>
12-
#else
13-
#include <c10/cuda/CUDAStream.h>
11+
12+
#ifdef HIPIFY_V2
13+
#define getCurrentHIPStream getCurrentCUDAStream
1414
#endif
1515

1616
#include "ck/ck.hpp"

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/fp8_rowwise_grouped_gemm.hip

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,15 @@
1414
#include <ATen/core/Tensor.h>
1515
#include <c10/hip/HIPStream.h>
1616

17+
#ifdef HIPIFY_V2
18+
#define getCurrentHIPStream getCurrentCUDAStream
19+
#endif
20+
1721
#include "ck/ck.hpp"
1822
#include "ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_xdl_cshuffle_tile_loop.hpp"
1923
#include "kernels/fp8_rowwise_grouped_kernel_manifest.h"
2024
#include "kernels/fp8_rowwise_grouped_heuristic.hpp"
21-
#include "fbgemm_gpu/quantize/tuning_cache.hpp"
25+
#include "fbgemm_gpu/quantize/tuning_cache.cuh"
2226
#include "fbgemm_gpu/quantize/utils.h"
2327

2428
namespace fbgemm_gpu {

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_grouped/kernels/fp8_rowwise_grouped_common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
*/
88
#undef __HIP_NO_HALF_CONVERSIONS__
99
#include <ATen/ATen.h>
10-
#ifdef USE_ROCM
1110
#include <c10/hip/HIPStream.h>
12-
#else
13-
#include <c10/cuda/CUDAStream.h>
11+
12+
#ifdef HIPIFY_V2
13+
#define getCurrentHIPStream getCurrentCUDAStream
1414
#endif
1515

1616
#include "ck/ck.hpp"

fbgemm_gpu/experimental/gen_ai/src/quantize/ck_extensions/fp8_rowwise_preshuffle/kernels/fp8_rowwise_preshuffle_common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
#include <iostream>
1010

1111
#include <ATen/ATen.h>
12-
#ifdef USE_ROCM
1312
#include <c10/hip/HIPStream.h>
14-
#else
15-
#include <c10/cuda/CUDAStream.h>
13+
14+
#ifdef HIPIFY_V2
15+
#define getCurrentHIPStream getCurrentCUDAStream
1616
#endif
1717

1818
#include "ck/ck.hpp"

0 commit comments

Comments
 (0)