File tree Expand file tree Collapse file tree 6 files changed +12
-14
lines changed
fbgemm_gpu/experimental/gen_ai/src/quantize
common/include/fbgemm_gpu/quantize Expand file tree Collapse file tree 6 files changed +12
-14
lines changed Original file line number Diff line number Diff line change @@ -19,6 +19,4 @@ constexpr int64_t nextPowerOf2(int64_t num) {
19
19
return 1 << (CHAR_BIT * sizeof (num) - __builtin_clz (num - 1 ));
20
20
}
21
21
22
- int getDeviceArch ();
23
-
24
22
} // namespace fbgemm_gpu
Original file line number Diff line number Diff line change 6
6
* LICENSE file in the root directory of this source tree.
7
7
*/
8
8
9
- #include " fbgemm_gpu/quantize/utils.h " // @manual
9
+ #pragma once
10
10
11
- #include < ATen/ATen.h>
12
- #include < c10/cuda/CUDAException.h>
13
- #include < cuda_runtime.h>
11
+ #include < ATen/cuda/CUDAContext.h>
14
12
15
13
namespace fbgemm_gpu {
16
14
17
- int getDeviceArch () {
15
+ inline int getDeviceArch () {
18
16
static int arch = []() {
19
- // Avoid expensive cudaGetDeviceProperties call.
20
- cudaDeviceProp prop;
21
- cudaGetDeviceProperties (&prop, 0 );
22
-
23
- if (prop.major >= 10 ) {
17
+ const int majorVersion =
18
+ at::cuda::getDeviceProperties (at::cuda::current_device ())->major ;
19
+ if (majorVersion >= 10 ) {
24
20
int runtimeVersion = 0 ;
25
21
C10_CUDA_CHECK (cudaRuntimeGetVersion (&runtimeVersion));
26
22
TORCH_CHECK (
27
23
runtimeVersion >= 12080 , " SM100a+ kernels require cuda >= 12.8" );
28
24
}
29
-
30
- return prop.major ;
25
+ return majorVersion;
31
26
}();
32
27
return arch;
33
28
}
29
+
34
30
} // namespace fbgemm_gpu
Original file line number Diff line number Diff line change 12
12
#include " bf16bf16bf16_grouped/bf16bf16bf16_grouped_manifest.cuh"
13
13
#include " fbgemm_gpu/quantize/tuning_cache.hpp"
14
14
#include " fbgemm_gpu/quantize/utils.h"
15
+ #include " fbgemm_gpu/quantize/utils_gpu.h"
15
16
16
17
namespace fbgemm_gpu {
17
18
Original file line number Diff line number Diff line change 13
13
#include " f8f8bf16_groupwise/f8f8bf16_groupwise_manifest.cuh"
14
14
#include " fbgemm_gpu/quantize/tuning_cache.hpp"
15
15
#include " fbgemm_gpu/quantize/utils.h"
16
+ #include " fbgemm_gpu/quantize/utils_gpu.h"
16
17
17
18
namespace fbgemm_gpu {
18
19
Original file line number Diff line number Diff line change 11
11
#include " f8f8bf16_rowwise_batched/f8f8bf16_rowwise_batched_manifest.cuh"
12
12
13
13
#include " fbgemm_gpu/quantize/utils.h"
14
+ #include " fbgemm_gpu/quantize/utils_gpu.h"
14
15
15
16
namespace fbgemm_gpu {
16
17
Original file line number Diff line number Diff line change 14
14
#include " f8f8bf16_rowwise_grouped_sm100/f8f8bf16_rowwise_grouped_manifest.cuh"
15
15
#include " fbgemm_gpu/quantize/tuning_cache.hpp"
16
16
#include " fbgemm_gpu/quantize/utils.h"
17
+ #include " fbgemm_gpu/quantize/utils_gpu.h"
17
18
18
19
namespace fbgemm_gpu {
19
20
You can’t perform that action at this time.
0 commit comments