Skip to content

Commit dddfecf

Browse files
author
pytorchbot
committed
2025-09-21 nightly release (3cefe05)
1 parent d600291 commit dddfecf

File tree

6 files changed

+12
-14
lines changed

6 files changed

+12
-14
lines changed

fbgemm_gpu/experimental/gen_ai/src/quantize/common/include/fbgemm_gpu/quantize/utils.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,4 @@ constexpr int64_t nextPowerOf2(int64_t num) {
1919
return 1 << (CHAR_BIT * sizeof(num) - __builtin_clz(num - 1));
2020
}
2121

22-
int getDeviceArch();
23-
2422
} // namespace fbgemm_gpu

fbgemm_gpu/experimental/gen_ai/src/quantize/common/utils.cpp renamed to fbgemm_gpu/experimental/gen_ai/src/quantize/common/include/fbgemm_gpu/quantize/utils_gpu.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,25 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9-
#include "fbgemm_gpu/quantize/utils.h" // @manual
9+
#pragma once
1010

11-
#include <ATen/ATen.h>
12-
#include <c10/cuda/CUDAException.h>
13-
#include <cuda_runtime.h>
11+
#include <ATen/cuda/CUDAContext.h>
1412

1513
namespace fbgemm_gpu {
1614

17-
int getDeviceArch() {
15+
inline int getDeviceArch() {
1816
static int arch = []() {
19-
// Avoid expensive cudaGetDeviceProperties call.
20-
cudaDeviceProp prop;
21-
cudaGetDeviceProperties(&prop, 0);
22-
23-
if (prop.major >= 10) {
17+
const int majorVersion =
18+
at::cuda::getDeviceProperties(at::cuda::current_device())->major;
19+
if (majorVersion >= 10) {
2420
int runtimeVersion = 0;
2521
C10_CUDA_CHECK(cudaRuntimeGetVersion(&runtimeVersion));
2622
TORCH_CHECK(
2723
runtimeVersion >= 12080, "SM100a+ kernels require cuda >= 12.8");
2824
}
29-
30-
return prop.major;
25+
return majorVersion;
3126
}();
3227
return arch;
3328
}
29+
3430
} // namespace fbgemm_gpu

fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions/bf16bf16bf16_grouped.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "bf16bf16bf16_grouped/bf16bf16bf16_grouped_manifest.cuh"
1313
#include "fbgemm_gpu/quantize/tuning_cache.hpp"
1414
#include "fbgemm_gpu/quantize/utils.h"
15+
#include "fbgemm_gpu/quantize/utils_gpu.h"
1516

1617
namespace fbgemm_gpu {
1718

fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions/f8f8bf16_groupwise.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "f8f8bf16_groupwise/f8f8bf16_groupwise_manifest.cuh"
1414
#include "fbgemm_gpu/quantize/tuning_cache.hpp"
1515
#include "fbgemm_gpu/quantize/utils.h"
16+
#include "fbgemm_gpu/quantize/utils_gpu.h"
1617

1718
namespace fbgemm_gpu {
1819

fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions/f8f8bf16_rowwise_batched.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "f8f8bf16_rowwise_batched/f8f8bf16_rowwise_batched_manifest.cuh"
1212

1313
#include "fbgemm_gpu/quantize/utils.h"
14+
#include "fbgemm_gpu/quantize/utils_gpu.h"
1415

1516
namespace fbgemm_gpu {
1617

fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions/f8f8bf16_rowwise_grouped.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "f8f8bf16_rowwise_grouped_sm100/f8f8bf16_rowwise_grouped_manifest.cuh"
1515
#include "fbgemm_gpu/quantize/tuning_cache.hpp"
1616
#include "fbgemm_gpu/quantize/utils.h"
17+
#include "fbgemm_gpu/quantize/utils_gpu.h"
1718

1819
namespace fbgemm_gpu {
1920

0 commit comments

Comments
 (0)