Skip to content

Commit c87cc9b

Browse files
Fix 20x slowdown of FP6 kernel due to device properties query (#1092)
Replace `cudaGetDeviceProperties` with `cudaDeviceGetAttribute`
1 parent 893cafe commit c87cc9b

File tree

1 file changed

+8
-9
lines changed

1 file changed

+8
-9
lines changed

torchao/csrc/cuda/fp6_llm/fp6_linear.cu

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,16 @@
2929
inline bool isSM75GPU() {
3030
int device;
3131
cudaError_t err = cudaGetDevice(&device);
32-
if (err != cudaSuccess) {
33-
return false;
34-
}
32+
if (err != cudaSuccess) return false;
3533

36-
cudaDeviceProp props;
37-
err = cudaGetDeviceProperties(&props, device);
38-
if (err != cudaSuccess) {
39-
return false;
40-
}
34+
int major, minor;
35+
err = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device);
36+
if (err != cudaSuccess) return false;
37+
38+
err = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device);
39+
if (err != cudaSuccess) return false;
4140

42-
return (props.major == 7) && (props.minor == 5);
41+
return (major == 7) && (minor == 5);
4342
}
4443

4544
template<typename TilingConfig, typename OutputDataType, int EXPONENT, int MANTISSA>

0 commit comments

Comments
 (0)