Skip to content

Commit 8c36bca

Browse files
JChunXfacebook-github-bot
authored andcommitted
CMF v0 FP8 triton tune (#4873)
Summary: X-link: facebookresearch/FBGEMM#1895 Pull Request resolved: #4873 Boost Triton FP8 perf for CMF v0 model, shape (2048, 5120, 5376) Reviewed By: RandySheriff Differential Revision: D82254779 fbshipit-source-id: e13bd8a68195904db37db8cc8748226a77564db7
1 parent 12a1be8 commit 8c36bca

File tree

1 file changed

+30
-2
lines changed

1 file changed

+30
-2
lines changed

fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3868,6 +3868,20 @@ def get_full_non_persistent_tuning_space():
38683868
num_warps=8,
38693869
num_stages=2,
38703870
),
3871+
triton.Config(
3872+
{
3873+
"BLOCK_M": 256,
3874+
"BLOCK_N": 256,
3875+
"BLOCK_K": 128,
3876+
"GROUP_M": 2,
3877+
"SPLIT_K": 1,
3878+
"waves_per_eu": 0,
3879+
"matrix_instr_nonkdim": 16,
3880+
"kpack": 1,
3881+
},
3882+
num_warps=8,
3883+
num_stages=2,
3884+
),
38713885
triton.Config(
38723886
{
38733887
"BLOCK_M": 256,
@@ -3876,12 +3890,26 @@ def get_full_non_persistent_tuning_space():
38763890
"GROUP_M": 2,
38773891
"SPLIT_K": 1,
38783892
"waves_per_eu": 2,
3879-
"matrix_instr_nonkdim": 32,
3880-
"kpack": 2,
3893+
"matrix_instr_nonkdim": 16,
3894+
"kpack": 1,
38813895
},
38823896
num_warps=8,
38833897
num_stages=2,
38843898
),
3899+
triton.Config(
3900+
{
3901+
"BLOCK_M": 128,
3902+
"BLOCK_N": 256,
3903+
"BLOCK_K": 64,
3904+
"GROUP_M": 2,
3905+
"SPLIT_K": 1,
3906+
"waves_per_eu": 2,
3907+
"matrix_instr_nonkdim": 16,
3908+
"kpack": 1,
3909+
},
3910+
num_warps=4,
3911+
num_stages=2,
3912+
),
38853913
triton.Config(
38863914
{
38873915
"BLOCK_M": 256,

0 commit comments

Comments
 (0)