We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6ef0978 commit d54fb85Copy full SHA for d54fb85
fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py
@@ -3866,6 +3866,20 @@ def get_full_non_persistent_tuning_space():
3866
num_warps=8,
3867
num_stages=2,
3868
),
3869
+ triton.Config(
3870
+ {
3871
+ "BLOCK_M": 256,
3872
+ "BLOCK_N": 256,
3873
+ "BLOCK_K": 64,
3874
+ "GROUP_M": 2,
3875
+ "SPLIT_K": 1,
3876
+ "waves_per_eu": 2,
3877
+ "matrix_instr_nonkdim": 32,
3878
+ "kpack": 2,
3879
+ },
3880
+ num_warps=8,
3881
+ num_stages=2,
3882
+ ),
3883
triton.Config(
3884
{
3885
"BLOCK_M": 256,
0 commit comments