We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fff3fcb commit 9aaf36cCopy full SHA for 9aaf36c
vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
@@ -33,7 +33,7 @@ def flashinfer_fused_moe_blockscale_fp8(
33
routing_method_type: int = 2,
34
) -> torch.Tensor:
35
from vllm.utils.flashinfer import flashinfer_trtllm_fp8_block_scale_moe
36
-
+ topk_group = topk_group if topk_group is not None else 0
37
assert top_k <= global_num_experts
38
assert top_k <= 10
39
# assert topk_group <= 4
0 commit comments