We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 51254d0 commit 7f680d0Copy full SHA for 7f680d0
vllm_ascend/ops/moe/moe_mlp.py
@@ -86,7 +86,7 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
86
87
bias1, bias2 = None, None
88
_output_dtype = w2_scale.dtype
89
-
+ wait_prefetch_done()
90
is_mc2 = get_forward_context().moe_comm_method_name == "mc2commimpl"
91
if w1_scale_bias is None and is_mc2:
92
if w1_scale.dtype != torch.float32:
0 commit comments