增加计算流等待预取流结束

yuzhup · yuzhup · commit 7f680d0a54c5 · 2025-09-22T09:46:51.000+08:00
diff --git a/vllm_ascend/ops/moe/moe_mlp.py b/vllm_ascend/ops/moe/moe_mlp.py
@@ -86,7 +86,7 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
 
     bias1, bias2 = None, None
     _output_dtype = w2_scale.dtype
-
+    wait_prefetch_done()
     is_mc2 = get_forward_context().moe_comm_method_name == "mc2commimpl"
     if w1_scale_bias is None and is_mc2:
         if w1_scale.dtype != torch.float32: