[Feature] qwen3_moe qk norm support multi_stream.

weijinqian_v1 · weijinqian_v1 · commit 70bde36ca6d0 · 2025-09-20T15:57:19.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/vllm_ascend/models/qwen3.py b/vllm_ascend/models/qwen3.py
@@ -28,7 +28,7 @@
 
 from vllm_ascend import envs
 from vllm_ascend.ops.layernorm import AddRMSNormW8A8Quant
-from vllm_ascend.utils import npu_stream_switch
+from vllm_ascend.utils import npu_stream_switch_aclgraph
 
 
 def pad(tensor, x):
@@ -158,7 +158,7 @@ def forward(
     ) -> torch.Tensor:
         qkv, _ = self.qkv_proj(hidden_states)
         q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
-        with npu_stream_switch(self.alt_stream):
+        with npu_stream_switch_aclgraph(self.alt_stream):
             # Add qk-norm
             q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim,
                                self.head_dim)
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
@@ -456,7 +456,7 @@ def delete_torchair_cache_file():
         pass
 
 
-def npu_stream_switch(target_stream: torch.npu.Stream,
+def npu_stream_switch_aclgraph(target_stream: torch.npu.Stream,
                       *,
                       enabled: bool = True):
     """