Skip to content

Commit 70bde36

Browse files
author
weijinqian_v1
committed
[Feature] qwen3_moe qk norm support multi_stream.
Signed-off-by: weijinqian_v1 <weijinqian@huawei.com>
1 parent e3ede4c commit 70bde36

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

vllm_ascend/models/qwen3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
from vllm_ascend import envs
3030
from vllm_ascend.ops.layernorm import AddRMSNormW8A8Quant
31-
from vllm_ascend.utils import npu_stream_switch
31+
from vllm_ascend.utils import npu_stream_switch_aclgraph
3232

3333

3434
def pad(tensor, x):
@@ -158,7 +158,7 @@ def forward(
158158
) -> torch.Tensor:
159159
qkv, _ = self.qkv_proj(hidden_states)
160160
q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
161-
with npu_stream_switch(self.alt_stream):
161+
with npu_stream_switch_aclgraph(self.alt_stream):
162162
# Add qk-norm
163163
q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim,
164164
self.head_dim)

vllm_ascend/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def delete_torchair_cache_file():
456456
pass
457457

458458

459-
def npu_stream_switch(target_stream: torch.npu.Stream,
459+
def npu_stream_switch_aclgraph(target_stream: torch.npu.Stream,
460460
*,
461461
enabled: bool = True):
462462
"""

0 commit comments

Comments
 (0)