Skip to content

Commit 0fbf909

Browse files
committed
fix lint
Signed-off-by: whx-sjtu <2952154980@qq.com>
1 parent 77ab9ef commit 0fbf909

File tree

4 files changed

+10
-5
lines changed

4 files changed

+10
-5
lines changed

tests/e2e/singlecard/test_multistream_overlap_shared_expert.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ def test_models_with_multistream_overlap_shared_expert(
5252
"multistream_overlap_shared_expert": True,
5353
},
5454
) as runner:
55-
vllm_moe_ms_eager_outputs = runner.model.generate(prompts, sampling_params)
55+
vllm_moe_ms_eager_outputs = runner.model.generate(
56+
prompts, sampling_params)
5657

5758
with VllmRunner(
5859
model,
@@ -62,7 +63,8 @@ def test_models_with_multistream_overlap_shared_expert(
6263
"multistream_overlap_shared_expert": True,
6364
},
6465
) as runner:
65-
vllm_moe_ms_aclgraph_outputs = runner.model.generate(prompts, sampling_params)
66+
vllm_moe_ms_aclgraph_outputs = runner.model.generate(
67+
prompts, sampling_params)
6668

6769
with VllmRunner(
6870
model,

vllm_ascend/ops/common_fused_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,8 @@ def forward(
439439
) -> tuple[torch.Tensor, torch.Tensor]:
440440
# Make sure the shared experts stream begins after hidden_states are ready.
441441
if self.multistream_overlap_shared_expert:
442-
self.shared_expert_stream.wait_stream(torch.npu.current_stream())
442+
self.shared_expert_stream.wait_stream( # type: ignore
443+
torch.npu.current_stream())
443444
with npu_stream_switch(self.shared_expert_stream,
444445
enabled=self.multistream_overlap_shared_expert):
445446
# Use a separate stream to run shared experts.

vllm_ascend/torchair/ops/torchair_fused_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1254,7 +1254,8 @@ def forward(self,
12541254
log2phy=self.log2phy,
12551255
global_redundant_expert_num=self.global_redundant_expert_num,
12561256
shared_experts=shared_experts if self.torchair_graph_enabled
1257-
and self.multistream_overlap_shared_expert and not is_prefill else None,
1257+
and self.multistream_overlap_shared_expert and not is_prefill else
1258+
None,
12581259
mc2_mask=mc2_mask,
12591260
quantized_x_for_share=quantized_x_for_share,
12601261
dynamic_scale_for_share=dynamic_scale_for_share,

vllm_ascend/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,8 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
322322
# TODO: Find out whether we need to take into account the pp_size
323323
parallel_factor = 1 + num_comm_groups + int(
324324
parallel_config.enable_expert_parallel) + int(
325-
vllm_config.additional_config.get("multistream_overlap_shared_expert", False))
325+
vllm_config.additional_config.get(
326+
"multistream_overlap_shared_expert", False))
326327
if is_moe_model(vllm_config):
327328
parallel_factor += (parallel_config.data_parallel_size > 1)
328329
# Calculate maximum supported batch sizes considering model architecture on the A2 Hardware Device

0 commit comments

Comments
 (0)