Skip to content

Commit 382c29f

Browse files
authored
[BugFix] Fix world size bug in model_runner (#2915)
- Fix world size bug in model_runner to make sure ep>16 runs with MC2 - enable e2e test for vl Co-Authored-By: whx-sjtu <2952154980@qq.com> Co-Authored-By: Icey <1790571317@qq.com> - vLLM version: v0.10.2 - vLLM main: vllm-project/vllm@3e903b6 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent c5a502f commit 382c29f

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

tests/e2e/singlecard/test_vlm.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,16 @@
2222
"""
2323
import os
2424

25-
import pytest
2625
from vllm import SamplingParams
2726
from vllm.assets.audio import AudioAsset
2827
from vllm.assets.image import ImageAsset
2928

3029
from tests.e2e.conftest import VllmRunner
3130

31+
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
3232
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
3333

3434

35-
@pytest.mark.skip(reason="fix me")
3635
def test_multimodal_vl(prompt_template):
3736
image = ImageAsset("cherry_blossom") \
3837
.pil_image.convert("RGB")
@@ -52,9 +51,12 @@ def test_multimodal_vl(prompt_template):
5251
"fps": 1,
5352
},
5453
enforce_eager=True) as vllm_model:
55-
vllm_model.generate_greedy(prompts=prompts,
56-
images=images,
57-
max_tokens=64)
54+
outputs = vllm_model.generate_greedy(prompts=prompts,
55+
images=images,
56+
max_tokens=64)
57+
assert len(outputs) == len(prompts)
58+
for _, output_str in outputs:
59+
assert output_str, "Generated output should not be empty."
5860

5961

6062
def test_multimodal_audio():
@@ -86,4 +88,7 @@ def test_multimodal_audio():
8688
dtype="bfloat16",
8789
limit_mm_per_prompt={"audio": 2},
8890
gpu_memory_utilization=0.9) as runner:
89-
runner.generate(inputs, sampling_params=sampling_params)
91+
outputs = runner.generate(inputs, sampling_params=sampling_params)
92+
93+
assert outputs is not None, "Generated outputs should not be None."
94+
assert len(outputs) > 0, "Generated outputs should not be empty."

tests/ut/worker/test_model_runner_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def test_select_moe_comm_method(soc_version, enable_expert_parallel,
5757
mock_runner = MagicMock(spec=NPUModelRunner)
5858
mock_runner.parallel_config = MagicMock()
5959
mock_runner.parallel_config.enable_expert_parallel = enable_expert_parallel
60-
mock_runner.parallel_config.world_size = world_size
60+
mock_runner.parallel_config.world_size_across_dp = world_size
6161
mock_runner.mc2_tokens_capacity = mc2_tokens_capacity
6262

6363
# Patch the helper functions

vllm_ascend/worker/model_runner_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1539,7 +1539,7 @@ def _select_moe_comm_method(self, num_tokens: int) -> str:
15391539
if not self.parallel_config.enable_expert_parallel:
15401540
moe_comm_method = "allgather"
15411541
elif soc_version in {AscendSocVersion.A2}:
1542-
if num_tokens <= self.mc2_tokens_capacity and self.parallel_config.world_size >= 16:
1542+
if num_tokens <= self.mc2_tokens_capacity and self.parallel_config.world_size_across_dp >= 16:
15431543
moe_comm_method = "mc2"
15441544
else:
15451545
moe_comm_method = "allgather"

0 commit comments

Comments
 (0)