Skip to content

Commit e99edf9

Browse files
committed
support pipeline parallel in V1 engine
Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
1 parent 509548f commit e99edf9

File tree

1 file changed

+8
-24
lines changed

1 file changed

+8
-24
lines changed

tests/e2e/multicard/test_pipeline_parallel.py

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@
1515
# This file is a part of the vllm-ascend project.
1616
#
1717
import pytest
18-
import torch
19-
from vllm import LLM, SamplingParams
18+
from tests.conftest import VllmRunner
2019

2120
MODELS = [
22-
"deepseek-ai/DeepSeek-V2-Lite",
21+
"Qwen/Qwen3-0.6B",
2322
]
2423

2524
TENSOR_PARALLELS = [2]
@@ -35,24 +34,9 @@
3534
@pytest.mark.parametrize("tp_size", TENSOR_PARALLELS)
3635
@pytest.mark.parametrize("pp_size", PIPELINE_PARALLELS)
3736
def test_models(model: str, tp_size: int, pp_size: int) -> None:
38-
# Create an LLM.
39-
llm = LLM(
40-
model=model,
41-
tensor_parallel_size=tp_size,
42-
pipeline_parallel_size=pp_size,
43-
trust_remote_code=True,
44-
enforce_eager=True,
45-
)
46-
# Prepare sampling_parames
47-
sampling_params = SamplingParams(
48-
max_tokens=64,
49-
temperature=0,
50-
ignore_eos=True,
51-
)
52-
53-
# Generate texts from the prompts.
54-
# The output is a list of RequestOutput objects
55-
outputs = llm.generate(prompts, sampling_params)
56-
torch.npu.synchronize()
57-
# The output length should be equal to prompts length.
58-
assert len(outputs) == len(prompts)
37+
with VllmRunner(model,
38+
tensor_parallel_size=tp_size,
39+
pipeline_parallel_size=pp_size,
40+
enforce_eager=True,
41+
gpu_memory_utilization=0.7) as vllm_model:
42+
vllm_model.generate_greedy(prompts, 64)

0 commit comments

Comments
 (0)