|
15 | 15 | # This file is a part of the vllm-ascend project.
|
16 | 16 | #
|
17 | 17 | import pytest
|
18 |
| -import torch |
19 |
| -from vllm import LLM, SamplingParams |
| 18 | +from tests.conftest import VllmRunner |
20 | 19 |
|
21 | 20 | MODELS = [
|
22 |
| - "deepseek-ai/DeepSeek-V2-Lite", |
| 21 | + "Qwen/Qwen3-0.6B", |
23 | 22 | ]
|
24 | 23 |
|
25 | 24 | TENSOR_PARALLELS = [2]
|
|
35 | 34 | @pytest.mark.parametrize("tp_size", TENSOR_PARALLELS)
|
36 | 35 | @pytest.mark.parametrize("pp_size", PIPELINE_PARALLELS)
|
37 | 36 | def test_models(model: str, tp_size: int, pp_size: int) -> None:
|
38 |
| - # Create an LLM. |
39 |
| - llm = LLM( |
40 |
| - model=model, |
41 |
| - tensor_parallel_size=tp_size, |
42 |
| - pipeline_parallel_size=pp_size, |
43 |
| - trust_remote_code=True, |
44 |
| - enforce_eager=True, |
45 |
| - ) |
46 |
| - # Prepare sampling_parames |
47 |
| - sampling_params = SamplingParams( |
48 |
| - max_tokens=64, |
49 |
| - temperature=0, |
50 |
| - ignore_eos=True, |
51 |
| - ) |
52 |
| - |
53 |
| - # Generate texts from the prompts. |
54 |
| - # The output is a list of RequestOutput objects |
55 |
| - outputs = llm.generate(prompts, sampling_params) |
56 |
| - torch.npu.synchronize() |
57 |
| - # The output length should be equal to prompts length. |
58 |
| - assert len(outputs) == len(prompts) |
| 37 | + with VllmRunner(model, |
| 38 | + tensor_parallel_size=tp_size, |
| 39 | + pipeline_parallel_size=pp_size, |
| 40 | + enforce_eager=True, |
| 41 | + gpu_memory_utilization=0.7) as vllm_model: |
| 42 | + vllm_model.generate_greedy(prompts, 64) |
0 commit comments