Skip to content

Commit 34dbdfe

Browse files
committed
[Test] Remove VLLM_USE_V1 in example and tests
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 3c404de commit 34dbdfe

30 files changed

+295
-786
lines changed

.github/workflows/vllm_ascend_test.yaml

Lines changed: 12 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,9 @@ concurrency:
4141

4242
jobs:
4343
lint:
44-
# Only trigger lint on pull request
45-
if: ${{ github.event_name == 'pull_request' }}
4644
uses: ./.github/workflows/pre-commit.yml
4745

4846
changes:
49-
# Only trigger changes on pull request
50-
if: ${{ github.event_name == 'pull_request' }}
5147
runs-on: ubuntu-latest
5248
permissions:
5349
pull-requests: read
@@ -60,20 +56,24 @@ jobs:
6056
with:
6157
filters: |
6258
e2e_tracker:
59+
- '.github/workflows/vllm_ascend_test.yaml'
6360
- 'vllm_ascend/**'
6461
- 'csrc/**'
6562
- 'cmake/**'
6663
- 'tests/e2e/**'
67-
- 'tests/conftest.py'
68-
- 'tests/model_utils.py'
69-
- 'tests/utils.py'
64+
- 'CMakeLists.txt'
65+
- 'setup.py'
66+
- 'requirements.txt'
67+
- 'requirements-dev.txt'
68+
- 'requirements-lint.txt'
69+
- 'packages.txt'
7070
ut_tracker:
7171
- 'tests/ut/**'
7272
ut:
7373
needs: [lint, changes]
7474
name: unit test
75-
# only trigger unit test after lint passed and the change is e2e and ut related. Or the PR is merged.
76-
if: ${{ github.event_name == 'push' || (needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true')) }}
75+
# only trigger unit test after lint passed and the change is e2e and ut related.
76+
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
7777
runs-on: ubuntu-latest
7878
container:
7979
image: quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
@@ -112,9 +112,8 @@ jobs:
112112
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
113113
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
114114
115-
- name: Run unit test for V1 Engine
115+
- name: Run unit test
116116
env:
117-
VLLM_USE_V1: 1
118117
VLLM_WORKER_MULTIPROC_METHOD: spawn
119118
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
120119
run: |
@@ -189,9 +188,8 @@ jobs:
189188
pip install -r requirements-dev.txt
190189
pip install -v -e .
191190
192-
- name: Run e2e test for V1 Engine
191+
- name: Run e2e test
193192
env:
194-
VLLM_USE_V1: 1
195193
VLLM_WORKER_MULTIPROC_METHOD: spawn
196194
VLLM_USE_MODELSCOPE: True
197195
run: |
@@ -213,26 +211,6 @@ jobs:
213211
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
214212
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
215213
216-
- name: Run e2e test on V0 engine
217-
if: ${{ github.event_name == 'schedule' }}
218-
env:
219-
VLLM_USE_V1: 0
220-
VLLM_USE_MODELSCOPE: True
221-
run: |
222-
pytest -sv tests/e2e/singlecard/test_offline_inference.py
223-
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
224-
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
225-
pytest -sv tests/e2e/singlecard/test_camem.py
226-
pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
227-
pytest -sv tests/e2e/singlecard/test_embedding.py
228-
pytest -sv tests/e2e/singlecard/ \
229-
--ignore=tests/e2e/singlecard/test_offline_inference.py \
230-
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
231-
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
232-
--ignore=tests/e2e/singlecard/test_camem.py \
233-
--ignore=tests/e2e/singlecard/test_prompt_embedding.py \
234-
--ignore=tests/e2e/singlecard/test_embedding.py
235-
236214
e2e-4-cards:
237215
needs: [e2e]
238216
if: ${{ needs.e2e.result == 'success' }}
@@ -290,9 +268,8 @@ jobs:
290268
pip install -r requirements-dev.txt
291269
pip install -v -e .
292270
293-
- name: Run vllm-project/vllm-ascend test for V1 Engine
271+
- name: Run vllm-project/vllm-ascend test
294272
env:
295-
VLLM_USE_V1: 1
296273
VLLM_WORKER_MULTIPROC_METHOD: spawn
297274
VLLM_USE_MODELSCOPE: True
298275
run: |
@@ -308,19 +285,3 @@ jobs:
308285
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
309286
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
310287
--ignore=tests/e2e/multicard/test_data_parallel.py
311-
312-
- name: Run vllm-project/vllm-ascend test on V0 engine
313-
if: ${{ github.event_name == 'schedule' }}
314-
env:
315-
VLLM_USE_V1: 0
316-
VLLM_USE_MODELSCOPE: True
317-
run: |
318-
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
319-
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
320-
# To avoid oom, we need to run the test in a single process.
321-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
322-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
323-
pytest -sv tests/e2e/multicard/test_data_parallel.py
324-
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
325-
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
326-
--ignore=tests/e2e/multicard/test_data_parallel.py

examples/offline_data_parallel.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ def main(
120120
trust_remote_code,
121121
):
122122
# DP only support on V1 engine
123-
os.environ["VLLM_USE_V1"] = "1"
124123
os.environ["VLLM_DP_RANK"] = str(global_dp_rank)
125124
os.environ["VLLM_DP_RANK_LOCAL"] = str(local_dp_rank)
126125
os.environ["VLLM_DP_SIZE"] = str(dp_size)

examples/offline_dualbatch_overlap_npu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
# enable dual-batch overlap for vllm ascend
77
os.environ["VLLM_ASCEND_ENABLE_DBO"] = "1"
8-
os.environ["VLLM_USE_V1"] = "1"
98

109
# Sample prompts.
1110
prompts = ["The president of the United States is"] * 41

examples/offline_inference_sleep_mode_npu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from vllm import LLM, SamplingParams
2323
from vllm.utils import GiB_bytes
2424

25-
os.environ["VLLM_USE_V1"] = "1"
2625
os.environ["VLLM_USE_MODELSCOPE"] = "True"
2726
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
2827

examples/run_dp_attention_etp16.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
export VLLM_USE_V1=1
21
export TASK_QUEUE_ENABLE=1
32
source /usr/local/Ascend/ascend-toolkit/set_env.sh
43
source /usr/local/Ascend/nnal/atb/set_env.sh

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ xgrammar
1212
zmq
1313
types-psutil
1414
pytest-cov
15+
regex
1516
sentence_transformers

requirements-lint.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ pre-commit==4.0.1
44
# type checking
55
mypy==1.11.1
66
types-PyYAML
7+
types-regex
78
types-requests
89
types-setuptools

tests/conftest.py renamed to tests/e2e/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@
3939
from vllm.transformers_utils.utils import maybe_model_redirect
4040
from vllm.utils import is_list_of
4141

42-
from tests.model_utils import (PROMPT_TEMPLATES, TokensTextLogprobs,
43-
TokensTextLogprobsPromptLogprobs)
42+
from tests.e2e.model_utils import (PROMPT_TEMPLATES, TokensTextLogprobs,
43+
TokensTextLogprobsPromptLogprobs)
4444
# TODO: remove this part after the patch merged into vllm, if
4545
# we not explicitly patch here, some of them might be effectiveless
4646
# in pytest scenario
File renamed without changes.

tests/e2e/multicard/test_fused_moe_allgather_ep.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,11 @@
2626
from modelscope import snapshot_download # type: ignore
2727
from vllm import SamplingParams
2828

29-
from tests.conftest import VllmRunner
29+
from tests.e2e.conftest import VllmRunner
3030

3131

3232
@patch.dict(
3333
os.environ, {
34-
"VLLM_USE_V1": "1",
3534
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
3635
"TASK_QUEUE_ENABLE": "1",
3736
"VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP": "1"
@@ -56,12 +55,10 @@ def test_generate_with_allgather():
5655
vllm_model.generate(example_prompts, sampling_params)
5756

5857

59-
@patch.dict(
60-
os.environ, {
61-
"VLLM_USE_V1": "1",
62-
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
63-
"TASK_QUEUE_ENABLE": "1"
64-
})
58+
@patch.dict(os.environ, {
59+
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
60+
"TASK_QUEUE_ENABLE": "1"
61+
})
6562
def test_generate_with_alltoall():
6663
example_prompts = ["Hello, my name is"]
6764
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
@@ -79,4 +76,4 @@ def test_generate_with_alltoall():
7976
},
8077
"expert_tensor_parallel_size": 1
8178
}) as vllm_model:
82-
vllm_model.generate(example_prompts, sampling_params)
79+
vllm_model.generate(example_prompts, sampling_params)

0 commit comments

Comments
 (0)