Skip to content

Commit d55529a

Browse files
Merge branch 'vllm-project:main' into flash-ascend
2 parents 98783ca + bb1f0d5 commit d55529a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2139
-971
lines changed

.github/workflows/_e2e_test.yaml

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
name: 'e2e test'
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
vllm:
7+
required: true
8+
type: string
9+
runner:
10+
required: true
11+
type: string
12+
image:
13+
required: true
14+
type: string
15+
type:
16+
required: true
17+
type: string
18+
19+
jobs:
20+
e2e:
21+
name: singlecard
22+
runs-on: ${{ inputs.runner }}-1
23+
container:
24+
image: ${{ inputs.image }}
25+
env:
26+
VLLM_LOGGING_LEVEL: ERROR
27+
VLLM_USE_MODELSCOPE: True
28+
steps:
29+
- name: Check npu and CANN info
30+
run: |
31+
npu-smi info
32+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
33+
34+
- name: Config mirrors
35+
run: |
36+
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
37+
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
38+
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
39+
apt-get update -y
40+
apt install git -y
41+
42+
- name: Checkout vllm-project/vllm-ascend repo
43+
uses: actions/checkout@v4
44+
45+
- name: Install system dependencies
46+
run: |
47+
apt-get -y install `cat packages.txt`
48+
apt-get -y install gcc g++ cmake libnuma-dev
49+
50+
- name: Checkout vllm-project/vllm repo
51+
uses: actions/checkout@v4
52+
with:
53+
repository: vllm-project/vllm
54+
ref: ${{ inputs.vllm }}
55+
path: ./vllm-empty
56+
fetch-depth: 1
57+
58+
- name: Install vllm-project/vllm from source
59+
working-directory: ./vllm-empty
60+
run: |
61+
VLLM_TARGET_DEVICE=empty pip install -e .
62+
63+
- name: Install vllm-project/vllm-ascend
64+
env:
65+
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
66+
run: |
67+
pip install -r requirements-dev.txt
68+
pip install -v -e .
69+
70+
- name: Run vllm-project/vllm-ascend test
71+
env:
72+
VLLM_WORKER_MULTIPROC_METHOD: spawn
73+
VLLM_USE_MODELSCOPE: True
74+
if: ${{ inputs.type == 'light' }}
75+
run: |
76+
pytest -sv tests/e2e/singlecard/test_aclgraph.py
77+
pytest -sv tests/e2e/singlecard/test_quantization.py
78+
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
79+
80+
- name: Run e2e test
81+
env:
82+
VLLM_WORKER_MULTIPROC_METHOD: spawn
83+
VLLM_USE_MODELSCOPE: True
84+
if: ${{ inputs.type == 'full' }}
85+
run: |
86+
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
87+
# the test separately.
88+
89+
pytest -sv tests/e2e/singlecard/test_aclgraph.py
90+
pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
91+
pytest -sv tests/e2e/singlecard/test_camem.py
92+
pytest -sv tests/e2e/singlecard/test_chunked.py
93+
pytest -sv tests/e2e/singlecard/test_embedding.py
94+
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
95+
#pytest -sv tests/e2e/singlecard/test_ilama_lora.py
96+
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
97+
pytest -sv tests/e2e/singlecard/test_quantization.py
98+
pytest -sv tests/e2e/singlecard/test_sampler.py
99+
pytest -sv tests/e2e/singlecard/test_vlm.py
100+
101+
# ------------------------------------ v1 spec decode test ------------------------------------ #
102+
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
103+
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
104+
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
105+
106+
pytest -sv tests/e2e/singlecard/ops/
107+
108+
e2e-2-cards:
109+
name: multicard
110+
runs-on: ${{ inputs.runner }}-2
111+
container:
112+
image: ${{ inputs.image }}
113+
env:
114+
VLLM_LOGGING_LEVEL: ERROR
115+
VLLM_USE_MODELSCOPE: True
116+
steps:
117+
- name: Check npu and CANN info
118+
run: |
119+
npu-smi info
120+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
121+
122+
- name: Config mirrors
123+
run: |
124+
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
125+
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
126+
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
127+
apt-get update -y
128+
apt install git -y
129+
130+
- name: Checkout vllm-project/vllm-ascend repo
131+
uses: actions/checkout@v4
132+
133+
- name: Install system dependencies
134+
run: |
135+
apt-get -y install `cat packages.txt`
136+
apt-get -y install gcc g++ cmake libnuma-dev
137+
138+
- name: Checkout vllm-project/vllm repo
139+
uses: actions/checkout@v4
140+
with:
141+
repository: vllm-project/vllm
142+
ref: ${{ inputs.vllm }}
143+
path: ./vllm-empty
144+
fetch-depth: 1
145+
146+
- name: Install vllm-project/vllm from source
147+
working-directory: ./vllm-empty
148+
run: |
149+
VLLM_TARGET_DEVICE=empty pip install -e .
150+
151+
- name: Install vllm-project/vllm-ascend
152+
env:
153+
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
154+
run: |
155+
pip install -r requirements-dev.txt
156+
pip install -v -e .
157+
158+
- name: Run vllm-project/vllm-ascend test (light)
159+
env:
160+
VLLM_WORKER_MULTIPROC_METHOD: spawn
161+
VLLM_USE_MODELSCOPE: True
162+
if: ${{ inputs.type == 'light' }}
163+
run: |
164+
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
165+
166+
- name: Run vllm-project/vllm-ascend test (full)
167+
env:
168+
VLLM_WORKER_MULTIPROC_METHOD: spawn
169+
VLLM_USE_MODELSCOPE: True
170+
if: ${{ inputs.type == 'full' }}
171+
run: |
172+
pytest -sv tests/e2e/multicard/test_data_parallel.py
173+
pytest -sv tests/e2e/multicard/test_expert_parallel.py
174+
# external_launcher test is not stable enough. Fix it later
175+
# pytest -sv tests/e2e/multicard/test_external_launcher.py
176+
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
177+
#pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
178+
179+
# To avoid oom, we need to run the test in a single process.
180+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
181+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
182+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
183+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
184+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
185+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
186+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
187+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
188+
189+
#pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
190+
pytest -sv tests/e2e/multicard/test_prefix_caching.py
191+
pytest -sv tests/e2e/multicard/test_qwen3_moe.py
192+
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py

.github/workflows/accuracy_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
2020
# 2. workflow_dispatch with models input
2121
# See detail rule in strategy.matrix note
22-
name: Benchmarks / accuracy
22+
name: ascend test / accuracy
2323

2424
on:
2525
schedule:

.github/workflows/format_pr_body.yaml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,10 @@ jobs:
3333
runs-on: ubuntu-latest
3434

3535
steps:
36-
- name: Checkout vllm-project/vllm repo
37-
uses: actions/checkout@v4
38-
with:
39-
repository: vllm-project/vllm
40-
path: ./vllm-empty
4136

4237
- name: Get vLLM version
43-
working-directory: ./vllm-empty
4438
run: |
45-
VLLM_COMMIT=$(git rev-parse HEAD)
39+
VLLM_COMMIT=9607d5eb449711b349d4c2bee0a9c94afcc7ed14
4640
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
4741
4842
- name: Checkout repository

.github/workflows/labeler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
pull-requests: write
1212
steps:
1313
- name: Label the PR
14-
uses: actions/labeler@v5
14+
uses: actions/labeler@v6
1515
with:
1616
repo-token: ${{ secrets.GITHUB_TOKEN }}
1717
configuration-path: .github/labeler.yml

.github/workflows/nightly_benchmarks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# limitations under the License.
1616
#
1717

18-
name: 'Benchmarks / Performance'
18+
name: 'ascend test / performance'
1919
# This workflow runs nightly benchmarks for vllm-ascend.
2020

2121
on:

.github/workflows/pre-commit.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@ name: pre-commit
22

33
on:
44
workflow_call:
5+
inputs:
6+
vllm:
7+
required: true
8+
type: string
59

610
permissions:
711
contents: read
@@ -22,6 +26,7 @@ jobs:
2226
with:
2327
repository: vllm-project/vllm
2428
path: ./vllm-empty
29+
ref: ${{ inputs.vllm }}
2530
- name: Install vllm
2631
working-directory: vllm-empty
2732
run: |

.github/workflows/vllm_ascend_doctest.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# This file is a part of the vllm-ascend project.
1616
#
1717

18-
name: 'e2e test / doctest'
18+
name: 'ascend test / doctest'
1919

2020
on:
2121
workflow_dispatch:

0 commit comments

Comments
 (0)