Skip to content

Commit a652b83

Browse files
committed
Merge branch 'main' into async-sched-dp
2 parents 12e2d25 + aa4d2a9 commit a652b83

File tree

113 files changed

+3890
-4665
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+3890
-4665
lines changed

.github/actionlint.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ self-hosted-runner:
1515
- linux-aarch64-a3-2
1616
- linux-aarch64-a3-4
1717
- linux-aarch64-a3-8
18+
- linux-amd64-cpu-0

.github/workflows/accuracy_test.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ jobs:
112112
uses: actions/checkout@v4
113113
with:
114114
repository: vllm-project/vllm
115-
ref: v0.10.1.1
115+
ref: main
116116
path: ./vllm-empty
117117

118118
- name: Install vllm-project/vllm from source
@@ -303,7 +303,7 @@ jobs:
303303
git push -f origin "${{ env.BRANCH_NAME }}"
304304
305305
- name: Create PR in upstream via API
306-
uses: actions/github-script@v7
306+
uses: actions/github-script@v8
307307
with:
308308
github-token: ${{ secrets.PAT_TOKEN }}
309309
script: |

.github/workflows/nightly_benchmarks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
strategy:
5252
matrix:
5353
include:
54-
- vllm_branch: v0.10.1.1
54+
- vllm_branch: main
5555
vllm_ascend_branch: main
5656
vllm_use_v1: 1
5757
max-parallel: 1

.github/workflows/pre-commit.yml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,19 @@ permissions:
88

99
jobs:
1010
pre-commit:
11-
runs-on: ubuntu-latest
11+
runs-on: linux-amd64-cpu-0
12+
container:
13+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
1214
steps:
15+
- name: Config mirrors
16+
run: |
17+
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
18+
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
19+
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
20+
apt-get update -y
21+
apt install git -y
1322
- name: Checkout vllm-project/vllm-ascend repo
1423
uses: actions/checkout@v4
15-
- uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
16-
with:
17-
python-version: "3.11"
1824
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
1925
- run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
2026
- name: Checkout vllm-project/vllm repo
@@ -30,8 +36,9 @@ jobs:
3036
- name: Install vllm-ascend dev
3137
run: |
3238
pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
33-
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
39+
git config --global --add safe.directory '*'
40+
- name: Run pre-commit check
3441
env:
35-
SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
36-
with:
37-
extra_args: --all-files --hook-stage manual
42+
SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086"
43+
GOPROXY: "https://goproxy.cn,direct"
44+
run: pre-commit run --all-files --hook-stage manual

.github/workflows/reminder_comment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
runs-on: ubuntu-latest
1010
steps:
1111
- name: Remind to run full CI on PR
12-
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
12+
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
1313
with:
1414
script: |
1515
github.rest.issues.createComment({

.github/workflows/vllm_ascend_dist.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
strategy:
4444
matrix:
4545
os: [linux-aarch64-a3-8]
46-
vllm_version: [v0.10.1.1, main]
46+
vllm_version: [main]
4747
name: vLLM Ascend test
4848
runs-on: ${{ matrix.os }}
4949
container:

.github/workflows/vllm_ascend_test.yaml

Lines changed: 16 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ on:
2525
branches:
2626
- 'main'
2727
- '*-dev'
28-
2928
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
3029
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
3130
# It's used to activate ascend-toolkit environment variables.
@@ -44,7 +43,9 @@ jobs:
4443
uses: ./.github/workflows/pre-commit.yml
4544

4645
changes:
47-
runs-on: ubuntu-latest
46+
runs-on: linux-amd64-cpu-0
47+
container:
48+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
4849
outputs:
4950
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
5051
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
@@ -68,6 +69,7 @@ jobs:
6869
- 'packages.txt'
6970
ut_tracker:
7071
- 'tests/ut/**'
72+
7173
ut:
7274
needs: [lint, changes]
7375
name: unit test
@@ -81,7 +83,7 @@ jobs:
8183
VLLM_USE_MODELSCOPE: True
8284
strategy:
8385
matrix:
84-
vllm_version: [v0.10.1.1, main]
86+
vllm_version: [main]
8587
steps:
8688
- name: Install packages
8789
run: |
@@ -129,16 +131,16 @@ jobs:
129131
name: vllm-ascend
130132
verbose: true
131133

132-
e2e:
134+
e2e-light:
133135
needs: [lint, changes]
134136
# only trigger e2e test after lint passed and the change is e2e related with pull request.
135-
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
137+
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
136138
strategy:
137139
max-parallel: 2
138140
matrix:
139141
os: [linux-aarch64-a2-1]
140-
vllm_version: [v0.10.1.1, main]
141-
name: singlecard e2e test
142+
vllm_version: [main]
143+
name: singlecard e2e test - light
142144
runs-on: ${{ matrix.os }}
143145
container:
144146
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -191,38 +193,19 @@ jobs:
191193
VLLM_WORKER_MULTIPROC_METHOD: spawn
192194
VLLM_USE_MODELSCOPE: True
193195
run: |
194-
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
195-
# the test separately.
196-
197196
pytest -sv tests/e2e/singlecard/test_aclgraph.py
198-
pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
199-
pytest -sv tests/e2e/singlecard/test_camem.py
200-
pytest -sv tests/e2e/singlecard/test_chunked.py
201-
pytest -sv tests/e2e/singlecard/test_embedding.py
202-
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
203-
# TODO: Fix lora accuracy error
204-
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
205-
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
206197
pytest -sv tests/e2e/singlecard/test_quantization.py
207-
pytest -sv tests/e2e/singlecard/test_sampler.py
208-
pytest -sv tests/e2e/singlecard/test_vlm.py
209-
210-
# ------------------------------------ v1 spec decode test ------------------------------------ #
211-
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
212-
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
213-
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
214-
215-
pytest -sv tests/e2e/singlecard/ops/
198+
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
216199
217-
e2e-2-cards:
218-
needs: [e2e]
219-
if: ${{ needs.e2e.result == 'success' }}
200+
e2e-2-cards-light:
201+
needs: [e2e-light]
202+
if: ${{ needs.e2e-light.result == 'success' }}
220203
strategy:
221204
max-parallel: 2
222205
matrix:
223206
os: [linux-aarch64-a2-2]
224-
vllm_version: [v0.10.1.1, main]
225-
name: multicard e2e test
207+
vllm_version: [main]
208+
name: multicard e2e test - light
226209
runs-on: ${{ matrix.os }}
227210
container:
228211
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -275,22 +258,4 @@ jobs:
275258
VLLM_WORKER_MULTIPROC_METHOD: spawn
276259
VLLM_USE_MODELSCOPE: True
277260
run: |
278-
pytest -sv tests/e2e/multicard/test_data_parallel.py
279-
pytest -sv tests/e2e/multicard/test_expert_parallel.py
280-
# external_launcher test is not stable enough. Fix it later
281-
# pytest -sv tests/e2e/multicard/test_external_launcher.py
282-
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
283-
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
284-
285-
# To avoid oom, we need to run the test in a single process.
286-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
287-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
288-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
289-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
290-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
291-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
292-
293-
#pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
294-
pytest -sv tests/e2e/multicard/test_prefix_caching.py
295-
pytest -sv tests/e2e/multicard/test_qwen3_moe.py
296-
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
261+
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP

.github/workflows/vllm_ascend_test_310p.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
max-parallel: 2
5454
matrix:
5555
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
56-
vllm_version: [v0.10.1.1, main]
56+
vllm_version: [main]
5757
name: 310p e2e test
5858
runs-on: ${{ matrix.os }}
5959
container:

0 commit comments

Comments
 (0)