Skip to content

Commit b0f3771

Browse files
[Test] Refactor accuracy test to nightly test
Signed-off-by: hfadzxy <starmoon_zhang@163.com>
1 parent 49e6983 commit b0f3771

File tree

4 files changed

+131
-145
lines changed

4 files changed

+131
-145
lines changed

.github/workflows/_accuracy_test.yaml renamed to .github/workflows/_e2e_nightly_single_node_models.yaml

Lines changed: 83 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,21 @@
1-
name: 'accuracy test'
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: 'e2e nightly models test'
219

320
on:
421
workflow_call:
@@ -16,46 +33,52 @@ on:
1633
image:
1734
required: true
1835
type: string
19-
model_name:
36+
model_list:
2037
required: true
2138
type: string
2239
upload:
2340
required: false
2441
type: boolean
2542
default: false
2643

27-
jobs:
28-
accuracy_tests:
44+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
45+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
46+
# It's used to activate ascend-toolkit environment variables.
47+
defaults:
48+
run:
49+
shell: bash -el {0}
50+
51+
# only cancel in-progress runs of the same workflow
52+
# and ignore the lint / 1 card / 2 cards / 4 cards test type
53+
concurrency:
54+
group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.runner }}-${{inputs.model_list}}
55+
cancel-in-progress: true
2956

57+
jobs:
58+
e2e-nightly:
59+
name: ${{inputs.model_list}} accuracy test
3060
runs-on: ${{ inputs.runner }}
31-
name: ${{ inputs.model_name }} accuracy
3261
container:
3362
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
3463
env:
3564
VLLM_USE_MODELSCOPE: True
36-
# 1. If version specified (work_dispatch), do specified branch accuracy test
37-
# 2. If no version (labeled PR), do accuracy test by default ref:
38-
# The branch, tag or SHA to checkout. When checking out the repository that
39-
# triggered a workflow, this defaults to the reference or SHA for that event.
40-
# Otherwise, uses the default branch.
4165
GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
42-
4366
steps:
44-
- name: Checkout repository
45-
uses: actions/checkout@v4
46-
47-
- name: Set model name as output
48-
id: set_output
67+
- name: Check npu and CANN info
4968
run: |
50-
echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
69+
npu-smi info
70+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
5171
5272
- name: Config mirrors
5373
run: |
54-
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
55-
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
56-
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
74+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
75+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
5776
apt-get update -y
5877
apt install git -y
78+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
79+
80+
- name: Checkout vllm-project/vllm-ascend repo
81+
uses: actions/checkout@v4
5982

6083
- name: Install system dependencies
6184
run: |
@@ -73,9 +96,16 @@ jobs:
7396
working-directory: ./vllm-empty
7497
run: |
7598
VLLM_TARGET_DEVICE=empty pip install -e .
76-
99+
100+
- name: Install vllm-project/vllm-ascend
101+
env:
102+
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
103+
run: |
104+
pip install -r requirements-dev.txt
105+
pip install -v -e .
106+
77107
- name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
78-
if: ${{ inputs.model_name == 'Qwen3-Next-80B-A3B-Instruct' }}
108+
if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
79109
shell: bash -l {0}
80110
run: |
81111
wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
@@ -108,14 +138,6 @@ jobs:
108138
path: ./vllm-ascend
109139
ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
110140

111-
- name: Install vllm-project/vllm-ascend
112-
working-directory: ./vllm-ascend
113-
env:
114-
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
115-
run: |
116-
pip install -r requirements-dev.txt
117-
pip install -v -e .
118-
119141
- name: Get vLLM commit hash and URL
120142
working-directory: ./vllm-empty
121143
run: |
@@ -149,11 +171,12 @@ jobs:
149171
pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
150172
} >> "$GITHUB_ENV"
151173
152-
- name: Run accuracy test
174+
- name: Run vllm-project/vllm-ascend accuracy test
153175
id: report
154176
env:
155177
VLLM_WORKER_MULTIPROC_METHOD: spawn
156178
VLLM_USE_MODELSCOPE: True
179+
VLLM_CI_RUNNER: ${{ inputs.runner }}
157180
VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
158181
VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
159182
VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
@@ -162,24 +185,44 @@ jobs:
162185
TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
163186
TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
164187
run: |
165-
model_base_name=$(basename ${{ inputs.model_name }})
166-
markdown_name="${model_base_name}"
167-
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
168188
mkdir -p ./benchmarks/accuracy
169-
pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
170-
--config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
189+
echo "Received model_list: ${{ inputs.model_list }}"
190+
models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
191+
any_failure=0
192+
for model in $models; do
193+
echo "Running test for model: $model"
194+
pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
195+
--config "./tests/e2e/models/configs/${model}.yaml" || {
196+
echo "Test failed for model: $model"
197+
any_failure=1
198+
}
199+
done
200+
201+
if [ $any_failure -ne 0 ]; then
202+
exit 1
203+
fi
171204
172205
- name: Generate step summary
173206
if: ${{ always() }}
174207
run: |
175-
cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
208+
models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
209+
for model in $models; do
210+
echo "Processing model: $model"
211+
model_base_name=$(basename "$model")
212+
cat ./benchmarks/accuracy/${model_base_name}.md >> $GITHUB_STEP_SUMMARY
213+
done
214+
215+
- name: Set artifact timestamp
216+
id: ts
217+
run: |
218+
echo "artifact_ts=$(date -u +%Y%m%dT%H%M%SZ)" >> $GITHUB_OUTPUT
176219
177220
- name: Upload Report
178221
if: ${{ inputs.upload == true }}
179222
uses: actions/upload-artifact@v5
180223
with:
181-
name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
182-
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
224+
name: report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.ts.outputs.artifact_ts }}
225+
path: ./benchmarks/accuracy/
183226
if-no-files-found: warn
184227
retention-days: 90
185-
overwrite: true
228+
overwrite: true

.github/workflows/accuracy_test.yaml

Lines changed: 0 additions & 85 deletions
This file was deleted.

.github/workflows/vllm_ascend_test_nightly_a2.yaml

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
multi-node-tests:
6767
name: multi-node
6868
needs: single-node-tests
69-
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
69+
if: always() && github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
7070
strategy:
7171
fail-fast: false
7272
max-parallel: 1
@@ -88,3 +88,36 @@ jobs:
8888
config_file_path: ${{ matrix.test_config.config_file_path }}
8989
secrets:
9090
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_A2_B64 }}
91+
92+
single-node-accuracy-tests:
93+
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
94+
strategy:
95+
fail-fast: false
96+
matrix:
97+
test_config:
98+
- os: linux-aarch64-a2-1
99+
model_list:
100+
- Qwen3-8B
101+
- Qwen2.5-VL-7B-Instruct
102+
# TODO: This model has a bug that needs to be fixed and readded
103+
# - Qwen2-Audio-7B-Instruct
104+
- Qwen3-8B-W8A8
105+
- Qwen3-VL-8B-Instruct
106+
- Qwen2.5-Omni-7B
107+
- Meta-Llama-3.1-8B-Instruct
108+
- os: linux-aarch64-a2-2
109+
model_list:
110+
- Qwen3-30B-A3B
111+
- Qwen3-VL-30B-A3B-Instruct
112+
- DeepSeek-V2-Lite
113+
- os: linux-aarch64-a2-4
114+
model_list:
115+
- Qwen3-Next-80B-A3B-Instruct
116+
- Qwen3-30B-A3B-W8A8
117+
uses: ./.github/workflows/_e2e_nightly_single_node_accuracy.yaml
118+
with:
119+
vllm: v0.11.0
120+
runner: ${{ matrix.test_config.os }}
121+
model_list: ${{ toJson(matrix.test_config.model_list) }}
122+
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
123+
upload: false

.github/workflows/vllm_ascend_test_models.yaml renamed to .github/workflows/vllm_ascend_test_report.yaml

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,15 @@
2020
# 2. pull_request change the related files
2121
# 3. workflow_dispatch with models input
2222

23-
name: ascend test / models
23+
name: ascend test / accuracy report
2424

2525
on:
26-
schedule:
27-
# Runs every 6 hours
28-
- cron: '0 */6 * * *'
2926
pull_request:
3027
branches:
3128
- 'main'
3229
- '*-dev'
3330
paths:
34-
- '.github/workflows/vllm_ascend_test_models.yaml'
31+
- '.github/workflows/vllm_ascend_test_report.yaml'
3532
- 'tests/e2e/models/test_lm_eval_correctness.py'
3633
workflow_dispatch:
3734
inputs:
@@ -60,22 +57,20 @@ concurrency:
6057
jobs:
6158
run:
6259
strategy:
60+
fail-fast: false
6361
matrix:
6462
include:
65-
- model_name: Qwen3-8B
66-
runner: a2-1
67-
- model_name: Qwen2.5-VL-7B-Instruct
68-
runner: a2-1
69-
- model_name: Qwen2-Audio-7B-Instruct
70-
runner: a2-1
71-
- model_name: Qwen3-30B-A3B
72-
runner: a2-2
73-
- model_name: Qwen3-VL-30B-A3B-Instruct
74-
runner: a2-2
75-
- model_name: DeepSeek-V2-Lite
76-
runner: a2-2
77-
fail-fast: false
78-
uses: ./.github/workflows/_accuracy_test.yaml
63+
- runner: linux-aarch64-a2-1
64+
model_list:
65+
- Qwen3-8B
66+
- Qwen2.5-VL-7B-Instruct
67+
- Qwen2-Audio-7B-Instruct
68+
- runner: linux-aarch64-a2-2
69+
model_list:
70+
- Qwen3-30B-A3B
71+
- Qwen3-VL-30B-A3B-Instruct
72+
- DeepSeek-V2-Lite
73+
uses: ./.github/workflows/_e2e_nightly_single_node_accuracy.yaml
7974
with:
8075
vllm: v0.11.0
8176
runner: linux-aarch64-${{ matrix.runner }}

0 commit comments

Comments
 (0)