22
22
name : Benchmarks / accuracy
23
23
24
24
on :
25
+ schedule :
26
+ # Runs every 6 hours
27
+ - cron : ' 0 */6 * * *'
25
28
pull_request :
26
29
types : [ labeled ]
27
30
workflow_dispatch :
34
37
# Current supported vLLM versions
35
38
options :
36
39
- main
40
+ - v0.9.2
37
41
- v0.9.1
38
42
- v0.7.3
39
43
vllm-ascend-version :
42
46
type : choice
43
47
options :
44
48
- main
49
+ - v0.9.1-dev
45
50
- v0.7.3-dev
46
51
models :
47
52
description : ' model:'
48
53
required : true
49
54
type : choice
50
55
options :
51
56
- all
52
- - Qwen/Qwen2.5-7B-Instruct
53
57
- Qwen/Qwen2.5-VL-7B-Instruct
54
58
- Qwen/Qwen3-8B-Base
59
+ - Qwen/Qwen3-30B-A3B
55
60
default : ' all'
56
61
57
62
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
@@ -73,56 +78,57 @@ jobs:
73
78
${{
74
79
(contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
75
80
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
81
+ contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') ||
76
82
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
77
83
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
78
- github.event_name == 'workflow_dispatch'
84
+ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
79
85
}}
80
86
runs-on : >-
81
87
${{
82
- (matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct ' && 'linux-arm64-npu-4') ||
88
+ (matrix.model_name == 'Qwen/Qwen3-30B-A3B ' && 'linux-arm64-npu-4') ||
83
89
'linux-arm64-npu-2'
84
90
}}
85
91
strategy :
86
92
matrix :
87
- vllm_use_version : [0, 1]
93
+ vllm_use_version : [1]
88
94
# the accuracy test will run:
89
95
# 1. workflow_dispatch with models input
90
- # - all: Qwen/Qwen2.5-7B-Instruct , Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
91
- # - specified but not all: Qwen/Qwen2.5-7B-Instruct , Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
96
+ # - all: Qwen/Qwen3-30B-A3B , Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
97
+ # - specified but not all: Qwen/Qwen3-30B-A3B , Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
92
98
# 2. PR labeled with "*-accuracy-test"
93
- # - accuracy-test: Qwen/Qwen2.5-7B-Instruct , Qwen/Qwen2.5-VL-7B-Instruct
94
- # - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
99
+ # - accuracy-test: Qwen/Qwen3-8B-Base , Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-30B-A3B
100
+ # - dense-accuracy-test: Qwen/Qwen3-8B-Base
95
101
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
102
+ # - moe-accuracy-test: Qwen/Qwen3-30B-A3B
96
103
model_name : ${{ fromJSON(
104
+ (github.event_name == 'schedule' &&
105
+ ' ["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]' ) ||
97
106
(github.event.inputs.models == 'all' &&
98
- ' ["Qwen/Qwen2.5-7B-Instruct ","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]' ) ||
99
- (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct ' &&
100
- ' ["Qwen/Qwen2.5-7B-Instruct "]' ) ||
107
+ ' ["Qwen/Qwen3-30B-A3B ","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]' ) ||
108
+ (github.event.inputs.models == 'Qwen/Qwen3-30B-A3B ' &&
109
+ ' ["Qwen/Qwen3-30B-A3B "]' ) ||
101
110
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
102
111
' ["Qwen/Qwen2.5-VL-7B-Instruct"]' ) ||
103
112
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
104
113
' ["Qwen/Qwen3-8B-Base"]' ) ||
105
114
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
106
- ' ["Qwen/Qwen2.5-7B-Instruct ","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
115
+ ' ["Qwen/Qwen3-8B-Base ","Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen3-30B-A3B "]' ||
107
116
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
108
- ' ["Qwen/Qwen2.5-7B-Instruct "]' ||
117
+ ' ["Qwen/Qwen3-8B-Base "]' ||
109
118
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
110
- ' ["Qwen/Qwen2.5-VL-7B-Instruct"]'
119
+ ' ["Qwen/Qwen2.5-VL-7B-Instruct"]' ||
120
+ contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') &&
121
+ ' ["Qwen/Qwen3-30B-A3B"]'
111
122
) }}
112
- # Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
113
- exclude :
114
- - model_name : Qwen/Qwen2.5-VL-7B-Instruct
115
- vllm_use_version : 1
116
123
117
124
fail-fast : false
118
125
name : ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
119
126
container :
120
127
image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
121
128
env :
122
- HF_ENDPOINT : https://hf-mirror.com
123
- HF_TOKEN : ${{ secrets.HF_TOKEN }}
124
129
DATASET_SOURCE : ModelScope
125
130
VLLM_USE_MODELSCOPE : True
131
+ USE_MODELSCOPE_HUB : 1
126
132
# 1. If version specified (work_dispatch), do specified branch accuracy test
127
133
# 2. If no version (labeled PR), do accuracy test by default ref:
128
134
# The branch, tag or SHA to checkout. When checking out the repository that
@@ -158,7 +164,7 @@ jobs:
158
164
repository : vllm-project/vllm
159
165
path : ./vllm-empty
160
166
# Please also update this when bump matched version
161
- ref : ${{ github.event.inputs.vllm-version || 'v0.9.1 ' }}
167
+ ref : ${{ github.event.inputs.vllm-version || 'v0.9.2 ' }}
162
168
163
169
- name : Install vllm-project/vllm from source
164
170
working-directory : ./vllm-empty
@@ -177,11 +183,28 @@ jobs:
177
183
PIP_EXTRA_INDEX_URL : https://mirrors.huaweicloud.com/ascend/repos/pypi
178
184
run : |
179
185
pip install -r requirements-dev.txt
180
- pip install -e .
186
+ pip install -v -e .
187
+
188
+ - name : Get vLLM commit hash and URL
189
+ working-directory : ./vllm-empty
190
+ run : |
191
+ VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
192
+ echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
193
+
194
+ - name : Get vLLM-Ascend commit hash and URL
195
+ working-directory : ./vllm-ascend
196
+ run : |
197
+ VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
198
+ echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
199
+
200
+ - name : Print resolved hashes
201
+ run : |
202
+ echo "vLLM : ${{ env.VLLM_COMMIT }}"
203
+ echo "vLLM-Ascend: ${{ env.VLLM_ASCEND_COMMIT }}"
181
204
182
205
- name : Install lm-eval, ray, and datasets
183
206
run : |
184
- pip install lm-eval
207
+ pip install lm-eval==0.4.8
185
208
186
209
- name : Collect version info
187
210
run : |
@@ -233,7 +256,10 @@ jobs:
233
256
--cann_version "${{ env.GHA_CANN_VERSION }}" \
234
257
--torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
235
258
--torch_version "${{ env.GHA_TORCH_VERSION }}" \
236
- --vllm_version "${{ env.GHA_VLLM_VERSION }}"
259
+ --vllm_version "${{ env.GHA_VLLM_VERSION }}" \
260
+ --vllm_commit "${{ env.VLLM_COMMIT }}" \
261
+ --vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
262
+ --vllm_use_v1 "$VLLM_USE_V1"
237
263
238
264
- name : Generate step summary
239
265
if : ${{ always() }}
@@ -245,12 +271,122 @@ jobs:
245
271
SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
246
272
echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
247
273
274
+ - name : Check report first line for failure
275
+ id : check_report
276
+ run : |
277
+ REPORT_PATH="./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md"
278
+ echo "Scanning $REPORT_PATH for ❌ …"
279
+ if grep -q '❌' "$REPORT_PATH"; then
280
+ echo "contains_fail=true" >> $GITHUB_OUTPUT
281
+ else
282
+ echo "contains_fail=false" >> $GITHUB_OUTPUT
283
+ fi
284
+
248
285
- name : Upload Report for V${{ matrix.vllm_use_version }}
249
- if : ${{ github.event_name == 'workflow_dispatch' }}
286
+ if : ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }}
250
287
uses : actions/upload-artifact@v4
251
288
with :
252
- name : " ${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report "
289
+ name : " report- ${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
253
290
path : ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
254
291
if-no-files-found : warn
255
292
retention-days : 90
256
293
overwrite : true
294
+
295
+ create_pr :
296
+ runs-on : ubuntu-latest
297
+ needs : accuracy_tests
298
+ if : ${{ github.event_name == 'workflow_dispatch' }}
299
+ env :
300
+ UPSTREAM_REPO : vllm-project/vllm-ascend
301
+ steps :
302
+ - name : Checkout repository
303
+ uses : actions/checkout@v4
304
+ with :
305
+ repository : vllm-ascend-ci/vllm-ascend
306
+ token : ${{ secrets.PAT_TOKEN }}
307
+ ref : main
308
+
309
+ - name : Add upstream remote
310
+ run : |
311
+ git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
312
+ git fetch upstream
313
+ git remote -v
314
+
315
+ - name : Set Git user info dynamically
316
+ run : |
317
+ git config user.name "${{ github.actor }}"
318
+ git config user.email "${{ github.actor }}@users.noreply.github.com"
319
+
320
+ - name : Create or switch to branch
321
+ run : |
322
+ TIMESTAMP=$(date +%Y%m%d%H%M%S)
323
+ BRANCH_NAME="auto-pr/accuracy-report-${TIMESTAMP}"
324
+ echo "BRANCH_NAME=${BRANCH_NAME}" >> $GITHUB_ENV
325
+ git checkout -B "${BRANCH_NAME}" upstream/${{ github.event.inputs.vllm-ascend-version }}
326
+
327
+ - name : Download only current run reports
328
+ uses : actions/download-artifact@v4
329
+ with :
330
+ path : ./docs/source/developer_guide/evaluation/accuracy_report
331
+ pattern : report-*
332
+ github-token : ${{ secrets.GITHUB_TOKEN }}
333
+ run-id : ${{ github.run_id }}
334
+
335
+ - name : Delete old report
336
+ run : |
337
+ find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
338
+ find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
339
+ find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
340
+
341
+ - name : Update accuracy_report/index.md
342
+ run : |
343
+ REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
344
+ INDEX_MD="$REPORT_DIR/index.md"
345
+ {
346
+ echo "# Accuracy Report"
347
+ echo ""
348
+ echo ":::{toctree}"
349
+ echo ":caption: Accuracy Report"
350
+ echo ":maxdepth: 1"
351
+
352
+ for report in "$REPORT_DIR"/*.md; do
353
+ filename="$(basename "$report" .md)"
354
+ if [ "$filename" != "index" ]; then
355
+ echo "$filename"
356
+ fi
357
+ done
358
+ echo ":::"
359
+ } > "$INDEX_MD"
360
+
361
+ - name : push accuracy report
362
+ env :
363
+ GITHUB_TOKEN : ${{ secrets.PAT_TOKEN }}
364
+ run : |
365
+ git add ./docs/source/developer_guide/evaluation/accuracy_report/*.md
366
+ git commit -s -m "[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}"
367
+ git push -f origin "${{ env.BRANCH_NAME }}"
368
+
369
+ - name : Create PR in upstream via API
370
+ uses : actions/github-script@v7
371
+ with :
372
+ github-token : ${{ secrets.PAT_TOKEN }}
373
+ script : |
374
+ const pr = await github.rest.pulls.create({
375
+ owner: 'vllm-project',
376
+ repo: 'vllm-ascend',
377
+ head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
378
+ base: '${{ github.event.inputs.vllm-ascend-version }}',
379
+ title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
380
+ body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
381
+ ${{
382
+ github.event.inputs.models == 'all'
383
+ && 'All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
384
+ || github.event.inputs.models
385
+ }}
386
+
387
+ - [Workflow run][1]
388
+
389
+ [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
390
+ });
391
+ core.info(`Created PR #${pr.data.number}`);
392
+
0 commit comments