@@ -62,6 +62,10 @@ defaults:
62
62
run :
63
63
shell : bash -el {0}
64
64
65
+ concurrency :
66
+ group : pr-${{ github.event.pull_request.number }}
67
+ cancel-in-progress : true
68
+
65
69
jobs :
66
70
accuracy_tests :
67
71
# test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
73
77
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
74
78
github.event_name == 'workflow_dispatch'
75
79
}}
76
- runs-on : ${{ matrix.runner || 'linux-arm64-npu-2' }}
80
+ runs-on : >-
81
+ ${{
82
+ (matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
83
+ 'linux-arm64-npu-2'
84
+ }}
77
85
strategy :
78
86
matrix :
87
+ vllm_use_version : [0, 1]
79
88
# the accuracy test will run:
80
89
# 1. workflow_dispatch with models input
81
90
# - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
@@ -84,17 +93,24 @@ jobs:
84
93
# - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
85
94
# - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
86
95
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
87
- include : ${{ fromJSON(
88
- (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct", "runner":"linux-arm64-npu-4"},{"model_name":"Qwen/Qwen3-8B-Base"}]') ||
89
- (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]') ||
90
- (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct", "runner":"linux-arm64-npu-4"}]') ||
91
- (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base"}]') ||
92
- contains(github.event.pull_request.labels.*.name, 'accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct", "runner":"linux-arm64-npu-4"}]' ||
93
- contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]' ||
94
- contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct", "runner":"linux-arm64-npu-4"}]'
96
+ model_name : ${{ fromJSON(
97
+ (github.event.inputs.models == 'all' &&
98
+ ' ["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","model_name":"Qwen/Qwen3-8B-Base"]' ) ||
99
+ (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
100
+ ' ["Qwen/Qwen2.5-7B-Instruct"]' ) ||
101
+ (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
102
+ ' ["Qwen/Qwen2.5-VL-7B-Instruct"]' ) ||
103
+ (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
104
+ ' ["Qwen/Qwen3-8B-Base"]' ) ||
105
+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
106
+ ' ["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
107
+ contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
108
+ ' ["Qwen/Qwen2.5-7B-Instruct"]' ||
109
+ contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
110
+ ' ["Qwen/Qwen2.5-VL-7B-Instruct"]'
95
111
) }}
96
112
fail-fast : false
97
- name : ${{ matrix.model_name }} accuracy
113
+ name : ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
98
114
container :
99
115
image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
100
116
env :
@@ -189,34 +205,38 @@ jobs:
189
205
echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
190
206
echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}"
191
207
192
- - name : Run Accuracy Test for V0
208
+ - name : Run Accuracy Test for V${{ matrix.vllm_use_version }}
193
209
id : report
194
210
working-directory : ./benchmarks
195
211
env :
196
- VLLM_USE_V1 : 0
197
212
PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
213
+ VLLM_USE_V1 : ${{ matrix.vllm_use_version }}
198
214
run : |
199
215
model_base_name=$(basename ${{ matrix.model_name }})
200
- echo "model_base_name=$model_base_name"
201
- echo "model_base_name=$model_base_name" >> $GITHUB_OUTPUT
202
- mkdir -p ./accuracy/V0
216
+ markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}"
217
+ echo "markdown_name=$markdown_name"
218
+ echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
219
+ mkdir -p ./accuracy
203
220
204
221
python ./scripts/run_accuracy.py \
205
222
--model "${{ matrix.model_name }}" \
206
- --output "./accuracy/V0/${model_base_name }.md" \
223
+ --output "./accuracy/${markdown_name }.md" \
207
224
--vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
208
225
--cann_version "${{ env.GHA_CANN_VERSION }}" \
209
226
--torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
210
227
--torch_version "${{ env.GHA_TORCH_VERSION }}" \
211
228
--vllm_version "${{ env.GHA_VLLM_VERSION }}"
212
229
213
- cat ./accuracy/V0/${model_base_name}.md >> $GITHUB_STEP_SUMMARY
230
+ - name : Generate step summary
231
+ if : ${{ always() }}
232
+ run : |
233
+ cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
214
234
215
- - name : Upload Report for V0
235
+ - name : Upload Report for V${{ matrix.vllm_use_version }}
216
236
uses : actions/upload-artifact@v4
217
237
with :
218
- name : " ${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.model_base_name }}-V0 -report"
219
- path : ./benchmarks/accuracy/V0/ ${{ steps.report.outputs.model_base_name }}.md
238
+ name : " ${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report"
239
+ path : ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
220
240
if-no-files-found : warn
221
241
retention-days : 90
222
242
overwrite : true
0 commit comments