@@ -62,6 +62,10 @@ defaults:
62
62
run :
63
63
shell : bash -el {0}
64
64
65
+ concurrency :
66
+ group : pr-${{ github.event.pull_request.number }}
67
+ cancel-in-progress : true
68
+
65
69
jobs :
66
70
accuracy_tests :
67
71
# test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
73
77
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
74
78
github.event_name == 'workflow_dispatch'
75
79
}}
76
- runs-on : ${{ matrix.runner || ' linux-arm64-npu-2' }}
80
+ runs-on : ${{ ( matrix.model_name == 'Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') || (matrix.model_name != 'Qwen2.5-VL-7B-Instruct' && ' linux-arm64-npu-2') }}
77
81
strategy :
78
82
matrix :
83
+ vllm_use_version : [0, 1]
79
84
# the accuracy test will run:
80
85
# 1. workflow_dispatch with models input
81
86
# - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
@@ -85,16 +90,26 @@ jobs:
85
90
# - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
86
91
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
87
92
include : ${{ fromJSON(
88
- (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct", "runner":"linux-arm64-npu-4"},{"model_name":"Qwen/Qwen3-8B-Base"}]') ||
89
- (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]') ||
90
- (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct", "runner":"linux-arm64-npu-4"}]') ||
91
- (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base"}]') ||
92
- contains(github.event.pull_request.labels.*.name, 'accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct", "runner":"linux-arm64-npu-4"}]' ||
93
- contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]' ||
94
- contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct", "runner":"linux-arm64-npu-4"}]'
93
+ (github.event.inputs.models == 'all' &&
94
+ ' [{"model_name":"Qwen/Qwen2.5-7B-Instruct"},
95
+ {"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"},
96
+ {"model_name":"Qwen/Qwen3-8B-Base"}]' ) ||
97
+ (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
98
+ ' [{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]' ) ||
99
+ (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
100
+ ' [{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}]' ) ||
101
+ (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
102
+ ' [{"model_name":"Qwen/Qwen3-8B-Base"}]' ) ||
103
+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
104
+ ' [{"model_name":"Qwen/Qwen2.5-7B-Instruct"},
105
+ {"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}]' ||
106
+ contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
107
+ ' [{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]' ||
108
+ contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
109
+ ' [{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}]'
95
110
) }}
96
111
fail-fast : false
97
- name : ${{ matrix.model_name }} accuracy
112
+ name : ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
98
113
container :
99
114
image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
100
115
env :
@@ -189,34 +204,38 @@ jobs:
189
204
echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
190
205
echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}"
191
206
192
- - name : Run Accuracy Test for V0
207
+ - name : Run Accuracy Test for V${{ matrix.vllm_use_version }}
193
208
id : report
194
209
working-directory : ./benchmarks
195
210
env :
196
- VLLM_USE_V1 : 0
197
211
PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
212
+ VLLM_USE_V1 : ${{ matrix.vllm_use_version }}
198
213
run : |
199
214
model_base_name=$(basename ${{ matrix.model_name }})
200
- echo "model_base_name=$model_base_name"
201
- echo "model_base_name=$model_base_name" >> $GITHUB_OUTPUT
202
- mkdir -p ./accuracy/V0
215
+ markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}"
216
+ echo "markdown_name=$markdown_name"
217
+ echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
218
+ mkdir -p ./accuracy
203
219
204
220
python ./scripts/run_accuracy.py \
205
221
--model "${{ matrix.model_name }}" \
206
- --output "./accuracy/V0/${model_base_name }.md" \
222
+ --output "./accuracy/${markdown_name }.md" \
207
223
--vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
208
224
--cann_version "${{ env.GHA_CANN_VERSION }}" \
209
225
--torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
210
226
--torch_version "${{ env.GHA_TORCH_VERSION }}" \
211
227
--vllm_version "${{ env.GHA_VLLM_VERSION }}"
212
228
213
- cat ./accuracy/V0/${model_base_name}.md >> $GITHUB_STEP_SUMMARY
229
+ - name : Generate step summary
230
+ if : ${{ always() }}
231
+ run : |
232
+ cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
214
233
215
- - name : Upload Report for V0
234
+ - name : Upload Report for V${{ matrix.vllm_use_version }}
216
235
uses : actions/upload-artifact@v4
217
236
with :
218
- name : " ${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.model_base_name }}-V0 -report"
219
- path : ./benchmarks/accuracy/V0/ ${{ steps.report.outputs.model_base_name }}.md
237
+ name : " ${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report"
238
+ path : ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
220
239
if-no-files-found : warn
221
240
retention-days : 90
222
241
overwrite : true
0 commit comments