90
90
}}
91
91
strategy :
92
92
matrix :
93
- vllm_use_version : [1]
94
93
# the accuracy test will run:
95
94
# 1. workflow_dispatch with models input
96
95
# - all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
@@ -122,7 +121,7 @@ jobs:
122
121
) }}
123
122
124
123
fail-fast : false
125
- name : ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
124
+ name : ${{ matrix.model_name }} accuracy
126
125
container :
127
126
image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
128
127
env :
@@ -236,15 +235,14 @@ jobs:
236
235
echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
237
236
echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
238
237
239
- - name : Run Accuracy Test for V${{ matrix.vllm_use_version }}
238
+ - name : Run Accuracy Test
240
239
id : report
241
240
working-directory : ./benchmarks
242
241
env :
243
242
PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
244
- VLLM_USE_V1 : ${{ matrix.vllm_use_version }}
245
243
run : |
246
244
model_base_name=$(basename ${{ matrix.model_name }})
247
- markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }} "
245
+ markdown_name="${model_base_name}"
248
246
echo "markdown_name=$markdown_name"
249
247
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
250
248
mkdir -p ./accuracy
@@ -259,7 +257,6 @@ jobs:
259
257
--vllm_version "${{ env.GHA_VLLM_VERSION }}" \
260
258
--vllm_commit "${{ env.VLLM_COMMIT }}" \
261
259
--vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
262
- --vllm_use_v1 "$VLLM_USE_V1"
263
260
264
261
- name : Generate step summary
265
262
if : ${{ always() }}
@@ -282,7 +279,7 @@ jobs:
282
279
echo "contains_fail=false" >> $GITHUB_OUTPUT
283
280
fi
284
281
285
- - name : Upload Report for V${{ matrix.vllm_use_version }}
282
+ - name : Upload Report
286
283
if : ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }}
287
284
uses : actions/upload-artifact@v4
288
285
with :
0 commit comments