[Test] Refactor accuracy test to nightly test

zhangxinyuehfad · zhangxinyuehfad · commit b0f377135404 · 2025-11-04T19:01:19.000+08:00
Signed-off-by: hfadzxy &lt;starmoon_zhang@163.com&gt;
diff --git a/.github/workflows/_e2e_nightly_single_node_models.yaml b/.github/workflows/_e2e_nightly_single_node_models.yaml
@@ -1,4 +1,21 @@
-name: 'accuracy test'
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+name: 'e2e nightly models test'
 
 on:
   workflow_call:
@@ -16,46 +33,52 @@ on:
       image:
         required: true
         type: string
-      model_name:
+      model_list:
         required: true
         type: string
       upload:
         required: false
         type: boolean
         default: false
 
-jobs:
-  accuracy_tests:
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 1 card / 2 cards / 4 cards test type
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.runner }}-${{inputs.model_list}}
+  cancel-in-progress: true
 
+jobs:
+  e2e-nightly:
+    name: ${{inputs.model_list}} accuracy test
     runs-on: ${{ inputs.runner }}
-    name: ${{ inputs.model_name }} accuracy
     container:
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
       env:
         VLLM_USE_MODELSCOPE: True
-        # 1. If version specified (work_dispatch), do specified branch accuracy test
-        # 2. If no version (labeled PR), do accuracy test by default ref:
-        # The branch, tag or SHA to checkout. When checking out the repository that
-        # triggered a workflow, this defaults to the reference or SHA for that event.
-        # Otherwise, uses the default branch.
         GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
-
     steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Set model name as output
-        id: set_output
+      - name: Check npu and CANN info
         run: |
-          echo "model_name=${{ inputs.model_name }}" >> $GITHUB_OUTPUT
+          npu-smi info
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
 
       - name: Config mirrors
         run: |
-          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
-          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
-          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
           apt-get update -y
           apt install git -y
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+
+      - name: Checkout vllm-project/vllm-ascend repo
+        uses: actions/checkout@v4
 
       - name: Install system dependencies
         run: |
@@ -73,9 +96,16 @@ jobs:
         working-directory: ./vllm-empty
         run: |
           VLLM_TARGET_DEVICE=empty pip install -e .
-        
+
+      - name: Install vllm-project/vllm-ascend
+        env:
+          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -v -e .
+
       - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
-        if: ${{ inputs.model_name == 'Qwen3-Next-80B-A3B-Instruct' }}
+        if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
         shell: bash -l {0}
         run: |
           wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
@@ -108,14 +138,6 @@ jobs:
           path: ./vllm-ascend
           ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
 
-      - name: Install vllm-project/vllm-ascend
-        working-directory: ./vllm-ascend
-        env:
-          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
-        run: |
-          pip install -r requirements-dev.txt
-          pip install -v -e .
-
       - name: Get vLLM commit hash and URL
         working-directory: ./vllm-empty
         run: |
@@ -149,11 +171,12 @@ jobs:
             pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
           } >> "$GITHUB_ENV"
 
-      - name: Run accuracy test
+      - name: Run vllm-project/vllm-ascend accuracy test
         id: report
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           VLLM_USE_MODELSCOPE: True
+          VLLM_CI_RUNNER: ${{ inputs.runner }}
           VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
           VLLM_COMMIT: ${{ env.VLLM_COMMIT }}
           VLLM_ASCEND_VERSION: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
@@ -162,24 +185,44 @@ jobs:
           TORCH_VERSION: ${{ env.GHA_TORCH_VERSION }}
           TORCH_NPU_VERSION: ${{ env.GHA_TORCH_NPU_VERSION }}
         run: |
-          model_base_name=$(basename ${{ inputs.model_name }})
-          markdown_name="${model_base_name}"
-          echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
           mkdir -p ./benchmarks/accuracy
-          pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
-          --config ./tests/e2e/models/configs/${{ inputs.model_name }}.yaml
+          echo "Received model_list: ${{ inputs.model_list }}"
+          models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
+          any_failure=0
+          for model in $models; do
+            echo "Running test for model: $model"
+            pytest -sv ./tests/e2e/models/test_lm_eval_correctness.py \
+              --config "./tests/e2e/models/configs/${model}.yaml" || {
+              echo "Test failed for model: $model"
+              any_failure=1
+            }
+          done
+
+          if [ $any_failure -ne 0 ]; then
+            exit 1
+          fi
 
       - name: Generate step summary
         if: ${{ always() }}
         run: |
-          cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
+          models=$(echo '${{ inputs.model_list }}' | jq -r '.[]')
+          for model in $models; do
+            echo "Processing model: $model"
+            model_base_name=$(basename "$model")
+            cat ./benchmarks/accuracy/${model_base_name}.md >> $GITHUB_STEP_SUMMARY
+          done
+
+      - name: Set artifact timestamp
+        id: ts
+        run: |
+          echo "artifact_ts=$(date -u +%Y%m%dT%H%M%SZ)" >> $GITHUB_OUTPUT
 
       - name: Upload Report
         if: ${{ inputs.upload == true }}
         uses: actions/upload-artifact@v5
         with:
-          name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
-          path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
+          name: report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.ts.outputs.artifact_ts }}
+          path: ./benchmarks/accuracy/
           if-no-files-found: warn
           retention-days: 90
-          overwrite: true
+          overwrite: true
diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml
diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml
@@ -66,7 +66,7 @@ jobs:
   multi-node-tests:
     name: multi-node
     needs: single-node-tests
-    if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
+    if: always() && github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
     strategy:
       fail-fast: false
       max-parallel: 1
@@ -88,3 +88,36 @@ jobs:
       config_file_path: ${{ matrix.test_config.config_file_path }}
     secrets:
       KUBECONFIG_B64: ${{ secrets.KUBECONFIG_A2_B64 }}
+
+  single-node-accuracy-tests:
+    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
+    strategy:
+      fail-fast: false
+      matrix:
+        test_config:
+          - os: linux-aarch64-a2-1
+            model_list:
+              - Qwen3-8B
+              - Qwen2.5-VL-7B-Instruct
+              # TODO: This model has a bug that needs to be fixed and readded
+              # - Qwen2-Audio-7B-Instruct
+              - Qwen3-8B-W8A8
+              - Qwen3-VL-8B-Instruct
+              - Qwen2.5-Omni-7B
+              - Meta-Llama-3.1-8B-Instruct
+          - os: linux-aarch64-a2-2
+            model_list:
+              - Qwen3-30B-A3B
+              - Qwen3-VL-30B-A3B-Instruct
+              - DeepSeek-V2-Lite
+          - os: linux-aarch64-a2-4
+            model_list:
+              - Qwen3-Next-80B-A3B-Instruct
+              - Qwen3-30B-A3B-W8A8
+    uses: ./.github/workflows/_e2e_nightly_single_node_accuracy.yaml
+    with:
+      vllm: v0.11.0
+      runner: ${{ matrix.test_config.os }}
+      model_list: ${{ toJson(matrix.test_config.model_list) }}
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
+      upload: false
diff --git a/.github/workflows/vllm_ascend_test_report.yaml b/.github/workflows/vllm_ascend_test_report.yaml
@@ -20,18 +20,15 @@
 # 2. pull_request change the related files
 # 3. workflow_dispatch with models input
 
-name: ascend test / models
+name: ascend test / accuracy report
 
 on:
-  schedule:
-    # Runs every 6 hours
-    - cron:  '0 */6 * * *'
   pull_request:
     branches:
       - 'main'
       - '*-dev'
     paths:
-      - '.github/workflows/vllm_ascend_test_models.yaml'
+      - '.github/workflows/vllm_ascend_test_report.yaml'
       - 'tests/e2e/models/test_lm_eval_correctness.py'
   workflow_dispatch:
     inputs:
@@ -60,22 +57,20 @@ concurrency:
 jobs:
   run:
     strategy:
+      fail-fast: false
       matrix:
         include:
-          - model_name: Qwen3-8B
-            runner: a2-1
-          - model_name: Qwen2.5-VL-7B-Instruct
-            runner: a2-1
-          - model_name: Qwen2-Audio-7B-Instruct
-            runner: a2-1
-          - model_name: Qwen3-30B-A3B
-            runner: a2-2
-          - model_name: Qwen3-VL-30B-A3B-Instruct
-            runner: a2-2
-          - model_name: DeepSeek-V2-Lite
-            runner: a2-2
-      fail-fast: false
-    uses: ./.github/workflows/_accuracy_test.yaml
+          - runner: linux-aarch64-a2-1
+            model_list:
+              - Qwen3-8B
+              - Qwen2.5-VL-7B-Instruct
+              - Qwen2-Audio-7B-Instruct
+          - runner: linux-aarch64-a2-2
+            model_list:
+              - Qwen3-30B-A3B
+              - Qwen3-VL-30B-A3B-Instruct
+              - DeepSeek-V2-Lite
+    uses: ./.github/workflows/_e2e_nightly_single_node_accuracy.yaml
     with:
       vllm: v0.11.0
       runner:  linux-aarch64-${{ matrix.runner }}