Switch Infra to linux-aarch64-a2 and python to 3.11

Yikun · Yikun · commit cf9ffd40638d · 2025-07-31T08:14:06.000+08:00
Signed-off-by: Yikun Jiang &lt;yikunkero@gmail.com&gt;
diff --git a/.github/Dockerfile.buildwheel b/.github/Dockerfile.buildwheel
@@ -14,7 +14,7 @@
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 #
-ARG PY_VERSION=3.10
+ARG PY_VERSION=3.11
 FROM quay.io/ascend/manylinux:8.0.0-910b-manylinux_2_28-py${PY_VERSION}
 
 ARG COMPILE_CUSTOM_KERNELS=1
diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml
@@ -1,8 +1,10 @@
 self-hosted-runner:
   # Labels of self-hosted runner in array of strings.
   labels:
-    - linux-arm64-npu-1
-    - linux-arm64-npu-2
-    - linux-arm64-npu-4
+    - linux-aarch64-a2-0
+    - linux-aarch64-a2-1
+    - linux-aarch64-a2-2
+    - linux-aarch64-a2-4
+    - linux-aarch64-a2-8
     - linux-arm64-npu-static-8
     - ubuntu-24.04-arm
diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml
@@ -79,8 +79,8 @@ jobs:
       }}
     runs-on: >-
       ${{
-          (matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
-          'linux-arm64-npu-2'
+          (matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-aarch64-a2-4') ||
+          'linux-aarch64-a2-2'
       }}
     strategy:
       matrix:
diff --git a/.github/workflows/release_code.yml b/.github/workflows/release_code.yml
@@ -53,7 +53,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.11"]
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
diff --git a/.github/workflows/vllm_ascend_doctest.yaml b/.github/workflows/vllm_ascend_doctest.yaml
@@ -45,7 +45,7 @@ jobs:
       matrix:
         vllm_verison: [main, v0.7.3-dev, main-openeuler, v0.7.3-dev-openeuler]
     name: vLLM Ascend test
-    runs-on: linux-arm64-npu-1
+    runs-on: linux-aarch64-a2-1
     container:
       image: m.daocloud.io/quay.io/ascend/vllm-ascend:${{ matrix.vllm_verison }}
     steps:
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
@@ -47,7 +47,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.11"]
         vllm_version: [v0.9.1]
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -114,12 +114,12 @@ jobs:
     strategy:
       max-parallel: 2
       matrix:
-        os: [linux-arm64-npu-1, linux-arm64-npu-4]
+        os: [linux-aarch64-a2-1, linux-aarch64-a2-4]
         vllm_version: [v0.9.1]
     concurrency:
       group: >
         ${{
-        matrix.os == 'linux-arm64-npu-4'
+        matrix.os == 'linux-aarch64-a2-4'
           && github.event.pull_request.number
           && format('pr-{0}-limit-npu-4', github.event.pull_request.number)
         || format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_version, github.event.pull_request.number)
@@ -179,7 +179,7 @@ jobs:
           VLLM_USE_V1: 1
           VLLM_WORKER_MULTIPROC_METHOD: spawn
         run: |
-          if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
+          if [[ "${{ matrix.os }}" == "linux-aarch64-a2-1" ]]; then
             VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py
             # guided decoding doesn't work, fix it later
             # pytest -sv tests/singlecard/test_guided_decoding.py.py
@@ -216,7 +216,7 @@ jobs:
         env:
           VLLM_USE_V1: 0
         run: |
-          if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
+          if [[ "${{ matrix.os }}" == "linux-aarch64-a2-1" ]]; then
             VLLM_USE_MODELSCOPE=True  pytest -sv tests/singlecard/test_offline_inference.py
             # guided decoding doesn't work, fix it later
             # pytest -sv tests/singlecard/test_guided_decoding.py.py
diff --git a/.github/workflows/vllm_ascend_test_long_term.yaml b/.github/workflows/vllm_ascend_test_long_term.yaml
@@ -39,7 +39,7 @@ jobs:
     strategy:
       max-parallel: 2
       matrix:
-        os: [linux-arm64-npu-1, linux-arm64-npu-4]
+        os: [linux-aarch64-a2-1, linux-aarch64-a2-4]
         vllm_version: [v0.9.1]
     name: vLLM Ascend long term test
     runs-on: ${{ matrix.os }}
@@ -93,7 +93,7 @@ jobs:
 
       - name: Run vllm-project/vllm-ascend long term test
         run: |
-          if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
+          if [[ "${{ matrix.os }}" == "llinux-aarch64-a2-1" ]]; then
             # v0 spec decode test
             # VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode_v0/e2e/test_mtp_correctness.py  # it needs a clean process
             # pytest -sv tests/long_term/spec_decode_v0 --ignore=tests/long_term/spec_decode_v0/e2e/test_mtp_correctness.py

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`# limitations under the License.`
`15`	`15`	`# This file is a part of the vllm-ascend project.`
`16`	`16`	`#`
`17`		`-ARG PY_VERSION=3.10`
	`17`	`+ARG PY_VERSION=3.11`
`18`	`18`	`FROM quay.io/ascend/manylinux:8.0.0-910b-manylinux_2_28-py${PY_VERSION}`
`19`	`19`
`20`	`20`	`ARG COMPILE_CUSTOM_KERNELS=1`