vllm-project
diff --git a/‎.github/actionlint.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/actionlint.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/accuracy_test.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/accuracy_test.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/nightly_benchmarks.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/nightly_benchmarks.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pre-commit.yml‎
Lines changed: 15 additions & 8 deletions b/‎.github/workflows/pre-commit.yml‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎.github/workflows/reminder_comment.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/reminder_comment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/vllm_ascend_dist.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/vllm_ascend_dist.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 16 additions & 51 deletions b/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 16 additions & 51 deletions
diff --git a/‎.github/workflows/vllm_ascend_test_310p.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/vllm_ascend_test_310p.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -15,3 +15,4 @@ self-hosted-runner:
     - linux-aarch64-a3-2
     - linux-aarch64-a3-4
     - linux-aarch64-a3-8
+    - linux-amd64-cpu-0
@@ -112,7 +112,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           repository: vllm-project/vllm
-          ref: v0.10.1.1
+          ref: main
           path: ./vllm-empty
 
       - name: Install vllm-project/vllm from source
@@ -303,7 +303,7 @@ jobs:
           git push -f origin "${{ env.BRANCH_NAME }}"
 
       - name: Create PR in upstream via API
-        uses: actions/github-script@v7
+        uses: actions/github-script@v8
         with:
           github-token: ${{ secrets.PAT_TOKEN }}
           script: |
 
@@ -51,7 +51,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - vllm_branch: v0.10.1.1
+          - vllm_branch: main
             vllm_ascend_branch: main
             vllm_use_v1: 1
       max-parallel: 1
 
@@ -8,13 +8,19 @@ permissions:
 
 jobs:
   pre-commit:
-    runs-on: ubuntu-latest
+    runs-on: linux-amd64-cpu-0
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
     steps:
+    - name: Config mirrors
+      run: |
+        sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
+        pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
+        pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
+        apt-get update -y
+        apt install git -y
     - name: Checkout vllm-project/vllm-ascend repo
       uses: actions/checkout@v4
-    - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
-      with:
-        python-version: "3.11"
     - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
     - run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
     - name: Checkout vllm-project/vllm repo
@@ -30,8 +36,9 @@ jobs:
     - name: Install vllm-ascend dev
       run: |
         pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
-    - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+        git config --global --add safe.directory '*'
+    - name: Run pre-commit check
       env:
-        SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
-      with:
-        extra_args: --all-files --hook-stage manual
+        SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086"
+        GOPROXY: "https://goproxy.cn,direct"
+      run: pre-commit run --all-files --hook-stage manual
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Remind to run full CI on PR
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
         with:
           script: |
             github.rest.issues.createComment({
 
@@ -43,7 +43,7 @@ jobs:
     strategy:
       matrix:
         os: [linux-aarch64-a3-8]
-        vllm_version: [v0.10.1.1, main]
+        vllm_version: [main]
     name: vLLM Ascend test
     runs-on: ${{ matrix.os }}
     container:
 
@@ -25,7 +25,6 @@ on:
     branches:
       - 'main'
       - '*-dev'
-
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.
 # It's used to activate ascend-toolkit environment variables.
@@ -44,7 +43,9 @@ jobs:
     uses: ./.github/workflows/pre-commit.yml
 
   changes:
-    runs-on: ubuntu-latest
+    runs-on: linux-amd64-cpu-0
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
     outputs:
       e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
       ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
@@ -68,6 +69,7 @@ jobs:
               - 'packages.txt'
             ut_tracker:
               - 'tests/ut/**'
+
   ut:
     needs: [lint, changes]
     name: unit test
@@ -81,7 +83,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [v0.10.1.1, main]
+        vllm_version: [main]
     steps:
       - name: Install packages
         run: |
@@ -129,16 +131,16 @@ jobs:
           name: vllm-ascend
           verbose: true
 
-  e2e:
+  e2e-light:
     needs: [lint, changes]
     # only trigger e2e test after lint passed and the change is e2e related with pull request.
-    if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
+    if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
     strategy:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-1]
-        vllm_version: [v0.10.1.1, main]
-    name: singlecard e2e test
+        vllm_version: [main]
+    name: singlecard e2e test - light
     runs-on: ${{ matrix.os }}
     container:
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -191,38 +193,19 @@ jobs:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           VLLM_USE_MODELSCOPE: True
         run: |
-          # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
-          # the test separately.
-
           pytest -sv tests/e2e/singlecard/test_aclgraph.py
-          pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
-          pytest -sv tests/e2e/singlecard/test_camem.py
-          pytest -sv tests/e2e/singlecard/test_chunked.py
-          pytest -sv tests/e2e/singlecard/test_embedding.py
-          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
-          # TODO: Fix lora accuracy error
-          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
-          pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
           pytest -sv tests/e2e/singlecard/test_quantization.py
-          pytest -sv tests/e2e/singlecard/test_sampler.py
-          pytest -sv tests/e2e/singlecard/test_vlm.py
-
-          # ------------------------------------ v1 spec decode test ------------------------------------ #
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
-
-          pytest -sv tests/e2e/singlecard/ops/
+          pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
 
-  e2e-2-cards:
-    needs: [e2e]
-    if: ${{ needs.e2e.result == 'success' }}
+  e2e-2-cards-light:
+    needs: [e2e-light]
+    if: ${{ needs.e2e-light.result == 'success' }}
     strategy:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-2]
-        vllm_version: [v0.10.1.1, main]
-    name: multicard e2e test
+        vllm_version: [main]
+    name: multicard e2e test - light
     runs-on: ${{ matrix.os }}
     container:
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -275,22 +258,4 @@ jobs:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
           VLLM_USE_MODELSCOPE: True
         run: |
-          pytest -sv tests/e2e/multicard/test_data_parallel.py
-          pytest -sv tests/e2e/multicard/test_expert_parallel.py
-          # external_launcher test is not stable enough. Fix it later
-          # pytest -sv tests/e2e/multicard/test_external_launcher.py
-          pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
-          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
-
-          # To avoid oom, we need to run the test in a single process.
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
-
-          #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
-          pytest -sv tests/e2e/multicard/test_prefix_caching.py
-          pytest -sv tests/e2e/multicard/test_qwen3_moe.py
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
+          pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
@@ -53,7 +53,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
-        vllm_version: [v0.10.1.1, main]
+        vllm_version: [main]
     name: 310p e2e test
     runs-on: ${{ matrix.os }}
     container: