Skip to content

Commit f65273c

Browse files
authored
Merge branch 'main' into v1lora
2 parents ad89a73 + 217211d commit f65273c

File tree

28 files changed

+870
-216
lines changed

28 files changed

+870
-216
lines changed

.github/workflows/accuracy_report.yaml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,12 @@ jobs:
7070
env:
7171
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
7272

73-
- name: Query artifact run id for Qwen3-8B V0 latest artifact
74-
id: get_Qwen3_8B_latest_run_id_V0
73+
- name: Query artifact run id for Qwen3-8B-Base V0 latest artifact
74+
id: get_Qwen3_8B_Base_latest_run_id_V0
7575
run: |
7676
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
7777
RUN_ID=$(echo "$ARTIFACT_JSON" | \
78-
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
78+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-Base-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
7979
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
8080
env:
8181
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -107,22 +107,22 @@ jobs:
107107
repository: vllm-project/vllm-ascend
108108
run-id: ${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
109109

110-
- name: Download Qwen/Qwen3-8B Artifact
110+
- name: Download Qwen/Qwen3-8B-Base Artifact
111111
uses: actions/download-artifact@v4
112112
with:
113-
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-V0-report
113+
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-Base-V0-report
114114
path: ./docs/source/developer_guide/evaluation/accuracy_report
115115
github-token: ${{ secrets.GITHUB_TOKEN }}
116116
repository: vllm-project/vllm-ascend
117-
run-id: ${{ steps.get_Qwen3_8B_latest_run_id_V0.outputs.runid }}
117+
run-id: ${{ steps.get_Qwen3_8B_Base_latest_run_id_V0.outputs.runid }}
118118

119119
- name: Display Files
120120
working-directory: ./docs/source/developer_guide/evaluation/accuracy_report
121121
run: |
122122
cat ./Qwen2.5-VL-7B-Instruct.md
123123
cat ./Llama-3.1-8B-Instruct.md
124124
cat ./Qwen2.5-7B-Instruct.md
125-
cat ./Qwen3-8B.md
125+
cat ./Qwen3-8B-Base.md
126126
127127
- name: Create Pull Request for markdown update
128128
uses: peter-evans/create-pull-request@v7
@@ -141,10 +141,10 @@ jobs:
141141
- [Qwen2.5-7B-Instruct accuracy report][2]
142142
- [Llama-3.1-8B-Instruct accuracy report][3]
143143
- [Qwen2.5-VL-7B-Instruct accuracy report][4]
144-
- [Qwen3-8B accuracy report][5]
144+
- [Qwen3-8B-Base accuracy report][5]
145145
146146
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
147147
[2]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
148148
[3]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
149149
[4]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
150-
[5]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_latest_run_id_V0.outputs.runid }}
150+
[5]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_Base_latest_run_id_V0.outputs.runid }}

.github/workflows/accuracy_test.yaml

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@ on:
2929
required: true
3030
type: string
3131
models:
32-
description: 'choose model(all/Qwen2.5-7B-Instruct/Llama-3.1-8B-Instruct/Qwen2.5-VL-7B-Instruct/Qwen3-8B)'
32+
description: 'choose model(all/Qwen2.5-7B-Instruct/Llama-3.1-8B-Instruct/Qwen2.5-VL-7B-Instruct/Qwen3-8B-Base)'
3333
required: true
3434
type: choice
3535
options:
3636
- all
3737
- Qwen/Qwen2.5-7B-Instruct
3838
- meta-llama/Llama-3.1-8B-Instruct
3939
- Qwen/Qwen2.5-VL-7B-Instruct
40-
- Qwen/Qwen3-8B
40+
- Qwen/Qwen3-8B-Base
4141
default: 'all'
4242

4343
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
@@ -54,16 +54,16 @@ jobs:
5454
strategy:
5555
matrix:
5656
include: ${{ fromJSON(
57-
(github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B","output_file":"Qwen3-8B"}]') ||
57+
(github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]') ||
5858
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"}]') ||
5959
(github.event.inputs.models == 'meta-llama/Llama-3.1-8B-Instruct' && '[{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"}]') ||
6060
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}]') ||
61-
(github.event.inputs.models == 'Qwen/Qwen3-8B' && '[{"model_name":"Qwen/Qwen3-8B","output_file":"Qwen3-8B"}]')
61+
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]')
6262
) }}
6363
fail-fast: false
6464

6565
container:
66-
image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
66+
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
6767
env:
6868
HF_ENDPOINT: https://hf-mirror.com
6969
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -86,12 +86,6 @@ jobs:
8686
apt install git -y
8787
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
8888
89-
- name: Install system dependencies
90-
run: |
91-
apt-get -y install `cat packages.txt`
92-
apt-get -y install gcc g++ cmake libnuma-dev
93-
94-
9589
- name: Install system dependencies
9690
run: |
9791
apt-get -y install `cat packages.txt`
@@ -147,7 +141,7 @@ jobs:
147141
working-directory: ./lm-eval
148142
run: |
149143
pip install -e .
150-
pip install ray datasets==2.16.0 transformers==4.50.3 huggingface-hub==0.29.3
144+
pip install ray datasets==2.16.0
151145
152146
- name: Collect version info
153147
run: |

.github/workflows/codespell.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#
2+
# Copyright 2023 The vLLM team.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# Adapted from vllm-project/vllm/blob/main/.github
16+
#
17+
18+
name: codespell
19+
20+
on:
21+
pull_request:
22+
branches:
23+
- 'main'
24+
- '*-dev'
25+
26+
jobs:
27+
codespell:
28+
runs-on: ubuntu-latest
29+
strategy:
30+
matrix:
31+
python-version: ["3.12"]
32+
steps:
33+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
34+
- name: Set up Python ${{ matrix.python-version }}
35+
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
36+
with:
37+
python-version: ${{ matrix.python-version }}
38+
- name: Install dependencies
39+
run: |
40+
python -m pip install --upgrade pip
41+
pip install -r requirements-lint.txt
42+
- name: Run codespell check
43+
run: |
44+
CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
45+
CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue')
46+
47+
codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"

.github/workflows/vllm_ascend_test.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ jobs:
116116
pytest -sv tests/ops
117117
pytest -sv tests/compile
118118
else
119-
pytest -sv tests/multicard/test_offline_inference_distributed.py
119+
pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py
120120
pytest -sv tests/ops
121121
pytest -sv tests/compile
122122
fi
@@ -130,7 +130,8 @@ jobs:
130130
pytest -sv tests/singlecard/test_lora_functions.py
131131
pytest -sv tests/ops
132132
else
133-
pytest -sv tests/multicard/test_offline_inference_distributed.py
133+
pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py
134+
pytest -sv -k "DeepSeek" tests/multicard/test_offline_inference_distributed.py
134135
pytest -sv tests/ops
135136
fi
136137
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
<!--
2+
**********************************************************************
3+
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
4+
* Copyright 2023 The vLLM team.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
* This file is a part of the vllm-ascend project.
18+
* Adapted from https://github.com/vllm-project/vllm/blob/main/docs/source/_templates/sections/header.html
19+
**********************************************************************
20+
-->
21+
<style>
22+
.notification-bar {
23+
width: 100vw;
24+
display: flex;
25+
justify-content: center;
26+
align-items: center;
27+
font-size: 16px;
28+
}
29+
.notification-bar p {
30+
margin: 0;
31+
}
32+
.notification-bar a {
33+
font-weight: bold;
34+
text-decoration: none;
35+
}
36+
37+
/* Light mode styles (default) */
38+
.notification-bar {
39+
background-color: #fff3cd;
40+
color: #856404;
41+
}
42+
.notification-bar a {
43+
color: #d97706;
44+
}
45+
46+
/* Dark mode styles */
47+
html[data-theme=dark] .notification-bar {
48+
background-color: #333;
49+
color: #ddd;
50+
}
51+
html[data-theme=dark] .notification-bar a {
52+
color: #ffa500; /* Brighter color for visibility */
53+
}
54+
</style>
55+
56+
<div class="notification-bar">
57+
<p>You are viewing the latest developer preview docs. <a href="https://vllm-ascend.readthedocs.io/en/stable/">Click here</a> to view docs for the latest stable release(v0.7.3).</p>
58+
</div>

docs/source/conf.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
# add these directories to sys.path here. If the directory is relative to the
2424
# documentation root, use os.path.abspath to make it absolute, like shown here.
2525
#
26-
# import os
26+
import os
27+
2728
# import sys
2829
# sys.path.insert(0, os.path.abspath('.'))
2930

@@ -119,6 +120,16 @@
119120
# so a file named "default.css" will overwrite the builtin "default.css".
120121
# html_static_path = ['_static']
121122

123+
READTHEDOCS_VERSION_TYPE = os.environ.get('READTHEDOCS_VERSION_TYPE')
124+
if READTHEDOCS_VERSION_TYPE == "tag":
125+
# remove the warning banner if the version is a tagged release
126+
header_file = os.path.join(os.path.dirname(__file__),
127+
"_templates/sections/header.html")
128+
# The file might be removed already if the build is triggered multiple times
129+
# (readthedocs build both HTML and PDF versions separately)
130+
if os.path.exists(header_file):
131+
os.remove(header_file)
132+
122133

123134
def setup(app):
124135
pass

docs/source/developer_guide/versioning_policy.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Usually, each minor version of vLLM (such as 0.7) will correspond to a vLLM Asce
7474

7575
For main branch, vLLM Ascend should works with vLLM main branch and latest 1 or 2 release version. So to ensure the backward compatibility, we will do the following:
7676
- Both main branch and target vLLM release is tested by Ascend E2E CI. For example, currently, vLLM main branch and vLLM 0.8.4 are tested now.
77-
- For code changes, we will make sure that the changes are compatible with the latest 1 or 2 vLLM release version as well. In this case, vLLM Ascend introduced a version check machinism inner the code. It'll check the version of installed vLLM pacakge first to decide which code logic to use. If users hit the `InvalidVersion` error, it sometimes means that they have installed an dev/editable version of vLLM package. In this case, we provide the env variable `VLLM_VERSION` to let users specify the version of vLLM package to use.
77+
- For code changes, we will make sure that the changes are compatible with the latest 1 or 2 vLLM release version as well. In this case, vLLM Ascend introduced a version check machinism inner the code. It'll check the version of installed vLLM package first to decide which code logic to use. If users hit the `InvalidVersion` error, it sometimes means that they have installed an dev/editable version of vLLM package. In this case, we provide the env variable `VLLM_VERSION` to let users specify the version of vLLM package to use.
7878
- For documentation changes, we will make sure that the changes are compatible with the latest 1 or 2 vLLM release version as well. Note should be added if there are any breaking changes.
7979

8080
## Document Branch Policy

docs/source/faqs.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ Currently, only 1P1D is supported by vllm. For vllm-ascend, it'll be done by [th
8484

8585
### 10. Does vllm-ascend support quantization method?
8686

87-
Currently, w8a8 quantization is already supported by vllm-ascend originally on v0.8.4rc2 or heigher, If you're using vllm 0.7.3 version, w8a8 quantization is supporeted with the integration of vllm-ascend and mindie-turbo, please use `pip install vllm-ascend[mindie-turbo]`.
87+
Currently, w8a8 quantization is already supported by vllm-ascend originally on v0.8.4rc2 or higher, If you're using vllm 0.7.3 version, w8a8 quantization is supporeted with the integration of vllm-ascend and mindie-turbo, please use `pip install vllm-ascend[mindie-turbo]`.
8888

8989
### 11. How to run w8a8 DeepSeek model?
9090

docs/source/tutorials/multi_npu_quantization.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## Run docker container:
44
:::{note}
5-
w8a8 quantization feature is supported by v0.8.4rc2 or highter
5+
w8a8 quantization feature is supported by v0.8.4rc2 or higher
66
:::
77

88
```{code-block} bash

docs/source/user_guide/release_notes.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ This is the second release candidate of v0.8.4 for vllm-ascend. Please follow th
3333
- DeepSeek V3/R1 works with DP, TP and MTP now. Please note that it's still in experimental status. Let us know if you hit any problem. [#429](https://github.com/vllm-project/vllm-ascend/pull/429) [#585](https://github.com/vllm-project/vllm-ascend/pull/585) [#626](https://github.com/vllm-project/vllm-ascend/pull/626) [#636](https://github.com/vllm-project/vllm-ascend/pull/636) [#671](https://github.com/vllm-project/vllm-ascend/pull/671)
3434

3535
### Core
36-
- ACLGraph feature is supported with V1 engine now. It's disabled by default because this feature rely on CANN 8.1 release. We'll make it avaiable by default in the next release [#426](https://github.com/vllm-project/vllm-ascend/pull/426)
37-
- Upgrade PyTorch to 2.5.1. vLLM Ascend no longer relies on the dev version of torch-npu now. Now users don't need to install the torch-npu by hand. The 2.5.1 version of torch-npu will be installed automaticlly. [#661](https://github.com/vllm-project/vllm-ascend/pull/661)
36+
- ACLGraph feature is supported with V1 engine now. It's disabled by default because this feature rely on CANN 8.1 release. We'll make it available by default in the next release [#426](https://github.com/vllm-project/vllm-ascend/pull/426)
37+
- Upgrade PyTorch to 2.5.1. vLLM Ascend no longer relies on the dev version of torch-npu now. Now users don't need to install the torch-npu by hand. The 2.5.1 version of torch-npu will be installed automatically. [#661](https://github.com/vllm-project/vllm-ascend/pull/661)
3838

3939
### Other
4040
- MiniCPM model works now. [#645](https://github.com/vllm-project/vllm-ascend/pull/645)

0 commit comments

Comments
 (0)