@@ -126,20 +126,20 @@ jobs:
126
126
VLLM_WORKER_MULTIPROC_METHOD : spawn
127
127
VLLM_USE_MODELSCOPE : True
128
128
run : |
129
- # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
130
- # the test separately.
131
-
132
- pytest -sv tests/e2e/singlecard/test_aclgraph.py
133
- pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
134
- pytest -sv tests/e2e/singlecard/test_camem.py
135
- pytest -sv tests/e2e/singlecard/test_chunked.py
136
- pytest -sv tests/e2e/singlecard/test_embedding.py
137
- #pytest -sv tests/e2e/singlecard/test_guided_decoding.py
138
- #pytest -sv tests/e2e/singlecard/test_ilama_lora.py
139
- pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
140
- pytest -sv tests/e2e/singlecard/test_quantization.py
141
- pytest -sv tests/e2e/singlecard/test_sampler.py
142
- pytest -sv tests/e2e/singlecard/test_vlm.py
129
+ # # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
130
+ # # the test separately.
131
+
132
+ # pytest -sv tests/e2e/singlecard/test_aclgraph.py
133
+ # pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
134
+ # pytest -sv tests/e2e/singlecard/test_camem.py
135
+ # pytest -sv tests/e2e/singlecard/test_chunked.py
136
+ # pytest -sv tests/e2e/singlecard/test_embedding.py
137
+ # # pytest -sv tests/e2e/singlecard/test_guided_decoding.py
138
+ # # pytest -sv tests/e2e/singlecard/test_ilama_lora.py
139
+ # pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
140
+ # pytest -sv tests/e2e/singlecard/test_quantization.py
141
+ # pytest -sv tests/e2e/singlecard/test_sampler.py
142
+ # pytest -sv tests/e2e/singlecard/test_vlm.py
143
143
144
144
# ------------------------------------ v1 spec decode test ------------------------------------ #
145
145
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -148,86 +148,86 @@ jobs:
148
148
149
149
pytest -sv tests/e2e/singlecard/ops/
150
150
151
- e2e-2-cards-full :
152
- # only trigger full test when pull request is approved
153
- needs : [changes]
154
- if : ${{ needs.changes.outputs.e2e_tracker == 'true' }}
155
- strategy :
156
- max-parallel : 2
157
- matrix :
158
- os : [linux-aarch64-a2-2]
159
- vllm_version : [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
160
- name : multicard e2e test - full
161
- runs-on : ${{ matrix.os }}
162
- container :
163
- image : swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
164
- env :
165
- VLLM_LOGGING_LEVEL : ERROR
166
- VLLM_USE_MODELSCOPE : True
167
- steps :
168
- - name : Check npu and CANN info
169
- run : |
170
- npu-smi info
171
- cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
172
-
173
- - name : Config mirrors
174
- run : |
175
- sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
176
- pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
177
- pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
178
- apt-get update -y
179
- apt install git -y
180
-
181
- - name : Checkout vllm-project/vllm-ascend repo
182
- uses : actions/checkout@v4
183
-
184
- - name : Install system dependencies
185
- run : |
186
- apt-get -y install `cat packages.txt`
187
- apt-get -y install gcc g++ cmake libnuma-dev
188
-
189
- - name : Checkout vllm-project/vllm repo
190
- uses : actions/checkout@v4
191
- with :
192
- repository : vllm-project/vllm
193
- ref : ${{ matrix.vllm_version }}
194
- path : ./vllm-empty
195
-
196
- - name : Install vllm-project/vllm from source
197
- working-directory : ./vllm-empty
198
- run : |
199
- VLLM_TARGET_DEVICE=empty pip install -e .
200
-
201
- - name : Install vllm-project/vllm-ascend
202
- env :
203
- PIP_EXTRA_INDEX_URL : https://mirrors.huaweicloud.com/ascend/repos/pypi
204
- run : |
205
- pip install -r requirements-dev.txt
206
- pip install -v -e .
207
-
208
- - name : Run vllm-project/vllm-ascend test
209
- env :
210
- VLLM_WORKER_MULTIPROC_METHOD : spawn
211
- VLLM_USE_MODELSCOPE : True
212
- run : |
213
- #pytest -sv tests/e2e/multicard/test_data_parallel.py
214
- pytest -sv tests/e2e/multicard/test_expert_parallel.py
215
- # external_launcher test is not stable enough. Fix it later
216
- # pytest -sv tests/e2e/multicard/test_external_launcher.py
217
- pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
218
- #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
219
-
220
- # To avoid oom, we need to run the test in a single process.
221
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
222
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
223
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
224
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
225
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
226
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
227
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
228
- pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
229
-
230
- #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
231
- pytest -sv tests/e2e/multicard/test_prefix_caching.py
232
- pytest -sv tests/e2e/multicard/test_qwen3_moe.py
233
- pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
151
+ # e2e-2-cards-full:
152
+ # # only trigger full test when pull request is approved
153
+ # needs: [changes]
154
+ # if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
155
+ # strategy:
156
+ # max-parallel: 2
157
+ # matrix:
158
+ # os: [linux-aarch64-a2-2]
159
+ # vllm_version: [6d8246aaffff3ebec84767e373212a7b8da328e2, v0.10.2]
160
+ # name: multicard e2e test - full
161
+ # runs-on: ${{ matrix.os }}
162
+ # container:
163
+ # image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
164
+ # env:
165
+ # VLLM_LOGGING_LEVEL: ERROR
166
+ # VLLM_USE_MODELSCOPE: True
167
+ # steps:
168
+ # - name: Check npu and CANN info
169
+ # run: |
170
+ # npu-smi info
171
+ # cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
172
+
173
+ # - name: Config mirrors
174
+ # run: |
175
+ # sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
176
+ # pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
177
+ # pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
178
+ # apt-get update -y
179
+ # apt install git -y
180
+
181
+ # - name: Checkout vllm-project/vllm-ascend repo
182
+ # uses: actions/checkout@v4
183
+
184
+ # - name: Install system dependencies
185
+ # run: |
186
+ # apt-get -y install `cat packages.txt`
187
+ # apt-get -y install gcc g++ cmake libnuma-dev
188
+
189
+ # - name: Checkout vllm-project/vllm repo
190
+ # uses: actions/checkout@v4
191
+ # with:
192
+ # repository: vllm-project/vllm
193
+ # ref: ${{ matrix.vllm_version }}
194
+ # path: ./vllm-empty
195
+
196
+ # - name: Install vllm-project/vllm from source
197
+ # working-directory: ./vllm-empty
198
+ # run: |
199
+ # VLLM_TARGET_DEVICE=empty pip install -e .
200
+
201
+ # - name: Install vllm-project/vllm-ascend
202
+ # env:
203
+ # PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
204
+ # run: |
205
+ # pip install -r requirements-dev.txt
206
+ # pip install -v -e .
207
+
208
+ # - name: Run vllm-project/vllm-ascend test
209
+ # env:
210
+ # VLLM_WORKER_MULTIPROC_METHOD: spawn
211
+ # VLLM_USE_MODELSCOPE: True
212
+ # run: |
213
+ # #pytest -sv tests/e2e/multicard/test_data_parallel.py
214
+ # pytest -sv tests/e2e/multicard/test_expert_parallel.py
215
+ # # external_launcher test is not stable enough. Fix it later
216
+ # # pytest -sv tests/e2e/multicard/test_external_launcher.py
217
+ # pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
218
+ # #pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
219
+
220
+ # # To avoid oom, we need to run the test in a single process.
221
+ # pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
222
+ # pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
223
+ # pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
224
+ # pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
225
+ # pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
226
+ # pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
227
+ # pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1
228
+ # pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight
229
+
230
+ # #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
231
+ # pytest -sv tests/e2e/multicard/test_prefix_caching.py
232
+ # pytest -sv tests/e2e/multicard/test_qwen3_moe.py
233
+ # pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
0 commit comments