[DONT MERGE]Test eagle OOM #1
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: 'e2e test' | ||
on: | ||
workflow_call: | ||
inputs: | ||
vllm: | ||
required: true | ||
type: string | ||
runner: | ||
required: true | ||
type: string | ||
image: | ||
required: true | ||
type: string | ||
type: | ||
required: true | ||
type: string | ||
jobs: | ||
e2e: | ||
name: singlecard | ||
runs-on: ${{ inputs.runner }}-1 | ||
container: | ||
image: ${{ inputs.image }} | ||
env: | ||
VLLM_LOGGING_LEVEL: ERROR | ||
VLLM_USE_MODELSCOPE: True | ||
steps: | ||
- name: Check npu and CANN info | ||
run: | | ||
npu-smi info | ||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info | ||
- name: Config mirrors | ||
run: | | ||
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list | ||
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple | ||
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local | ||
apt-get update -y | ||
apt install git -y | ||
- name: Checkout vllm-project/vllm-ascend repo | ||
uses: actions/checkout@v4 | ||
- name: Install system dependencies | ||
run: | | ||
apt-get -y install `cat packages.txt` | ||
apt-get -y install gcc g++ cmake libnuma-dev | ||
- name: Checkout vllm-project/vllm repo | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: vllm-project/vllm | ||
ref: ${{ inputs.vllm }} | ||
path: ./vllm-empty | ||
fetch-depth: 1 | ||
- name: Install vllm-project/vllm from source | ||
working-directory: ./vllm-empty | ||
run: | | ||
VLLM_TARGET_DEVICE=empty pip install -e . | ||
- name: Install vllm-project/vllm-ascend | ||
env: | ||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi | ||
run: | | ||
pip install -r requirements-dev.txt | ||
pip install -v -e . | ||
- name: Run vllm-project/vllm-ascend test | ||
env: | ||
VLLM_WORKER_MULTIPROC_METHOD: spawn | ||
VLLM_USE_MODELSCOPE: True | ||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 | ||
if: ${{ inputs.type == 'light' }} | ||
run: | | ||
pytest -sv tests/e2e/singlecard/test_aclgraph.py | ||
pytest -sv tests/e2e/singlecard/test_quantization.py | ||
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl | ||
- name: Run e2e test | ||
env: | ||
VLLM_WORKER_MULTIPROC_METHOD: spawn | ||
VLLM_USE_MODELSCOPE: True | ||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 | ||
PYTORCH_NPU_ALLOC_CONF: expandable_segments:True | ||
NPU_MEMORY_FRACTION: 0.96 | ||
OMP_NUM_THREADS: 1 | ||
if: ${{ inputs.type == 'full' }} | ||
run: | | ||
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run | ||
# the test separately. | ||
pytest -sv tests/e2e/singlecard/test_aclgraph.py | ||
pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py | ||
pytest -sv tests/e2e/singlecard/test_camem.py | ||
pytest -sv tests/e2e/singlecard/test_chunked.py | ||
pytest -sv tests/e2e/singlecard/test_embedding.py | ||
pytest -sv tests/e2e/singlecard/test_guided_decoding.py | ||
pytest -sv tests/e2e/singlecard/test_ilama_lora.py | ||
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py | ||
pytest -sv tests/e2e/singlecard/test_quantization.py | ||
pytest -sv tests/e2e/singlecard/test_sampler.py | ||
pytest -sv tests/e2e/singlecard/test_vlm.py | ||
# ------------------------------------ v1 spec decode test ------------------------------------ # | ||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py | ||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py | ||
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py | ||
pytest -sv tests/e2e/singlecard/ops/ | ||
e2e-2-cards: | ||
name: multicard | ||
runs-on: ${{ inputs.runner }}-2 | ||
container: | ||
image: ${{ inputs.image }} | ||
env: | ||
VLLM_LOGGING_LEVEL: ERROR | ||
VLLM_USE_MODELSCOPE: True | ||
steps: | ||
- name: Check npu and CANN info | ||
run: | | ||
npu-smi info | ||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info | ||
- name: Config mirrors | ||
run: | | ||
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list | ||
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple | ||
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local | ||
apt-get update -y | ||
apt install git -y | ||
- name: Checkout vllm-project/vllm-ascend repo | ||
uses: actions/checkout@v4 | ||
- name: Install system dependencies | ||
run: | | ||
apt-get -y install `cat packages.txt` | ||
apt-get -y install gcc g++ cmake libnuma-dev | ||
- name: Checkout vllm-project/vllm repo | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: vllm-project/vllm | ||
ref: ${{ inputs.vllm }} | ||
path: ./vllm-empty | ||
fetch-depth: 1 | ||
- name: Install vllm-project/vllm from source | ||
working-directory: ./vllm-empty | ||
run: | | ||
VLLM_TARGET_DEVICE=empty pip install -e . | ||
- name: Install vllm-project/vllm-ascend | ||
env: | ||
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi | ||
run: | | ||
pip install -r requirements-dev.txt | ||
pip install -v -e . | ||
- name: Run vllm-project/vllm-ascend test (light) | ||
env: | ||
VLLM_WORKER_MULTIPROC_METHOD: spawn | ||
VLLM_USE_MODELSCOPE: True | ||
if: ${{ inputs.type == 'light' }} | ||
run: | | ||
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP | ||
- name: Run vllm-project/vllm-ascend test (full) | ||
env: | ||
VLLM_WORKER_MULTIPROC_METHOD: spawn | ||
VLLM_USE_MODELSCOPE: True | ||
if: ${{ inputs.type == 'full' }} | ||
run: | | ||
pytest -sv tests/e2e/multicard/test_data_parallel.py | ||
pytest -sv tests/e2e/multicard/test_expert_parallel.py | ||
# external_launcher test is not stable enough. Fix it later | ||
# pytest -sv tests/e2e/multicard/test_external_launcher.py | ||
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py | ||
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py | ||
# To avoid oom, we need to run the test in a single process. | ||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ | ||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe | ||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8 | ||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC | ||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC | ||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe | ||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_flashcomm_v1 | ||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen_Dense_with_prefetch_mlp_weight | ||
#pytest -sv tests/e2e/multicard/test_pipeline_parallel.py | ||
pytest -sv tests/e2e/multicard/test_prefix_caching.py | ||
pytest -sv tests/e2e/multicard/test_qwen3_moe.py | ||
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py |