Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
run: |
# pytest -sv tests/e2e/singlecard/test_aclgraph.py
# pytest -sv tests/e2e/singlecard/test_quantization.py
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
# pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl

- name: Run e2e test
env:
Expand All @@ -102,7 +102,7 @@ jobs:
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
pytest -sv tests/e2e/singlecard/test_quantization.py
pytest -sv tests/e2e/singlecard/test_sampler.py
pytest -sv tests/e2e/singlecard/test_vlm.py
#pytest -sv tests/e2e/singlecard/test_vlm.py
pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py

# ------------------------------------ v1 spec decode test ------------------------------------ #
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/format_pr_body.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:

- name: Get vLLM version
run: |
VLLM_COMMIT=83f478bb19489b41e9d208b47b4bb5a95ac171ac
VLLM_COMMIT=releases/v0.11.1
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
- name: Checkout repository
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/vllm_ascend_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
lint:
uses: ./.github/workflows/pre-commit.yml
with:
vllm: 83f478bb19489b41e9d208b47b4bb5a95ac171ac
vllm: releases/v0.11.1
changes:
runs-on: ubuntu-latest
outputs:
Expand Down Expand Up @@ -83,7 +83,7 @@ jobs:
VLLM_USE_MODELSCOPE: True
strategy:
matrix:
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
vllm_version: [releases/v0.11.1, v0.11.0]
steps:
- name: Install packages
run: |
Expand Down Expand Up @@ -138,7 +138,7 @@ jobs:
name: e2e-light
strategy:
matrix:
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
vllm_version: [releases/v0.11.1, v0.11.0]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
name: e2e-full
strategy:
matrix:
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
vllm_version: [releases/v0.11.1, v0.11.0]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
uses: ./.github/workflows/_e2e_test.yaml
Expand Down
9 changes: 6 additions & 3 deletions tests/ut/worker/test_worker_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from tests.ut.base import TestBase
from vllm_ascend.utils import vllm_version_is

init_cached_hf_modules_path = "vllm.utils.init_cached_hf_modules" if vllm_version_is(
"0.11.0") else "vllm.utils.import_utils.init_cached_hf_modules"


class TestNPUWorker(TestBase):

Expand Down Expand Up @@ -46,7 +49,7 @@ def setUp(self):
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch("vllm.utils.init_cached_hf_modules")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_normal_case(
self,
Expand Down Expand Up @@ -108,7 +111,7 @@ def test_init_npu_worker_normal_case(
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch("vllm.utils.init_cached_hf_modules")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_with_trust_remote_code(
self,
Expand Down Expand Up @@ -153,7 +156,7 @@ def test_init_npu_worker_with_trust_remote_code(
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch("vllm.utils.init_cached_hf_modules")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_with_custom_cache_dtype(
self,
Expand Down
9 changes: 8 additions & 1 deletion vllm_ascend/attention/attention_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@
get_decode_context_model_parallel_rank,
get_decode_context_model_parallel_world_size)
from vllm.forward_context import ForwardContext, get_forward_context
from vllm.utils import cdiv

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv

from vllm.v1.attention.backends.utils import AttentionCGSupport
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.kv_cache_interface import AttentionSpec
Expand Down
9 changes: 8 additions & 1 deletion vllm_ascend/attention/mla_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@
from vllm.logger import logger
from vllm.model_executor.layers.linear import (LinearBase,
UnquantizedLinearMethod)
from vllm.utils import cdiv, round_down

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv, round_down
else:
from vllm.utils.math_utils import cdiv, round_down

from vllm.v1.attention.backends.utils import AttentionCGSupport

from vllm_ascend import envs
Expand Down
9 changes: 8 additions & 1 deletion vllm_ascend/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@
from vllm.distributed.kv_events import KVEventBatch
from vllm.logger import logger
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.utils import cdiv

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv

from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
from vllm.v1.core.sched.scheduler import Scheduler
Expand Down
10 changes: 9 additions & 1 deletion vllm_ascend/distributed/mooncake/config_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@
import torch
from vllm.distributed.kv_transfer.kv_connector.v1.base import \
KVConnectorMetadata
from vllm.utils import cdiv, logger
from vllm.utils import logger

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv

from vllm.v1.core.sched.output import NewRequestData


Expand Down
9 changes: 8 additions & 1 deletion vllm_ascend/patch/platform/patch_mamba_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
from vllm.logger import init_logger
from vllm.model_executor.models import ModelRegistry
from vllm.model_executor.models.config import MambaModelConfig
from vllm.utils import cdiv

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv

from vllm.v1.kv_cache_interface import FullAttentionSpec, MambaSpec

from vllm_ascend.utils import vllm_version_is
Expand Down
9 changes: 8 additions & 1 deletion vllm_ascend/spec_decode/mtp_proposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@
from vllm.model_executor.models.deepseek_mtp import DeepSeekMTP
from vllm.model_executor.models.deepseek_v2 import DeepseekV32IndexerCache
from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
from vllm.utils import cdiv

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv

from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
CommonAttentionMetadata)
from vllm.v1.core.sched.output import SchedulerOutput
Expand Down
2 changes: 2 additions & 0 deletions vllm_ascend/torchair/models/torchair_deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,8 @@ def __init__(
if self.q_lora_rank is not None else None,
q_proj=self.q_proj
if self.q_lora_rank is None else self.q_b_proj,
q_b_proj=self.q_b_proj
if self.q_lora_rank is not None else None,
kv_a_proj_with_mqa=self.kv_a_proj_with_mqa,
kv_a_layernorm=self.kv_a_layernorm,
kv_b_proj=self.kv_b_proj,
Expand Down
8 changes: 7 additions & 1 deletion vllm_ascend/torchair/torchair_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@
AttentionType)
from vllm.attention.backends.utils import PAD_SLOT_ID
from vllm.config import VllmConfig
from vllm.utils import cdiv

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv

from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
AscendAttentionMetadataBuilder,
Expand Down
8 changes: 7 additions & 1 deletion vllm_ascend/torchair/torchair_mla.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.linear import (LinearBase,
UnquantizedLinearMethod)
from vllm.utils import cdiv, round_down

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv, round_down
else:
from vllm.utils.math_utils import cdiv, round_down

import vllm_ascend.envs as envs_ascend
from vllm_ascend.ascend_config import get_ascend_config
Expand Down
8 changes: 7 additions & 1 deletion vllm_ascend/torchair/torchair_sfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
from vllm.distributed import get_tensor_model_parallel_world_size, get_tp_group
from vllm.model_executor.layers.linear import (LinearBase,
UnquantizedLinearMethod)
from vllm.utils import cdiv, round_down

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv, round_down
else:
from vllm.utils.math_utils import cdiv, round_down

import vllm_ascend.envs as envs_ascend
from vllm_ascend.ascend_config import get_ascend_config
Expand Down
8 changes: 7 additions & 1 deletion vllm_ascend/worker/block_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
import numpy as np
import torch
from vllm.distributed import get_dcp_group
from vllm.utils import cdiv

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv

from vllm_ascend.utils import prefill_context_parallel_enable

Expand Down
10 changes: 9 additions & 1 deletion vllm_ascend/worker/model_runner_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,15 @@
from vllm.sampling_params import SamplingType
from vllm.sequence import IntermediateTensors
from vllm.tasks import GenerationTask, PoolingTask, SupportedTask
from vllm.utils import cdiv, length_from_prompt_token_ids_or_embeds
from vllm.utils import length_from_prompt_token_ids_or_embeds

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv

from vllm.utils.jsontree import json_map_leaves
from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder
from vllm.v1.attention.backends.utils import (
Expand Down
6 changes: 5 additions & 1 deletion vllm_ascend/worker/worker_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ def __init__(

if self.model_config.trust_remote_code:
# note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules
if vllm_version_is("0.11.0"):
from vllm.utils import init_cached_hf_modules
else:
from vllm.utils.import_utils import init_cached_hf_modules

init_cached_hf_modules()

self.profiler = self._init_profiler()
Expand Down
Loading