Skip to content

[Misc] Remove VLLM_USE_V1 usage in code #1764

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 42 additions & 65 deletions tests/ut/test_ascend_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,71 +193,48 @@ def test_check_ascend_config_pass(self):
@_clean_up_ascend_config
def test_check_ascend_config_wrong_case(self):
test_vllm_config = VllmConfig()
# For V0 engine
with mock.patch.dict(os.environ, {"VLLM_USE_V1": "0"}):
with self.assertRaises(NotImplementedError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
check_ascend_config(test_vllm_config, False)
with self.assertRaises(NotImplementedError):
test_vllm_config.additional_config = {
"ascend_scheduler_config": {
"enabled": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
check_ascend_config(test_vllm_config, True)
# For V1 engine
with mock.patch.dict(os.environ, {"VLLM_USE_V1": "1"}):
# torchair + eager mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
enforce_eager = True
check_ascend_config(test_vllm_config, enforce_eager)
# torchair + non deepseek model
with self.assertRaises(NotImplementedError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": True,
},
"refresh": True
}
model_path = os.path.join(os.path.dirname(__file__),
"fake_weight")
fake_model_config = ModelConfig(model=model_path)
fake_model_config.hf_config = PretrainedConfig()
fake_model_config.hf_config.model_type = "llama"
test_vllm_config.model_config = fake_model_config
init_ascend_config(test_vllm_config)
check_ascend_config(test_vllm_config, False)
# aclgraph + deepseek model
with self.assertRaises(NotImplementedError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
},
"refresh": True
}
model_path = os.path.join(os.path.dirname(__file__),
"fake_weight")
fake_model_config = ModelConfig(model=model_path)
fake_model_config.hf_config = PretrainedConfig()
fake_model_config.hf_config.model_type = "deepseek"
test_vllm_config.model_config = fake_model_config
init_ascend_config(test_vllm_config)
check_ascend_config(test_vllm_config, False)

# torchair + eager mode
with self.assertRaises(RuntimeError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": True,
},
"refresh": True
}
init_ascend_config(test_vllm_config)
enforce_eager = True
check_ascend_config(test_vllm_config, enforce_eager)
# torchair + non deepseek model
with self.assertRaises(NotImplementedError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": True,
},
"refresh": True
}
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
fake_model_config = ModelConfig(model=model_path)
fake_model_config.hf_config = PretrainedConfig()
fake_model_config.hf_config.model_type = "llama"
test_vllm_config.model_config = fake_model_config
init_ascend_config(test_vllm_config)
check_ascend_config(test_vllm_config, False)
# aclgraph + deepseek model
with self.assertRaises(NotImplementedError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
},
"refresh": True
}
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
fake_model_config = ModelConfig(model=model_path)
fake_model_config.hf_config = PretrainedConfig()
fake_model_config.hf_config.model_type = "deepseek"
test_vllm_config.model_config = fake_model_config
init_ascend_config(test_vllm_config)
check_ascend_config(test_vllm_config, False)

def test_check_torchair_supported(self):
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
Expand Down
63 changes: 0 additions & 63 deletions tests/ut/test_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,69 +389,6 @@ def test_check_and_update_config_v1_worker_class_selection(
"vllm_ascend.worker.worker_v1.NPUWorker",
)

@patch("vllm_ascend.ascend_config.check_ascend_config")
@patch("vllm_ascend.ascend_config.init_ascend_config")
@patch("vllm.envs.VLLM_USE_V1", False)
def test_check_and_update_config_speculative_worker_config(
self, mock_init_ascend, mock_check_ascend):
mock_init_ascend.return_value = self.mock_ascend_config
self.mock_vllm_config.speculative_config = MagicMock()
self.mock_vllm_config.speculative_config.disable_logprobs = True
self.mock_vllm_config.parallel_config.worker_cls = "auto"

with patch.dict("os.environ", {}):
from vllm_ascend import platform

importlib.reload(platform)
self.platform.check_and_update_config(self.mock_vllm_config)
import os

self.assertEqual(os.environ.get("ACL_OP_INIT_MODE"), "1")
self.assertEqual(
self.mock_vllm_config.parallel_config.worker_cls,
"vllm.spec_decode.spec_decode_worker.create_spec_worker",
)
self.assertEqual(
self.mock_vllm_config.parallel_config.sd_worker_cls,
"vllm_ascend.worker.worker.NPUWorker",
)

@patch("vllm_ascend.ascend_config.check_ascend_config")
@patch("vllm_ascend.ascend_config.init_ascend_config")
@patch("vllm.envs.VLLM_USE_V1", False)
def test_check_and_update_config_multi_step_worker_config(
self, mock_init_ascend, mock_check_ascend):
mock_init_ascend.return_value = self.mock_ascend_config
self.mock_vllm_config.scheduler_config.is_multi_step = True
self.mock_vllm_config.parallel_config.worker_cls = "auto"

from vllm_ascend import platform

importlib.reload(platform)
self.platform.check_and_update_config(self.mock_vllm_config)
self.assertEqual(
self.mock_vllm_config.parallel_config.worker_cls,
"vllm_ascend.worker.multi_step_worker.MultiStepWorker",
)

@patch("vllm_ascend.ascend_config.check_ascend_config")
@patch("vllm_ascend.ascend_config.init_ascend_config")
@patch("vllm.envs.VLLM_USE_V1", False)
def test_check_and_update_config_default_worker_config(
self, mock_init_ascend, mock_check_ascend):
mock_init_ascend.return_value = self.mock_ascend_config
self.mock_vllm_config.parallel_config.worker_cls = "auto"
self.mock_vllm_config.scheduler_config.is_multi_step = False

from vllm_ascend import platform

importlib.reload(platform)
self.platform.check_and_update_config(self.mock_vllm_config)
self.assertEqual(
self.mock_vllm_config.parallel_config.worker_cls,
"vllm_ascend.worker.worker.NPUWorker",
)

@patch("vllm_ascend.ascend_config.check_ascend_config")
@patch("vllm_ascend.ascend_config.init_ascend_config")
@patch("vllm_ascend.utils.is_310p", return_value=True)
Expand Down
71 changes: 30 additions & 41 deletions vllm_ascend/ascend_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
from typing import Optional

import vllm.envs as envs
from vllm.logger import logger

TORCHAIR_MODEL_LIST = ["deepseek", "pangu"]
Expand Down Expand Up @@ -126,46 +125,36 @@ def get_ascend_config():
def check_ascend_config(vllm_config, enforce_eager):
ascend_config = get_ascend_config()

# for v0 engine
if not envs.VLLM_USE_V1:
# for eager mode
if enforce_eager:
# torchair_graph cannot be enabled with eager mode.
if ascend_config.torchair_graph_config.enabled:
raise NotImplementedError(
"Torchair graph mode is only supported for V1 Engine.")
if ascend_config.ascend_scheduler_config.enabled:
raise NotImplementedError(
"Ascend scheduler is only supported for V1 Engine.")
# for v1 engine
raise RuntimeError(
"Can't enable graph mode and eager mode at the same time. Please set `enforce_eager=False` if you attempt to enable NPU graph mode."
)
# for graph mode
else:
# for eager mode
if enforce_eager:
# torchair_graph cannot be enabled with eager mode.
if ascend_config.torchair_graph_config.enabled:
raise RuntimeError(
"Can't enable graph mode and eager mode at the same time. Please set `enforce_eager=False` if you attempt to enable NPU graph mode."
)
# for graph mode
# torchair_graph case
if ascend_config.torchair_graph_config.enabled:
# torchair_graph is supported for deepseek/pangu model only.
if vllm_config.model_config:
model_type = vllm_config.model_config.hf_config.model_type
if not _check_torchair_supported(model_type):
raise NotImplementedError(
"Torchair graph mode only works with following model types:"
f"{TORCHAIR_MODEL_LIST}.")
# aclgraph case
else:
# torchair_graph case
if ascend_config.torchair_graph_config.enabled:
# torchair_graph is supported for deepseek/pangu model only.
if vllm_config.model_config:
model_type = vllm_config.model_config.hf_config.model_type
if not _check_torchair_supported(model_type):
raise NotImplementedError(
"Torchair graph mode only works with following model types:"
f"{TORCHAIR_MODEL_LIST}.")
# aclgraph case
else:
# aclgraph doesn't work with deepseek model and only qwen model is well tested.
if vllm_config.model_config:
model_type = vllm_config.model_config.hf_config.model_type
if "deepseek" in model_type:
raise NotImplementedError(
"ACL Graph does not support deepseek. Please "
"try torchair graph mode to serve deepseek models on vllm-ascend."
" Or set `enforce_eager=True` to use eager mode.")
if "qwen" not in model_type:
logger.warning(
"ACL Graph is currently experimental. Please "
"raise an issue on https://github.com/vllm-project/vllm-ascend/issues"
" if you encourage any Error")
# aclgraph doesn't work with deepseek model and only qwen model is well tested.
if vllm_config.model_config:
model_type = vllm_config.model_config.hf_config.model_type
if "deepseek" in model_type:
raise NotImplementedError(
"ACL Graph does not support deepseek. Please "
"try torchair graph mode to serve deepseek models on vllm-ascend."
" Or set `enforce_eager=True` to use eager mode.")
if "qwen" not in model_type:
logger.warning(
"ACL Graph is currently experimental. Please "
"raise an issue on https://github.com/vllm-project/vllm-ascend/issues"
" if you encourage any Error")
18 changes: 7 additions & 11 deletions vllm_ascend/models/deepseek_dbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import torch
import torch.distributed as dist
import torch_npu # noqa: F401
import vllm.envs as envs
from torch import nn
from transformers import PretrainedConfig
from vllm.attention import Attention, AttentionMetadata
Expand Down Expand Up @@ -397,20 +396,17 @@
hidden_states_or_q_c = hidden_states
if self.torchair_graph_enabled:
forward_kwargs = {}
if envs.VLLM_USE_V1:
output_shape = hidden_states.shape
output = torch.empty(output_shape,
dtype=hidden_states_or_q_c.dtype,
device=hidden_states_or_q_c.device)
forward_kwargs['output'] = output

output_shape = hidden_states.shape
output = torch.empty(output_shape,

Check warning on line 400 in vllm_ascend/models/deepseek_dbo.py

View check run for this annotation

Codecov / codecov/patch

vllm_ascend/models/deepseek_dbo.py#L399-L400

Added lines #L399 - L400 were not covered by tests
dtype=hidden_states_or_q_c.dtype,
device=hidden_states_or_q_c.device)
forward_kwargs['output'] = output

Check warning on line 403 in vllm_ascend/models/deepseek_dbo.py

View check run for this annotation

Codecov / codecov/patch

vllm_ascend/models/deepseek_dbo.py#L403

Added line #L403 was not covered by tests
output = self.mla_attn.impl.forward(self.mla_attn,
hidden_states_or_q_c,
hidden_states, None, kv_cache,
attn_metadata,
**forward_kwargs)
if envs.VLLM_USE_V1:
output = output.view(-1, output_shape[-1])
output = output.view(-1, output_shape[-1])

Check warning on line 409 in vllm_ascend/models/deepseek_dbo.py

View check run for this annotation

Codecov / codecov/patch

vllm_ascend/models/deepseek_dbo.py#L409

Added line #L409 was not covered by tests
return output
else:
kv_c, k_pe = self.kv_a_proj_with_mqa(hidden_states)[0].split(
Expand Down Expand Up @@ -885,7 +881,7 @@
def can_run_ms(self):
attn_metadata = get_forward_context().attn_metadata
# support mla attention and V1 engine at present
if not self.use_mla or not envs.VLLM_USE_V1:
if not self.use_mla:

Check warning on line 884 in vllm_ascend/models/deepseek_dbo.py

View check run for this annotation

Codecov / codecov/patch

vllm_ascend/models/deepseek_dbo.py#L884

Added line #L884 was not covered by tests
return False
# enable prefill overlap
if attn_metadata is None or attn_metadata.num_prefills == 0:
Expand Down
18 changes: 7 additions & 11 deletions vllm_ascend/models/deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

import torch
import torch_npu
import vllm.envs as envs
from torch import nn
from transformers import PretrainedConfig
from vllm.attention import Attention, AttentionMetadata
Expand Down Expand Up @@ -579,20 +578,17 @@
else:
hidden_states_or_q_c = hidden_states
if self.torchair_graph_enabled:
if envs.VLLM_USE_V1:
output_shape = hidden_states.shape
output = torch.empty(output_shape,
dtype=hidden_states_or_q_c.dtype,
device=hidden_states_or_q_c.device)
forward_kwargs['output'] = output

output_shape = hidden_states.shape
output = torch.empty(output_shape,

Check warning on line 582 in vllm_ascend/models/deepseek_v2.py

View check run for this annotation

Codecov / codecov/patch

vllm_ascend/models/deepseek_v2.py#L581-L582

Added lines #L581 - L582 were not covered by tests
dtype=hidden_states_or_q_c.dtype,
device=hidden_states_or_q_c.device)
forward_kwargs['output'] = output

Check warning on line 585 in vllm_ascend/models/deepseek_v2.py

View check run for this annotation

Codecov / codecov/patch

vllm_ascend/models/deepseek_v2.py#L585

Added line #L585 was not covered by tests
output = self.mla_attn.impl.forward(self.mla_attn,
hidden_states_or_q_c,
hidden_states, None, kv_cache,
attn_metadata,
**forward_kwargs)
if envs.VLLM_USE_V1:
output = output.view(-1, output_shape[-1])
output = output.view(-1, output_shape[-1])

Check warning on line 591 in vllm_ascend/models/deepseek_v2.py

View check run for this annotation

Codecov / codecov/patch

vllm_ascend/models/deepseek_v2.py#L591

Added line #L591 was not covered by tests
return output
else:
kv_c, k_pe = self.kv_a_proj_with_mqa(hidden_states)[0].split(
Expand Down Expand Up @@ -660,7 +656,7 @@
prefix=f"{prefix}.mlp",
)
self.mla_moe_communication = ascend_config.torchair_graph_config.enable_multistream_moe \
and model_config.use_mla and envs.VLLM_USE_V1 and self.tp_size > 1
and model_config.use_mla and self.tp_size > 1
else:
self.mlp = CustomDeepseekV2MLP(
hidden_size=config.hidden_size,
Expand Down
Loading
Loading