Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ jobs:
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
pytest -sv tests/e2e/multicard/test_data_parallel.py
pytest -sv tests/e2e/multicard/test_expert_parallel.py
# pytest -sv tests/e2e/multicard/test_external_launcher.py
pytest -sv tests/e2e/multicard/test_external_launcher.py
pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
Expand Down
2 changes: 2 additions & 0 deletions tests/e2e/multicard/test_external_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def test_moe_external_launcher(model):
assert proc.returncode == 0


@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
def test_external_launcher_and_sleepmode():
script = Path(
__file__
Expand Down Expand Up @@ -154,6 +155,7 @@ def test_external_launcher_and_sleepmode():
assert proc.returncode == 0


@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Disabling VLLM_ASCEND_ENABLE_NZ makes this test pass, but it appears to be a workaround for a deeper issue. This change effectively prevents testing the sleep_mode_level=2 functionality when the FRACTAL_NZ weight format optimization is enabled, reducing test coverage.

The root cause seems to be that when sleep_mode_level=2 is used, the test manually reloads model weights from disk (see examples/offline_external_launcher.py, line 250). This reloading process does not account for the FRACTAL_NZ format that weights are converted to when VLLM_ASCEND_ENABLE_NZ=1. Consequently, the model receives weights in an unexpected format, leading to failure.

While this fix may be acceptable to unblock CI, the underlying incompatibility should be addressed. Possible long-term solutions include:

  • Updating the weight reloading logic in the test to correctly handle the FRACTAL_NZ format.
  • If sleep_mode_level=2 is fundamentally incompatible with VLLM_ASCEND_ENABLE_NZ, this should be documented and ideally enforced with a runtime check.

As an immediate step, please add a code comment above this decorator to explain why this environment variable is being disabled for this specific test. This will improve code clarity and help future developers understand the context.

def test_external_launcher_and_sleepmode_level2():
script = Path(
__file__
Expand Down
3 changes: 3 additions & 0 deletions tests/e2e/singlecard/test_camem.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#

import gc
import os
from unittest.mock import patch

import torch
from vllm import SamplingParams
Expand Down Expand Up @@ -71,6 +73,7 @@ def test_basic_camem():


@fork_new_process_for_each_test
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
def test_end_to_end():
free, total = torch.npu.mem_get_info()
used_bytes_baseline = total - free # in case other process is running
Expand Down
3 changes: 3 additions & 0 deletions tests/ut/worker/test_worker_v1.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import unittest
from unittest.mock import MagicMock, patch

Expand Down Expand Up @@ -273,6 +274,7 @@ def test_sleep_mode_disabled_raises_error(self, mock_sleep_mode_enabled):

@patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled")
@patch("vllm_ascend.worker.worker_v1.CaMemAllocator")
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
def test_wake_up_mode_enabled(self, mock_allocator_class,
mock_sleep_mode_enabled):
"""Test wake_up method when sleep mode is enabled"""
Expand All @@ -295,6 +297,7 @@ def test_wake_up_mode_enabled(self, mock_allocator_class,
mock_allocator.wake_up.assert_called_once_with(tags=["test_tag"])

@patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled")
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
def test_wake_up_mode_disabled_raises_error(self, mock_sleep_mode_enabled):
"""Test wake_up method raises exception when sleep mode is disabled"""
from vllm_ascend.worker.worker_v1 import NPUWorker
Expand Down
7 changes: 6 additions & 1 deletion vllm_ascend/worker/worker_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from vllm_ascend.device_allocator.camem import CaMemAllocator
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
from vllm_ascend.platform import NPUPlatform
from vllm_ascend.utils import (init_ascend_soc_version,
from vllm_ascend.utils import (init_ascend_soc_version, is_enable_nz,
prefill_context_parallel_enable,
register_ascend_customop, sleep_mode_enabled,
try_register_lib, vllm_version_is)
Expand Down Expand Up @@ -184,6 +184,11 @@ def wake_up(self, tags: Optional[list[str]] = None) -> None:
raise ValueError(
"Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1."
)

if is_enable_nz():
raise ValueError(
"FRACTAL_NZ mode is enabled. This may cause model parameter precision issues "
"in the RL scenarios. Please set VLLM_ASCEND_ENABLE_NZ=0.")
allocator = CaMemAllocator.get_instance()
allocator.wake_up(tags=tags)

Expand Down
Loading