Skip to content

Commit f93c728

Browse files
committed
vllm-ascend support msMonitor tool
Signed-off-by: mei-feiyao <1332490378@qq.com>
1 parent d01fd1d commit f93c728

File tree

3 files changed

+33
-0
lines changed

3 files changed

+33
-0
lines changed

tests/ut/worker/test_worker_v1.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,26 @@ def test_profile_no_profiler_raises_error(self):
355355

356356
self.assertIn("Profiler is not enabled", str(cm.exception))
357357

358+
@patch("vllm_ascend.worker.worker_v1.envs_vllm")
359+
@patch("vllm_ascend.worker.worker_v1.envs_ascend")
360+
def test_profile_and_msmonitor_both_enabled_raises_error(self, mock_envs_vllm, mock_envs_ascend):
361+
"""Test profile method raises exception when both profiler and msmonitor are enabled"""
362+
from vllm_ascend.worker.worker_v1 import NPUWorker
363+
364+
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
365+
mock_envs_ascend.MSMONITOR_USE_DAEMON = 1
366+
367+
# Create worker mock
368+
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
369+
worker = NPUWorker()
370+
371+
# Test should raise exception
372+
with self.assertRaises(RuntimeError) as cm:
373+
_ = worker._init_profiler()
374+
375+
self.assertIn("MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time.",
376+
str(cm.exception))
377+
358378
def test_lora_methods(self):
359379
"""Test LoRA related methods"""
360380
from vllm_ascend.worker.worker_v1 import NPUWorker

vllm_ascend/envs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@
159159
# caused by the initialization of the Mooncake connector.
160160
"PHYSICAL_DEVICES":
161161
lambda: os.getenv("PHYSICAL_DEVICES", None),
162+
# Whether to enable msMonitor tool to monitor the performance of vllm-ascend.
163+
"MSMONITOR_USE_DAEMON":
164+
lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", '0'))),
162165
}
163166

164167
# end-env-vars-definition

vllm_ascend/worker/worker_v1.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import torch_npu
2626
import vllm.envs as envs_vllm
2727
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
28+
from torch_npu.profiler import dynamic_profile as dp
2829
from vllm.config import VllmConfig
2930
from vllm.distributed import (ensure_model_parallel_initialized,
3031
init_distributed_environment)
@@ -41,6 +42,7 @@
4142
DraftTokenIds, ModelRunnerOutput)
4243
from vllm.v1.worker.worker_base import WorkerBase
4344

45+
import vllm_ascend.envs as envs_ascend
4446
from vllm_ascend.ascend_config import init_ascend_config
4547
from vllm_ascend.device_allocator.camem import CaMemAllocator
4648
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
@@ -221,6 +223,10 @@ def execute_model(
221223
self,
222224
scheduler_output: "SchedulerOutput",
223225
) -> Optional[Union[ModelRunnerOutput, AsyncModelRunnerOutput]]:
226+
# enable msMonitor to monitor the performance of vllm-ascend
227+
if envs_ascend.MSMONITOR_USE_DAEMON:
228+
dp.step()
229+
224230
intermediate_tensors = None
225231
forward_pass = scheduler_output.total_num_scheduled_tokens > 0
226232
if forward_pass and not get_pp_group().is_first_rank:
@@ -350,6 +356,10 @@ def _init_profiler(self):
350356
# Torch profiler. Enabled and configured through env vars:
351357
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
352358
if envs_vllm.VLLM_TORCH_PROFILER_DIR:
359+
if envs_ascend.MSMONITOR_USE_DAEMON:
360+
raise RuntimeError(
361+
"MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time."
362+
)
353363
torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR
354364
logger.info("Profiling enabled. Traces will be saved to: %s",
355365
torch_profiler_trace_dir)

0 commit comments

Comments
 (0)