vllm-ascend support msMonitor tool

mfyCn-1204 · mfyCn-1204 · commit f93c728d8b3f · 2025-09-23T17:23:56.000+08:00
Signed-off-by: mei-feiyao &lt;1332490378@qq.com&gt;
diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py
@@ -355,6 +355,26 @@ def test_profile_no_profiler_raises_error(self):
 
             self.assertIn("Profiler is not enabled", str(cm.exception))
 
+    @patch("vllm_ascend.worker.worker_v1.envs_vllm")
+    @patch("vllm_ascend.worker.worker_v1.envs_ascend")
+    def test_profile_and_msmonitor_both_enabled_raises_error(self, mock_envs_vllm, mock_envs_ascend):
+        """Test profile method raises exception when both profiler and msmonitor are enabled"""
+        from vllm_ascend.worker.worker_v1 import NPUWorker
+        
+        mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
+        mock_envs_ascend.MSMONITOR_USE_DAEMON = 1
+
+        # Create worker mock
+        with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
+            worker = NPUWorker()
+
+            # Test should raise exception
+            with self.assertRaises(RuntimeError) as cm:
+                _ = worker._init_profiler()
+
+            self.assertIn("MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time.",
+                          str(cm.exception))
+
     def test_lora_methods(self):
         """Test LoRA related methods"""
         from vllm_ascend.worker.worker_v1 import NPUWorker
diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py
@@ -159,6 +159,9 @@
     # caused by the initialization of the Mooncake connector.
     "PHYSICAL_DEVICES":
     lambda: os.getenv("PHYSICAL_DEVICES", None),
+    # Whether to enable msMonitor tool to monitor the performance of vllm-ascend.
+    "MSMONITOR_USE_DAEMON":
+    lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", '0'))),
 }
 
 # end-env-vars-definition
diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py
@@ -25,6 +25,7 @@
 import torch_npu
 import vllm.envs as envs_vllm
 from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
+from torch_npu.profiler import dynamic_profile as dp
 from vllm.config import VllmConfig
 from vllm.distributed import (ensure_model_parallel_initialized,
                               init_distributed_environment)
@@ -41,6 +42,7 @@
                              DraftTokenIds, ModelRunnerOutput)
 from vllm.v1.worker.worker_base import WorkerBase
 
+import vllm_ascend.envs as envs_ascend
 from vllm_ascend.ascend_config import init_ascend_config
 from vllm_ascend.device_allocator.camem import CaMemAllocator
 from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
@@ -221,6 +223,10 @@ def execute_model(
         self,
         scheduler_output: "SchedulerOutput",
     ) -> Optional[Union[ModelRunnerOutput, AsyncModelRunnerOutput]]:
+        # enable msMonitor to monitor the performance of vllm-ascend
+        if envs_ascend.MSMONITOR_USE_DAEMON:
+            dp.step()
+
         intermediate_tensors = None
         forward_pass = scheduler_output.total_num_scheduled_tokens > 0
         if forward_pass and not get_pp_group().is_first_rank:
@@ -350,6 +356,10 @@ def _init_profiler(self):
         # Torch profiler. Enabled and configured through env vars:
         # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
         if envs_vllm.VLLM_TORCH_PROFILER_DIR:
+            if envs_ascend.MSMONITOR_USE_DAEMON:
+                raise RuntimeError(
+                    "MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time."
+                )
             torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR
             logger.info("Profiling enabled. Traces will be saved to: %s",
                         torch_profiler_trace_dir)

Original file line number	Diff line number	Diff line change
`@@ -159,6 +159,9 @@`
`159`	`159`	`# caused by the initialization of the Mooncake connector.`
`160`	`160`	`"PHYSICAL_DEVICES":`
`161`	`161`	`lambda: os.getenv("PHYSICAL_DEVICES", None),`
	`162`	`+ # Whether to enable msMonitor tool to monitor the performance of vllm-ascend.`
	`163`	`+ "MSMONITOR_USE_DAEMON":`
	`164`	`+ lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", '0'))),`
`162`	`165`	`}`
`163`	`166`
`164`	`167`	`# end-env-vars-definition`