|
25 | 25 | import torch_npu
|
26 | 26 | import vllm.envs as envs_vllm
|
27 | 27 | from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
|
| 28 | +from torch_npu.profiler import dynamic_profile as dp |
28 | 29 | from vllm.config import VllmConfig
|
29 | 30 | from vllm.distributed import (ensure_model_parallel_initialized,
|
30 | 31 | init_distributed_environment)
|
|
41 | 42 | DraftTokenIds, ModelRunnerOutput)
|
42 | 43 | from vllm.v1.worker.worker_base import WorkerBase
|
43 | 44 |
|
| 45 | +import vllm_ascend.envs as envs_ascend |
44 | 46 | from vllm_ascend.ascend_config import init_ascend_config
|
45 | 47 | from vllm_ascend.device_allocator.camem import CaMemAllocator
|
46 | 48 | from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
|
@@ -221,6 +223,10 @@ def execute_model(
|
221 | 223 | self,
|
222 | 224 | scheduler_output: "SchedulerOutput",
|
223 | 225 | ) -> Optional[Union[ModelRunnerOutput, AsyncModelRunnerOutput]]:
|
| 226 | + # enable msMonitor to monitor the performance of vllm-ascend |
| 227 | + if envs_ascend.MSMONITOR_USE_DAEMON: |
| 228 | + dp.step() |
| 229 | + |
224 | 230 | intermediate_tensors = None
|
225 | 231 | forward_pass = scheduler_output.total_num_scheduled_tokens > 0
|
226 | 232 | if forward_pass and not get_pp_group().is_first_rank:
|
@@ -350,6 +356,10 @@ def _init_profiler(self):
|
350 | 356 | # Torch profiler. Enabled and configured through env vars:
|
351 | 357 | # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
352 | 358 | if envs_vllm.VLLM_TORCH_PROFILER_DIR:
|
| 359 | + if envs_ascend.MSMONITOR_USE_DAEMON: |
| 360 | + raise RuntimeError( |
| 361 | + "MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time." |
| 362 | + ) |
353 | 363 | torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR
|
354 | 364 | logger.info("Profiling enabled. Traces will be saved to: %s",
|
355 | 365 | torch_profiler_trace_dir)
|
|
0 commit comments