From adf4a9ce79b6d1ee9bec20f46d46bd8eec2318f8 Mon Sep 17 00:00:00 2001 From: shen-shanshan <467638484@qq.com> Date: Thu, 29 May 2025 11:44:21 +0000 Subject: [PATCH] remove metrics patch Signed-off-by: shen-shanshan <467638484@qq.com> --- .../patch/worker/patch_common/__init__.py | 1 - .../worker/patch_common/patch_metrics.py | 53 ------------------- 2 files changed, 54 deletions(-) delete mode 100644 vllm_ascend/patch/worker/patch_common/patch_metrics.py diff --git a/vllm_ascend/patch/worker/patch_common/__init__.py b/vllm_ascend/patch/worker/patch_common/__init__.py index 7618823ba6..08a4b608e4 100644 --- a/vllm_ascend/patch/worker/patch_common/__init__.py +++ b/vllm_ascend/patch/worker/patch_common/__init__.py @@ -20,7 +20,6 @@ import vllm_ascend.patch.worker.patch_common.patch_utils # noqa isort:skip import vllm_ascend.patch.worker.patch_common.patch_distributed # noqa import vllm_ascend.patch.worker.patch_common.patch_eagle # noqa -import vllm_ascend.patch.worker.patch_common.patch_metrics # noqa import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa import vllm_ascend.patch.worker.patch_common.patch_multi_step_worker # noqa import vllm_ascend.patch.worker.patch_common.patch_sampler # noqa diff --git a/vllm_ascend/patch/worker/patch_common/patch_metrics.py b/vllm_ascend/patch/worker/patch_common/patch_metrics.py deleted file mode 100644 index 6d1f2dc0a9..0000000000 --- a/vllm_ascend/patch/worker/patch_common/patch_metrics.py +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. -# This file is a part of the vllm-ascend project. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from typing import Callable - -import torch -from vllm.spec_decode.metrics import AsyncMetricsCollector - -Timer = Callable[[], float] - - -def _copy_rejsample_metrics_async(self) -> torch.npu.Event: - """ - TODO: torch.cuda.xxx --> torch.npu.xxx - Copy rejection/typical-acceptance sampling metrics - (number of accepted tokens, etc) to CPU asynchronously. - - Returns a NPU event recording when the copy is complete. - """ - assert self._copy_stream is not None - self._copy_stream.wait_stream(torch.npu.current_stream()) - - with torch.npu.stream(self._copy_stream): - self._aggregate_num_accepted_tokens.copy_( - self.spec_decode_sampler.num_accepted_tokens, non_blocking=True) - self._aggregate_num_emitted_tokens.copy_( - self.spec_decode_sampler.num_emitted_tokens, non_blocking=True) - # Number of draft tokens is calculated on CPU, so no copy is - # required. - self._aggregate_num_draft_tokens = ( - self.spec_decode_sampler.num_draft_tokens) - - aggregate_metrics_ready = torch.npu.Event() - aggregate_metrics_ready.record(self._copy_stream) - - return aggregate_metrics_ready - - -AsyncMetricsCollector._copy_rejsample_metrics_async = _copy_rejsample_metrics_async