Skip to content

Commit 1f43715

Browse files
MengqingCao1092626063
authored andcommitted
tiny fix
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent bb4521b commit 1f43715

File tree

3 files changed

+16
-10
lines changed

3 files changed

+16
-10
lines changed

tests/ut/worker/test_worker_v1.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,8 @@ def test_execute_dummy_batch(self):
429429
worker.execute_dummy_batch()
430430

431431
# Verify call
432-
mock_model_runner._dummy_run.assert_called_once_with(1)
432+
mock_model_runner._dummy_run.assert_called_once_with(
433+
1, uniform_decode=True, force_attention=True)
433434

434435
@patch("vllm_ascend.worker.worker_v1.envs_vllm")
435436
@patch("vllm_ascend.worker.worker_v1.logger")

vllm_ascend/worker/model_runner_v1.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2377,11 +2377,11 @@ def _dummy_run(
23772377
with_prefill = True
23782378

23792379
attn_metadata = self._build_attention_metadata(
2380-
with_prefill,
2381-
num_reqs,
2382-
num_tokens,
2383-
max_query_len,
2384-
force_attention,
2380+
create_mixed_batch=False,
2381+
num_reqs=num_reqs,
2382+
num_tokens=num_tokens,
2383+
max_query_len=max_query_len,
2384+
force_attention=force_attention,
23852385
)
23862386

23872387
if not self.in_profile_run and self.dynamic_eplb:

vllm_ascend/worker/worker_v1.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
import torch_npu
2626
import vllm.envs as envs_vllm
2727
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
28-
from vllm.config import VllmConfig
28+
from torch_npu.profiler import dynamic_profile as dp
29+
from vllm.config import CUDAGraphMode, VllmConfig
2930
from vllm.distributed import (ensure_model_parallel_initialized,
3031
init_distributed_environment)
3132
from vllm.distributed.kv_transfer import ensure_kv_transfer_initialized
@@ -333,9 +334,13 @@ def pin_lora(self, lora_id: int) -> bool:
333334
return self.model_runner.pin_lora(lora_id)
334335

335336
def execute_dummy_batch(self) -> None:
336-
self.model_runner._dummy_run(1,
337-
uniform_decode=True,
338-
force_attention=True)
337+
uniform_decode = self.compilation_config.cudagraph_mode in [
338+
CUDAGraphMode.FULL, CUDAGraphMode.PIECEWISE
339+
]
340+
force_attention = self.compilation_config.cudagraph_mode == CUDAGraphMode.FULL
341+
self.model_runner._dummy_run(num_tokens=1,
342+
uniform_decode=uniform_decode,
343+
force_attention=force_attention)
339344

340345
def _init_worker_distributed_environment(self) -> None:
341346
"""Initialize the distributed environment."""

0 commit comments

Comments
 (0)