Skip to content

Commit bb4521b

Browse files
MengqingCao1092626063
authored andcommitted
fix fullgraph
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent efafee9 commit bb4521b

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2298,6 +2298,12 @@ def _generate_dummy_run_hidden_states(self, with_prefill,
22982298
positions=positions,
22992299
intermediate_tensors=intermediate_tensors,
23002300
inputs_embeds=inputs_embeds)
2301+
forward_context = get_forward_context()
2302+
assert forward_context is not None
2303+
if forward_context.cudagraph_runtime_mode == CUDAGraphMode.FULL:
2304+
update_attn_params(self.update_stream, forward_context,
2305+
positions.shape[0])
2306+
23012307
if self.drafter and self.drafter.name == SpecDcodeType.EAGLE3:
23022308
hidden_states, _ = hidden_states
23032309
else:

vllm_ascend/worker/worker_v1.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,9 @@ def pin_lora(self, lora_id: int) -> bool:
333333
return self.model_runner.pin_lora(lora_id)
334334

335335
def execute_dummy_batch(self) -> None:
336-
self.model_runner._dummy_run(1, uniform_decode=True)
336+
self.model_runner._dummy_run(1,
337+
uniform_decode=True,
338+
force_attention=True)
337339

338340
def _init_worker_distributed_environment(self) -> None:
339341
"""Initialize the distributed environment."""

0 commit comments

Comments
 (0)