update

shen-shanshan · shen-shanshan · commit 24235b4eb37d · 2025-08-14T02:43:27.000Z
Signed-off-by: shen-shanshan &lt;467638484@qq.com&gt;
diff --git a/vllm_ascend/torchair/torchair_attention.py b/vllm_ascend/torchair/torchair_attention.py
@@ -164,11 +164,16 @@ def build(self,
               num_reqs,
               num_actual_tokens,
               max_query_len,
-              graph_pad_size: int = -1,
               enable_dbo_across_dp: bool = False,
+              is_only_prefill: bool = False,
               *args,
               **kwargs):
 
+        if 'graph_pad_size' in kwargs:
+            graph_pad_size = kwargs['graph_pad_size']
+        else:
+            graph_pad_size = -1  # default value
+
         device = self.runner.device
 
         block_table = self.runner.input_batch.block_table[0].get_device_tensor(
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -84,8 +84,6 @@
 from vllm_ascend.platform import NPUPlatform
 from vllm_ascend.sample.rejection_sampler import AscendRejectionSampler
 from vllm_ascend.torchair.torchair_attention import AscendTorchairMetadata
-from vllm_ascend.torchair.utils import (check_torchair_cache_exist,
-                                        write_kv_cache_bytes_to_file)
 from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
                                ProfileExecuteDuration, is_310p,
                                maybe_converting_weight_acl_format,