fix may_reinitialize_input_batch bug

wangxiyuan · wangxiyuan · commit c0b8af77191c · 2025-09-14T16:14:21.000+08:00
Signed-off-by: wangxiyuan &lt;wangxiyuan1007@gmail.com&gt;
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -2815,8 +2815,7 @@ def may_reinitialize_input_batch(self,
                 # This is likely Mamba or other non-attention cache,
                 # no splitting.
                 kernel_block_sizes.append([0])
-
-        if block_sizes != [self.cache_config.block_size]:
+        if kernel_block_sizes != [self.cache_config.block_size]:
             assert self.cache_config.cpu_offload_gb == 0, (
                 "Cannot re-initialize the input batch when CPU weight "
                 "offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 "  # noqa: E501