Skip to content

Commit 2d6582c

Browse files
committed
fix deepseek prefix cache
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent fc99a05 commit 2d6582c

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

vllm_ascend/platform.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,9 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
247247
if cache_config.block_size is None:
248248
cache_config.block_size = 128
249249
else:
250-
cache_config.block_size = cdiv(cache_config.block_size,
251-
64) * 64
250+
if not vllm_config.model_config.is_deepseek_mla:
251+
cache_config.block_size = cdiv(cache_config.block_size,
252+
64) * 64
252253

253254
if cache_config.enable_prefix_caching and cache_config.block_size != 128:
254255
logger.warning(

0 commit comments

Comments
 (0)