Skip to content

Commit 5de847a

Browse files
committed
[fix] avoid unnecessary decline of kv cache memory with use_cached_kv_cache_bytes disabled
Signed-off-by: linfeng-yuan <1102311262@qq.com>
1 parent f0a96e7 commit 5de847a

File tree

1 file changed

+19
-20
lines changed

1 file changed

+19
-20
lines changed

vllm_ascend/torchair/torchair_worker.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,25 @@ def determine_available_memory(self) -> int:
3535
ascend_config = get_ascend_config()
3636
if ascend_config.enable_shared_expert_dp:
3737
return available_kv_cache_memory
38-
if ascend_config.torchair_graph_config.use_cached_kv_cache_bytes and check_kv_cache_bytes_cache_exist(
39-
):
40-
old_kv_cache_bytes = read_kv_cache_bytes_from_file(
41-
torch.distributed.get_rank())
42-
if 0 < old_kv_cache_bytes <= available_kv_cache_memory:
43-
logger.info(
44-
f"Use cached torchair kv_cache_bytes: {old_kv_cache_bytes}"
45-
)
46-
self.model_runner.new_kv_cache_bytes = old_kv_cache_bytes
47-
return old_kv_cache_bytes
48-
else:
49-
logger.info(
50-
"Cached torchair kv_cache_bytes is too big, invalidate old torchair_cache"
51-
)
52-
delete_torchair_cache_file()
53-
bytes_floating_tolerance = 1024 * 1024 * envs_ascend.VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE
54-
available_kv_cache_memory -= bytes_floating_tolerance
55-
logger.info(f"Use new kv_cache_bytes: {available_kv_cache_memory}")
56-
self.model_runner.new_kv_cache_bytes = available_kv_cache_memory
57-
38+
if ascend_config.torchair_graph_config.use_cached_kv_cache_bytes:
39+
if check_kv_cache_bytes_cache_exist():
40+
old_kv_cache_bytes = read_kv_cache_bytes_from_file(
41+
torch.distributed.get_rank())
42+
if 0 < old_kv_cache_bytes <= available_kv_cache_memory:
43+
logger.info(
44+
f"Use cached torchair kv_cache_bytes: {old_kv_cache_bytes}"
45+
)
46+
self.model_runner.new_kv_cache_bytes = old_kv_cache_bytes
47+
return old_kv_cache_bytes
48+
else:
49+
logger.info(
50+
"Cached torchair kv_cache_bytes is too big, invalidate old torchair_cache"
51+
)
52+
delete_torchair_cache_file()
53+
bytes_floating_tolerance = 1024 * 1024 * envs_ascend.VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE
54+
available_kv_cache_memory -= bytes_floating_tolerance
55+
logger.info(f"Use new kv_cache_bytes: {available_kv_cache_memory}")
56+
self.model_runner.new_kv_cache_bytes = available_kv_cache_memory
5857
return available_kv_cache_memory
5958

6059
def init_device(self):

0 commit comments

Comments
 (0)