@@ -35,26 +35,25 @@ def determine_available_memory(self) -> int:
35
35
ascend_config = get_ascend_config ()
36
36
if ascend_config .enable_shared_expert_dp :
37
37
return available_kv_cache_memory
38
- if ascend_config .torchair_graph_config .use_cached_kv_cache_bytes and check_kv_cache_bytes_cache_exist (
39
- ):
40
- old_kv_cache_bytes = read_kv_cache_bytes_from_file (
41
- torch .distributed .get_rank ())
42
- if 0 < old_kv_cache_bytes <= available_kv_cache_memory :
43
- logger .info (
44
- f"Use cached torchair kv_cache_bytes: { old_kv_cache_bytes } "
45
- )
46
- self .model_runner .new_kv_cache_bytes = old_kv_cache_bytes
47
- return old_kv_cache_bytes
48
- else :
49
- logger .info (
50
- "Cached torchair kv_cache_bytes is too big, invalidate old torchair_cache"
51
- )
52
- delete_torchair_cache_file ()
53
- bytes_floating_tolerance = 1024 * 1024 * envs_ascend .VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE
54
- available_kv_cache_memory -= bytes_floating_tolerance
55
- logger .info (f"Use new kv_cache_bytes: { available_kv_cache_memory } " )
56
- self .model_runner .new_kv_cache_bytes = available_kv_cache_memory
57
-
38
+ if ascend_config .torchair_graph_config .use_cached_kv_cache_bytes :
39
+ if check_kv_cache_bytes_cache_exist ():
40
+ old_kv_cache_bytes = read_kv_cache_bytes_from_file (
41
+ torch .distributed .get_rank ())
42
+ if 0 < old_kv_cache_bytes <= available_kv_cache_memory :
43
+ logger .info (
44
+ f"Use cached torchair kv_cache_bytes: { old_kv_cache_bytes } "
45
+ )
46
+ self .model_runner .new_kv_cache_bytes = old_kv_cache_bytes
47
+ return old_kv_cache_bytes
48
+ else :
49
+ logger .info (
50
+ "Cached torchair kv_cache_bytes is too big, invalidate old torchair_cache"
51
+ )
52
+ delete_torchair_cache_file ()
53
+ bytes_floating_tolerance = 1024 * 1024 * envs_ascend .VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE
54
+ available_kv_cache_memory -= bytes_floating_tolerance
55
+ logger .info (f"Use new kv_cache_bytes: { available_kv_cache_memory } " )
56
+ self .model_runner .new_kv_cache_bytes = available_kv_cache_memory
58
57
return available_kv_cache_memory
59
58
60
59
def init_device (self ):
0 commit comments