Skip to content

Commit a4e590e

Browse files
committed
ut support both 090 and main
Signed-off-by: whx-sjtu <2952154980@qq.com>
1 parent e98663d commit a4e590e

File tree

1 file changed

+10
-9
lines changed

1 file changed

+10
-9
lines changed

tests/singlecard/core/test_ascend_scheduler.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from vllm.v1.structured_output import StructuredOutputManager
1717

1818
from vllm_ascend.core.scheduler import AscendScheduler
19+
from vllm_ascend.utils import vllm_version_is
1920

2021
EOS_TOKEN_ID = 50256
2122

@@ -97,7 +98,7 @@ def create_scheduler(
9798
)
9899
kv_cache_config = KVCacheConfig(
99100
num_blocks=num_blocks, # A large number of blocks to hold all requests
100-
kv_cache_tensors=[],
101+
**({"tensors": {}} if vllm_version_is("0.9.0") else {"kv_cache_tensors": []}),
101102
kv_cache_groups=[
102103
KVCacheGroupSpec(['layer'],
103104
FullAttentionSpec(block_size, 1, 1, torch.float32,
@@ -139,6 +140,7 @@ def create_requests(num_requests: int,
139140
multi_modal_placeholders=mm_position,
140141
multi_modal_hashes=None,
141142
eos_token_id=EOS_TOKEN_ID,
143+
**({"arrival_time": 0.0} if vllm_version_is("0.9.0") else {}),
142144
)
143145
requests.append(request)
144146
return requests
@@ -557,6 +559,8 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
557559
1. Speculated tokens get scheduled correctly
558560
2. Spec decoding stats properly count number of draft and accepted tokens
559561
"""
562+
if vllm_version_is("0.9.0"):
563+
return
560564
num_spec_tokens = max(1, max(len(t) for t in spec_tokens))
561565
scheduler = create_scheduler(num_speculative_tokens=num_spec_tokens)
562566
requests = create_requests(num_requests=len(spec_tokens), num_tokens=1)
@@ -734,11 +738,12 @@ def assert_scheduler_empty(scheduler: AscendScheduler):
734738
assert len(scheduler.encoder_cache_manager.cached) == 0
735739

736740
# KVCache Manager.
737-
assert len(scheduler.kv_cache_manager.coordinator.single_type_managers[0].
738-
req_to_blocks) == 0
741+
if not vllm_version_is("0.9.0"):
742+
assert len(scheduler.kv_cache_manager.coordinator.single_type_managers[0].
743+
req_to_blocks) == 0
744+
assert len(scheduler.kv_cache_manager.coordinator.single_type_managers[0].
745+
num_cached_block) == 0
739746
assert len(scheduler.kv_cache_manager.req_to_block_hashes) == 0
740-
assert len(scheduler.kv_cache_manager.coordinator.single_type_managers[0].
741-
num_cached_block) == 0
742747
num_free_blocks = (
743748
scheduler.kv_cache_manager.block_pool.free_block_queue.num_free_blocks)
744749
assert num_free_blocks == (
@@ -748,10 +753,6 @@ def assert_scheduler_empty(scheduler: AscendScheduler):
748753
# value, etc will remain since we lazily evict for prefix cache.
749754
for block in scheduler.kv_cache_manager.block_pool.blocks:
750755
assert block.ref_cnt == 0
751-
# assert block._block_hash is None
752-
# assert (
753-
# len(scheduler.kv_cache_manager.block_pool.cached_block_hash_to_block
754-
# ) == 0)
755756

756757

757758
def test_memory_leak():

0 commit comments

Comments
 (0)