Skip to content

Commit aa007f1

Browse files
committed
ut support both 090 and main
Signed-off-by: whx-sjtu <2952154980@qq.com>
1 parent e98663d commit aa007f1

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

tests/singlecard/core/test_ascend_scheduler.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from vllm.v1.structured_output import StructuredOutputManager
1717

1818
from vllm_ascend.core.scheduler import AscendScheduler
19+
from vllm_ascend.utils import vllm_version_is
1920

2021
EOS_TOKEN_ID = 50256
2122

@@ -97,7 +98,11 @@ def create_scheduler(
9798
)
9899
kv_cache_config = KVCacheConfig(
99100
num_blocks=num_blocks, # A large number of blocks to hold all requests
100-
kv_cache_tensors=[],
101+
**({
102+
"tensors": {}
103+
} if vllm_version_is("0.9.0") else {
104+
"kv_cache_tensors": []
105+
}),
101106
kv_cache_groups=[
102107
KVCacheGroupSpec(['layer'],
103108
FullAttentionSpec(block_size, 1, 1, torch.float32,
@@ -139,6 +144,9 @@ def create_requests(num_requests: int,
139144
multi_modal_placeholders=mm_position,
140145
multi_modal_hashes=None,
141146
eos_token_id=EOS_TOKEN_ID,
147+
**({
148+
"arrival_time": 0.0
149+
} if vllm_version_is("0.9.0") else {}),
142150
)
143151
requests.append(request)
144152
return requests
@@ -557,6 +565,8 @@ def test_schedule_spec_decoding_stats(spec_tokens, output_tokens, expected):
557565
1. Speculated tokens get scheduled correctly
558566
2. Spec decoding stats properly count number of draft and accepted tokens
559567
"""
568+
if vllm_version_is("0.9.0"):
569+
return
560570
num_spec_tokens = max(1, max(len(t) for t in spec_tokens))
561571
scheduler = create_scheduler(num_speculative_tokens=num_spec_tokens)
562572
requests = create_requests(num_requests=len(spec_tokens), num_tokens=1)
@@ -734,11 +744,12 @@ def assert_scheduler_empty(scheduler: AscendScheduler):
734744
assert len(scheduler.encoder_cache_manager.cached) == 0
735745

736746
# KVCache Manager.
737-
assert len(scheduler.kv_cache_manager.coordinator.single_type_managers[0].
738-
req_to_blocks) == 0
747+
if not vllm_version_is("0.9.0"):
748+
assert len(scheduler.kv_cache_manager.coordinator.
749+
single_type_managers[0].req_to_blocks) == 0
750+
assert len(scheduler.kv_cache_manager.coordinator.
751+
single_type_managers[0].num_cached_block) == 0
739752
assert len(scheduler.kv_cache_manager.req_to_block_hashes) == 0
740-
assert len(scheduler.kv_cache_manager.coordinator.single_type_managers[0].
741-
num_cached_block) == 0
742753
num_free_blocks = (
743754
scheduler.kv_cache_manager.block_pool.free_block_queue.num_free_blocks)
744755
assert num_free_blocks == (
@@ -748,10 +759,6 @@ def assert_scheduler_empty(scheduler: AscendScheduler):
748759
# value, etc will remain since we lazily evict for prefix cache.
749760
for block in scheduler.kv_cache_manager.block_pool.blocks:
750761
assert block.ref_cnt == 0
751-
# assert block._block_hash is None
752-
# assert (
753-
# len(scheduler.kv_cache_manager.block_pool.cached_block_hash_to_block
754-
# ) == 0)
755762

756763

757764
def test_memory_leak():

0 commit comments

Comments
 (0)