8
8
SchedulerConfig , SpeculativeConfig , VllmConfig )
9
9
from vllm .multimodal .inputs import PlaceholderRange
10
10
from vllm .sampling_params import SamplingParams
11
+ from vllm .v1 .core .kv_cache_utils import (get_request_block_hasher ,
12
+ init_none_hash )
11
13
from vllm .v1 .core .sched .output import SchedulerOutput
12
14
from vllm .v1 .kv_cache_interface import (FullAttentionSpec , KVCacheConfig ,
13
15
KVCacheGroupSpec )
@@ -36,7 +38,10 @@ def create_requests(
36
38
mm_positions : Optional [list [PlaceholderRange ]] = None ,
37
39
max_tokens : int = 16 ,
38
40
stop_token_ids : Optional [list [int ]] = None ,
41
+ block_size : int = 3 ,
42
+ hash_fn = hash ,
39
43
):
44
+ init_none_hash (hash_fn )
40
45
prompt_logprobs = PROMPT_LOGPROBS
41
46
sampling_params = SamplingParams (ignore_eos = False ,
42
47
max_tokens = max_tokens ,
@@ -46,16 +51,16 @@ def create_requests(
46
51
for i in range (num_requests ):
47
52
mm_position = None
48
53
mm_inputs = None
49
- request = Request (
50
- request_id = f" { i } " ,
51
- prompt_token_ids = [ i ] * num_tokens ,
52
- sampling_params = sampling_params ,
53
- multi_modal_kwargs = mm_inputs ,
54
- multi_modal_placeholders = mm_position ,
55
- multi_modal_hashes = None ,
56
- eos_token_id = EOS_TOKEN_ID ,
57
- pooling_params = None ,
58
- )
54
+ request = Request (request_id = f" { i } " ,
55
+ prompt_token_ids = [ i ] * num_tokens ,
56
+ sampling_params = sampling_params ,
57
+ multi_modal_kwargs = mm_inputs ,
58
+ multi_modal_placeholders = mm_position ,
59
+ multi_modal_hashes = None ,
60
+ eos_token_id = EOS_TOKEN_ID ,
61
+ pooling_params = None ,
62
+ block_hasher = get_request_block_hasher (
63
+ block_size , hash_fn ) )
59
64
requests .append (request )
60
65
return requests
61
66
0 commit comments