Skip to content

Commit 39d6cd1

Browse files
whx-sjtuhw_whx
authored andcommitted
fix ci problems
Signed-off-by: whx-sjtu <2952154980@qq.com>
1 parent 6d35d5d commit 39d6cd1

File tree

1 file changed

+13
-16
lines changed

1 file changed

+13
-16
lines changed

vllm_ascend/core/scheduler.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,10 @@
2323
from vllm.logger import logger
2424
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
2525
from vllm.utils import cdiv
26+
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
2627
from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
2728
from vllm.v1.core.sched.scheduler import Scheduler
28-
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
29-
from vllm.v1.core.sched.utils import check_stop
30-
from vllm.v1.engine import EngineCoreOutput, EngineCoreOutputs, EngineCoreEventType
29+
from vllm.v1.engine import EngineCoreEventType, EngineCoreOutputs
3130
from vllm.v1.kv_cache_interface import KVCacheConfig
3231
from vllm.v1.outputs import ModelRunnerOutput
3332
from vllm.v1.request import Request, RequestStatus
@@ -55,7 +54,6 @@ def __init__(
5554
self.scheduled_req_ids: set[str] = set()
5655
self.running: list[Request] = []
5756

58-
5957
def schedule(self) -> SchedulerOutput:
6058
if self.scheduler_config.chunked_prefill_enabled:
6159
return super().schedule()
@@ -129,7 +127,7 @@ def skip_cur_request():
129127

130128
# Total computed tokens (local + external).
131129
num_computed_tokens = (num_native_computed_tokens +
132-
num_external_computed_tokens)
130+
num_external_computed_tokens)
133131
else:
134132
# P/D: skip checking prefix cache if loaded from remote kvs.
135133
new_computed_blocks = KVCacheBlocks.create_empty()
@@ -154,7 +152,7 @@ def skip_cur_request():
154152
# requests, which have output tokens.
155153
num_new_tokens = request.num_tokens - num_computed_tokens
156154
max_tokens_in_kvcache = (self.kv_cache_config.num_blocks *
157-
self.block_size)
155+
self.block_size)
158156
prompt_limit = min(prompt_limit, max_tokens_in_kvcache)
159157

160158
# Finish request that exceeds prompt_limit or kv cache size.
@@ -166,7 +164,8 @@ def skip_cur_request():
166164
prompt_limit,
167165
)
168166
request.status = RequestStatus.FINISHED_IGNORED
169-
self.finished_req_ids.add(request.request_id) # type: ignore
167+
self.finished_req_ids.add( # type: ignore
168+
request.request_id) # type: ignore
170169
self.waiting.popleft()
171170
continue
172171

@@ -175,12 +174,11 @@ def skip_cur_request():
175174
skip_cur_request()
176175
continue
177176
assert num_new_tokens > 0
178-
179177
if vllm_version_is("0.9.0"):
180178
blocks = computed_blocks.blocks
181179
else:
182180
blocks = computed_blocks.blocks[0]
183-
181+
184182
watermark = getattr(self.scheduler_config, "watermark", 0.01)
185183
if not self._check_watermark_for_prefill(request, num_new_tokens,
186184
blocks, watermark):
@@ -194,8 +192,7 @@ def skip_cur_request():
194192
num_native_computed_tokens,
195193
new_computed_blocks=computed_blocks,
196194
num_lookahead_tokens=self.num_lookahead_tokens,
197-
delay_cache_blocks=load_kv_async
198-
)
195+
delay_cache_blocks=load_kv_async)
199196
if new_blocks is None:
200197
# The request cannot be scheduled.
201198
break
@@ -221,7 +218,7 @@ def skip_cur_request():
221218
self.running.append(request)
222219
if self.log_stats:
223220
request.record_event(EngineCoreEventType.SCHEDULED,
224-
scheduled_timestamp)
221+
scheduled_timestamp)
225222
self.scheduled_req_ids.add(request.request_id)
226223
# Check request status.
227224
if request.status == RequestStatus.WAITING:
@@ -299,8 +296,7 @@ def skip_cur_request():
299296
request,
300297
num_new_tokens,
301298
num_draft_tokens=num_draft_tokens,
302-
num_lookahead_tokens=self.num_lookahead_tokens
303-
)
299+
num_lookahead_tokens=self.num_lookahead_tokens)
304300
if new_blocks is None:
305301
# The request cannot be scheduled.
306302
# Preempt the lowest-priority request.
@@ -310,7 +306,8 @@ def skip_cur_request():
310306
preempted_req.num_computed_tokens = 0
311307
if self.log_stats:
312308
preempted_req.record_event(
313-
EngineCoreEventType.PREEMPTED, scheduled_timestamp)
309+
EngineCoreEventType.PREEMPTED,
310+
scheduled_timestamp)
314311
self.waiting.appendleft(preempted_req)
315312
preempted_reqs.append(preempted_req)
316313
if preempted_req == request:
@@ -344,7 +341,7 @@ def skip_cur_request():
344341
del request.spec_token_ids[num_scheduled_spec_tokens:]
345342
scheduled_spec_decode_tokens[request.request_id] = (
346343
request.spec_token_ids)
347-
344+
348345
# Record scheduled LoRA requests.
349346
if self.lora_config and request.lora_request:
350347
scheduled_loras.add(request.lora_request.lora_int_id)

0 commit comments

Comments
 (0)