Skip to content

Commit 155e954

Browse files
cleanup
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
1 parent 2c21577 commit 155e954

File tree

1 file changed

+1
-11
lines changed

1 file changed

+1
-11
lines changed

vllm/v1/attention/backends/flashinfer.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ class FlashInferMetadata:
159159
# (batch_size + 1,). The cumulative subquery lengths of the sequences in
160160
# the batch, used to index into subquery. E.g., if the subquery length
161161
# is [4, 6], it is [0, 4, 10].
162-
qo_indptr: torch.Tensor
162+
qo_indptr_cpu: torch.Tensor
163163
# An example for paged_kv_indices, paged_kv_indptr:
164164
# request 1, page indices [0, 5, 8]
165165
# request 2, page indices [1, 6, 7]
@@ -213,14 +213,6 @@ class FlashInferMetadata:
213213
decode_wrapper: Optional[BatchDecodeWithPagedKVCacheWrapper] = None
214214
cascade_wrapper: Optional[MultiLevelCascadeAttentionWrapper] = None
215215

216-
# CPU version for FlashInfer planning
217-
qo_indptr_cpu: Optional[torch.Tensor] = None
218-
219-
@property
220-
def query_start_loc(self):
221-
# The GPUModelRunner expects to be able to access this property.
222-
return self.qo_indptr
223-
224216
def __post_init__(self):
225217
if self.head_dim is not None:
226218
FlashInferBackend.validate_head_size(self.head_dim)
@@ -396,7 +388,6 @@ def build(self,
396388
split_decodes_and_prefills(common_attn_metadata)
397389

398390
page_size = self.kv_cache_spec.block_size
399-
qo_indptr = common_attn_metadata.query_start_loc
400391
max_seq_len = common_attn_metadata.seq_lens_cpu.max()
401392
seq_lens = common_attn_metadata.seq_lens
402393
seq_lens_cpu = common_attn_metadata.seq_lens_cpu
@@ -457,7 +448,6 @@ def build(self,
457448
kv_cache_dtype = self.kv_cache_spec.dtype
458449
attn_metadata = FlashInferMetadata(
459450
num_actual_tokens=num_actual_tokens,
460-
qo_indptr=qo_indptr,
461451
qo_indptr_cpu=common_attn_metadata.query_start_loc_cpu,
462452
paged_kv_indptr_cpu=paged_kv_indptr_cpu,
463453
paged_kv_indices=paged_kv_indices,

0 commit comments

Comments
 (0)