Skip to content

Commit 12e0a8b

Browse files
committed
Fix for placeholder_attn TP > 1
1 parent 7dc8f38 commit 12e0a8b

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

vllm/attention/backends/placeholder_attn.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,13 @@ def build(self, seq_lens: List[int], query_lens: List[int],
266266
dtype=query_start_loc.dtype,
267267
out=query_start_loc[1:])
268268

269+
# Placeholders
270+
slot_mapping = torch.empty(0)
271+
block_tables = torch.empty(0)
272+
269273
return PlaceholderAttentionMetadata(
270274
num_prefills=self.num_prefills,
271-
slot_mapping=None,
275+
slot_mapping=slot_mapping,
272276
num_prefill_tokens=self.num_prefill_tokens,
273277
num_decode_tokens=num_decode_tokens,
274278
seq_lens=seq_lens,
@@ -279,7 +283,7 @@ def build(self, seq_lens: List[int], query_lens: List[int],
279283
query_start_loc=query_start_loc,
280284
seq_start_loc=seq_start_loc,
281285
context_lens_tensor=context_lens_tensor,
282-
block_tables=None,
286+
block_tables=block_tables,
283287
use_cuda_graph=use_captured_graph,
284288
)
285289

0 commit comments

Comments
 (0)