Skip to content

Commit 4b2a9c4

Browse files
committed
bugfix for mtp>1
Signed-off-by: zouyida2052 <zouyida2002@gmail.com>
1 parent 4a12841 commit 4b2a9c4

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

vllm_ascend/attention/mla_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,10 @@ def __init__(self,
198198
if self.speculative_config:
199199
spec_token_num = self.speculative_config.num_speculative_tokens
200200
self.decode_threshold += spec_token_num
201-
self.reorder_batch_threshold = self.decode_threshold
202201
assert self.decode_threshold <= 16, f"decode_threshold exceeded \
203202
npu_fused_infer_attention_score TND layout's limit of 16, \
204203
got {self.decode_threshold}"
204+
self.reorder_batch_threshold = self.decode_threshold
205205

206206
if self.chunked_prefill_enabled:
207207
self.chunked_prefill_workspace_size = min(

0 commit comments

Comments
 (0)