We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4b2a9c4 commit 47c43dbCopy full SHA for 47c43db
vllm_ascend/attention/mla_v1.py
@@ -201,6 +201,7 @@ def __init__(self,
201
assert self.decode_threshold <= 16, f"decode_threshold exceeded \
202
npu_fused_infer_attention_score TND layout's limit of 16, \
203
got {self.decode_threshold}"
204
+
205
self.reorder_batch_threshold = self.decode_threshold
206
207
if self.chunked_prefill_enabled:
0 commit comments