Skip to content

Commit 35acbb2

Browse files
committed
remove another compressed
Signed-off-by: Angazenn <supperccell@163.com>
1 parent 821253c commit 35acbb2

File tree

1 file changed

+1
-4
lines changed

1 file changed

+1
-4
lines changed

vllm_ascend/attention/attention_v1.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,6 @@ def __init__(
304304
self.key_cache = None
305305
self.value_cache = None
306306

307-
pta_version_support_compressed_mask = "2.7.1.dev20250918"
308-
self.compressed_mask = verify_torch_npu_version(pta_version_support_compressed_mask, "compressed mask")
309-
310307
def _forward_prefill_no_cache(
311308
self,
312309
query: torch.Tensor,
@@ -459,7 +456,7 @@ def _forward_v1_style(
459456
attn_metadata.seq_lens = \
460457
attn_metadata.seq_lens.to(device=query.device)
461458

462-
if self.compressed_mask:
459+
if torch.version.cann.startswith("8.3"):
463460
# TODO:The npu_fused_infer_attention_score op is planned to
464461
# be utilized in a wider range in upcoming versions.
465462
num_block, block_size, head_num, head_dim = self.key_cache.shape

0 commit comments

Comments
 (0)