Skip to content

Commit 821253c

Browse files
committed
change to cann version
Signed-off-by: Angazenn <supperccell@163.com>
1 parent 6ef548d commit 821253c

File tree

3 files changed

+4
-9
lines changed

3 files changed

+4
-9
lines changed

vllm_ascend/attention/attention_mask.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ def __init__(
5050
self._seq_len_cached = attn_mask.shape[0]
5151
self.attn_mask_cache = attn_mask
5252
self.device = device
53-
if self.device:
54-
53+
if torch.version.cann.startswith("8.3"):
5554
assigned_mask_dim = 2048
5655
self.chunked_prefill_attn_mask = torch.triu(torch.ones(assigned_mask_dim, assigned_mask_dim), diagonal=1
5756
).to(torch.int8).to(device)

vllm_ascend/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
_SLEEP_MODE_ENABLED = None
6161
_CURRENT_STREAM = None
6262
_ASCEND_CUSTOMOP_IS_REIGISTERED = False
63-
_CURRENT_TORCH_NPU_VERSION = torch_npu.__version__
6463

6564
def is_310p():
6665
global _IS_310P

vllm_ascend/worker/model_runner_v1.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@
112112
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
113113
AscendSocVersion, ProfileExecuteDuration,
114114
get_ascend_soc_version, is_310p,
115-
lmhead_tp_enable, verify_torch_npu_version)
115+
lmhead_tp_enable)
116116
from vllm_ascend.worker.npu_input_batch import CachedRequestState, InputBatch
117117

118118
if TYPE_CHECKING:
@@ -301,10 +301,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
301301
use_mla=self.model_config.use_mla,
302302
)
303303

304-
pta_version_support_compressed_mask = "2.7.1.dev20250918"
305-
self.compressed_mask = verify_torch_npu_version(pta_version_support_compressed_mask, "compressed mask")
306-
307-
if self.compressed_mask:
304+
if torch.version.cann.startswith("8.3"):
308305
self.attn_mask_builder = AttentionMaskBuilder(
309306
self.scheduler_config.max_num_batched_tokens, self.dtype, self.device)
310307
else:
@@ -825,7 +822,7 @@ def _make_attention_mask(self, seq_lens, position,
825822
attn_state) -> torch.Tensor:
826823
# Chunk Prefill situation.
827824
if attn_state == AscendAttentionState.ChunkedPrefill and not self.vllm_config.model_config.use_mla:
828-
if self.compressed_mask:
825+
if torch.version.cann.startswith("8.3"):
829826
return self.attn_mask_builder.get_splitfuse_attn_mask()
830827
else:
831828
return self.attn_mask_builder.get_splitfuse_attn_mask(

0 commit comments

Comments
 (0)