Skip to content

Commit 026b1a5

Browse files
committed
change to cann version
Signed-off-by: Angazenn <supperccell@163.com>
1 parent e2e335b commit 026b1a5

File tree

3 files changed

+4
-34
lines changed

3 files changed

+4
-34
lines changed

vllm_ascend/attention/attention_mask.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ def __init__(
5050
self._seq_len_cached = attn_mask.shape[0]
5151
self.attn_mask_cache = attn_mask
5252
self.device = device
53-
if self.device:
54-
53+
if torch.version.cann.startswith("8.3"):
5554
assigned_mask_dim = 2048
5655
self.chunked_prefill_attn_mask = torch.triu(torch.ones(assigned_mask_dim, assigned_mask_dim), diagonal=1
5756
).to(torch.int8).to(device)

vllm_ascend/utils.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
_SLEEP_MODE_ENABLED = None
6161
_CURRENT_STREAM = None
6262
_ASCEND_CUSTOMOP_IS_REIGISTERED = False
63-
_CURRENT_TORCH_NPU_VERSION = torch_npu.__version__
6463

6564
def is_310p():
6665
global _IS_310P
@@ -617,28 +616,3 @@ def weak_ref_tensors(
617616
if isinstance(tensors, tuple):
618617
return tuple(weak_ref_tensor(t) for t in tensors)
619618
raise ValueError("Invalid type for tensors")
620-
621-
622-
def verify_torch_npu_version(
623-
aim_version: str = "",
624-
aim: str = "usage",
625-
) -> bool:
626-
"""
627-
Verify if the current torch_npu version is latest enough for the aimed functionality.
628-
If current version is sufficiently latest or is in a private branch,
629-
return True.
630-
"""
631-
632-
if any(keyword in _CURRENT_TORCH_NPU_VERSION for keyword in ["dev"]):
633-
# version of format, which needs verification
634-
current_ver_date = int(_CURRENT_TORCH_NPU_VERSION.split("dev")[1])
635-
aim_ver_date = int(aim_version.split("dev")[1])
636-
if current_ver_date >= aim_ver_date:
637-
logger.debug(
638-
f"The current torch_npu version {_CURRENT_TORCH_NPU_VERSION} is a daily build that satisfies the \
639-
version requirements by default for {aim}.")
640-
return True
641-
logger.debug(
642-
f"The current torch_npu version {_CURRENT_TORCH_NPU_VERSION} cannot satisfie the \
643-
version requirements by default for {aim}.")
644-
return False

vllm_ascend/worker/model_runner_v1.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
106106
AscendSocVersion, ProfileExecuteDuration,
107107
get_ascend_soc_version, is_310p,
108-
lmhead_tp_enable, verify_torch_npu_version)
108+
lmhead_tp_enable)
109109
from vllm_ascend.worker.npu_input_batch import CachedRequestState, InputBatch
110110

111111
if TYPE_CHECKING:
@@ -292,10 +292,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
292292
use_mla=self.model_config.use_mla,
293293
)
294294

295-
pta_version_support_compressed_mask = "2.7.1.dev20250918"
296-
self.compressed_mask = verify_torch_npu_version(pta_version_support_compressed_mask, "compressed mask")
297-
298-
if self.compressed_mask:
295+
if torch.version.cann.startswith("8.3"):
299296
self.attn_mask_builder = AttentionMaskBuilder(
300297
self.scheduler_config.max_num_batched_tokens, self.dtype, self.device)
301298
else:
@@ -804,7 +801,7 @@ def _make_attention_mask(self, seq_lens, position,
804801
attn_state) -> torch.Tensor:
805802
# Chunk Prefill situation.
806803
if attn_state == AscendAttentionState.ChunkedPrefill and not self.vllm_config.model_config.use_mla:
807-
if self.compressed_mask:
804+
if torch.version.cann.startswith("8.3"):
808805
return self.attn_mask_builder.get_splitfuse_attn_mask()
809806
else:
810807
return self.attn_mask_builder.get_splitfuse_attn_mask(

0 commit comments

Comments
 (0)