update

jianzs · jianzs · commit 2caec5910f9d · 2025-09-20T09:37:17.000+08:00
Signed-off-by: Jade Zheng &lt;zheng.shoujian@outlook.com&gt;
diff --git a/vllm_ascend/attention/attention_mask.py b/vllm_ascend/attention/attention_mask.py
@@ -17,17 +17,13 @@
 
 def _generate_attn_mask(max_seq_len, dtype):
     # Construct lower triangle matrix.
-    mask_flag = torch.ones(
-        (max_seq_len, max_seq_len),
-        dtype=torch.bool).tril_().view(max_seq_len, max_seq_len)
+    mask_flag = torch.ones((max_seq_len, max_seq_len),
+                           dtype=torch.bool).tril_()
     # Create upper triangle matrix used to mark mask positions.
     mask_flag = ~mask_flag
     # Currently for fp16 dtype, the mask value should be set to -inf.
     # TODO: Eliminate this part in the future.
-    if dtype == torch.float16:
-        mask_value = torch.finfo(torch.float32).min
-    else:
-        mask_value = 1
+    mask_value = float('-inf') if dtype == torch.float16 else 1
     attn_mask = torch.zeros(size=(max_seq_len, max_seq_len), dtype=dtype) \
         .masked_fill_(mask_flag, mask_value)
     return attn_mask