Fix ut

Yikun · wangxiyuan · commit 0407ffc5b580 · 2025-07-20T22:20:30.000+08:00
Signed-off-by: Yikun Jiang &lt;yikunkero@gmail.com&gt;
diff --git a/tests/ut/attention/test_attention_v1.py b/tests/ut/attention/test_attention_v1.py
@@ -3,12 +3,15 @@
 import torch
 
 from tests.ut.base import TestBase
+from vllm_ascend.attention.attention_v1 import \
+    AscendAttentionBackendImpl092  # isort: skip
 from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
                                                 AscendAttentionBackendImpl,
                                                 AscendAttentionMetadataBuilder,
                                                 AscendAttentionState,
                                                 AscendMetadata,
                                                 CommonAttentionState)
+from vllm_ascend.utils import vllm_version_is
 
 
 class TestAscendAttentionBackend(TestBase):
@@ -17,8 +20,12 @@ def test_get_name(self):
         self.assertEqual(AscendAttentionBackend.get_name(), "ASCEND")
 
     def test_get_impl_cls(self):
-        self.assertEqual(AscendAttentionBackend.get_impl_cls(),
-                         AscendAttentionBackendImpl)
+        if vllm_version_is("0.9.2"):
+            self.assertEqual(AscendAttentionBackend.get_impl_cls(),
+                             AscendAttentionBackendImpl092)
+        else:
+            self.assertEqual(AscendAttentionBackend.get_impl_cls(),
+                             AscendAttentionBackendImpl)
 
     def test_get_metadata_cls(self):
         self.assertEqual(AscendAttentionBackend.get_metadata_cls(),
diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
@@ -1233,21 +1233,20 @@ def forward(
 
 class AscendMLAImpl092(AscendMLAImpl):
 
-    def __init__(
-        self,
-        num_heads: int,
-        head_size: int,
-        scale: float,
-        num_kv_heads: int,
-        alibi_slopes: Optional[List[float]],
-        sliding_window: Optional[int],
-        kv_cache_dtype: str,
-        blocksparse_params: Optional[Dict[str, Any]] = None,
-        logits_soft_cap: Optional[float] = None,
-        attn_type: str = AttentionType.DECODER,
-        kv_sharing_target_layer_name: Optional[str] = None,
-        use_irope: bool = False,
-    ) -> None:
+    def __init__(self,
+                 num_heads: int,
+                 head_size: int,
+                 scale: float,
+                 num_kv_heads: int,
+                 alibi_slopes: Optional[List[float]],
+                 sliding_window: Optional[int],
+                 kv_cache_dtype: str,
+                 blocksparse_params: Optional[Dict[str, Any]] = None,
+                 logits_soft_cap: Optional[float] = None,
+                 attn_type: str = AttentionType.DECODER,
+                 kv_sharing_target_layer_name: Optional[str] = None,
+                 use_irope: bool = False,
+                 **kwargs) -> None:
         super().__init__(
             num_heads=num_heads,
             head_size=head_size,
@@ -1260,4 +1259,4 @@ def __init__(
             attn_type=attn_type,
             kv_sharing_target_layer_name=kv_sharing_target_layer_name,
             use_irope=use_irope,
-        )
+            **kwargs)