fix lint

hust17yixuan · hust17yixuan · commit 295236973cfe · 2025-08-22T18:02:30.000+08:00
Signed-off-by: hust17yixuan &lt;303660421@qq.com&gt;
diff --git a/tests/ut/torchair/ops/test_torchair_fused_moe.py b/tests/ut/torchair/ops/test_torchair_fused_moe.py
@@ -23,10 +23,8 @@
 from vllm.model_executor.layers.fused_moe import FusedMoEMethodBase
 
 from vllm_ascend.ascend_forward_context import _get_fused_moe_state
-# from vllm_ascend.ops.fused_moe import (AscendFusedMoE,
-#                                        AscendUnquantizedFusedMoEMethod)
-from vllm_ascend.torchair.ops.torchair_fused_moe import (TorchairAscendFusedMoE,
-                                       TorchairAscendUnquantizedFusedMoEMethod)
+from vllm_ascend.torchair.ops.torchair_fused_moe import (
+    TorchairAscendFusedMoE, TorchairAscendUnquantizedFusedMoEMethod)
 from vllm_ascend.utils import AscendSocVersion, adapt_patch  # noqa E402
 
 adapt_patch(True)
@@ -57,33 +55,33 @@ def mock_dist_env(mocker: MockerFixture):
 
     with patch('torch.distributed.get_rank', return_value=0), \
          patch('torch.distributed.get_world_size', return_value=4), \
-         patch('vllm_ascend.ops.fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \
-         patch('vllm_ascend.ops.fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \
-         patch('vllm_ascend.ops.fused_moe.get_tp_group', return_value=mock_dp_and_tp_group(mocker)), \
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_tp_group', return_value=mock_dp_and_tp_group(mocker)), \
          patch('vllm.distributed.parallel_state.get_tp_group', return_value=mock_dp_and_tp_group(mocker)), \
-         patch('vllm_ascend.ops.fused_moe.get_dp_group', return_value=mock_dp_and_tp_group(mocker)), \
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_dp_group', return_value=mock_dp_and_tp_group(mocker)), \
          patch('vllm.model_executor.layers.fused_moe.layer.get_dp_group', return_value=mock_dp_and_tp_group(mocker)), \
          patch('torch.distributed.all_gather', return_value=MagicMock(return_value=torch.randn(10,32))), \
          patch('torch.distributed.all_to_all_single', return_value=torch.randn(8, 32)), \
-         patch('vllm_ascend.ops.fused_moe.tensor_model_parallel_all_reduce',
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.tensor_model_parallel_all_reduce',
                return_value=torch.randn(5, 32)), \
-         patch('vllm_ascend.ops.fused_moe.data_parallel_reduce_scatter',
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.data_parallel_reduce_scatter',
                return_value=torch.randn(5, 32)), \
          patch('vllm.model_executor.layers.fused_moe.config.get_dp_group',
                return_value=mock_dp_and_tp_group(mocker)), \
-         patch('vllm_ascend.ops.fused_moe.get_ascend_config',
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_ascend_config',
                return_value=MagicMock(
                    torchair_graph_config=MagicMock(enabled=False, enable_multistream_moe=False),
                    expert_map_path=None
                )), \
-         patch('vllm_ascend.ops.fused_moe.determine_expert_map',
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.determine_expert_map',
                return_value=(3, torch.tensor([0, 1, 2, -1, -1, -1, -1, -1]))), \
-         patch('vllm_ascend.ops.fused_moe.get_forward_context',
+         patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_forward_context',
                return_value=MagicMock(
                    max_tokens_across_dp=10,
                    dp_metadata=MagicMock(cu_tokens_across_dp_cpu=[5, 10])
                )), \
-        patch('vllm_ascend.ops.fused_moe.get_current_vllm_config',
+        patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_current_vllm_config',
                return_value=MagicMock(
                    parallel_config=MagicMock(tensor_parallel_size=2),
                    scheduler_config=MagicMock(max_num_seqs=4),
@@ -196,7 +194,7 @@ def apply(self, hidden_states: torch.Tensor,
         pass
 
 
-class TestAscendFusedMoe:
+class TestTorchairAscendFusedMoe:
 
     def test_init_no_quant(self, mock_dist_env, default_moe_config):
         layer = TorchairAscendFusedMoE(**default_moe_config)
@@ -233,7 +231,7 @@ def test_init_with_quant(self, mock_dist_env, default_moe_config):
         mock_quant_config.get_quant_method.return_value = mock_quant_method
 
         moe = TorchairAscendFusedMoE(**default_moe_config,
-                             quant_config=mock_quant_config)
+                                    quant_config=mock_quant_config)
 
         assert moe.quant_method is not None
         assert moe.quant_method == mock_quant_method
@@ -266,7 +264,7 @@ def test_forward(self, mock_dist_env, default_moe_config, others_param):
         forward_context = MagicMock(mc2_mask=torch.zeros(num_tokens,
                                                          dtype=torch.bool),
                                     padded_num_tokens=num_tokens)
-        with patch("vllm_ascend.ops.fused_moe.get_forward_context",
+        with patch("vllm_ascend.torchair.ops.torchair_fused_moe.get_forward_context",
                    return_value=forward_context):
             output = moe.forward(inputs,
                                  router_logits,
@@ -299,7 +297,7 @@ def test_forward_ms_fused_moe_comp(self, mock_dist_env,
         assert output.shape == (5, 32)
 
 
-class TestAscendUnquantizedFusedMoEMethod:
+class TestTorchairAscendUnquantizedFusedMoEMethod:
 
     def test_process_weights_after_loading(self, moe_method, mock_dist_env):
         layer = MagicMock()
@@ -328,7 +326,7 @@ def test_apply_without_expert_map(self, moe_method, mock_dist_env,
         is_deepseek_v3_r1 = global_num_experts == 256
         forward_context = MagicMock(fused_moe_state=_get_fused_moe_state(
             ep_size, is_prefill, is_deepseek_v3_r1))
-        with patch("vllm_ascend.ops.fused_moe.get_forward_context",
+        with patch("vllm_ascend.torchair.ops.torchair_fused_moe.get_forward_context",
                    return_value=forward_context):
             moe_method.ep_size = ep_size
             x = torch.randn(8, 2, 2)
@@ -363,10 +361,10 @@ def test_apply_with_expert_map(self, moe_method, mock_dist_env,
         is_prefill = False
         forward_context = MagicMock(
             fused_moe_state=_get_fused_moe_state(ep_size, is_prefill, True))
-        with patch("vllm_ascend.ops.fused_moe.MOE_ALL2ALL_BUFFER",
+        with patch("vllm_ascend.torchair.ops.torchair_fused_moe.MOE_ALL2ALL_BUFFER",
                    alltoall_buffer), \
-             patch("vllm_ascend.ops.fused_moe.get_forward_context", return_value=forward_context), \
-             patch("vllm_ascend.ops.fused_moe.get_ascend_soc_version", return_value=AscendSocVersion.A3):
+             patch("vllm_ascend.torchair.ops.torchair_fused_moe.get_forward_context", return_value=forward_context), \
+             patch("vllm_ascend.torchair.ops.torchair_fused_moe.get_ascend_soc_version", return_value=AscendSocVersion.A3):
             expert_map = torch.tensor([0, 1, 2, -1, -1, -1, -1, -1])
             moe_method.ep_size = ep_size
             x = torch.randn(8, 2, 2)
diff --git a/vllm_ascend/torchair/models/torchair_deepseek_v2.py b/vllm_ascend/torchair/models/torchair_deepseek_v2.py
@@ -70,9 +70,9 @@
 from vllm.sequence import IntermediateTensors
 
 from vllm_ascend.ascend_config import get_ascend_config
-from vllm_ascend.torchair.ops.torchair_fused_moe import TorchairAscendFusedMoE
 from vllm_ascend.quantization.quant_config import AscendLinearMethod
 from vllm_ascend.quantization.w8a8_dynamic import AscendW8A8DynamicLinearMethod
+from vllm_ascend.torchair.ops.torchair_fused_moe import TorchairAscendFusedMoE
 from vllm_ascend.utils import dispose_tensor, npu_prefetch
 
 
diff --git a/vllm_ascend/torchair/ops/torchair_fused_moe.py b/vllm_ascend/torchair/ops/torchair_fused_moe.py
@@ -57,9 +57,10 @@
 MOE_ALL2ALL_BUFFER: bool = envs_ascend.MOE_ALL2ALL_BUFFER
 
 
-def torchair_process_topk_ids(topk_ids: torch.Tensor, expert_num: int, ep_size: int,
-                     max_row_per_ep_rank: int, num_tokens: int,
-                     top_k: int) -> tuple[torch.Tensor, torch.Tensor]:
+def torchair_process_topk_ids(topk_ids: torch.Tensor, expert_num: int, 
+                              ep_size: int, max_row_per_ep_rank: int, 
+                              num_tokens: int,
+                              top_k: int) -> tuple[torch.Tensor, torch.Tensor]:
     original_total_elements = num_tokens * top_k
     device = topk_ids.device
     original_dtype = topk_ids.dtype
@@ -538,10 +539,10 @@ def torchair_fused_experts_with_all2all_buffer(
     group_list_type = 0
 
     hidden_states = torchair_apply_mlp(hidden_states,
-                              w1,
-                              w2,
-                              expert_tokens,
-                              group_list_type=group_list_type)
+                                       w1,
+                                       w2,
+                                       expert_tokens,
+                                       group_list_type=group_list_type)
 
     resorted_idx = torch.argsort(sorted_idx.float()).to(sorted_idx.dtype)
     hidden_states = hidden_states[resorted_idx]
@@ -686,7 +687,8 @@ def torchair_fused_experts_with_all2allv(
      tokens_per_expert) = (token_dispatcher.token_permutation(
          hidden_states, probs, routing_map))
 
-    expert_output = torchair_apply_mlp(dispatched_input, w1, w2, tokens_per_expert)
+    expert_output = torchair_apply_mlp(dispatched_input, w1, w2, 
+                                       tokens_per_expert)
     output, mlp_bias = token_dispatcher.token_unpermutation(expert_output)
     return output
 
@@ -960,8 +962,9 @@ def _renormalize_topk_weights(
 
         # TODO: Change to npu_group_topk when the latest CANN and NNAL is available
         # >>> torch_npu._npu_group_topk(topk_weights, group_num=num_expert_group, k=topk_group)
-        topk_weights = torchair_native_grouped_topk(topk_weights, num_expert_group,
-                                           topk_group)
+        topk_weights = torchair_native_grouped_topk(topk_weights, 
+                                                    num_expert_group,
+                                                    topk_group)
         # TODO bfloat16 is not supported in torch.topk with ge graph.
         if e_score_correction_bias is not None:
             topk_ids = torch.topk(topk_weights.to(torch.float32),
@@ -1111,12 +1114,12 @@ def apply(
                 FusedMoEState.AllGather, FusedMoEState.NaiveMulticast
         ]:
             return torchair_fused_experts(hidden_states=x,
-                                 w1=layer.w13_weight,
-                                 w2=layer.w2_weight,
-                                 topk_weights=topk_weights,
-                                 topk_ids=topk_ids,
-                                 top_k=top_k,
-                                 expert_map=expert_map)
+                                          w1=layer.w13_weight,
+                                          w2=layer.w2_weight,
+                                          topk_weights=topk_weights,
+                                          topk_ids=topk_ids,
+                                          top_k=top_k,
+                                          expert_map=expert_map)
         elif MOE_ALL2ALL_BUFFER:
             return torchair_fused_experts_with_all2all_buffer(
                 hidden_states=x,
@@ -1140,14 +1143,15 @@ def apply(
                 w2=layer.w2_weight,
             )
         else:
-            return torchair_fused_experts_with_all2all(hidden_states=x,
-                                              w1=layer.w13_weight,
-                                              w2=layer.w2_weight,
-                                              topk_weights=topk_weights,
-                                              topk_ids=topk_ids,
-                                              top_k=top_k,
-                                              expert_map=expert_map,
-                                              ep_group=get_ep_group())
+            return torchair_fused_experts_with_all2all(
+                hidden_states=x,
+                w1=layer.w13_weight,
+                w2=layer.w2_weight,
+                topk_weights=topk_weights,
+                topk_ids=topk_ids,
+                top_k=top_k,
+                expert_map=expert_map,
+                ep_group=get_ep_group())
 
 
 class TorchairAscendFusedMoE(FusedMoE):
@@ -1284,7 +1288,8 @@ def __init__(
             quant_config=quant_config)
 
         if quant_config is None:
-            self.quant_method = TorchairAscendUnquantizedFusedMoEMethod(self.moe)
+            self.quant_method = TorchairAscendUnquantizedFusedMoEMethod(
+                self.moe)
         else:
             self.quant_method = quant_config.get_quant_method(self, prefix)
 
@@ -1563,4 +1568,4 @@ def _forward_ms_fused_moe_comp(
             enable_force_load_balance=enable_force_load_balance,
         )
 
-        return hidden_states
+        return hidden_states