Skip to content

Commit 3a2ec56

Browse files
author
weijinqian_v1
committed
[Refactor][MOE] remove redundant code.
Signed-off-by: weijinqian_v1 <weijinqian@huawei.com>
1 parent 6eea19b commit 3a2ec56

File tree

1 file changed

+38
-41
lines changed

1 file changed

+38
-41
lines changed

tests/ut/ops/test_fused_ops.py

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -129,36 +129,38 @@ def capture_register(dispatcher_instance):
129129
with_quant=False)
130130

131131
with patch('torch.distributed.get_rank', return_value=0), \
132-
patch('torch.distributed.get_world_size', return_value=4), \
133-
patch('vllm_ascend.ops.fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \
134-
patch('vllm_ascend.ops.fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \
135-
patch('vllm_ascend.ops.fused_moe.get_tp_group', return_value=mock_dp_and_tp_group(mocker)), \
136-
patch('vllm.distributed.parallel_state.get_tp_group', return_value=mock_dp_and_tp_group(mocker)), \
137-
patch('vllm_ascend.ops.fused_moe.get_dp_group', return_value=mock_dp_and_tp_group(mocker)), \
138-
patch('vllm.model_executor.layers.fused_moe.layer.get_dp_group', return_value=mock_dp_and_tp_group(mocker)), \
139-
patch('torch.distributed.all_gather'), \
140-
patch('torch.distributed.all_to_all_single'), \
141-
patch('vllm_ascend.ops.fused_moe.tensor_model_parallel_all_reduce'), \
142-
patch('vllm_ascend.ops.fused_moe.data_parallel_reduce_scatter'), \
143-
patch('vllm.model_executor.layers.fused_moe.config.get_dp_group',
144-
return_value=mock_dp_and_tp_group(mocker)), \
145-
patch('vllm_ascend.ops.fused_moe.get_ascend_config',
146-
return_value=MagicMock(
147-
torchair_graph_config=MagicMock(enabled=False, enable_multistream_moe=False),
148-
expert_map_path=None
149-
)), \
150-
patch('vllm_ascend.ops.fused_moe.determine_expert_map',
151-
return_value=(3, torch.tensor([0, 1, 2, -1, -1, -1, -1, -1]))), \
152-
patch('vllm_ascend.ops.fused_moe.get_forward_context',
153-
return_value=mock_forward_context_obj), \
132+
patch('torch.distributed.get_world_size', return_value=4), \
133+
patch('vllm_ascend.ops.fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \
134+
patch('vllm_ascend.ops.fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \
135+
patch('vllm_ascend.ops.fused_moe.get_tp_group', return_value=mock_dp_and_tp_group(mocker)), \
136+
patch('vllm.distributed.parallel_state.get_tp_group', return_value=mock_dp_and_tp_group(mocker)), \
137+
patch('vllm_ascend.ops.fused_moe.get_dp_group', return_value=mock_dp_and_tp_group(mocker)), \
138+
patch('vllm.model_executor.layers.fused_moe.layer.get_dp_group', return_value=mock_dp_and_tp_group(mocker)), \
139+
patch('torch.distributed.all_gather'), \
140+
patch('torch.distributed.all_to_all_single'), \
141+
patch('vllm_ascend.ops.fused_moe.tensor_model_parallel_all_reduce'), \
142+
patch('vllm_ascend.ops.fused_moe.data_parallel_reduce_scatter'), \
143+
patch('vllm.model_executor.layers.fused_moe.config.get_dp_group',
144+
return_value=mock_dp_and_tp_group(mocker)), \
145+
patch('vllm_ascend.ops.fused_moe.get_ascend_config',
146+
return_value=MagicMock(
147+
torchair_graph_config=MagicMock(enabled=False, enable_multistream_moe=False),
148+
expert_map_path=None
149+
)), \
150+
patch('vllm_ascend.ops.fused_moe.determine_expert_map',
151+
return_value=(3, torch.tensor([0, 1, 2, -1, -1, -1, -1, -1]))), \
152+
patch('vllm_ascend.ops.fused_moe.get_forward_context',
153+
return_value=mock_forward_context_obj), \
154154
patch('vllm_ascend.ops.fused_moe.get_current_vllm_config',
155-
return_value=MagicMock(
156-
parallel_config=MagicMock(tensor_parallel_size=2),
157-
scheduler_config=MagicMock(max_num_seqs=4),
158-
model_config=MagicMock(max_model_len=2048)
159-
)), \
155+
return_value=MagicMock(
156+
parallel_config=MagicMock(tensor_parallel_size=2),
157+
scheduler_config=MagicMock(max_num_seqs=4),
158+
model_config=MagicMock(max_model_len=2048)
159+
)), \
160160
patch("vllm_ascend.utils.get_ascend_soc_version", return_value=AscendSocVersion.A3), \
161-
patch.object(token_dispatcher_module, 'setup_token_dispatchers', mock_setup_token_dispatchers):
161+
patch.object(token_dispatcher_module, 'setup_token_dispatchers', mock_setup_token_dispatchers), \
162+
patch('vllm_ascend.ops.layers.moe_mlp.get_forward_context',
163+
return_value=mock_forward_context_obj):
162164

163165
yield {
164166
'mock_forward_context_obj': mock_forward_context_obj,
@@ -524,27 +526,22 @@ def test_select_experts(self, mock_dist_env, mock_moe_env,
524526

525527
class TestUnifiedApplyMLP(TestBase):
526528

527-
@patch('vllm_ascend.ops.fused_moe.get_forward_context')
528-
@patch('vllm_ascend.ops.fused_moe.get_mc2_group')
529-
@patch('vllm_ascend.ops.fused_moe.is_310p')
529+
@patch('vllm_ascend.ops.layers.moe_mlp.get_forward_context')
530+
@patch('vllm_ascend.ops.layers.moe_mlp.is_310p')
530531
@patch('torch_npu.npu_grouped_matmul')
531532
@patch('torch_npu.npu_dynamic_quant')
532533
@patch('torch_npu.npu_dequant_swiglu_quant')
533534
def test_unified_apply_mlp_with_quantization_mc2(self, mock_npu_dequant,
534535
mock_npu_dynamic_quant,
535536
mock_npu_grouped_matmul,
536537
mock_is_310p,
537-
mock_get_mc2_group,
538538
mock_get_forward_context):
539539

540540
mock_forward_context = MagicMock()
541541
mock_forward_context.with_quant = True
542542
mock_forward_context.fused_moe_state = FusedMoEState.MC2
543543
mock_get_forward_context.return_value = mock_forward_context
544544

545-
mock_mc2_group = MagicMock()
546-
mock_get_mc2_group.return_value = mock_mc2_group
547-
548545
mock_is_310p.return_value = False
549546

550547
mock_npu_dynamic_quant.return_value = (torch.randint(-128,
@@ -597,8 +594,8 @@ def test_unified_apply_mlp_with_quantization_mc2(self, mock_npu_dequant,
597594

598595
self.assertEqual(result.dtype, torch.bfloat16)
599596

600-
@patch('vllm_ascend.ops.fused_moe.get_forward_context')
601-
@patch('vllm_ascend.ops.fused_moe.is_310p')
597+
@patch('vllm_ascend.ops.layers.moe_mlp.get_forward_context')
598+
@patch('vllm_ascend.ops.layers.moe_mlp.is_310p')
602599
@patch('torch_npu.npu_grouped_matmul')
603600
@patch('torch_npu.npu_swiglu')
604601
@patch('torch_npu.npu_dynamic_quant')
@@ -645,7 +642,7 @@ def test_unified_apply_mlp_without_quantization(
645642
self.assertEqual(result.shape, hidden_states.shape)
646643
self.assertEqual(result.dtype, torch.float16)
647644

648-
@patch('vllm_ascend.ops.fused_moe.get_forward_context')
645+
@patch('vllm_ascend.ops.layers.moe_mlp.get_forward_context')
649646
@patch('torch_npu.npu_grouped_matmul')
650647
@patch('torch_npu.npu_swiglu')
651648
@patch('torch_npu.npu_dynamic_quant')
@@ -705,8 +702,8 @@ def test_unified_apply_mlp_with_quantization_and_dynamic_scale(
705702
self.assertEqual(result.shape, hidden_states.shape)
706703
self.assertEqual(result.dtype, torch.bfloat16)
707704

708-
@patch('vllm_ascend.ops.fused_moe.get_forward_context')
709-
@patch('vllm_ascend.ops.fused_moe.is_310p')
705+
@patch('vllm_ascend.ops.layers.moe_mlp.get_forward_context')
706+
@patch('vllm_ascend.ops.layers.moe_mlp.is_310p')
710707
@patch('torch_npu.npu_grouped_matmul')
711708
@patch('torch_npu.npu_swiglu')
712709
@patch('torch_npu.npu_dynamic_quant')
@@ -755,4 +752,4 @@ def test_unified_apply_mlp_without_quantization_310p(
755752
mock_npu_swiglu.assert_called_once()
756753

757754
self.assertEqual(result.shape, hidden_states.shape)
758-
self.assertEqual(result.dtype, torch.float16)
755+
self.assertEqual(result.dtype, torch.float16)

0 commit comments

Comments
 (0)