Skip to content

Commit 5a32cb8

Browse files
committed
[Dist][EP] Remove ETP/EP maintained in vllm-ascend
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent 89c1a0f commit 5a32cb8

File tree

20 files changed

+57
-417
lines changed

20 files changed

+57
-417
lines changed

docs/source/user_guide/additional_config.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ The following table lists the additional configuration options available in vLLM
2828
|-------------------------------| ---- |------|-----------------------------------------------------------------------------------------------|
2929
| `torchair_graph_config` | dict | `{}` | The config options for torchair graph mode |
3030
| `ascend_scheduler_config` | dict | `{}` | The config options for ascend scheduler |
31-
| `expert_tensor_parallel_size` | str | `0` | Expert tensor parallel size the model to use. |
3231
| `refresh` | bool | `false` | Whether to refresh global ascend config content. This value is usually used by rlhf or ut/e2e test case. |
3332
| `expert_map_path` | str | `None` | When using expert load balancing for the MOE model, an expert map path needs to be passed in. |
3433
| `chunked_prefill_for_mla` | bool | `False` | Whether to enable the fused operator-like chunked_prefill. |
@@ -75,7 +74,6 @@ An example of additional configuration is as follows:
7574
"enabled": True,
7675
"enable_chunked_prefill": True,
7776
},
78-
"expert_tensor_parallel_size": 1,
7977
"refresh": False,
8078
}
8179
```

tests/e2e/multicard/test_ep.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import os
2+
3+
import pytest
4+
5+
from tests.conftest import VllmRunner
6+
from tests.model_utils import check_outputs_equal
7+
8+
9+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
10+
reason="ep is not supported on v0")
11+
@pytest.mark.parametrize("model_name", ["deepseek-ai/DeepSeek-V2-Lite-Chat"])
12+
def test_e2e_ep_correctness(model_name):
13+
example_prompts = [
14+
"Hello, my name is",
15+
"The president of the United States is",
16+
"The capital of France is",
17+
"The future of AI is",
18+
]
19+
max_tokens = 5
20+
21+
with VllmRunner(model_name, tensor_parallel_size=2) as vllm_model:
22+
tp_output = vllm_model.generate_greedy(example_prompts, max_tokens)
23+
24+
with VllmRunner(model_name,
25+
tensor_parallel_size=2,
26+
enable_expert_parallel=True) as vllm_model:
27+
ep_output = vllm_model.generate_greedy(example_prompts, max_tokens)
28+
29+
check_outputs_equal(
30+
outputs_0_lst=ep_output,
31+
outputs_1_lst=tp_output,
32+
name_0="ep_output",
33+
name_1="tp_output",
34+
)

tests/e2e/multicard/test_fused_moe_allgather_ep.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ def test_generate_with_allgather():
5151
"enabled": True,
5252
"chunked_prefill_enabled": False,
5353
},
54-
"expert_tensor_parallel_size": 1
5554
}) as vllm_model:
5655
vllm_model.generate(example_prompts, sampling_params)
5756

@@ -77,6 +76,5 @@ def test_generate_with_alltoall():
7776
"enabled": True,
7877
"chunked_prefill_enabled": False,
7978
},
80-
"expert_tensor_parallel_size": 1
8179
}) as vllm_model:
8280
vllm_model.generate(example_prompts, sampling_params)

tests/ut/distributed/test_parallel_state.py

Lines changed: 0 additions & 208 deletions
This file was deleted.

tests/ut/test_ascend_config.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def test_init_ascend_config_without_additional_config(self):
4343
test_vllm_config = VllmConfig()
4444
# No additional config given, check the default value here.
4545
ascend_config = init_ascend_config(test_vllm_config)
46-
self.assertEqual(ascend_config.expert_tensor_parallel_size, 0)
4746
self.assertIsNone(ascend_config.expert_map_path)
4847

4948
torchair_graph_config = ascend_config.torchair_graph_config
@@ -76,12 +75,10 @@ def test_init_ascend_config_with_additional_config(self):
7675
"ascend_scheduler_config": {
7776
"enabled": True
7877
},
79-
"expert_tensor_parallel_size": 1,
8078
"expert_map_path": "test_expert_map_path",
8179
"refresh": True
8280
}
8381
ascend_config = init_ascend_config(test_vllm_config)
84-
self.assertEqual(ascend_config.expert_tensor_parallel_size, 1)
8582
self.assertEqual(ascend_config.expert_map_path, "test_expert_map_path")
8683

8784
torchair_graph_config = ascend_config.torchair_graph_config

tests/ut/test_platform.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ def setUp(self):
2828
self.mock_vllm_config.speculative_config = None
2929

3030
self.mock_ascend_config = MagicMock()
31-
self.mock_ascend_config.expert_tensor_parallel_size = 0
3231
self.mock_ascend_config.torchair_graph_config.enabled = False
3332
self.mock_ascend_config.ascend_scheduler_config.enabled = False
3433

@@ -253,30 +252,6 @@ def test_check_and_update_config_basic_config_update(
253252
mock_init_ascend.assert_called_once_with(self.mock_vllm_config)
254253
mock_check_ascend.assert_called_once()
255254

256-
@patch("vllm_ascend.utils.is_310p", return_value=False)
257-
@patch("vllm_ascend.ascend_config.check_ascend_config")
258-
@patch("vllm_ascend.ascend_config.init_ascend_config")
259-
def test_check_and_update_config_expert_parallel_enabled(
260-
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
261-
mock_init_ascend.return_value = self.mock_ascend_config
262-
self.mock_vllm_config.parallel_config.enable_expert_parallel = True
263-
self.mock_vllm_config.parallel_config.tensor_parallel_size = 2
264-
self.mock_vllm_config.parallel_config.world_size_across_dp = 4
265-
266-
from vllm_ascend import platform
267-
268-
importlib.reload(platform)
269-
270-
self.platform.check_and_update_config(self.mock_vllm_config)
271-
272-
self.assertEqual(
273-
self.mock_vllm_config.parallel_config.expert_tensor_parallel_size,
274-
1)
275-
self.assertEqual(
276-
self.mock_vllm_config.parallel_config.expert_parallel_size,
277-
self.mock_vllm_config.parallel_config.world_size_across_dp,
278-
)
279-
280255
@patch("vllm_ascend.utils.is_310p", return_value=False)
281256
@patch("vllm_ascend.ascend_config.check_ascend_config")
282257
@patch("vllm_ascend.ascend_config.init_ascend_config")

vllm_ascend/ascend_config.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ def __init__(self, vllm_config):
4545
self.ascend_scheduler_config = AscendSchedulerConfig(
4646
ascend_scheduler_config)
4747

48-
self.expert_tensor_parallel_size = int(
49-
additional_config.get("expert_tensor_parallel_size", 0))
5048
self.expert_map_path = additional_config.get("expert_map_path", None)
5149
self.chunked_prefill_for_mla = additional_config.get(
5250
"chunked_prefill_for_mla", False)

0 commit comments

Comments
 (0)