Skip to content

Commit f73bbbc

Browse files
committed
update doc and test
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent b680c8b commit f73bbbc

File tree

2 files changed

+2
-4
lines changed

2 files changed

+2
-4
lines changed

docs/source/tutorials/multi_npu_moge.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ Run the following script to start the vLLM server on Multi-NPU:
4848
```bash
4949
vllm serve /path/to/pangu-pro-moe-model \
5050
--tensor-parallel-size 4 \
51+
--enable-expert-parallel \
5152
--trust-remote-code \
5253
--enforce-eager
5354
```
@@ -113,6 +114,7 @@ if __name__ == "__main__":
113114

114115
llm = LLM(model="/path/to/pangu-pro-moe-model",
115116
tensor_parallel_size=4,
117+
enable_expert_parallel=True,
116118
distributed_executor_backend="mp",
117119
max_model_len=1024,
118120
trust_remote_code=True,

tests/e2e/multicard/test_ep.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
1-
import os
2-
31
import pytest
42

53
from tests.conftest import VllmRunner
64
from tests.model_utils import check_outputs_equal
75

86

9-
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
10-
reason="ep is not supported on v0")
117
@pytest.mark.parametrize("model_name", ["deepseek-ai/DeepSeek-V2-Lite-Chat"])
128
def test_e2e_ep_correctness(model_name):
139
example_prompts = [

0 commit comments

Comments
 (0)