File tree Expand file tree Collapse file tree 2 files changed +2
-4
lines changed Expand file tree Collapse file tree 2 files changed +2
-4
lines changed Original file line number Diff line number Diff line change @@ -48,6 +48,7 @@ Run the following script to start the vLLM server on Multi-NPU:
48
48
``` bash
49
49
vllm serve /path/to/pangu-pro-moe-model \
50
50
--tensor-parallel-size 4 \
51
+ --enable-expert-parallel \
51
52
--trust-remote-code \
52
53
--enforce-eager
53
54
```
@@ -113,6 +114,7 @@ if __name__ == "__main__":
113
114
114
115
llm = LLM(model = " /path/to/pangu-pro-moe-model" ,
115
116
tensor_parallel_size = 4 ,
117
+ enable_expert_parallel = True ,
116
118
distributed_executor_backend = " mp" ,
117
119
max_model_len = 1024 ,
118
120
trust_remote_code = True ,
Original file line number Diff line number Diff line change 1
- import os
2
-
3
1
import pytest
4
2
5
3
from tests .conftest import VllmRunner
6
4
from tests .model_utils import check_outputs_equal
7
5
8
6
9
- @pytest .mark .skipif (os .getenv ("VLLM_USE_V1" ) == "0" ,
10
- reason = "ep is not supported on v0" )
11
7
@pytest .mark .parametrize ("model_name" , ["deepseek-ai/DeepSeek-V2-Lite-Chat" ])
12
8
def test_e2e_ep_correctness (model_name ):
13
9
example_prompts = [
You can’t perform that action at this time.
0 commit comments