Skip to content

Commit 68afe74

Browse files
test_pd_connector
Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com>
1 parent 36f4b9e commit 68afe74

File tree

2 files changed

+282
-0
lines changed

2 files changed

+282
-0
lines changed

tests/e2e/multi_node/config/config.json

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,261 @@
4848
"request_rate": 1
4949
},
5050
"accuracy_parameters": {}
51+
},
52+
{
53+
"test_name": "test_deepseek_pd_llmdatadist",
54+
"disaggregate_prefill": true,
55+
"num_nodes": 2,
56+
"server_parameters": {
57+
"leader_config": {
58+
"model": "vllm-ascend/DeepSeek-V3-W8A8",
59+
"enforce_eager": true,
60+
"enable_expert_parallel": true,
61+
"data_parallel_size": 2,
62+
"data_parallel_size_local": 2,
63+
"quantization": "ascend",
64+
"tensor_parallel_size": 8,
65+
"kv_transfer_config": {
66+
"kv_connector": "LLMDataDistCMgrConnector",
67+
"kv_buffer_device": "npu",
68+
"kv_role": "kv_producer",
69+
"kv_connector_module_path": "vllm_ascend.distributed.llmdatadist_c_mgr_connector"
70+
},
71+
"additional_config": {}
72+
},
73+
"worker_config": {
74+
"model": "vllm-ascend/DeepSeek-V3-W8A8",
75+
"enable_expert_parallel": true,
76+
"data_parallel_size": 2,
77+
"data_parallel_size_local": 2,
78+
"quantization": "ascend",
79+
"tensor_parallel_size": 8,
80+
"max_num_seqs": 28,
81+
"kv_transfer_config": {
82+
"kv_connector": "LLMDataDistCMgrConnector",
83+
"kv_buffer_device": "npu",
84+
"kv_role": "kv_consumer",
85+
"kv_connector_module_path": "vllm_ascend.distributed.llmdatadist_c_mgr_connector"
86+
},
87+
"additional_config": {
88+
"torchair_graph_config": {
89+
"enabled": true,
90+
"graph_batch_sizes": [28]
91+
}
92+
}
93+
}
94+
},
95+
"client_parameters": {
96+
"model": "vllm-ascend/DeepSeek-V3-W8A8",
97+
"endpoint_type": "vllm",
98+
"dataset_name": "sharegpt",
99+
"dataset_path": "/root/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
100+
"num_prompts": 200,
101+
"request_rate": 1
102+
},
103+
"accuracy_parameters": {}
104+
},
105+
{
106+
"test_name": "test_qwen_pd_llmdatadist",
107+
"disaggregate_prefill": true,
108+
"num_nodes": 2,
109+
"server_parameters": {
110+
"leader_config": {
111+
"model": "vllm-ascend/Qwen3-235B-A22B-W8A8",
112+
"enforce_eager": true,
113+
"enable_expert_parallel": true,
114+
"data_parallel_size": 4,
115+
"data_parallel_size_local": 4,
116+
"quantization": "ascend",
117+
"tensor_parallel_size": 4,
118+
"kv_transfer_config": {
119+
"kv_connector": "LLMDataDistCMgrConnector",
120+
"kv_buffer_device": "npu",
121+
"kv_role": "kv_producer",
122+
"kv_connector_module_path": "vllm_ascend.distributed.llmdatadist_c_mgr_connector"
123+
},
124+
"additional_config": {}
125+
},
126+
"worker_config": {
127+
"model": "vllm-ascend/Qwen3-235B-A22B-W8A8",
128+
"enable_expert_parallel": true,
129+
"data_parallel_size": 4,
130+
"data_parallel_size_local": 4,
131+
"quantization": "ascend",
132+
"tensor_parallel_size": 4,
133+
"max_num_seqs": 32,
134+
"compilation_config": {
135+
"cudagraph_capture_sizes": [1,4,8,16,32]
136+
},
137+
"kv_transfer_config": {
138+
"kv_connector": "LLMDataDistCMgrConnector",
139+
"kv_buffer_device": "npu",
140+
"kv_role": "kv_consumer",
141+
"kv_connector_module_path": "vllm_ascend.distributed.llmdatadist_c_mgr_connector"
142+
},
143+
"additional_config": {}
144+
}
145+
},
146+
"client_parameters": {
147+
"model": "vllm-ascend/Qwen3-235B-A22B-W8A8",
148+
"endpoint_type": "vllm",
149+
"dataset_name": "sharegpt",
150+
"dataset_path": "/root/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
151+
"num_prompts": 200,
152+
"request_rate": 1
153+
},
154+
"accuracy_parameters": {}
155+
},
156+
{
157+
"test_name": "test_deepseek_pd_mooncake",
158+
"disaggregate_prefill": true,
159+
"num_nodes": 2,
160+
"server_parameters": {
161+
"leader_config": {
162+
"model": "vllm-ascend/DeepSeek-V3-W8A8",
163+
"enforce_eager": true,
164+
"enable_expert_parallel": true,
165+
"data_parallel_size": 2,
166+
"data_parallel_size_local": 2,
167+
"quantization": "ascend",
168+
"tensor_parallel_size": 8,
169+
"kv_transfer_config": {
170+
"kv_connector": "MooncakeConnector",
171+
"kv_role": "kv_producer",
172+
"kv_port": "36010",
173+
"engine_id": "0",
174+
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
175+
"kv_connector_extra_config": {
176+
"use_ascend_direct": true,
177+
"prefill": {
178+
"dp_size": 2,
179+
"tp_size": 8
180+
},
181+
"decode":{
182+
"dp_size": 2,
183+
"tp_size": 8
184+
}
185+
}
186+
},
187+
"additional_config": {}
188+
},
189+
"worker_config": {
190+
"model": "vllm-ascend/DeepSeek-V3-W8A8",
191+
"enable_expert_parallel": true,
192+
"data_parallel_size": 2,
193+
"data_parallel_size_local": 2,
194+
"quantization": "ascend",
195+
"tensor_parallel_size": 8,
196+
"max_num_seqs": 28,
197+
"kv_transfer_config": {
198+
"kv_connector": "MooncakeConnector",
199+
"kv_role": "kv_consumer",
200+
"kv_port": "36010",
201+
"engine_id": "1",
202+
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
203+
"kv_connector_extra_config": {
204+
"use_ascend_direct": true,
205+
"prefill": {
206+
"dp_size": 2,
207+
"tp_size": 8
208+
},
209+
"decode":{
210+
"dp_size": 2,
211+
"tp_size": 8
212+
}
213+
}
214+
},
215+
"additional_config": {
216+
"torchair_graph_config": {
217+
"enabled": true,
218+
"graph_batch_sizes": [28]
219+
}
220+
}
221+
}
222+
},
223+
"client_parameters": {
224+
"model": "vllm-ascend/DeepSeek-V3-W8A8",
225+
"endpoint_type": "vllm",
226+
"dataset_name": "sharegpt",
227+
"dataset_path": "/root/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
228+
"num_prompts": 200,
229+
"request_rate": 1
230+
},
231+
"accuracy_parameters": {}
232+
},
233+
{
234+
"test_name": "test_qwen_pd_mooncake",
235+
"disaggregate_prefill": true,
236+
"num_nodes": 2,
237+
"server_parameters": {
238+
"leader_config": {
239+
"model": "vllm-ascend/Qwen3-235B-A22B-W8A8",
240+
"enforce_eager": true,
241+
"enable_expert_parallel": true,
242+
"data_parallel_size": 4,
243+
"data_parallel_size_local": 4,
244+
"quantization": "ascend",
245+
"tensor_parallel_size": 4,
246+
"kv_transfer_config": {
247+
"kv_connector": "MooncakeConnector",
248+
"kv_role": "kv_producer",
249+
"kv_port": "36010",
250+
"engine_id": "0",
251+
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
252+
"kv_connector_extra_config": {
253+
"use_ascend_direct": true,
254+
"prefill": {
255+
"dp_size": 4,
256+
"tp_size": 4
257+
},
258+
"decode":{
259+
"dp_size": 4,
260+
"tp_size": 4
261+
}
262+
}
263+
},
264+
"additional_config": {}
265+
},
266+
"worker_config": {
267+
"model": "vllm-ascend/Qwen3-235B-A22B-W8A8",
268+
"enable_expert_parallel": true,
269+
"data_parallel_size": 4,
270+
"data_parallel_size_local": 4,
271+
"quantization": "ascend",
272+
"tensor_parallel_size": 4,
273+
"max_num_seqs": 32,
274+
"compilation_config": {
275+
"cudagraph_capture_sizes": [1,4,8,16,32]
276+
},
277+
"kv_transfer_config": {
278+
"kv_connector": "MooncakeConnector",
279+
"kv_role": "kv_consumer",
280+
"kv_port": "36010",
281+
"engine_id": "1",
282+
"kv_connector_module_path": "vllm_ascend.distributed.mooncake_connector",
283+
"kv_connector_extra_config": {
284+
"use_ascend_direct": true,
285+
"prefill": {
286+
"dp_size": 4,
287+
"tp_size": 4
288+
},
289+
"decode":{
290+
"dp_size": 4,
291+
"tp_size": 4
292+
}
293+
}
294+
},
295+
"additional_config": {}
296+
}
297+
},
298+
"client_parameters": {
299+
"model": "vllm-ascend/Qwen3-235B-A22B-W8A8",
300+
"endpoint_type": "vllm",
301+
"dataset_name": "sharegpt",
302+
"dataset_path": "/root/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
303+
"num_prompts": 200,
304+
"request_rate": 1
305+
},
306+
"accuracy_parameters": {}
51307
}
52308
]

tests/e2e/multi_node/scripts/run.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ checkout_src() {
2828
if [ ! -d "$SRC_DIR/vllm" ]; then
2929
git clone -b v0.10.2 https://github.com/vllm-project/vllm.git "$SRC_DIR/vllm"
3030
fi
31+
32+
#mooncake
33+
if [ ! -d "$SRC_DIR/Mooncake" ]; then
34+
git clone https://github.com/kvcache-ai/Mooncake.git "$SRC_DIR/Mooncake"
35+
cd "$SRC_DIR/Mooncake"
36+
git checkout 06cc217504a6f1b0cdaa26b096b985651b262748
37+
cd -
38+
fi
3139
}
3240

3341
install_sys_dependencies() {
@@ -51,6 +59,23 @@ install_vllm() {
5159
pip install -r "$SRC_DIR/vllm-ascend/requirements-dev.txt"
5260
}
5361

62+
install_mooncake() {
63+
echo "====> Install mooncake"
64+
apt-get update
65+
apt install -y --allow-change-held-packages python3 python-is-python3
66+
apt-get install -y --no-install-recommends mpich libmpich-dev
67+
cd $SRC_DIR/Mooncake
68+
sed -i '/option(USE_ASCEND_DIRECT)/s/OFF/ON/' mooncake-common/common.cmake
69+
bash dependencies.sh --yes
70+
mkdir build
71+
cd -
72+
cd $SRC_DIR/Mooncake/build
73+
cmake ..
74+
make -j
75+
make install
76+
cd -
77+
}
78+
5479
run_tests() {
5580
echo "====> Run tests"
5681
cd "$SRC_DIR/vllm-ascend"
@@ -63,6 +88,7 @@ main() {
6388
checkout_src
6489
install_sys_dependencies
6590
install_vllm
91+
install_mooncake
6692
run_tests
6793
}
6894

0 commit comments

Comments
 (0)