Skip to content

Commit 70339c1

Browse files
WithHadesoffline0806
authored andcommitted
[Feat] allow using aclgraph in ray backend (vllm-project#2589)
### What this PR does / why we need it? Allow using aclgraph in ray backend, for tp + pp + aclgraph in multi machine ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.10.1.1 - vLLM main: vllm-project/vllm@4ba0c58 Signed-off-by: withHades <244036962@qq.com> Signed-off-by: offline0806 <z00858301@china.huawei.com>
1 parent 7f65a0c commit 70339c1

File tree

2 files changed

+0
-36
lines changed

2 files changed

+0
-36
lines changed

tests/ut/test_platform.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -363,36 +363,6 @@ def test_check_and_update_config_unsupported_cudagraph_mode(
363363
CUDAGraphMode.NONE,
364364
)
365365

366-
@patch("vllm_ascend.utils.is_310p", return_value=False)
367-
@patch("vllm_ascend.ascend_config.check_ascend_config")
368-
@patch("vllm_ascend.ascend_config.init_ascend_config")
369-
def test_check_and_update_config_disable_aclgraph_when_ray_enabled(
370-
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
371-
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
372-
)
373-
vllm_config = TestNPUPlatform.mock_vllm_config()
374-
vllm_config.model_config.enforce_eager = False
375-
vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
376-
vllm_config.parallel_config.distributed_executor_backend = "ray"
377-
378-
with self.assertLogs(logger="vllm", level="WARNING") as cm:
379-
from vllm_ascend import platform
380-
381-
importlib.reload(platform)
382-
self.platform.check_and_update_config(vllm_config)
383-
print(30 * "=", f"cm.output: {cm.output}")
384-
self.assertTrue(
385-
"Ray distributed executor backend is not compatible with ACL Graph mode"
386-
in cm.output[0])
387-
self.assertEqual(
388-
vllm_config.compilation_config.level,
389-
CompilationLevel.NO_COMPILATION,
390-
)
391-
self.assertEqual(
392-
vllm_config.compilation_config.cudagraph_mode,
393-
CUDAGraphMode.NONE,
394-
)
395-
396366
@patch("vllm_ascend.utils.is_310p", return_value=False)
397367
@patch("vllm_ascend.ascend_config.check_ascend_config")
398368
@patch("vllm_ascend.ascend_config.init_ascend_config")

vllm_ascend/platform.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
185185
"and use_cached_kv_cache_bytes in torchair_graph_config.")
186186
delete_torchair_cache_file()
187187

188-
if parallel_config.distributed_executor_backend == "ray":
189-
logger.warning(
190-
"Ray distributed executor backend is not compatible with ACL Graph mode "
191-
"right now. Setting CUDAGraphMode to NONE")
192-
compilation_config.cudagraph_mode = CUDAGraphMode.NONE
193-
194188
# set cudaprah sizes before extending `compilation_config.splitting_ops`
195189
vllm_config._set_cudagraph_sizes()
196190

0 commit comments

Comments
 (0)