Skip to content

Commit ea53f90

Browse files
panchao-hubzhangdepeng
andauthored
support torchair mode (vllm-project#2641)
### What this PR does / why we need it? support torchair mode ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.10.1.1 - vLLM main: vllm-project/vllm@5438967 Signed-off-by: zhangdepeng <zhangdepeng2@huawei.com> Signed-off-by: p00465316 <panchao13@huawei.com> Co-authored-by: zhangdepeng <zhangdepeng2@huawei.com>
1 parent b72e340 commit ea53f90

File tree

4 files changed

+19
-0
lines changed

4 files changed

+19
-0
lines changed

docs/source/user_guide/configuration/additional_config.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ The details of each config option are as follows:
4343
| Name | Type | Default | Description |
4444
| ---- | ---- | ------- | ----------- |
4545
| `enabled` | bool | `False` | Whether to enable torchair graph mode. Currently only DeepSeek series models and PanguProMoE are supported to use torchair graph mode |
46+
| `mode` | str | `None` | When using reduce-overhead mode for torchair, mode needs to be set |
4647
| `enable_multistream_mla`| bool | `False` | Whether to put vector ops of MLA to another stream. This option only takes effects on models using MLA (e.g., DeepSeek). |
4748
| `enable_multistream_moe`| bool | `False` | Whether to enable multistream shared expert. This option only takes effects on DeepSeek moe models. |
4849
| `enable_view_optimize` | bool | `True` | Whether to enable torchair view optimization |

tests/ut/test_ascend_config.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def test_init_ascend_config_without_additional_config(self):
4646

4747
torchair_graph_config = ascend_config.torchair_graph_config
4848
self.assertFalse(torchair_graph_config.enabled)
49+
self.assertEqual(torchair_graph_config.mode, '')
4950
self.assertFalse(torchair_graph_config.use_cached_graph)
5051
self.assertEqual(torchair_graph_config.graph_batch_sizes, [])
5152
self.assertFalse(torchair_graph_config.graph_batch_sizes_init)
@@ -294,6 +295,17 @@ def test_ascend_config_load_error(self):
294295
}
295296
init_ascend_config(test_vllm_config)
296297

298+
# mode should not be configured without torchair graph mode
299+
with self.assertRaises(RuntimeError):
300+
test_vllm_config.additional_config = {
301+
"torchair_graph_config": {
302+
"enabled": False,
303+
"mode": 'max-autotune',
304+
},
305+
"refresh": True
306+
}
307+
init_ascend_config(test_vllm_config)
308+
297309
# enable_kv_nz should not be enabled without torchair graph mode
298310
with self.assertRaises(RuntimeError):
299311
test_vllm_config.additional_config = {

vllm_ascend/ascend_config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class TorchairGraphConfig:
7070

7171
def __init__(self, torchair_graph_config):
7272
self.enabled = torchair_graph_config.get("enabled", False)
73+
self.mode = torchair_graph_config.get("mode", '')
7374
self.use_cached_graph = torchair_graph_config.get(
7475
"use_cached_graph", False)
7576
self.graph_batch_sizes = torchair_graph_config.get(
@@ -91,6 +92,9 @@ def __init__(self, torchair_graph_config):
9192
"graph_batch_sizes_init is only valid when graph_batch_sizes is empty"
9293
)
9394
if not self.enabled:
95+
if self.mode:
96+
raise RuntimeError(
97+
"mode is valid only when Torchair graph mode is enabled")
9498
if self.use_cached_graph:
9599
raise RuntimeError(
96100
"use_cached_graph is valid only when Torchair graph mode is enabled"

vllm_ascend/torchair/torchair_model_runner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,8 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
324324
communication_adaptation_310p()
325325

326326
config = torchair.CompilerConfig()
327+
if get_ascend_config().torchair_graph_config.mode:
328+
config.mode = get_ascend_config().torchair_graph_config.mode
327329
config.experimental_config.frozen_parameter = True
328330
# enabling tiling_schedule_optimize on 300I Duo has some bugs, so we have to
329331
# disable it on 300I Duo platform now.

0 commit comments

Comments
 (0)