Skip to content

Commit 0f87cfe

Browse files
authored
Merge branch 'vllm-project:main' into main
2 parents c846746 + 1f6465c commit 0f87cfe

File tree

4 files changed

+8
-1
lines changed

4 files changed

+8
-1
lines changed

docs/source/user_guide/configuration/additional_config.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ The details of each config option are as follows:
4848
| `enable_multistream_mla`| bool | `False` | Whether to put vector ops of MLA to another stream. This option only takes effects on models using MLA (e.g., DeepSeek). |
4949
| `enable_multistream_moe`| bool | `False` | Whether to enable multistream shared expert. This option only takes effects on DeepSeek moe models. |
5050
| `enable_view_optimize` | bool | `True` | Whether to enable torchair view optimization |
51+
| `enable_frozen_parameter` | bool | `True` | Whether to fix the memory address of weights during inference to reduce the input address refresh time during graph execution. |
5152
| `use_cached_graph` | bool | `False` | Whether to use cached graph |
5253
| `graph_batch_sizes` | list[int] | `[]` | The batch size for torchair graph cache |
5354
| `graph_batch_sizes_init` | bool | `False` | Init graph batch size dynamically if `graph_batch_sizes` is empty |

tests/ut/test_ascend_config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def test_init_ascend_config_without_additional_config(self):
5353
self.assertFalse(torchair_graph_config.enable_multistream_mla)
5454
self.assertFalse(torchair_graph_config.enable_multistream_moe)
5555
self.assertTrue(torchair_graph_config.enable_view_optimize)
56+
self.assertTrue(torchair_graph_config.enable_frozen_parameter)
5657
self.assertFalse(torchair_graph_config.enable_kv_nz)
5758

5859
ascend_scheduler_config = ascend_config.ascend_scheduler_config
@@ -70,6 +71,7 @@ def test_init_ascend_config_with_additional_config(self):
7071
"enable_multistream_mla": True,
7172
"enable_multistream_moe": True,
7273
"enable_view_optimize": True,
74+
"enable_frozen_parameter": True,
7375
"enable_kv_nz": True
7476
},
7577
"ascend_scheduler_config": {
@@ -89,6 +91,7 @@ def test_init_ascend_config_with_additional_config(self):
8991
self.assertTrue(torchair_graph_config.enable_multistream_mla)
9092
self.assertTrue(torchair_graph_config.enable_multistream_moe)
9193
self.assertTrue(torchair_graph_config.enable_view_optimize)
94+
self.assertTrue(torchair_graph_config.enable_frozen_parameter)
9295
self.assertTrue(torchair_graph_config.enable_kv_nz)
9396

9497
ascend_scheduler_config = ascend_config.ascend_scheduler_config

vllm_ascend/ascend_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ def __init__(self, torchair_graph_config):
114114
"enable_multistream_moe", False)
115115
self.enable_view_optimize = torchair_graph_config.get(
116116
"enable_view_optimize", True)
117+
self.enable_frozen_parameter = torchair_graph_config.get(
118+
"enable_frozen_parameter", True)
117119
self.enable_kv_nz = torchair_graph_config.get("enable_kv_nz", False)
118120

119121
if not isinstance(self.graph_batch_sizes, list):

vllm_ascend/torchair/torchair_model_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,8 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
359359
config = torchair.CompilerConfig()
360360
if get_ascend_config().torchair_graph_config.mode:
361361
config.mode = get_ascend_config().torchair_graph_config.mode
362-
config.experimental_config.frozen_parameter = True
362+
config.experimental_config.frozen_parameter = \
363+
get_ascend_config().torchair_graph_config.enable_frozen_parameter
363364
# enabling tiling_schedule_optimize on 300I Duo has some bugs, so we have to
364365
# disable it on 300I Duo platform now.
365366
config.experimental_config.tiling_schedule_optimize = not is_310p()

0 commit comments

Comments
 (0)