Skip to content

Commit 2bef0e6

Browse files
author
zhangdepeng
committed
bugfix for torchair
Signed-off-by: panchao <panchao13@huawei.com>
1 parent 6c97336 commit 2bef0e6

File tree

4 files changed

+5
-2
lines changed

4 files changed

+5
-2
lines changed

vllm_ascend/ascend_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class TorchairGraphConfig:
6060

6161
def __init__(self, torchair_graph_config):
6262
self.enabled = torchair_graph_config.get("enabled", False)
63+
self.mode = torchair_graph_config.get("mode", 'max-autotune')
6364
self.use_cached_graph = torchair_graph_config.get(
6465
"use_cached_graph", False)
6566
self.graph_batch_sizes = torchair_graph_config.get(

vllm_ascend/torchair/torchair_attention.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ def __init__(
304304
self.num_queries_per_kv = self.num_heads // self.num_kv_heads
305305
self.key_cache = None
306306
self.value_cache = None
307+
self.scale_tensor = torch.zeros((), device='npu' ,dtype=torch.int32)
307308

308309
def forward(
309310
self,
@@ -366,7 +367,7 @@ def forward(
366367
key_cache, value_cache = kv_cache[0], kv_cache[1]
367368
slots = attn_metadata.slot_mapping
368369

369-
block_size = key_cache.shape[1]
370+
block_size = self.scale_tensor + key_cache.shape[1]
370371
slots_indices = slots.reshape(-1, 1)
371372
block_indices = slots_indices // block_size
372373
slots_indices = slots_indices % block_size

vllm_ascend/torchair/torchair_model_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
316316
communication_adaptation_310p()
317317

318318
config = torchair.CompilerConfig()
319+
config.mode = get_ascend_config().torchair_graph_config.mode
319320
config.experimental_config.frozen_parameter = True
320321
# enabling tiling_schedule_optimize on 300I Duo has some bugs, so we have to
321322
# disable it on 300I Duo platform now.

vllm_ascend/torchair/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def register_torchair_model():
168168
"vllm_ascend.torchair.models.qwen2:CustomQwen2ForCausalLM")
169169

170170
ModelRegistry.register_model(
171-
"Qwen3ForCausalLM",
171+
"Qwen3MoeForCausalLM",
172172
"vllm_ascend.torchair.models.qwen3_moe:CustomQwen3MoeForCausalLM")
173173

174174

0 commit comments

Comments
 (0)