Skip to content

Commit c0da054

Browse files
committed
fix
Signed-off-by: Icey <1790571317@qq.com>
1 parent 8c9d5d3 commit c0da054

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

vllm_ascend/models/qwen3_next.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def forward(
363363
output: torch.Tensor,
364364
cache_params: Optional[MambaCacheParams] = None,
365365
):
366-
return torch.ops.vllm.gdn_attention(
366+
return torch.ops.vllm.npu_gdn_attention(
367367
hidden_states,
368368
output,
369369
self.prefix,
@@ -1107,7 +1107,7 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
11071107
return self.model.get_expert_mapping()
11081108

11091109

1110-
def gdn_attention(
1110+
def npu_gdn_attention(
11111111
hidden_states: torch.Tensor,
11121112
output: torch.Tensor,
11131113
layer_name: str,
@@ -1117,7 +1117,7 @@ def gdn_attention(
11171117
self._forward(hidden_states=hidden_states, output=output)
11181118

11191119

1120-
def gdn_attention_fake(
1120+
def npu_gdn_attention_fake(
11211121
hidden_states: torch.Tensor,
11221122
output: torch.Tensor,
11231123
layer_name: str,
@@ -1126,9 +1126,9 @@ def gdn_attention_fake(
11261126

11271127

11281128
direct_register_custom_op(
1129-
op_name="gdn_attention",
1130-
op_func=gdn_attention,
1129+
op_name="npu_gdn_attention",
1130+
op_func=npu_gdn_attention,
11311131
mutates_args=["output"],
1132-
fake_impl=gdn_attention_fake,
1132+
fake_impl=npu_gdn_attention_fake,
11331133
dispatch_key=current_platform.dispatch_key,
11341134
)

0 commit comments

Comments
 (0)