@@ -363,7 +363,7 @@ def forward(
363
363
output : torch .Tensor ,
364
364
cache_params : Optional [MambaCacheParams ] = None ,
365
365
):
366
- return torch .ops .vllm .gdn_attention (
366
+ return torch .ops .vllm .npu_gdn_attention (
367
367
hidden_states ,
368
368
output ,
369
369
self .prefix ,
@@ -1107,7 +1107,7 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
1107
1107
return self .model .get_expert_mapping ()
1108
1108
1109
1109
1110
- def gdn_attention (
1110
+ def npu_gdn_attention (
1111
1111
hidden_states : torch .Tensor ,
1112
1112
output : torch .Tensor ,
1113
1113
layer_name : str ,
@@ -1117,7 +1117,7 @@ def gdn_attention(
1117
1117
self ._forward (hidden_states = hidden_states , output = output )
1118
1118
1119
1119
1120
- def gdn_attention_fake (
1120
+ def npu_gdn_attention_fake (
1121
1121
hidden_states : torch .Tensor ,
1122
1122
output : torch .Tensor ,
1123
1123
layer_name : str ,
@@ -1126,9 +1126,9 @@ def gdn_attention_fake(
1126
1126
1127
1127
1128
1128
direct_register_custom_op (
1129
- op_name = "gdn_attention " ,
1130
- op_func = gdn_attention ,
1129
+ op_name = "npu_gdn_attention " ,
1130
+ op_func = npu_gdn_attention ,
1131
1131
mutates_args = ["output" ],
1132
- fake_impl = gdn_attention_fake ,
1132
+ fake_impl = npu_gdn_attention_fake ,
1133
1133
dispatch_key = current_platform .dispatch_key ,
1134
1134
)
0 commit comments