Skip to content

Commit 3fdb901

Browse files
committed
reset attention_factor to old behaviour
1 parent e0a7c80 commit 3fdb901

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

fast_llm/layers/transformer/rotary/rotary.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,10 @@ class YarnRotary[ConfigType: YarnRotaryConfig](DefaultRotary[YarnRotaryConfig]):
181181
"""
182182

183183
def _get_frequencies(self, sequence_length: int, kv_channels: int, device="cuda") -> torch.Tensor:
184-
return super()._get_frequencies(sequence_length, kv_channels, device) * self._config.attention_factor
184+
attention_factor = self._config.attention_factor
185+
if attention_factor is None:
186+
attention_factor = 0.1 * math.log(self._config.scale_factor) + 1.0
187+
return super()._get_frequencies(sequence_length, kv_channels, device) * attention_factor
185188

186189
def _get_angle_scales(self, kv_channels: int, device="cuda") -> torch.Tensor:
187190
scales = super()._get_angle_scales(kv_channels, device)

0 commit comments

Comments
 (0)