further simplification

lucidrains · lucidrains · commit 8d7de9c9d7cf · 2023-07-17T19:41:44.000-07:00
diff --git a/memory_efficient_attention_pytorch/flash_attention.py b/memory_efficient_attention_pytorch/flash_attention.py
@@ -8,7 +8,7 @@
 
 # constants
 
-EPSILON = 1e-6
+EPSILON = 1e-10
 
 # helper functions
 
@@ -81,24 +81,22 @@ def forward(ctx, q, k, v, mask, causal, q_bucket_size, k_bucket_size):
                     attn_weights.masked_fill_(causal_mask, max_neg_value)
 
                 block_row_maxes = attn_weights.amax(dim = -1, keepdims = True)
-                attn_weights -= block_row_maxes
-                exp_weights = torch.exp(attn_weights)
+                new_row_maxes = torch.maximum(block_row_maxes, row_maxes)
+
+                exp_weights = torch.exp(attn_weights - new_row_maxes)
 
                 if exists(col_mask):
                     exp_weights.masked_fill_(~col_mask, 0.)
 
                 block_row_sums = exp_weights.sum(dim = -1, keepdims = True).clamp(min = EPSILON)
 
-                new_row_maxes = torch.maximum(block_row_maxes, row_maxes)
-
                 exp_values = einsum('... i j, ... j d -> ... i d', exp_weights, vc)
 
                 exp_row_max_diff = torch.exp(row_maxes - new_row_maxes)
-                exp_block_row_max_diff = torch.exp(block_row_maxes - new_row_maxes)
 
-                new_row_sums = exp_row_max_diff * row_sums + exp_block_row_max_diff * block_row_sums
+                new_row_sums = exp_row_max_diff * row_sums + block_row_sums
 
-                oc.mul_(exp_row_max_diff).add_(exp_block_row_max_diff * exp_values)
+                oc.mul_(exp_row_max_diff).add_(exp_values)
 
                 row_maxes.copy_(new_row_maxes)
                 row_sums.copy_(new_row_sums)
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'memory-efficient-attention-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.1.5',
+  version = '0.1.6',
   license='MIT',
   description = 'Memory Efficient Attention - Pytorch',
   long_description_content_type = 'text/markdown',