Enable flash_v3 backward (#2445)

bertmaher · facebook-github-bot · commit e6251f183870 · 2024-08-28T13:09:45.000-07:00
Summary: Pull Request resolved: #2445 Reviewed By: xuzhao9 Differential Revision: D61924864 Pulled By: bertmaher fbshipit-source-id: 760036820c1196a921eaff4d99bf8647e25264ee
diff --git a/torchbenchmark/operators/flash_attention/operator.py b/torchbenchmark/operators/flash_attention/operator.py
@@ -73,7 +73,7 @@
 try:
     torch_lib_path = os.path.join(os.path.dirname(__file__), "lib")
     with add_ld_library_path(torch_lib_path):
-        import flashattn_hopper_cuda
+        from flash_attn_interface import flash_attn_func as flash_attn_v3
 except (ImportError, IOError, AttributeError):
     HAS_FLASH_V3 = False
     pass
@@ -223,9 +223,7 @@ def flash_v3(
         q = q.transpose(1, 2).contiguous()
         k = k.transpose(1, 2).contiguous()
         v = v.transpose(1, 2).contiguous()
-        fn = lambda: flashattn_hopper_cuda.fwd(
-            q, k, v, None, self.sm_scale, self.causal
-        )
+        fn = lambda: flash_attn_v3(q, k, v, self.sm_scale, self.causal)
         return fn
 
     @register_benchmark()