Fix imports

jasonjk-park · facebook-github-bot · commit 2feadb6a2f07 · 2024-10-17T07:29:41.000-07:00
Summary: Update imports for latest updates + silu_mul interface change

Reviewed By: jianyuh

Differential Revision: D64516452

fbshipit-source-id: b9b98a6eda45a093661e8b23f6b8ec300b559960
diff --git a/torchbenchmark/operators/fp8_fused_quant_gemm_rowwise/operator.py b/torchbenchmark/operators/fp8_fused_quant_gemm_rowwise/operator.py
@@ -8,10 +8,12 @@
 import triton
 
 try:
-    from gen_ai.llm_inference.fb.llm.llama_layers import (
-        quantize_fp8_row,
+    from fbgemm_gpu.experimental.gemm.triton_gemm.fp8_gemm import quantize_fp8_row
+    from gen_ai.llm_inference.fb.llm.kernel.rms_norm import (
         rms_norm,
         rms_norm_fp8_rowwise_quant,
+    )
+    from gen_ai.llm_inference.fb.llm.kernel.silu_mul import (
         silu_mul,
         silu_mul_fp8_rowwise_quant,
     )
@@ -120,8 +122,7 @@ def _impl(x1, x2, wq, w_scale, wd):
     @register_benchmark(enabled=HAS_FB_IMPORT)
     def silu_mul_quant(self, x1, x2, wq, w_scale, wd) -> Callable:
         def _impl(x1, x2, wq, w_scale, wd):
-            y = torch.empty_like(x1)
-            x = silu_mul(x1, x2, y)
+            x = silu_mul(x1, x2)
             xq, x_scale = quantize_fp8_row(x, use_triton=True)
             if torch.version.hip:
                 # use CK kernel for AMD