Skip to content

Commit 9bf0741

Browse files
garroudfacebook-github-bot
authored andcommitted
add meta impl for int4 preshuffle kernels (#4384)
Summary: X-link: facebookresearch/FBGEMM#1458 att. add fake impl to integrate with AOTI Reviewed By: jianyuh Differential Revision: D76834825
1 parent 170b79d commit 9bf0741

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,26 @@ at::Tensor f8i4bf16_rowwise_meta(
588588
return Y;
589589
}
590590

591+
std::tuple<at::Tensor, at::Tensor> preshuffle_i4_meta(
592+
at::Tensor WQ,
593+
at::Tensor w_scale) {
594+
return {
595+
at::empty_like(WQ),
596+
at::empty({w_scale.size(0), 8, w_scale.size(1)}, w_scale.options())};
597+
}
598+
599+
at::Tensor f8i4bf16_shuffled_meta(
600+
at::Tensor XQ, // FP8
601+
at::Tensor WQ, // INT4
602+
at::Tensor /* x_scale */,
603+
at::Tensor /* w_scale */,
604+
at::Tensor /* w_scale_group */) {
605+
const at::SymInt M = XQ.sym_size(0);
606+
const at::SymInt N = WQ.sym_size(0);
607+
auto Y = at::empty_symint({M, N}, XQ.options().dtype(at::kBFloat16));
608+
return Y;
609+
}
610+
591611
at::Tensor bf16i4bf16_rowwise_meta(
592612
at::Tensor X, // BF16
593613
at::Tensor W, // INT4
@@ -723,6 +743,8 @@ TORCH_LIBRARY_IMPL(fbgemm, Meta, m) {
723743
m.impl("bf16i4bf16_rowwise_batched", bf16i4bf16_rowwise_batched_meta);
724744
m.impl("f8f8bf16_lite", f8f8bf16_lite_meta);
725745
m.impl("scaled_fp4_quant", scaled_fp4_quant_meta);
746+
m.impl("preshuffle_i4", preshuffle_i4_meta);
747+
m.impl("f8i4bf16_shuffled", f8i4bf16_shuffled_meta);
726748
#endif
727749
#ifdef USE_ROCM
728750
m.impl("f8f8f16_rowwise", f8f8f16_rowwise_meta);

0 commit comments

Comments
 (0)