Skip to content

Commit a334d73

Browse files
garroudfacebook-github-bot
authored andcommitted
add meta impl for int4 preshuffle kernels (#4384)
Summary: X-link: facebookresearch/FBGEMM#1458 Pull Request resolved: #4384 att. add fake impl to integrate with AOTI Reviewed By: jianyuh Differential Revision: D76834825
1 parent 0bef87a commit a334d73

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,26 @@ at::Tensor f8i4bf16_rowwise_meta(
581581
return Y;
582582
}
583583

584+
std::tuple<at::Tensor, at::Tensor> preshuffle_i4_meta(
585+
at::Tensor WQ,
586+
at::Tensor w_scale) {
587+
return {
588+
at::empty_like(WQ),
589+
at::empty({w_scale.size(0), 8, w_scale.size(1)}, w_scale.options())};
590+
}
591+
592+
at::Tensor f8i4bf16_shuffled_meta(
593+
at::Tensor XQ, // FP8
594+
at::Tensor WQ, // INT4
595+
at::Tensor /* x_scale */,
596+
at::Tensor /* w_scale */,
597+
at::Tensor /* w_scale_group */) {
598+
const at::SymInt M = XQ.sym_size(0);
599+
const at::SymInt N = WQ.sym_size(0);
600+
auto Y = at::empty_symint({M, N}, XQ.options().dtype(at::kBFloat16));
601+
return Y;
602+
}
603+
584604
at::Tensor bf16i4bf16_rowwise_meta(
585605
at::Tensor X, // BF16
586606
at::Tensor W, // INT4
@@ -702,6 +722,8 @@ TORCH_LIBRARY_IMPL(fbgemm, Meta, m) {
702722
m.impl("bf16i4bf16_rowwise_batched", bf16i4bf16_rowwise_batched_meta);
703723
m.impl("f8f8bf16_lite", f8f8bf16_lite_meta);
704724
m.impl("scaled_fp4_quant", scaled_fp4_quant_meta);
725+
m.impl("preshuffle_i4", preshuffle_i4_meta);
726+
m.impl("f8i4bf16_shuffled", f8i4bf16_shuffled_meta);
705727
#endif
706728
#ifdef USE_ROCM
707729
m.impl("f8f8f16_rowwise", f8f8f16_rowwise_meta);

0 commit comments

Comments
 (0)