-
Notifications
You must be signed in to change notification settings - Fork 29
Open
Description
Hi team,
I came across an error when trying to compile a bfloat16 scalar code (x := x ^ 2) on Phoenix, compiled through MLIR-AIR and MLIR-AIE:
LLVM ERROR: unable to legalize instruction: %164:_(<2 x s16>) = G_FMUL %163:_, %163:_ (in function: core_0_2)
Please see the input IRs below:
MLIR-AIE:
%core_0_2 = aie.core(%tile_0_2) {
%c32 = arith.constant 32 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
cf.br ^bb1
^bb1: // 2 preds: ^bb0, ^bb1
aie.use_lock(%lock_0_2_3, AcquireGreaterEqual, 1)
aie.use_lock(%lock_0_2_2, AcquireGreaterEqual, 1)
scf.for %arg0 = %c0 to %c32 step %c1 {
scf.for %arg1 = %c0 to %c32 step %c1 {
%0 = memref.load %buf0[%arg0, %arg1] : memref<32x32xbf16, 2 : i32>
%1 = arith.mulf %0, %0 : bf16
memref.store %1, %buf1[%arg0, %arg1] : memref<32x32xbf16, 2 : i32>
}
}
aie.use_lock(%lock_0_2, Release, 1)
aie.use_lock(%lock_0_2_4, Release, 1)
cf.br ^bb1
} {elf_file = "gelu_seg_core_0_2.elf"}
LLVM IR
module attributes {llvm.target_triple = "aie2"} {
llvm.mlir.global external @buf0() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x bf16>>
llvm.mlir.global external @buf1() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x bf16>>
llvm.mlir.global external @buf2() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x bf16>>
llvm.mlir.global external @buf3() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x bf16>>
llvm.func @debug_i32(i32) attributes {sym_visibility = "private"}
llvm.func @llvm.aie2.put.ms(i32, i32) attributes {sym_visibility = "private"}
llvm.func @llvm.aie2.get.ss() -> !llvm.struct<(i32, i32)> attributes {sym_visibility = "private"}
llvm.func @llvm.aie2.mcd.write.vec(vector<16xi32>, i32) attributes {sym_visibility = "private"}
llvm.func @llvm.aie2.scd.read.vec(i32) -> vector<16xi32> attributes {sym_visibility = "private"}
llvm.func @llvm.aie2.acquire(i32, i32) attributes {sym_visibility = "private"}
llvm.func @llvm.aie2.release(i32, i32) attributes {sym_visibility = "private"}
llvm.mlir.global external @air_channel_3() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x bf16>>
llvm.mlir.global external @air_channel_0() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x bf16>>
llvm.func @core_0_2() {
%0 = llvm.mlir.addressof @buf1 : !llvm.ptr
%1 = llvm.mlir.addressof @buf0 : !llvm.ptr
%2 = llvm.mlir.constant(48 : i32) : i32
%3 = llvm.mlir.constant(51 : i32) : i32
%4 = llvm.mlir.constant(50 : i32) : i32
%5 = llvm.mlir.constant(49 : i32) : i32
%6 = llvm.mlir.constant(1 : i32) : i32
%7 = llvm.mlir.constant(-1 : i32) : i32
%8 = llvm.mlir.constant(32 : index) : i64
%9 = llvm.mlir.constant(1 : index) : i64
%10 = llvm.mlir.constant(0 : index) : i64
llvm.br ^bb1
^bb1: // 2 preds: ^bb0, ^bb6
llvm.call @llvm.aie2.acquire(%5, %7) : (i32, i32) -> ()
llvm.call @llvm.aie2.acquire(%4, %7) : (i32, i32) -> ()
llvm.br ^bb2(%10 : i64)
^bb2(%11: i64): // 2 preds: ^bb1, ^bb5
%12 = llvm.icmp "slt" %11, %8 : i64
llvm.cond_br %12, ^bb3(%10 : i64), ^bb6
^bb3(%13: i64): // 2 preds: ^bb2, ^bb4
%14 = llvm.icmp "slt" %13, %8 : i64
llvm.cond_br %14, ^bb4, ^bb5
^bb4: // pred: ^bb3
%15 = llvm.getelementptr %1[0, 0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<32 x array<32 x bf16>>
%16 = llvm.mul %11, %8 overflow<nsw, nuw> : i64
%17 = llvm.add %16, %13 overflow<nsw, nuw> : i64
%18 = llvm.getelementptr inbounds|nuw %15[%17] : (!llvm.ptr, i64) -> !llvm.ptr, bf16
%19 = llvm.load %18 : !llvm.ptr -> bf16
%20 = llvm.fmul %19, %19 : bf16
%21 = llvm.getelementptr %0[0, 0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<32 x array<32 x bf16>>
%22 = llvm.getelementptr inbounds|nuw %21[%17] : (!llvm.ptr, i64) -> !llvm.ptr, bf16
llvm.store %20, %22 : bf16, !llvm.ptr
%23 = llvm.add %13, %9 : i64
llvm.br ^bb3(%23 : i64)
^bb5: // pred: ^bb3
%24 = llvm.add %11, %9 : i64
llvm.br ^bb2(%24 : i64)
^bb6: // pred: ^bb2
llvm.call @llvm.aie2.release(%3, %6) : (i32, i32) -> ()
llvm.call @llvm.aie2.release(%2, %6) : (i32, i32) -> ()
llvm.br ^bb1
}
}
Could you please advise on how to get around this issue? Or is there a reason why this code pattern isn't supported?
Metadata
Metadata
Assignees
Labels
No labels