@@ -261,3 +261,27 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.thr
261
261
tt.return
262
262
}
263
263
}
264
+
265
+ // -----
266
+
267
+ // COM: Case 5:
268
+ // COM: Checks that block encoding has been forwarded to the store op
269
+ // COM: and the ttg.convert_layout operation has been removed
270
+ // CHECK: #[[BLOCKED:.+]] = #ttg.blocked<{sizePerThread = [1, 1], threadsPerWarp = [1, 16], warpsPerCTA = [2, 2], order = [1, 0]}>
271
+ #blocked = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 16 ], warpsPerCTA = [2 , 2 ], order = [1 , 0 ]}>
272
+ #blocked1 = #ttg.blocked <{sizePerThread = [1 , 1 ], threadsPerWarp = [1 , 16 ], warpsPerCTA = [1 , 4 ], order = [1 , 0 ]}>
273
+ module attributes {" ttg.num-ctas" = 1 : i32 , " ttg.num-warps" = 4 : i32 , " ttg.threads-per-warp" = 16 : i32 , " ttig.support_sg_2d_block" } {
274
+ tt.func public @matmul_kernel_with_block_pointers (%arg0: !tt.ptr <f16 >, %arg1: !tt.ptr <f16 >, %arg2: !tt.ptr <f16 >, %arg3: i32 , %arg4: i32 , %arg5: i32 , %arg6: i32 , %arg7: i32 , %arg8: i32 ) {
275
+ %c8_i32 = arith.constant 8 : i32
276
+ %c64_i64 = arith.constant 64 : i64
277
+ %c1_i64 = arith.constant 1 : i64
278
+ %c256_i64 = arith.constant 256 : i64
279
+ %cst = arith.constant dense <0.000000e+00 > : tensor <64 x256 xf16 , #blocked >
280
+ %25 = ttg.convert_layout %cst : tensor <64 x256 xf16 , #blocked > -> tensor <64 x256 xf16 , #blocked1 >
281
+ // CHECK: tt.make_tensor_ptr {{.*}}, {{\[}}{{.*}}, {{.*}}], {{\[}}{{.*}}, {{.*}}], {{\[}}{{.*}}, {{.*}}] {order = array<i32: 1, 0>} : <tensor<64x256xf16, #[[BLOCKED]]>>
282
+ %27 = tt.make_tensor_ptr %arg2 , [%c256_i64 , %c256_i64 ], [%c64_i64 , %c1_i64 ], [%c8_i32 , %c8_i32 ] {order = array<i32 : 1 , 0 >} : <tensor <64 x256 xf16 , #blocked1 >>
283
+ // CHECK: tt.store {{.*}}, {{.*}} {boundaryCheck = array<i32: 0, 1>} : !tt.ptr<tensor<64x256xf16, #[[BLOCKED]]>>
284
+ tt.store %27 , %25 {boundaryCheck = array<i32 : 0 , 1 >} : !tt.ptr <tensor <64 x256 xf16 , #blocked1 >>
285
+ tt.return
286
+ }
287
+ }
0 commit comments