This repository was archived by the owner on Aug 30, 2024. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +4
-2
lines changed
include/subgroup/tile/impl Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -93,6 +93,7 @@ tile_load(tile_t& tile, payload_t& payload) {
93
93
static constexpr gpu_arch arch_tag = payload_t ::arch_tag;
94
94
95
95
static constexpr reg_layout reg_layout_ = tile_desc::register_layout;
96
+ // In the case of pack, tranpose is in vnni format
96
97
static constexpr bool is_vnni_reverse =
97
98
payload_t ::mem_transpose_dtype_less4bytes &&
98
99
((reg_layout_ == reg_layout::tiled) ||
@@ -188,14 +189,13 @@ tile_load(tile_t& tile, payload_t& payload) {
188
189
((block_size_y * sizeof (dtype)) % sizeof (load_dtype) == 0 ),
189
190
" check vnni limitation for DW transpose" );
190
191
191
- // auto payload_2d = payload.payloads.xetla_format<uint32_t, num_block, 16>();
192
192
#pragma unroll
193
193
for (uint32_t i = 0 ; i < num_block_y; ++i) {
194
- constexpr uint32_t load_block_elems = block_elems * arr_len;
195
194
int offset_y = i * block_size_y;
196
195
#pragma unroll
197
196
for (uint32_t j = 0 ; j < num_block_x; j += arr_len) {
198
197
int32_t offset_x = j * block_size_x;
198
+ constexpr uint32_t load_block_elems = block_elems * arr_len;
199
199
auto reg_blk = tile.reg .xetla_select <load_block_elems, 1 >(
200
200
(i * num_block_x + j) * block_elems);
201
201
constexpr uint32_t ld_blk_height = (reg_transpose && trans)
Original file line number Diff line number Diff line change @@ -230,12 +230,14 @@ struct mem_payload_t<
230
230
__XETLA_API void update_tdesc (int offset) {
231
231
auto payloads_2d = payloads.xetla_format <uint32_t , num_block, 16 >();
232
232
if constexpr (update_dir == tdesc_update_dir::x_dir) {
233
+ offset_x += offset / scale_factor;
233
234
#pragma unroll
234
235
for (uint32_t i = 0 ; i < num_block; i++) {
235
236
xetla_update_tdesc_offsetx (
236
237
payloads_2d.row (i), offset / int32_t (scale_factor));
237
238
}
238
239
} else {
240
+ offset_y += offset;
239
241
#pragma unroll
240
242
for (uint32_t i = 0 ; i < num_block; i++) {
241
243
xetla_update_tdesc_offsety (payloads_2d.row (i), offset);
You can’t perform that action at this time.
0 commit comments