Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 631b2a3

Browse files
committed
update scalar load/store
1 parent 177f21a commit 631b2a3

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

include/common/core/memory.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,13 @@ __XETLA_API xetla_vector<T, N> xetla_load_global(
355355
__ESIMD_NS::cache_hint_L1<gpu::xetla::detail::get_cache_hint(L1H)>,
356356
__ESIMD_NS::cache_hint_L2<gpu::xetla::detail::get_cache_hint(L2H)>,
357357
__ESIMD_NS::alignment<alignment>};
358-
if constexpr (sizeof(T) * N < sizeof(uint32_t)) {
359-
xetla_vector<uint32_t, N> offsets(byte_offset, sizeof(T));
360-
return __ESIMD_NS::gather<T, N, uint32_t>(ptr, offsets);
358+
if constexpr (sizeof(T) * N < sizeof(uint32_t) || N == 1) {
359+
xetla_vector<T, N> ret;
360+
#pragma unroll
361+
for (uint32_t i = 0; i < N; i++) {
362+
ret[i] = ptr[i + byte_offset / sizeof(T)];
363+
}
364+
return ret;
361365
} else {
362366
return __ESIMD_NS::block_load<T, N>(ptr, byte_offset, props);
363367
}
@@ -501,9 +505,11 @@ __XETLA_API void xetla_store_global(
501505
__ESIMD_NS::cache_hint_L2<gpu::xetla::detail::get_cache_hint(L2H)>,
502506
__ESIMD_NS::alignment<alignment>};
503507

504-
if constexpr (sizeof(T) * N < sizeof(uint32_t)) {
505-
xetla_vector<uint32_t, N> offsets(byte_offset, sizeof(T));
506-
return __ESIMD_NS::scatter<T, N, uint32_t>(ptr, offsets, vals);
508+
if constexpr (sizeof(T) * N < sizeof(uint32_t) || N == 1) {
509+
#pragma unroll
510+
for (uint32_t i = 0; i < N; i++) {
511+
ptr[i + byte_offset / sizeof(T)] = vals[i];
512+
}
507513
} else {
508514
__ESIMD_NS::block_store<T, N>(ptr, byte_offset, vals, props);
509515
}

0 commit comments

Comments
 (0)