Skip to content

Commit 0716570

Browse files
abhilash1910Wang, Yihan
andauthored
[SYCLomatic-Test] Add Help Function for Exchange Headers (#594)
Signed-off-by: Wang, Yihan <yihan.wang@intel.com> Co-authored-by: Wang, Yihan <yihan.wang@intel.com>
1 parent 48158e7 commit 0716570

File tree

2 files changed

+248
-0
lines changed

2 files changed

+248
-0
lines changed

help_function/help_function.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@
142142
<test testName="onedpl_test_unique_by_key" configFile="config/TEMPLATE_help_function_skip_cuda_backend.xml" />
143143
<test testName="onedpl_test_unique" configFile="config/TEMPLATE_help_function_skip_cuda_backend.xml" />
144144
<test testName="onedpl_test_vector" configFile="config/TEMPLATE_help_function_skip_cuda_backend.xml" />
145+
<test testName="onedpl_test_group_exchange" configFile="config/TEMPLATE_help_function.xml" />
145146
<test testName="util_select_from_sub_group" configFile="config/TEMPLATE_help_function_usm.xml" />
146147
<test testName="util_shift_sub_group_left" configFile="config/TEMPLATE_help_function_usm.xml" />
147148
<test testName="util_shift_sub_group_right" configFile="config/TEMPLATE_help_function_usm.xml" />
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
// ====------ onedpl_test_group_exchange.cpp-------------- -*- C++ -* ----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//
8+
// ===---------------------------------------------------------------------===//
9+
10+
#include <sycl/sycl.hpp>
11+
#include <dpct/dpct.hpp>
12+
#include <dpct/dpl_utils.hpp>
13+
#include <iostream>
14+
15+
template <int GROUP_THREADS, typename InputT, int ITEMS_PER_THREAD,
16+
typename InputIteratorT>
17+
void load_striped(int linear_tid, InputIteratorT block_itr,
18+
InputT (&items)[ITEMS_PER_THREAD]) {
19+
#pragma unroll
20+
for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) {
21+
items[ITEM] = block_itr[linear_tid + ITEM * GROUP_THREADS];
22+
}
23+
}
24+
25+
template <int GROUP_THREADS, typename T, int ITEMS_PER_THREAD,
26+
typename OutputIteratorT>
27+
void store_striped(int linear_tid, OutputIteratorT block_itr,
28+
T (&items)[ITEMS_PER_THREAD]) {
29+
OutputIteratorT thread_itr = block_itr + linear_tid;
30+
#pragma unroll
31+
for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) {
32+
thread_itr[(ITEM * GROUP_THREADS)] = items[ITEM];
33+
}
34+
}
35+
36+
bool test_striped_to_blocked() {
37+
sycl::queue q;
38+
int data[512];
39+
for (int i = 0; i < 128; i++) {
40+
data[4 * i + 0] = i;
41+
data[4 * i + 1] = i + 1 * 128;
42+
data[4 * i + 2] = i + 2 * 128;
43+
data[4 * i + 3] = i + 3 * 128;
44+
}
45+
46+
sycl::buffer<int, 1> buffer(data, 512);
47+
q.submit([&](sycl::handler &h) {
48+
using group_exchange = dpct::group::exchange<int, 4>;
49+
size_t temp_storage_size = group_exchange::get_local_memory_size(128);
50+
sycl::local_accessor<uint8_t, 1> tacc(
51+
sycl::range<1>(temp_storage_size), h);
52+
sycl::accessor dacc(buffer, h, sycl::read_write);
53+
h.parallel_for(
54+
sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)),
55+
[=](sycl::nd_item<3> item) {
56+
int thread_data[4];
57+
auto *d = dacc.get_multi_ptr<sycl::access::decorated::yes>().get();
58+
auto *tmp = tacc.get_multi_ptr<sycl::access::decorated::yes>().get();
59+
load_striped<128>(item.get_local_linear_id(), d, thread_data);
60+
group_exchange(tmp).striped_to_blocked(item, thread_data);
61+
store_striped<128>(item.get_local_linear_id(), d, thread_data);
62+
});
63+
});
64+
q.wait_and_throw();
65+
66+
sycl::host_accessor data_accessor(buffer, sycl::read_only);
67+
const int *ptr = data_accessor.get_multi_ptr<sycl::access::decorated::yes>();
68+
for (int i = 0; i < 512; ++i) {
69+
if (ptr[i] != i) {
70+
std::cout << "test_striped_to_blocked failed\n";
71+
std::ostream_iterator<int> Iter(std::cout, ", ");
72+
std::copy(ptr, ptr + 512, Iter);
73+
std::cout << std::endl;
74+
return false;
75+
}
76+
}
77+
78+
std::cout << "test_striped_to_blocked pass\n";
79+
return true;
80+
}
81+
82+
bool test_blocked_to_striped() {
83+
sycl::queue q;
84+
int data[512];
85+
for (int i = 0; i < 512; i++) data[i] = i;
86+
87+
sycl::buffer<int, 1> buffer(data, 512);
88+
89+
q.submit([&](sycl::handler &h) {
90+
using group_exchange = dpct::group::exchange<int, 4>;
91+
size_t temp_storage_size = group_exchange::get_local_memory_size(128);
92+
sycl::local_accessor<uint8_t, 1> tacc(
93+
sycl::range<1>(temp_storage_size), h);
94+
sycl::accessor data_accessor(buffer, h, sycl::read_write);
95+
h.parallel_for(
96+
sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)),
97+
[=](sycl::nd_item<3> item) {
98+
int thread_data[4];
99+
auto *d = data_accessor.get_multi_ptr<sycl::access::decorated::yes>().get();
100+
auto *tmp = tacc.get_multi_ptr<sycl::access::decorated::yes>().get();
101+
load_striped<128>(item.get_local_linear_id(), d, thread_data);
102+
group_exchange(tmp).blocked_to_striped(item, thread_data);
103+
store_striped<128>(item.get_local_linear_id(), d, thread_data);
104+
});
105+
});
106+
q.wait_and_throw();
107+
108+
sycl::host_accessor data_accessor(buffer, sycl::read_only);
109+
const int *ptr = data_accessor.get_multi_ptr<sycl::access::decorated::yes>();
110+
int expected[512];
111+
for (int i = 0; i < 128; i++) {
112+
expected[4 * i + 0] = i;
113+
expected[4 * i + 1] = i + 1 * 128;
114+
expected[4 * i + 2] = i + 2 * 128;
115+
expected[4 * i + 3] = i + 3 * 128;
116+
}
117+
for (int i = 0; i < 512; i++) {
118+
if (expected[i] != ptr[i]) {
119+
std::cout << "test_blocked_to_striped failed\n";
120+
std::ostream_iterator<int> Iter(std::cout, ", ");
121+
std::copy(ptr, ptr + 512, Iter);
122+
std::cout << std::endl;
123+
return false;
124+
}
125+
}
126+
std::cout << "test_blocked_to_striped pass\n";
127+
return true;
128+
}
129+
130+
bool test_scatter_to_blocked() {
131+
sycl::queue q;
132+
int data[512];
133+
int rank[512];
134+
for (int i = 0; i < 128; i++) {
135+
data[4 * i + 0] = i;
136+
data[4 * i + 1] = i + 1 * 128;
137+
data[4 * i + 2] = i + 2 * 128;
138+
data[4 * i + 3] = i + 3 * 128;
139+
rank[4 * i + 0] = i * 4 + 0;
140+
rank[4 * i + 1] = i * 4 + 1;
141+
rank[4 * i + 2] = i * 4 + 2;
142+
rank[4 * i + 3] = i * 4 + 3;
143+
}
144+
145+
sycl::buffer<int, 1> dbuffer(data, 512);
146+
sycl::buffer<int, 1> rbuffer(rank, 512);
147+
148+
q.submit([&](sycl::handler &h) {
149+
using group_exchange = dpct::group::exchange<int, 4>;
150+
size_t tmp_size = group_exchange::get_local_memory_size(128);
151+
sycl::local_accessor<uint8_t, 1> tacc(sycl::range<1>(tmp_size), h);
152+
sycl::accessor dacc(dbuffer, h, sycl::read_write);
153+
sycl::accessor racc(rbuffer, h, sycl::read_only);
154+
h.parallel_for(
155+
sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)),
156+
[=](sycl::nd_item<3> item) {
157+
int thread_data[4], thread_rank[4];
158+
auto *d = dacc.get_multi_ptr<sycl::access::decorated::yes>().get();
159+
auto *r = racc.get_multi_ptr<sycl::access::decorated::yes>().get();
160+
auto *tmp = tacc.get_multi_ptr<sycl::access::decorated::yes>().get();
161+
load_striped<128>(item.get_local_linear_id(), d, thread_data);
162+
load_striped<128>(item.get_local_linear_id(), r, thread_rank);
163+
group_exchange(tmp).scatter_to_blocked(item, thread_data, thread_rank);
164+
store_striped<128>(item.get_local_linear_id(), d, thread_data);
165+
});
166+
});
167+
q.wait_and_throw();
168+
169+
sycl::host_accessor data_accessor(dbuffer, sycl::read_only);
170+
const int *ptr = data_accessor.get_multi_ptr<sycl::access::decorated::yes>();
171+
for (int i = 0; i < 512; ++i) {
172+
if (ptr[i] != i) {
173+
std::cout << "test_scatter_to_blocked failed\n";
174+
std::ostream_iterator<int> Iter(std::cout, ", ");
175+
std::copy(ptr, ptr + 512, Iter);
176+
std::cout << std::endl;
177+
return false;
178+
}
179+
}
180+
std::cout << "test_scatter_to_blocked pass\n";
181+
return true;
182+
}
183+
184+
bool test_scatter_to_striped() {
185+
sycl::queue q;
186+
int data[512];
187+
int rank[512];
188+
for (int i = 0; i < 512; i++) data[i] = i;
189+
rank[0] = 0;
190+
rank[128] = 1;
191+
rank[256] = 2;
192+
rank[384] = 3;
193+
for (int i = 1; i < 128; i++) {
194+
rank[0 * 128 + i] = rank[0 * 128 + i - 1] + 4;
195+
rank[1 * 128 + i] = rank[1 * 128 + i - 1] + 4;
196+
rank[2 * 128 + i] = rank[2 * 128 + i - 1] + 4;
197+
rank[3 * 128 + i] = rank[3 * 128 + i - 1] + 4;
198+
}
199+
sycl::buffer<int, 1> dbuffer(data, 512);
200+
sycl::buffer<int, 1> rbuffer(rank, 512);
201+
q.submit([&](sycl::handler &h) {
202+
using group_exchange = dpct::group::exchange<int, 4>;
203+
size_t tmp_size = group_exchange::get_local_memory_size(128);
204+
sycl::local_accessor<uint8_t, 1> tacc(sycl::range<1>(tmp_size), h);
205+
sycl::accessor dacc(dbuffer, h, sycl::read_write);
206+
sycl::accessor racc(rbuffer, h, sycl::read_only);
207+
h.parallel_for(
208+
sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)),
209+
[=](sycl::nd_item<3> item) {
210+
int thread_data[4], thread_rank[4];
211+
auto *d = dacc.get_multi_ptr<sycl::access::decorated::yes>().get();
212+
auto *r = racc.get_multi_ptr<sycl::access::decorated::yes>().get();
213+
auto *tmp = tacc.get_multi_ptr<sycl::access::decorated::yes>().get();
214+
load_striped<128>(item.get_local_linear_id(), d, thread_data);
215+
load_striped<128>(item.get_local_linear_id(), r, thread_rank);
216+
group_exchange(tmp).scatter_to_striped(item, thread_data, thread_rank);
217+
store_striped<128>(item.get_local_linear_id(), d, thread_data);
218+
});
219+
});
220+
q.wait_and_throw();
221+
222+
sycl::host_accessor data_accessor(dbuffer, sycl::read_only);
223+
const int *ptr = data_accessor.get_multi_ptr<sycl::access::decorated::yes>();
224+
int expected[512];
225+
for (int i = 0; i < 128; i++) {
226+
expected[4 * i + 0] = i;
227+
expected[4 * i + 1] = i + 1 * 128;
228+
expected[4 * i + 2] = i + 2 * 128;
229+
expected[4 * i + 3] = i + 3 * 128;
230+
}
231+
for (int i = 0; i < 512; i++) {
232+
if (expected[i] != ptr[i]) {
233+
std::cout << "test_scatter_to_striped failed\n";
234+
std::ostream_iterator<int> Iter(std::cout, ", ");
235+
std::copy(ptr, ptr + 512, Iter);
236+
std::cout << std::endl;
237+
return false;
238+
}
239+
}
240+
std::cout << "test_scatter_to_striped pass\n";
241+
return true;
242+
}
243+
244+
int main() {
245+
return !(test_blocked_to_striped() && test_striped_to_blocked() &&
246+
test_scatter_to_blocked() && test_scatter_to_striped());
247+
}

0 commit comments

Comments
 (0)