Skip to content

Commit 80110d2

Browse files
authored
[SYCLomatic] Update memory free migration by calling device synchronization before device pointer free (#1511)
Signed-off-by: Wang, Hao3 <hao3.wang@intel.com>
1 parent de00708 commit 80110d2

20 files changed

+59
-37
lines changed

clang/lib/DPCT/ASTTraversal.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10654,9 +10654,23 @@ void MemoryMigrationRule::freeMigration(const MatchFinder::MatchResult &Result,
1065410654
std::ostringstream Repl;
1065510655
buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue);
1065610656
if (hasManagedAttr(0)(C)) {
10657-
ArgStr = "*(" + ArgStr + ".get_ptr())";
10657+
ArgStr = "*(" + ArgStr + ".get_ptr())";
1065810658
}
10659-
Repl << MapNames::getClNamespace() + "free(" << ArgStr
10659+
auto &SM = DpctGlobalInfo::getSourceManager();
10660+
auto Indent = getIndent(SM.getExpansionLoc(C->getBeginLoc()), SM).str();
10661+
if (DpctGlobalInfo::isOptimizeMigration()) {
10662+
Repl << MapNames::getClNamespace() << "free";
10663+
} else {
10664+
if (DpctGlobalInfo::useNoQueueDevice()) {
10665+
Repl << Indent << "{{NEEDREPLACEQ" << std::to_string(Index)
10666+
<< "}}.wait_and_throw();\n"
10667+
<< Indent << MapNames::getClNamespace() << "free";
10668+
} else {
10669+
requestFeature(HelperFeatureEnum::device_ext);
10670+
Repl << MapNames::getDpctNamespace() << "dpct_free";
10671+
}
10672+
}
10673+
Repl << "(" << ArgStr
1066010674
<< ", {{NEEDREPLACEQ" + std::to_string(Index) + "}})";
1066110675
emplaceTransformation(new ReplaceStmt(C, std::move(Repl.str())));
1066210676
} else {

clang/runtime/dpct-rt/include/dpct/memory.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,9 @@ static inline void *dpct_malloc(size_t &pitch, size_t x, size_t y,
832832
/// \returns no return value.
833833
static inline void dpct_free(void *ptr,
834834
sycl::queue &q = get_default_queue()) {
835+
#ifndef DPCT_USM_LEVEL_NONE
836+
dpct::get_current_device().queues_wait_and_throw();
837+
#endif
835838
detail::dpct_free(ptr, q);
836839
}
837840

clang/test/dpct/cub/devicelevel/device_reduce_arg_max.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ void test1() {
3434
// CHECK-NOT: size_t temp_storage_bytes = 0;
3535
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceReduce::ArgMax was removed because this functionality is redundant in SYCL.
3636
// CHECK: dpct::reduce_argmax(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_out, num_items);
37-
// CHECK-NOT: sycl::free({{.*}})
37+
// CHECK-NOT: dpct::dpct_free({{.*}})
3838
// CHECK: }
3939

4040
void test2() {
@@ -51,7 +51,7 @@ void test2() {
5151
// CHECK: DPCT1027:{{.*}}: The call to cub::DeviceReduce::ArgMax was replaced with 0 because this functionality is redundant in SYCL.
5252
// CHECK: auto res = 0;
5353
// CHECK: dpct::reduce_argmax(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_out, num_items);
54-
// CHECK-NOT: sycl::free({{.*}})
54+
// CHECK-NOT: dpct::dpct_free({{.*}})
5555
// CHECK: }
5656

5757
void test3() {
@@ -73,7 +73,7 @@ void test3() {
7373
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceReduce::ArgMax was removed because this functionality is redundant in SYCL.
7474
// CHECK: dpct::reduce_argmax(oneapi::dpl::execution::device_policy(*s), d_in, d_out, num_items);
7575
// CHECK: dev_ct1.destroy_queue(s);
76-
// CHECK-NOT: sycl::free({{.*}})
76+
// CHECK-NOT: dpct::dpct_free({{.*}})
7777
// CHECK: }
7878

7979
int main() {

clang/test/dpct/cub/devicelevel/device_reduce_arg_min.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ void test1() {
3434
// CHECK-NOT: size_t temp_storage_bytes = 0;
3535
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceReduce::ArgMin was removed because this functionality is redundant in SYCL.
3636
// CHECK: dpct::reduce_argmin(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_out, num_items);
37-
// CHECK-NOT: sycl::free({{.*}})
37+
// CHECK-NOT: dpct::dpct_free({{.*}})
3838
// CHECK: }
3939

4040
void test2() {
@@ -51,7 +51,7 @@ void test2() {
5151
// CHECK: DPCT1027:{{.*}}: The call to cub::DeviceReduce::ArgMin was replaced with 0 because this functionality is redundant in SYCL.
5252
// CHECK: auto res = 0;
5353
// CHECK: dpct::reduce_argmin(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_out, num_items);
54-
// CHECK-NOT: sycl::free({{.*}})
54+
// CHECK-NOT: dpct::dpct_free({{.*}})
5555
// CHECK: }
5656

5757
void test3() {
@@ -73,7 +73,7 @@ void test3() {
7373
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceReduce::ArgMin was removed because this functionality is redundant in SYCL.
7474
// CHECK: dpct::reduce_argmin(oneapi::dpl::execution::device_policy(*s), d_in, d_out, num_items);
7575
// CHECK: dev_ct1.destroy_queue(s);
76-
// CHECK-NOT: sycl::free({{.*}})
76+
// CHECK-NOT: dpct::dpct_free({{.*}})
7777
// CHECK: }
7878

7979
int main() {

clang/test/dpct/cub/devicelevel/device_reduce_max.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ void test1() {
3434
// CHECK-NOT: size_t temp_storage_bytes = 0;
3535
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceReduce::Max was removed because this functionality is redundant in SYCL.
3636
// CHECK: q_ct1.fill(d_out, oneapi::dpl::reduce(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_in + num_items, typename std::iterator_traits<decltype(d_out)>::value_type{}, sycl::maximum<>()), 1).wait();
37-
// CHECK-NOT: sycl::free({{.*}})
37+
// CHECK-NOT: dpct::dpct_free({{.*}})
3838
// CHECK: }
3939

4040
void test2() {
@@ -51,7 +51,7 @@ void test2() {
5151
// CHECK: DPCT1027:{{.*}}: The call to cub::DeviceReduce::Max was replaced with 0 because this functionality is redundant in SYCL.
5252
// CHECK: auto res = 0;
5353
// CHECK: q_ct1.fill(d_out, oneapi::dpl::reduce(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_in + num_items, typename std::iterator_traits<decltype(d_out)>::value_type{}, sycl::maximum<>()), 1).wait();
54-
// CHECK-NOT: sycl::free({{.*}})
54+
// CHECK-NOT: dpct::dpct_free({{.*}})
5555
// CHECK: }
5656

5757
void test3() {
@@ -73,7 +73,7 @@ void test3() {
7373
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceReduce::Max was removed because this functionality is redundant in SYCL.
7474
// CHECK: s->fill(d_out, oneapi::dpl::reduce(oneapi::dpl::execution::device_policy(*s), d_in, d_in + num_items, typename std::iterator_traits<decltype(d_out)>::value_type{}, sycl::maximum<>()), 1).wait();
7575
// CHECK: dev_ct1.destroy_queue(s);
76-
// CHECK-NOT: sycl::free({{.*}})
76+
// CHECK-NOT: dpct::dpct_free({{.*}})
7777
// CHECK: }
7878

7979
int main() {

clang/test/dpct/cub/devicelevel/device_reduce_min.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ void test1() {
3434
// CHECK-NOT: size_t temp_storage_bytes = 0;
3535
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceReduce::Min was removed because this functionality is redundant in SYCL.
3636
// CHECK: q_ct1.fill(d_out, oneapi::dpl::reduce(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_in + num_items, typename std::iterator_traits<decltype(d_out)>::value_type{}, sycl::minimum<>()), 1).wait();
37-
// CHECK-NOT: sycl::free({{.*}})
37+
// CHECK-NOT: dpct::dpct_free({{.*}})
3838
// CHECK: }
3939

4040
void test2() {
@@ -51,7 +51,7 @@ void test2() {
5151
// CHECK: DPCT1027:{{.*}}: The call to cub::DeviceReduce::Min was replaced with 0 because this functionality is redundant in SYCL.
5252
// CHECK: auto res = 0;
5353
// CHECK: q_ct1.fill(d_out, oneapi::dpl::reduce(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_in + num_items, typename std::iterator_traits<decltype(d_out)>::value_type{}, sycl::minimum<>()), 1).wait();
54-
// CHECK-NOT: sycl::free({{.*}})
54+
// CHECK-NOT: dpct::dpct_free({{.*}})
5555
// CHECK: }
5656

5757
void test3() {
@@ -73,7 +73,7 @@ void test3() {
7373
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceReduce::Min was removed because this functionality is redundant in SYCL.
7474
// CHECK: s->fill(d_out, oneapi::dpl::reduce(oneapi::dpl::execution::device_policy(*s), d_in, d_in + num_items, typename std::iterator_traits<decltype(d_out)>::value_type{}, sycl::minimum<>()), 1).wait();
7575
// CHECK: dev_ct1.destroy_queue(s);
76-
// CHECK-NOT: sycl::free({{.*}})
76+
// CHECK-NOT: dpct::dpct_free({{.*}})
7777
// CHECK: }
7878

7979
int main() {

clang/test/dpct/cub/devicelevel/device_select_if.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void test1() {
4444
// CHECK-NOT: size_t temp_storage_bytes = 0;
4545
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceSelect::If was removed because this functionality is redundant in SYCL.
4646
// CHECK: q_ct1.fill(d_num_selected_out, std::distance(d_out, oneapi::dpl::copy_if(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_in + num_items, d_out, select_op)), 1).wait();
47-
// CHECK-NOT: sycl::free(d_temp_storage, q_ct1);
47+
// CHECK-NOT: dpct::dpct_free(d_temp_storage, q_ct1);
4848
// CHECK: }
4949

5050
void test2() {
@@ -61,7 +61,7 @@ void test2() {
6161
// CHECK: DPCT1027:{{.*}}: The call to cub::DeviceSelect::If was replaced with 0 because this functionality is redundant in SYCL.
6262
// CHECK: auto res = 0;
6363
// CHECK: q_ct1.fill(d_num_selected_out, std::distance(d_out, oneapi::dpl::copy_if(oneapi::dpl::execution::device_policy(q_ct1), d_in, d_in + num_items, d_out, select_op)), 1).wait();
64-
// CHECK-NOT: sycl::free(d_temp_storage, q_ct1);
64+
// CHECK-NOT: dpct::dpct_free(d_temp_storage, q_ct1);
6565
// CHECK: }
6666

6767
void test3() {

clang/test/dpct/cub/devicelevel/device_unique_by_key.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ void test1() {
3737
// CHECK-NOT: size_t temp_storage_bytes = 0;
3838
// CHECK: DPCT1026:{{.*}}: The call to cub::DeviceSelect::UniqueByKey was removed because this functionality is redundant in SYCL.
3939
// CHECK: q_ct1.fill(d_num_selected_out, std::distance(d_keys_out, std::get<0>(dpct::unique_copy(oneapi::dpl::execution::device_policy(q_ct1), d_keys_in, d_keys_in + num_items, d_values_in, d_keys_out, d_values_out))), 1).wait();
40-
// CHECK-NOT: sycl::free(d_temp_storage, q_ct1);
40+
// CHECK-NOT: dpct::dpct_free(d_temp_storage, q_ct1);
4141
// CHECK: }
4242

4343
void test2() {
@@ -53,7 +53,7 @@ void test2() {
5353
// CHECK-NOT: size_t temp_storage_bytes = 0;
5454
// CHECK: DPCT1027:{{.*}}: The call to cub::DeviceSelect::UniqueByKey was replaced with 0 because this functionality is redundant in SYCL.
5555
// CHECK: q_ct1.fill(d_num_selected_out, std::distance(d_keys_out, std::get<0>(dpct::unique_copy(oneapi::dpl::execution::device_policy(q_ct1), d_keys_in, d_keys_in + num_items, d_values_in, d_keys_out, d_values_out))), 1).wait();
56-
// CHECK-NOT: sycl::free(d_temp_storage, q_ct1);
56+
// CHECK-NOT: dpct::dpct_free(d_temp_storage, q_ct1);
5757
// CHECK: }
5858

5959
void test3() {

clang/test/dpct/curand-usm.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ private:
131131
//CHECK-NEXT: }
132132
//CHECK-NEXT: ~B(){
133133
//CHECK-NEXT: rng.reset();
134-
//CHECK-NEXT: sycl::free(karg1, dpct::get_in_order_queue());
134+
//CHECK-NEXT: dpct::dpct_free(karg1, dpct::get_in_order_queue());
135135
//CHECK-NEXT: }
136136
//CHECK:private:
137137
//CHECK-NEXT: dpct::rng::host_rng_ptr rng;

clang/test/dpct/helper_function_preference/no-queue-device/kernel1.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ static uint *d_Data1;
99
// CHECK: void malloc1() { d_Data1 = (uint *)sycl::malloc_device(SIZE * sizeof(int), q_ct1); }
1010
void malloc1() { cudaMalloc((void **)&d_Data1, SIZE * sizeof(int)); }
1111

12-
// CHECK: void free1() { sycl::free(d_Data1, q_ct1); }
12+
// CHECK: void free1() { q_ct1.wait_and_throw();
13+
// CHECK: sycl::free(d_Data1, q_ct1); }
1314
void free1() { cudaFree(d_Data1); }
1415

1516
// CHECK: void kernelWrapper1(int *d_Data) {

0 commit comments

Comments
 (0)