Skip to content

Commit 5f6d807

Browse files
committed
add cudaStreamSynchronize
1 parent 19b8fcc commit 5f6d807

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

ChASE-MPI/impl/mgpu_cudaDLA.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,8 @@ namespace chase {
756756
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status_);
757757
cudaSetDevice(shmrank_*num_devices_per_rank);
758758
cuda_exec(cudaMemcpyAsync(approxV, d_V_, sizeof(T)*N*nevex, cudaMemcpyDeviceToHost, stream2_[0]));
759+
cudaStreamSynchronize(stream2_[0]);
760+
759761
}
760762

761763
/*!
@@ -865,6 +867,8 @@ namespace chase {
865867
cudaSetDevice(shmrank_*num_devices_per_rank);
866868
cuda_exec(cudaMemcpyAsync(approxV + locked * N, d_V_, sizeof(T)* N * block, cudaMemcpyDeviceToHost, stream_[0]));
867869
cuda_exec(cudaMemcpyAsync(workspace + locked * N, d_W_, sizeof(T)* N * block, cudaMemcpyDeviceToHost, stream_[0]));
870+
cudaStreamSynchronize(stream_[0]);
871+
868872
//related to cusolver HEEVD
869873
cudaDeviceSynchronize();
870874
if (d_A_) cudaFree(d_A_);

0 commit comments

Comments
 (0)