Skip to content

Commit 670dad3

Browse files
committed
update curand sequence for ChASE
1 parent d75ab35 commit 670dad3

File tree

5 files changed

+24
-32
lines changed

5 files changed

+24
-32
lines changed

ChASE-MPI/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ if(SCALAPACK_FOUND)
9494
)
9595
endif()
9696

97-
9897
if(OpenMP_FOUND)
9998
target_link_libraries( chase_mpi INTERFACE
10099
OpenMP::OpenMP_CXX

ChASE-MPI/chase_mpi.hpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -507,14 +507,6 @@ class ChaseMpi : public chase::Chase<T>
507507
#ifdef USE_NSIGHT
508508
nvtxRangePop();
509509
#endif
510-
/*
511-
std::mt19937 gen(2342.0);
512-
std::normal_distribution<> normal_distribution;
513-
514-
for (std::size_t k = 0; k < N_; ++k){
515-
v1[k] = getRandomT<T>([&]() { return normal_distribution(gen); });
516-
}
517-
*/
518510
#ifdef HAS_OMP
519511
char* omp_threads;
520512
omp_threads = getenv("OMP_NUM_THREADS");
@@ -587,7 +579,7 @@ class ChaseMpi : public chase::Chase<T>
587579
#endif
588580
#ifdef HAS_OMP
589581
omp_set_num_threads(num_threads);
590-
#endif
582+
#endif
591583
delete[] ritzv;
592584
delete[] isuppz;
593585
delete[] d;
@@ -644,7 +636,7 @@ class ChaseMpi : public chase::Chase<T>
644636
num_threads = std::atoi(omp_threads);
645637
}
646638
omp_set_num_threads(1);
647-
#endif
639+
#endif
648640
// ENSURE that v1 has one norm
649641
#ifdef USE_NSIGHT
650642
nvtxRangePushA("Lanczos: loop");
@@ -706,7 +698,7 @@ class ChaseMpi : public chase::Chase<T>
706698
}
707699
#ifdef HAS_OMP
708700
omp_set_num_threads(num_threads);
709-
#endif
701+
#endif
710702
delete[] isuppz;
711703
delete[] d;
712704
delete[] e;

ChASE-MPI/impl/chase_mpidla_blaslapack.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ class ChaseMpiDLABlaslapack : public ChaseMpiDLAInterface<T>
275275
num_threads = std::atoi(omp_threads);
276276
}
277277
omp_set_num_threads(1);
278-
#endif
278+
#endif
279279
for (auto i = 0; i < unconverged; i++)
280280
{
281281
T alpha = -ritzv[i];
@@ -286,7 +286,7 @@ class ChaseMpiDLABlaslapack : public ChaseMpiDLAInterface<T>
286286
}
287287
#ifdef HAS_OMP
288288
omp_set_num_threads(num_threads);
289-
#endif
289+
#endif
290290
}
291291

292292
//! - This function performs the local computation for ChaseMpiDLA::heevd()

ChASE-MPI/impl/chase_mpidla_mgpu.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
//! generated numbers
4141
//! @param[in] stream_ an asynchronous CUDA stream which allows to run this
4242
//! function asynchronously
43-
void chase_rand_normal(unsigned long long seed, curandState* states, float* v,
43+
void chase_rand_normal(unsigned long long seed, curandStatePhilox4_32_10_t* states, float* v,
4444
int n, cudaStream_t stream_);
4545
//! generate `n` random double numbers in normal distribution on each GPU
4646
//! device.
@@ -51,7 +51,7 @@ void chase_rand_normal(unsigned long long seed, curandState* states, float* v,
5151
//! generated numbers
5252
//! @param[in] stream_ an asynchronous CUDA stream which allows to run this
5353
//! function asynchronously
54-
void chase_rand_normal(unsigned long long seed, curandState* states, double* v,
54+
void chase_rand_normal(unsigned long long seed, curandStatePhilox4_32_10_t* states, double* v,
5555
int n, cudaStream_t stream_);
5656
//! generate `n` random complex float numbers in normal distribution on each GPU
5757
//! device. The real part and the imaginary part of each individual random
@@ -63,7 +63,7 @@ void chase_rand_normal(unsigned long long seed, curandState* states, double* v,
6363
//! generated numbers
6464
//! @param[in] stream_ an asynchronous CUDA stream which allows to run this
6565
//! function asynchronously
66-
void chase_rand_normal(unsigned long long seed, curandState* states,
66+
void chase_rand_normal(unsigned long long seed, curandStatePhilox4_32_10_t* states,
6767
std::complex<float>* v, int n, cudaStream_t stream_);
6868
//! generate `n` random complex double numbers in normal distribution on each
6969
//! GPU device. The real part and the imaginary part of each individual random
@@ -75,7 +75,7 @@ void chase_rand_normal(unsigned long long seed, curandState* states,
7575
//! generated numbers
7676
//! @param[in] stream_ an asynchronous CUDA stream which allows to run this
7777
//! function asynchronously
78-
void chase_rand_normal(unsigned long long seed, curandState* states,
78+
void chase_rand_normal(unsigned long long seed, curandStatePhilox4_32_10_t* states,
7979
std::complex<double>* v, int n, cudaStream_t stream_);
8080

8181
//! shift the diagonal of a `nxn` square matrix `A` in float real data type.
@@ -258,7 +258,7 @@ class ChaseMpiDLAMultiGPU : public ChaseMpiDLAInterface<T>
258258
cuda_exec(
259259
cudaMalloc((void**)&d_ritz_, sizeof(Base<T>) * (nev_ + nex_)));
260260
cuda_exec(
261-
cudaMalloc((void**)&states_, sizeof(curandState) * (256 * 32)));
261+
cudaMalloc((void**)&states_, sizeof(curandStatePhilox4_32_10_t) * (256 * 32)));
262262

263263
cublasCreate(&cublasH_);
264264
cusolverDnCreate(&cusolverH_);
@@ -786,7 +786,8 @@ class ChaseMpiDLAMultiGPU : public ChaseMpiDLAInterface<T>
786786
stream1_; //!< CUDA stream for asynchronous exectution of kernels
787787
cudaStream_t
788788
stream2_; //!< CUDA stream for asynchronous exectution of kernels
789-
curandState* states_ = NULL; //!< a pointer of `curandState` for the cuRAND
789+
//curandState* states_ = NULL; //!< a pointer of `curandState` for the cuRAND
790+
curandStatePhilox4_32_10_t *states_ = NULL;
790791
T* d_H_; //!< a pointer to a local buffer of size `m_*n_` on GPU, which is
791792
//!< mapped to `H_`.
792793
T* d_C_; //!< a pointer to a local buffer of size `m_*(nev_+nex_)` on GPU,

ChASE-MPI/kernels/shift.cu

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
#define GRIDDIM 32
1717

1818
// generate `n` random float numbers on GPU
19-
__global__ void s_normal_kernel(unsigned long long seed, curandState* states,
19+
__global__ void s_normal_kernel(unsigned long long seed, curandStatePhilox4_32_10_t* states,
2020
float* v, int n)
2121
{
2222
int tid = blockIdx.x * blockDim.x + threadIdx.x;
23-
curandState* state = states + tid;
23+
curandStatePhilox4_32_10_t* state = states + tid;
2424
curand_init(seed, tid, 0, state);
2525

2626
int i;
@@ -33,11 +33,11 @@ __global__ void s_normal_kernel(unsigned long long seed, curandState* states,
3333
}
3434

3535
// generate `n` random double numbers on GPU
36-
__global__ void d_normal_kernel(unsigned long long seed, curandState* states,
36+
__global__ void d_normal_kernel(unsigned long long seed, curandStatePhilox4_32_10_t* states,
3737
double* v, int n)
3838
{
3939
int tid = blockIdx.x * blockDim.x + threadIdx.x;
40-
curandState* state = states + tid;
40+
curandStatePhilox4_32_10_t* state = states + tid;
4141
curand_init(seed, tid, 0, state);
4242

4343
int i;
@@ -49,11 +49,11 @@ __global__ void d_normal_kernel(unsigned long long seed, curandState* states,
4949
}
5050
}
5151
// generate `n` random complex single numbers on GPU
52-
__global__ void c_normal_kernel(unsigned long long seed, curandState* states,
52+
__global__ void c_normal_kernel(unsigned long long seed, curandStatePhilox4_32_10_t* states,
5353
cuComplex* v, int n)
5454
{
5555
int tid = blockIdx.x * blockDim.x + threadIdx.x;
56-
curandState* state = states + tid;
56+
curandStatePhilox4_32_10_t* state = states + tid;
5757
curand_init(seed, tid, 0, state);
5858

5959
int i;
@@ -68,11 +68,11 @@ __global__ void c_normal_kernel(unsigned long long seed, curandState* states,
6868
}
6969

7070
// generate `n` random complex double numbers on GPU
71-
__global__ void z_normal_kernel(unsigned long long seed, curandState* states,
71+
__global__ void z_normal_kernel(unsigned long long seed, curandStatePhilox4_32_10_t* states,
7272
cuDoubleComplex* v, int n)
7373
{
7474
int tid = blockIdx.x * blockDim.x + threadIdx.x;
75-
curandState* state = states + tid;
75+
curandStatePhilox4_32_10_t* state = states + tid;
7676
curand_init(seed, tid, 0, state);
7777

7878
int i;
@@ -166,26 +166,26 @@ __global__ void zshift_mgpu_matrix(cuDoubleComplex* A, std::size_t* off_m,
166166
}
167167
}
168168

169-
void chase_rand_normal(unsigned long long seed, curandState* states, float* v,
169+
void chase_rand_normal(unsigned long long seed, curandStatePhilox4_32_10_t* states, float* v,
170170
int n, cudaStream_t stream_)
171171
{
172172
s_normal_kernel<<<GRIDDIM, BLOCKDIM, 0, stream_>>>(seed, states, v, n);
173173
}
174174

175-
void chase_rand_normal(unsigned long long seed, curandState* states, double* v,
175+
void chase_rand_normal(unsigned long long seed, curandStatePhilox4_32_10_t* states, double* v,
176176
int n, cudaStream_t stream_)
177177
{
178178
d_normal_kernel<<<GRIDDIM, BLOCKDIM, 0, stream_>>>(seed, states, v, n);
179179
}
180180

181-
void chase_rand_normal(unsigned long long seed, curandState* states,
181+
void chase_rand_normal(unsigned long long seed, curandStatePhilox4_32_10_t* states,
182182
std::complex<float>* v, int n, cudaStream_t stream_)
183183
{
184184
c_normal_kernel<<<GRIDDIM, BLOCKDIM, 0, stream_>>>(
185185
seed, states, reinterpret_cast<cuComplex*>(v), n);
186186
}
187187

188-
void chase_rand_normal(unsigned long long seed, curandState* states,
188+
void chase_rand_normal(unsigned long long seed, curandStatePhilox4_32_10_t* states,
189189
std::complex<double>* v, int n, cudaStream_t stream_)
190190
{
191191
z_normal_kernel<<<GRIDDIM, BLOCKDIM, 0, stream_>>>(

0 commit comments

Comments
 (0)