8
8
namespace H2O4GPU {
9
9
namespace KMeans {
10
10
11
- namespace kernel {
12
-
13
- /*
14
- * Compute min value for each row.
15
- * @tparam T Numeric type of the data
16
- * @param _res The output matrix with shape m x 1
17
- * @param _val The input matrix with shape m x n
18
- */
19
- template <typename T>
20
- __global__ void row_min_sequential (kParam <T> _res, kParam <T> _val) {
21
-
22
- size_t idx = global_thread_idx ();
23
- if (idx < _val.rows ) {
24
- T min = std::numeric_limits<T>::max ();
25
- for (size_t i = 0 ; i < _val.cols ; ++i) {
26
- T value = _val.ptr [idx * _val.cols + i];
27
- if (value < min) {
28
- min = value;
29
- }
30
- }
31
- _res.ptr [idx] = min;
32
- }
33
- }
34
-
35
- template <typename T>
36
- __global__ void row_argmin_sequential (kParam <int > _res, kParam <T> _val) {
37
-
38
- size_t idx = global_thread_idx ();
39
- if (idx < _val.rows ) {
40
- T min = std::numeric_limits<T>::max ();
41
- int min_idx = -1 ;
42
- for (size_t i = 0 ; i < _val.cols ; ++i) {
43
- T value = _val.ptr [idx * _val.cols + i];
44
- if (value < min) {
45
- min = value;
46
- min_idx = i;
47
- }
48
- }
49
- _res.ptr [idx] = min_idx;
50
- }
51
- }
52
-
53
- } // namespace kernel
54
-
55
11
// FIXME: Using struct for operations is just keeping the possibility of
56
12
// creating an unified operations for KmMatrix. For example, let KmMatrix
57
13
// inherit those left associative ops, or create an inferface for elementwise
@@ -60,110 +16,40 @@ __global__ void row_argmin_sequential(kParam<int> _res, kParam<T> _val) {
60
16
// FIXME: Use return value instead.
61
17
template <typename T>
62
18
struct DotOp {
63
- void dot (KmMatrix<T>& _res, KmMatrix<T>& _val) {
64
- this ->dot (_res, _val, _val);
65
- }
66
- void dot (KmMatrix<T>& _res, KmMatrix<T>& _lhs,
67
- KmMatrix<T>& _rhs) {
68
- constexpr T alpha = 1.0 ;
69
- constexpr T beta = 1.0 ;
70
- cublasHandle_t handle = GpuInfo::ins ().cublas_handle ();
71
- Blas::gemm (handle,
72
- CUBLAS_OP_N, CUBLAS_OP_N, // FIXME
73
- _lhs.rows (), _rhs.cols (), _lhs.cols (),
74
- &alpha,
75
- _lhs.dev_ptr (), _lhs.cols (),
76
- _rhs.dev_ptr (), _rhs.cols (),
77
- &beta,
78
- _res.dev_ptr (), _res.cols ());
79
- }
19
+ void dot (KmMatrix<T>& _res, KmMatrix<T>& _val);
20
+ void dot (KmMatrix<T>& _res, KmMatrix<T>& _lhs, KmMatrix<T>& _rhs);
80
21
};
81
22
82
23
template <typename T>
83
24
struct VecBatchDotOp {
84
- void dot (KmMatrix<T>& _res, KmMatrix<T>& _val) {
85
- this ->dot (_res, _val, _val);
86
- }
87
- void dot (KmMatrix<T>& _res, KmMatrix<T>& _lhs, KmMatrix<T>& _rhs) {
88
- constexpr T alpha = 1.0 ;
89
- constexpr T beta = 1.0 ;
90
- cublasHandle_t handle = GpuInfo::ins ().cublas_handle ();
91
- Blas::gemm_strided_batched (
92
- handle,
93
- CUBLAS_OP_N, CUBLAS_OP_T,
94
- 1 , 1 , _rhs.cols (), // m, n, k
95
- &alpha,
96
- _lhs.dev_ptr (), 1 , _lhs.cols (),
97
- _rhs.dev_ptr (), 1 , _rhs.cols (),
98
- &beta,
99
- _res.dev_ptr (), _res.cols (), 1 , // c should be columun vector
100
- _lhs.rows ());
101
- }
25
+ void dot (KmMatrix<T>& _res, KmMatrix<T>& _val);
26
+ void dot (KmMatrix<T>& _res, KmMatrix<T>& _lhs, KmMatrix<T>& _rhs);
102
27
};
103
28
104
29
template <typename T>
105
30
struct SumOp {
106
- T sum (KmMatrix<T>& _val) {
107
- T* raw_ptr = _val.dev_ptr ();
108
- thrust::device_ptr<T> ptr (raw_ptr);
109
- T res = thrust::reduce (ptr, ptr + _val.size (), (T)0 , thrust::plus<T>());
110
- return res;
111
- }
31
+ T sum (KmMatrix<T>& _val);
112
32
};
113
33
114
34
template <typename T>
115
35
struct MulOp {
116
- void mul (KmMatrix<T>& _res, KmMatrix<T>& _lhs, T _rhs) {
117
- cublasHandle_t handle = GpuInfo::ins ().cublas_handle ();
118
- Blas::axpy (
119
- handle, _lhs.size (), // handle, n
120
- &_rhs, // alpha
121
- _lhs.dev_ptr (), 1 ,
122
- _res.dev_ptr (), 1 );
123
- }
36
+ void mul (KmMatrix<T>& _res, KmMatrix<T>& _lhs, T _rhs);
124
37
};
125
38
126
39
127
40
template <typename T>
128
41
struct MeanOp {
129
- T mean (KmMatrix<T>& _val) {
130
- T res = SumOp<T>().sum (_val);
131
- res = res / _val.size ();
132
- return res;
133
- }
42
+ T mean (KmMatrix<T>& _val);
134
43
};
135
44
136
45
template <typename T>
137
46
struct ArgMinOp {
138
-
139
- KmMatrix<int > argmin (KmMatrix<T>& _val, KmMatrixDim _dim) {
140
- if (_dim == KmMatrixDim::ROW) {
141
- KmMatrix<int > _res (_val.rows (), 1 );
142
- kernel::row_argmin_sequential<<<div_roundup (_val.rows (), 256 ), 256 >>>(
143
- _res.k_param (), _val.k_param ());
144
- return _res;
145
- } else {
146
- // FIXME
147
- M_ERROR (" Not implemented" );
148
- }
149
- }
47
+ KmMatrix<int > argmin (KmMatrix<T>& _val, KmMatrixDim _dim);
150
48
};
151
49
152
50
template <typename T>
153
51
struct MinOp {
154
-
155
- KmMatrix<T> min (KmMatrix<T>& _val, KmMatrixDim _dim) {
156
- size_t blocks = GpuInfo::ins ().blocks (32 );
157
- if (_dim == KmMatrixDim::ROW) {
158
- KmMatrix<T> _res (_val.rows (), 1 );
159
- kernel::row_min_sequential<<<div_roundup (_val.rows (), 256 ), 256 >>>(
160
- _res.k_param (), _val.k_param ());
161
- return _res;
162
- } else {
163
- // FIXME
164
- M_ERROR (" Not implemented" );
165
- }
166
- }
52
+ KmMatrix<T> min (KmMatrix<T>& _val, KmMatrixDim _dim);
167
53
};
168
54
169
55
} // namespace KMenas
0 commit comments