@@ -50,6 +50,7 @@ __global__ void construct_distance_pairs_kernel(
50
50
}
51
51
}
52
52
53
+ // See SelfArgMinOp
53
54
template <typename T>
54
55
__global__ void self_row_argmin_sequential (kParam <int > _res, kParam <T> _val) {
55
56
@@ -68,6 +69,22 @@ __global__ void self_row_argmin_sequential(kParam<int> _res, kParam<T> _val) {
68
69
}
69
70
}
70
71
72
+ template <typename T>
73
+ __global__ void self_row_min_sequential (kParam <T> _res, kParam <T> _val) {
74
+
75
+ size_t idx = global_thread_idx ();
76
+ if (idx < _val.rows ) {
77
+ T min = std::numeric_limits<T>::max ();
78
+ for (size_t i = 0 ; i < _val.cols ; ++i) {
79
+ T value = _val.ptr [idx * _val.cols + i];
80
+ if (value < min && value != 0 ) {
81
+ min = value;
82
+ }
83
+ }
84
+ _res.ptr [idx] = min;
85
+ }
86
+ }
87
+
71
88
} // namespace kernel
72
89
73
90
namespace detail {
@@ -143,6 +160,22 @@ struct SelfArgMinOp {
143
160
144
161
};
145
162
163
+ template <typename T>
164
+ struct SelfMinOp {
165
+ KmMatrix<T> min (KmMatrix<T>& _val, KmMatrixDim _dim) {
166
+ size_t blocks = GpuInfo::ins ().blocks (32 );
167
+ if (_dim == KmMatrixDim::ROW) {
168
+ KmMatrix<T> _res (_val.rows (), 1 );
169
+ kernel::self_row_min_sequential<<<div_roundup(_val.rows(), 256 ), 256 >>> (
170
+ _res.k_param (), _val.k_param ());
171
+ return _res;
172
+ } else {
173
+ // FIXME
174
+ M_ERROR (" Not implemented" );
175
+ }
176
+ }
177
+ };
178
+
146
179
// We use counting to construct the weight as described in the paper. Counting
147
180
// is performed by histogram algorithm.
148
181
// For re-cluster, the paper suggests using K-Means++, but that will require
@@ -199,36 +232,43 @@ KmMatrix<T> GreedyRecluster<T>::recluster(KmMatrix<T>& _centroids, size_t _k) {
199
232
200
233
int * min_indices_ptr = min_indices.dev_ptr ();
201
234
202
- KmMatrix<T> centroids (_k, _centroids.cols ());
235
+ KmMatrix<T> new_centroids (_k, _centroids.cols ());
236
+ T * new_centroids_ptr = new_centroids.dev_ptr ();
203
237
int cols = _centroids.cols ();
204
238
205
- thrust::copy_if (
239
+ T * old_centroids_ptr = _centroids.dev_ptr ();
240
+
241
+ auto k_iter = thrust::make_counting_iterator (0 );
242
+ thrust::for_each (
206
243
thrust::device,
207
- _centroids.dev_ptr (), _centroids.dev_ptr () + _centroids.size (),
208
- centroids.dev_ptr (),
244
+ k_iter, k_iter + _k,
209
245
[=] __device__ (int idx) {
210
- size_t row = idx / cols;
211
- for (size_t i = 0 ; i < _k; ++i) {
212
- if (row == min_indices_ptr[i])
213
- return true ;
246
+ size_t index = min_indices_ptr[idx];
247
+
248
+ size_t in_begin = index * cols;
249
+ size_t in_end = (index + 1 ) * cols;
250
+
251
+ size_t res_begin = idx * cols;
252
+ size_t res_end = (idx + 1 ) * cols;
253
+ for (size_t i = in_begin, j = res_begin; i < in_end; ++i, ++j) {
254
+ new_centroids_ptr[j] = old_centroids_ptr[i];
214
255
}
215
- return false ;
216
256
});
217
257
218
- return centroids ;
258
+ return new_centroids ;
219
259
}
220
260
221
261
} // namespace detail
222
262
223
263
224
264
225
265
/* ============== KmeansLlInit Class member functions ============== */
266
+
226
267
template <typename T, template <class > class ReclusterPolicy >
227
268
KmMatrix<T> KmeansLlInit<T, ReclusterPolicy>::probability(
228
269
KmMatrix<T>& _data, KmMatrix<T>& _centroids) {
229
270
230
271
KmMatrix<T> centroids_dot (_centroids.rows (), 1 );
231
-
232
272
VecBatchDotOp<T>().dot (centroids_dot, _centroids);
233
273
234
274
// FIXME: Time this
@@ -237,7 +277,8 @@ KmMatrix<T> KmeansLlInit<T, ReclusterPolicy>::probability(
237
277
data_dot_, centroids_dot, distance_pairs_);
238
278
distance_pairs_ = distance_op (_data, _centroids);
239
279
240
- KmMatrix<T> min_distances = MinOp<T>().min (distance_pairs_, KmMatrixDim::ROW);
280
+ KmMatrix<T> min_distances = detail::SelfMinOp<T>().min (distance_pairs_,
281
+ KmMatrixDim::ROW);
241
282
242
283
T cost = SumOp<T>().sum (min_distances);
243
284
@@ -321,6 +362,8 @@ KmeansLlInit<T, ReclusterPolicy>::operator()(KmMatrix<T>& _data, size_t _k) {
321
362
// Calculate X^2 (point-wise)
322
363
data_dot_ = KmMatrix<T>(_data.rows (), 1 );
323
364
VecBatchDotOp<T>().dot (data_dot_, _data);
365
+ data_dot_.set_name (" data dot" );
366
+ std::cout << data_dot_ << std::endl;
324
367
325
368
// First centroid
326
369
KmMatrix<T> centroids = _data.row (idx);
@@ -329,7 +372,8 @@ KmeansLlInit<T, ReclusterPolicy>::operator()(KmMatrix<T>& _data, size_t _k) {
329
372
330
373
T cost = SumOp<T>().sum (prob);
331
374
332
- for (size_t i = 0 ; i < std::log (cost); ++i) {
375
+ size_t max_iter = std::max (T (MAX_ITER), std::log (cost));
376
+ for (size_t i = 0 ; i < max_iter; ++i) {
333
377
prob = probability (_data, centroids);
334
378
KmMatrix<T> new_centroids = sample_centroids (_data, prob);
335
379
centroids = stack (centroids, new_centroids, KmMatrixDim::ROW);
@@ -341,8 +385,9 @@ KmeansLlInit<T, ReclusterPolicy>::operator()(KmMatrix<T>& _data, size_t _k) {
341
385
M_ERROR (" Not implemented." );
342
386
}
343
387
344
- std::cout << centroids << std::endl;
345
388
centroids = ReclusterPolicy<T>::recluster (centroids, k_);
389
+ std::cout << centroids << std::endl;
390
+
346
391
return centroids;
347
392
}
348
393
0 commit comments