Skip to content

Commit b96b927

Browse files
committed
Fix zero distance prob, and re-cluster with large centroids.
1 parent ad4abc5 commit b96b927

File tree

2 files changed

+65
-15
lines changed

2 files changed

+65
-15
lines changed

src/gpu/kmeans/kmeans_init.cu

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ __global__ void construct_distance_pairs_kernel(
5050
}
5151
}
5252

53+
// See SelfArgMinOp
5354
template <typename T>
5455
__global__ void self_row_argmin_sequential(kParam<int> _res, kParam<T> _val) {
5556

@@ -68,6 +69,22 @@ __global__ void self_row_argmin_sequential(kParam<int> _res, kParam<T> _val) {
6869
}
6970
}
7071

72+
template <typename T>
73+
__global__ void self_row_min_sequential(kParam<T> _res, kParam<T> _val) {
74+
75+
size_t idx = global_thread_idx();
76+
if (idx < _val.rows) {
77+
T min = std::numeric_limits<T>::max();
78+
for (size_t i = 0; i < _val.cols; ++i) {
79+
T value = _val.ptr[idx * _val.cols + i];
80+
if (value < min && value != 0) {
81+
min = value;
82+
}
83+
}
84+
_res.ptr[idx] = min;
85+
}
86+
}
87+
7188
} // namespace kernel
7289

7390
namespace detail {
@@ -143,6 +160,22 @@ struct SelfArgMinOp {
143160
144161
};
145162
163+
template <typename T>
164+
struct SelfMinOp {
165+
KmMatrix<T> min(KmMatrix<T>& _val, KmMatrixDim _dim) {
166+
size_t blocks = GpuInfo::ins().blocks(32);
167+
if (_dim == KmMatrixDim::ROW) {
168+
KmMatrix<T> _res(_val.rows(), 1);
169+
kernel::self_row_min_sequential<<<div_roundup(_val.rows(), 256), 256>>>(
170+
_res.k_param(), _val.k_param());
171+
return _res;
172+
} else {
173+
// FIXME
174+
M_ERROR("Not implemented");
175+
}
176+
}
177+
};
178+
146179
// We use counting to construct the weight as described in the paper. Counting
147180
// is performed by histogram algorithm.
148181
// For re-cluster, the paper suggests using K-Means++, but that will require
@@ -199,36 +232,43 @@ KmMatrix<T> GreedyRecluster<T>::recluster(KmMatrix<T>& _centroids, size_t _k) {
199232
200233
int * min_indices_ptr = min_indices.dev_ptr();
201234
202-
KmMatrix<T> centroids (_k, _centroids.cols());
235+
KmMatrix<T> new_centroids (_k, _centroids.cols());
236+
T * new_centroids_ptr = new_centroids.dev_ptr();
203237
int cols = _centroids.cols();
204238
205-
thrust::copy_if(
239+
T * old_centroids_ptr = _centroids.dev_ptr();
240+
241+
auto k_iter = thrust::make_counting_iterator(0);
242+
thrust::for_each(
206243
thrust::device,
207-
_centroids.dev_ptr(), _centroids.dev_ptr() + _centroids.size(),
208-
centroids.dev_ptr(),
244+
k_iter, k_iter + _k,
209245
[=] __device__ (int idx) {
210-
size_t row = idx / cols;
211-
for (size_t i = 0; i < _k; ++i) {
212-
if (row == min_indices_ptr[i])
213-
return true;
246+
size_t index = min_indices_ptr[idx];
247+
248+
size_t in_begin = index * cols;
249+
size_t in_end = (index + 1) * cols;
250+
251+
size_t res_begin = idx * cols;
252+
size_t res_end = (idx + 1) * cols;
253+
for (size_t i = in_begin, j = res_begin; i < in_end; ++i, ++j) {
254+
new_centroids_ptr[j] = old_centroids_ptr[i];
214255
}
215-
return false;
216256
});
217257
218-
return centroids;
258+
return new_centroids;
219259
}
220260
221261
} // namespace detail
222262
223263
224264
225265
/* ============== KmeansLlInit Class member functions ============== */
266+
226267
template <typename T, template <class> class ReclusterPolicy >
227268
KmMatrix<T> KmeansLlInit<T, ReclusterPolicy>::probability(
228269
KmMatrix<T>& _data, KmMatrix<T>& _centroids) {
229270
230271
KmMatrix<T> centroids_dot (_centroids.rows(), 1);
231-
232272
VecBatchDotOp<T>().dot(centroids_dot, _centroids);
233273
234274
// FIXME: Time this
@@ -237,7 +277,8 @@ KmMatrix<T> KmeansLlInit<T, ReclusterPolicy>::probability(
237277
data_dot_, centroids_dot, distance_pairs_);
238278
distance_pairs_ = distance_op(_data, _centroids);
239279
240-
KmMatrix<T> min_distances = MinOp<T>().min(distance_pairs_, KmMatrixDim::ROW);
280+
KmMatrix<T> min_distances = detail::SelfMinOp<T>().min(distance_pairs_,
281+
KmMatrixDim::ROW);
241282
242283
T cost = SumOp<T>().sum(min_distances);
243284
@@ -321,6 +362,8 @@ KmeansLlInit<T, ReclusterPolicy>::operator()(KmMatrix<T>& _data, size_t _k) {
321362
// Calculate X^2 (point-wise)
322363
data_dot_ = KmMatrix<T>(_data.rows(), 1);
323364
VecBatchDotOp<T>().dot(data_dot_, _data);
365+
data_dot_.set_name("data dot");
366+
std::cout << data_dot_ << std::endl;
324367
325368
// First centroid
326369
KmMatrix<T> centroids = _data.row(idx);
@@ -329,7 +372,8 @@ KmeansLlInit<T, ReclusterPolicy>::operator()(KmMatrix<T>& _data, size_t _k) {
329372
330373
T cost = SumOp<T>().sum(prob);
331374
332-
for (size_t i = 0; i < std::log(cost); ++i) {
375+
size_t max_iter = std::max(T(MAX_ITER), std::log(cost));
376+
for (size_t i = 0; i < max_iter; ++i) {
333377
prob = probability(_data, centroids);
334378
KmMatrix<T> new_centroids = sample_centroids(_data, prob);
335379
centroids = stack(centroids, new_centroids, KmMatrixDim::ROW);
@@ -341,8 +385,9 @@ KmeansLlInit<T, ReclusterPolicy>::operator()(KmMatrix<T>& _data, size_t _k) {
341385
M_ERROR("Not implemented.");
342386
}
343387
344-
std::cout << centroids << std::endl;
345388
centroids = ReclusterPolicy<T>::recluster(centroids, k_);
389+
std::cout << centroids << std::endl;
390+
346391
return centroids;
347392
}
348393

src/gpu/kmeans/kmeans_init.cuh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
#include "KmMatrix/Generator.cuh"
1717
#include "KmMatrix/GpuInfo.cuh"
1818

19-
namespace H2O4GPU{
19+
constexpr double ESP = 1e-8;
20+
21+
namespace H2O4GPU {
2022
namespace KMeans {
2123

2224
namespace detail {
@@ -92,6 +94,9 @@ struct KmeansLlInit : public KmeansInitBase<T> {
9294
int seed_;
9395
int k_;
9496

97+
// Suggested in original paper, 8 is usually enough.
98+
constexpr static float MAX_ITER = 8;
99+
95100
std::unique_ptr<GeneratorBase<T>> generator_;
96101

97102
// Buffer like variables

0 commit comments

Comments
 (0)