MRPT  2.0.1
KmTree.cpp
Go to the documentation of this file.
1 /* +------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | https://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2020, Individual contributors, see AUTHORS file |
6  | See: https://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See: https://www.mrpt.org/License |
8  +------------------------------------------------------------------------+ */
9 // See KmTree.cpp
10 //
11 // Author: David Arthur (darthur@gmail.com), 2009
12 
13 // Includes
14 #include "KmTree.h"
15 #include <cstdlib>
16 #include <iostream>
17 using namespace std;
18 
19 KmTree::KmTree(int n, int d, Scalar* points) : n_(n), d_(d), points_(points)
20 {
21  // Initialize memory
22  int node_size = sizeof(Node) + d_ * 3 * sizeof(Scalar);
23  node_data_ = (char*)malloc((2 * n - 1) * node_size);
24  point_indices_ = (int*)malloc(n * sizeof(int));
25  for (int i = 0; i < n; i++) point_indices_[i] = i;
26  KM_ASSERT(node_data_ != nullptr && point_indices_ != nullptr);
27 
28  // Calculate the bounding box for the points
29  Scalar* bound_v1 = PointAllocate(d_);
30  Scalar* bound_v2 = PointAllocate(d_);
31  KM_ASSERT(bound_v1 != nullptr && bound_v2 != nullptr);
32  PointCopy(bound_v1, points, d_);
33  PointCopy(bound_v2, points, d_);
34  for (int i = 1; i < n; i++)
35  for (int j = 0; j < d; j++)
36  {
37  if (bound_v1[j] > points[i * d_ + j])
38  bound_v1[j] = points[i * d_ + j];
39  if (bound_v2[j] < points[i * d_ + j])
40  bound_v1[j] = points[i * d_ + j];
41  }
42 
43  // Build the tree
44  char* temp_node_data = node_data_;
45  top_node_ = BuildNodes(points, 0, n - 1, &temp_node_data);
46 
47  // Cleanup
48  PointFree(bound_v1);
49  PointFree(bound_v2);
50 }
51 
53 {
54  free(point_indices_);
55  free(node_data_);
56 }
57 
58 Scalar KmTree::DoKMeansStep(int k, Scalar* centers, int* assignment) const
59 {
60  // Create an invalid center for comparison purposes
61  Scalar* bad_center = PointAllocate(d_);
62  KM_ASSERT(bad_center != nullptr);
63  memset(bad_center, 0xff, d_ * sizeof(Scalar));
64 
65  // Allocate data
66  auto* sums = (Scalar*)calloc(k * d_, sizeof(Scalar));
67  int* counts = (int*)calloc(k, sizeof(int));
68  int num_candidates = 0;
69  int* candidates = (int*)malloc(k * sizeof(int));
70  KM_ASSERT(sums != nullptr && counts != nullptr && candidates != nullptr);
71  for (int i = 0; i < k; i++)
72  if (memcmp(centers + i * d_, bad_center, d_ * sizeof(Scalar)) != 0)
73  candidates[num_candidates++] = i;
74 
75  // Find nodes
76  Scalar result = DoKMeansStepAtNode(
77  top_node_, num_candidates, candidates, centers, sums, counts,
78  assignment);
79 
80  // Set the new centers
81  for (int i = 0; i < k; i++)
82  {
83  if (counts[i] > 0)
84  {
85  PointScale(sums + i * d_, Scalar(1) / counts[i], d_);
86  PointCopy(centers + i * d_, sums + i * d_, d_);
87  }
88  else
89  {
90  memcpy(centers + i * d_, bad_center, d_ * sizeof(Scalar));
91  }
92  }
93 
94  // Cleanup memory
95  PointFree(bad_center);
96  free(candidates);
97  free(counts);
98  free(sums);
99  return result;
100 }
101 
102 // Helper functions for constructor
103 // ================================
104 
105 // Build a kd tree from the given set of points
107  Scalar* points, int first_index, int last_index, char** next_node_data)
108 {
109  // Allocate the node
110  Node* node = (Node*)(*next_node_data);
111  (*next_node_data) += sizeof(Node);
112  node->sum = (Scalar*)(*next_node_data);
113  (*next_node_data) += sizeof(Scalar) * d_;
114  node->median = (Scalar*)(*next_node_data);
115  (*next_node_data) += sizeof(Scalar) * d_;
116  node->radius = (Scalar*)(*next_node_data);
117  (*next_node_data) += sizeof(Scalar) * d_;
118 
119  // Fill in basic info
120  node->num_points = (last_index - first_index + 1);
121  node->first_point_index = first_index;
122 
123  // Calculate the bounding box
124  Scalar* first_point = points + point_indices_[first_index] * d_;
125  Scalar* bound_p1 = PointAllocate(d_);
126  Scalar* bound_p2 = PointAllocate(d_);
127  KM_ASSERT(bound_p1 != nullptr && bound_p2 != nullptr);
128  PointCopy(bound_p1, first_point, d_);
129  PointCopy(bound_p2, first_point, d_);
130  for (int i = first_index + 1; i <= last_index; i++)
131  for (int j = 0; j < d_; j++)
132  {
133  Scalar c = points[point_indices_[i] * d_ + j];
134  if (bound_p1[j] > c) bound_p1[j] = c;
135  if (bound_p2[j] < c) bound_p2[j] = c;
136  }
137 
138  // Calculate bounding box stats and delete the bounding box memory
139  Scalar max_radius = -1;
140  int split_d = -1;
141  for (int j = 0; j < d_; j++)
142  {
143  node->median[j] = (bound_p1[j] + bound_p2[j]) / 2;
144  node->radius[j] = (bound_p2[j] - bound_p1[j]) / 2;
145  if (node->radius[j] > max_radius)
146  {
147  max_radius = node->radius[j];
148  split_d = j;
149  }
150  }
151  PointFree(bound_p2);
152  PointFree(bound_p1);
153 
154  // If the max spread is 0, make this a leaf node
155  if (max_radius == 0)
156  {
157  node->lower_node = node->upper_node = nullptr;
158  PointCopy(node->sum, first_point, d_);
159  if (last_index != first_index)
160  PointScale(node->sum, Scalar(last_index - first_index + 1), d_);
161  node->opt_cost = 0;
162  return node;
163  }
164 
165  // Partition the points around the midpoint in this dimension. The
166  // partitioning is done in-place
167  // by iterating from left-to-right and right-to-left in the same way that
168  // partioning is done for
169  // quicksort.
170  Scalar split_pos = node->median[split_d];
171  int i1 = first_index, i2 = last_index, size1 = 0;
172  while (i1 <= i2)
173  {
174  bool is_i1_good =
175  (points[point_indices_[i1] * d_ + split_d] < split_pos);
176  bool is_i2_good =
177  (points[point_indices_[i2] * d_ + split_d] >= split_pos);
178  if (!is_i1_good && !is_i2_good)
179  {
180  int temp = point_indices_[i1];
181  point_indices_[i1] = point_indices_[i2];
182  point_indices_[i2] = temp;
183  is_i1_good = is_i2_good = true;
184  }
185  if (is_i1_good)
186  {
187  i1++;
188  size1++;
189  }
190  if (is_i2_good)
191  {
192  i2--;
193  }
194  }
195 
196  // Create the child nodes
197  KM_ASSERT(size1 >= 1 && size1 <= last_index - first_index);
198  node->lower_node = BuildNodes(
199  points, first_index, first_index + size1 - 1, next_node_data);
200  node->upper_node =
201  BuildNodes(points, first_index + size1, last_index, next_node_data);
202 
203  // Calculate the new sum and opt cost
204  PointCopy(node->sum, node->lower_node->sum, d_);
205  PointAdd(node->sum, node->upper_node->sum, d_);
206  Scalar* center = PointAllocate(d_);
207  KM_ASSERT(center != nullptr);
208  PointCopy(center, node->sum, d_);
209  PointScale(center, Scalar(1) / node->num_points, d_);
210  node->opt_cost = GetNodeCost(node->lower_node, center) +
211  GetNodeCost(node->upper_node, center);
212  PointFree(center);
213  return node;
214 }
215 
216 // Returns the total contribution of all points in the given kd-tree node,
217 // assuming they are all
218 // assigned to a center at the given location. We need to return:
219 //
220 // sum_{x \in node} ||x - center||^2.
221 //
222 // If c denotes the center of mass of the points in this node and n denotes the
223 // number of points in
224 // it, then this quantity is given by
225 //
226 // n * ||c - center||^2 + sum_{x \in node} ||x - c||^2
227 //
228 // The sum is precomputed for each node as opt_cost. This formula follows from
229 // expanding both sides
230 // as dot products. See Kanungo/Mount for more info.
231 Scalar KmTree::GetNodeCost(const Node* node, Scalar* center) const
232 {
233  Scalar dist_sq = 0;
234  for (int i = 0; i < d_; i++)
235  {
236  Scalar x = (node->sum[i] / node->num_points) - center[i];
237  dist_sq += x * x;
238  }
239  return node->opt_cost + node->num_points * dist_sq;
240 }
241 
242 // Helper functions for DoKMeans step
243 // ==================================
244 
245 // A recursive version of DoKMeansStep. This determines which clusters all
246 // points that are rooted
247 // node will be assigned to, and updates sums, counts and assignment (if not
248 // null) accordingly.
249 // candidates maintains the set of cluster indices which could possibly be the
250 // closest clusters
251 // for points in this subtree.
253  const Node* node, int k, int* candidates, Scalar* centers, Scalar* sums,
254  int* counts, int* assignment) const
255 {
256  // Determine which center the node center is closest to
257  Scalar min_dist_sq =
258  PointDistSq(node->median, centers + candidates[0] * d_, d_);
259  int closest_i = candidates[0];
260  for (int i = 1; i < k; i++)
261  {
262  Scalar dist_sq =
263  PointDistSq(node->median, centers + candidates[i] * d_, d_);
264  if (dist_sq < min_dist_sq)
265  {
266  min_dist_sq = dist_sq;
267  closest_i = candidates[i];
268  }
269  }
270 
271  // If this is a non-leaf node, recurse if necessary
272  if (node->lower_node != nullptr)
273  {
274  // Build the new list of candidates
275  int new_k = 0;
276  int* new_candidates = (int*)malloc(k * sizeof(int));
277  KM_ASSERT(new_candidates != nullptr);
278  for (int i = 0; i < k; i++)
279  if (!ShouldBePruned(
280  node->median, node->radius, centers, closest_i,
281  candidates[i]))
282  new_candidates[new_k++] = candidates[i];
283 
284  // Recurse if there's at least two
285  if (new_k > 1)
286  {
287  Scalar result = DoKMeansStepAtNode(
288  node->lower_node, new_k, new_candidates,
289  centers, sums, counts, assignment) +
291  node->upper_node, new_k, new_candidates,
292  centers, sums, counts, assignment);
293  free(new_candidates);
294  return result;
295  }
296  else
297  {
298  free(new_candidates);
299  }
300  }
301 
302  // Assigns all points within this node to a single center
303  PointAdd(sums + closest_i * d_, node->sum, d_);
304  counts[closest_i] += node->num_points;
305  if (assignment != nullptr)
306  {
307  for (int i = node->first_point_index;
308  i < node->first_point_index + node->num_points; i++)
309  assignment[point_indices_[i]] = closest_i;
310  }
311  return GetNodeCost(node, centers + closest_i * d_);
312 }
313 
314 // Determines whether every point in the box is closer to centers[best_index]
315 // than to
316 // centers[test_index].
317 //
318 // If x is a point, c_0 = centers[best_index], c = centers[test_index], then:
319 // (x-c).(x-c) < (x-c_0).(x-c_0)
320 // <=> (c-c_0).(c-c_0) < 2(x-c_0).(c-c_0)
321 //
322 // The right-hand side is maximized for a vertex of the box where for each
323 // dimension, we choose
324 // the low or high value based on the sign of x-c_0 in that dimension.
326  Scalar* box_median, Scalar* box_radius, Scalar* centers, int best_index,
327  int test_index) const
328 {
329  if (best_index == test_index) return false;
330 
331  Scalar* best = centers + best_index * d_;
332  Scalar* test = centers + test_index * d_;
333  Scalar lhs = 0, rhs = 0;
334  for (int i = 0; i < d_; i++)
335  {
336  Scalar component = test[i] - best[i];
337  lhs += component * component;
338  if (component > 0)
339  rhs += (box_median[i] + box_radius[i] - best[i]) * component;
340  else
341  rhs += (box_median[i] - box_radius[i] - best[i]) * component;
342  }
343  return (lhs >= 2 * rhs);
344 }
345 
347 {
348  auto* dist_sq = (Scalar*)malloc(n_ * sizeof(Scalar));
349  KM_ASSERT(dist_sq != nullptr);
350 
351  // Choose an initial center uniformly at random
353  int i = GetRandom(n_);
354  memcpy(centers, points_ + point_indices_[i] * d_, d_ * sizeof(Scalar));
355  Scalar total_cost = 0;
356  for (int j = 0; j < n_; j++)
357  {
358  dist_sq[j] = PointDistSq(points_ + point_indices_[j] * d_, centers, d_);
359  total_cost += dist_sq[j];
360  }
361 
362  // Repeatedly choose more centers
363  for (int new_cluster = 1; new_cluster < k; new_cluster++)
364  {
365  while (true)
366  {
367  Scalar cutoff = (rand() / Scalar(RAND_MAX)) * total_cost;
368  Scalar cur_cost = 0;
369  for (i = 0; i < n_; i++)
370  {
371  cur_cost += dist_sq[i];
372  if (cur_cost >= cutoff) break;
373  }
374  if (i < n_) break;
375  }
376  memcpy(
377  centers + new_cluster * d_, points_ + point_indices_[i] * d_,
378  d_ * sizeof(Scalar));
379  total_cost =
380  SeedKmppUpdateAssignment(top_node_, new_cluster, centers, dist_sq);
381  }
382 
383  // Clean up and return
384  free(dist_sq);
385  return total_cost;
386 }
387 
388 // Helper functions for SeedKMeansPlusPlus
389 // =======================================
390 
391 // Sets kmpp_cluster_index to 0 for all nodes
392 void KmTree::SeedKmppSetClusterIndex(const Node* node, int value) const
393 {
394  node->kmpp_cluster_index = value;
395  if (node->lower_node != nullptr)
396  {
397  SeedKmppSetClusterIndex(node->lower_node, value);
398  SeedKmppSetClusterIndex(node->upper_node, value);
399  }
400 }
401 
403  const Node* node, int new_cluster, Scalar* centers, Scalar* dist_sq) const
404 {
405  // See if we can assign all points in this node to one cluster
406  if (node->kmpp_cluster_index >= 0)
407  {
408  if (ShouldBePruned(
409  node->median, node->radius, centers, node->kmpp_cluster_index,
410  new_cluster))
411  return GetNodeCost(node, centers + node->kmpp_cluster_index * d_);
412  if (ShouldBePruned(
413  node->median, node->radius, centers, new_cluster,
414  node->kmpp_cluster_index))
415  {
416  SeedKmppSetClusterIndex(node, new_cluster);
417  for (int i = node->first_point_index;
418  i < node->first_point_index + node->num_points; i++)
419  dist_sq[i] = PointDistSq(
420  points_ + point_indices_[i] * d_,
421  centers + new_cluster * d_, d_);
422  return GetNodeCost(node, centers + new_cluster * d_);
423  }
424 
425  // It may be that the a leaf-node point is equidistant from the new
426  // center or old
427  if (node->lower_node == nullptr)
428  return GetNodeCost(node, centers + node->kmpp_cluster_index * d_);
429  }
430 
431  // Recurse
433  node->lower_node, new_cluster, centers, dist_sq) +
435  node->upper_node, new_cluster, centers, dist_sq);
436  int i1 = node->lower_node->kmpp_cluster_index,
437  i2 = node->upper_node->kmpp_cluster_index;
438  if (i1 == i2 && i1 != -1)
439  node->kmpp_cluster_index = i1;
440  else
441  node->kmpp_cluster_index = -1;
442  return cost;
443 }
double Scalar
Definition: KmUtils.h:43
Scalar * median
Definition: KmTree.h:71
int kmpp_cluster_index
Definition: KmTree.h:76
KmTree(int n, int d, Scalar *points)
Definition: KmTree.cpp:19
Scalar * radius
Definition: KmTree.h:71
Scalar * PointAllocate(int d)
Definition: KmUtils.h:49
void PointCopy(Scalar *p1, const Scalar *p2, int d)
Definition: KmUtils.h:55
STL namespace.
bool ShouldBePruned(Scalar *box_median, Scalar *box_radius, Scalar *centers, int best_index, int test_index) const
Definition: KmTree.cpp:325
Scalar GetNodeCost(const Node *node, Scalar *center) const
Definition: KmTree.cpp:231
void PointAdd(Scalar *p1, const Scalar *p2, int d)
Definition: KmUtils.h:61
int n_
Definition: KmTree.h:99
Node * lower_node
Definition: KmTree.h:75
int * point_indices_
Definition: KmTree.h:103
void PointScale(Scalar *p, Scalar scale, int d)
Definition: KmUtils.h:66
Scalar opt_cost
Definition: KmTree.h:73
Scalar * sum
Definition: KmTree.h:72
Scalar DoKMeansStep(int k, Scalar *centers, int *assignment) const
Definition: KmTree.cpp:58
int num_points
Definition: KmTree.h:69
int d_
Definition: KmTree.h:99
int GetRandom(int n)
Definition: KmUtils.h:100
Scalar SeedKmppUpdateAssignment(const Node *node, int new_cluster, Scalar *centers, Scalar *dist_sq) const
Definition: KmTree.cpp:402
#define KM_ASSERT(expression)
Definition: KmUtils.h:86
Scalar DoKMeansStepAtNode(const Node *node, int k, int *candidates, Scalar *centers, Scalar *sums, int *counts, int *assignment) const
Definition: KmTree.cpp:252
Scalar SeedKMeansPlusPlus(int k, Scalar *centers) const
Definition: KmTree.cpp:346
~KmTree()
Definition: KmTree.cpp:52
Node * BuildNodes(Scalar *points, int first_index, int last_index, char **next_node_data)
Definition: KmTree.cpp:106
int first_point_index
Definition: KmTree.h:70
void PointFree(Scalar *p)
Definition: KmUtils.h:54
char * node_data_
Definition: KmTree.h:102
Scalar PointDistSq(const Scalar *p1, const Scalar *p2, int d)
Definition: KmUtils.h:71
void SeedKmppSetClusterIndex(const Node *node, int index) const
Definition: KmTree.cpp:392
Scalar * points_
Definition: KmTree.h:100
Node * top_node_
Definition: KmTree.h:101
void memcpy(void *dest, size_t destSize, const void *src, size_t copyCount) noexcept
An OS and compiler independent version of "memcpy".
Node * upper_node
Definition: KmTree.h:75



Page generated by Doxygen 1.8.14 for MRPT 2.0.1 Git: 0fef1a6d7 Fri Apr 3 23:00:21 2020 +0200 at vie abr 3 23:20:28 CEST 2020