MRPT  1.9.9
KmTree.h
Go to the documentation of this file.
1 /* +------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | http://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2018, Individual contributors, see AUTHORS file |
8  +------------------------------------------------------------------------+ */
9 // BEWARE: BETA VERSION
10 // --------------------
11 //
12 // A k-d tree that vastly speeds up an iteration of k-means (in any number of
13 // dimensions). The main
14 // idea for this data structure is from Kanungo/Mount. This is used internally
15 // by Kmeans.cpp, and
16 // will most likely not need to be used directly.
17 //
18 // The stucture works as follows:
19 // - All data points are placed into a tree where we choose child nodes by
20 // partitioning all data
21 // points along a plane parallel to the axis.
22 // - We maintain for each node, the bounding box of all data points stored at
23 // that node.
24 // - To do a k-means iteration, we need to assign points to clusters and
25 // calculate the sum and
26 // the number of points assigned to each cluster. For each node in the tree,
27 // we can rule out
28 // some cluster centers as being too far away from every single point in
29 // that bounding box.
30 // Once only one cluster is left, all points in the node can be assigned to
31 // that cluster in
32 // batch.
33 //
34 // Author: David Arthur (darthur@gmail.com), 2009
35
36 #ifndef KM_TREE_H__
37 #define KM_TREE_H__
38
39 // Includes
40 #include "KmUtils.h"
41
42 // KmTree class definition
43 class KmTree
44 {
45  public:
46  // Constructs a tree out of the given n data points living in R^d.
47  KmTree(int n, int d, Scalar* points);
48  ~KmTree();
49
50  // Given k cluster centers, this runs a full k-means iterations, choosing
51  // the next set of
52  // centers and returning the cost function for this set of centers. If
53  // assignment is not null,
54  // it should be an array of size n that will be filled with the index of the
55  // cluster (0 - k-1)
56  // that each data point is assigned to. The new center values will overwrite
57  // the old ones.
58  Scalar DoKMeansStep(int k, Scalar* centers, int* assignment) const;
59
60  // Choose k initial centers for k-means using the kmeans++ seeding
61  // procedure. The resulting
62  // centers are returned via the centers variable, which should be
63  // pre-allocated to size k*d.
64  // The cost of the initial clustering is returned.
65  Scalar SeedKMeansPlusPlus(int k, Scalar* centers) const;
66
67  private:
68  struct Node
69  {
70  int num_points; // Number of points stored in this node
71  int first_point_index; // The smallest point index stored in this node
72  Scalar *median, *radius; // Bounding box center and half side-lengths
73  Scalar* sum; // Sum of the points stored in this node
74  Scalar opt_cost; // Min cost for putting all points in this node in 1
75  // cluster
76  Node *lower_node, *upper_node; // Child nodes
77  mutable int kmpp_cluster_index; // The cluster these points are
78  // assigned to or -1 if variable
79  };
80
81  // Helper functions for constructor
83  Scalar* points, int first_index, int last_index, char** next_node_data);
84  Scalar GetNodeCost(const Node* node, Scalar* center) const;
85
86  // Helper functions for DoKMeans step
88  const Node* node, int k, int* candidates, Scalar* centers, Scalar* sums,
89  int* counts, int* assignment) const;
90  bool ShouldBePruned(
91  Scalar* box_median, Scalar* box_radius, Scalar* centers, int best_index,
92  int test_index) const;
93
94  // Helper functions for SeedKMeansPlusPlus
95  void SeedKmppSetClusterIndex(const Node* node, int index) const;
97  const Node* node, int new_cluster, Scalar* centers,
98  Scalar* dist_sq) const;
99
100  int n_, d_;
103  char* node_data_;
105 };
106
107 #endif
double Scalar
Definition: KmUtils.h:44
Scalar * median
Definition: KmTree.h:72
int kmpp_cluster_index
Definition: KmTree.h:77
GLenum GLsizei n
Definition: glext.h:5074
KmTree(int n, int d, Scalar *points)
Definition: KmTree.cpp:19
Definition: KmTree.h:72
bool ShouldBePruned(Scalar *box_median, Scalar *box_radius, Scalar *centers, int best_index, int test_index) const
Definition: KmTree.cpp:325
GLsizei const GLfloat * points
Definition: glext.h:5339
Scalar GetNodeCost(const Node *node, Scalar *center) const
Definition: KmTree.cpp:231
int n_
Definition: KmTree.h:100
GLuint index
Definition: glext.h:4054
Node * lower_node
Definition: KmTree.h:76
int * point_indices_
Definition: KmTree.h:104
Scalar opt_cost
Definition: KmTree.h:74
Scalar * sum
Definition: KmTree.h:73
Scalar DoKMeansStep(int k, Scalar *centers, int *assignment) const
Definition: KmTree.cpp:58
int num_points
Definition: KmTree.h:70
int d_
Definition: KmTree.h:100
Definition: KmTree.h:43
Scalar SeedKmppUpdateAssignment(const Node *node, int new_cluster, Scalar *centers, Scalar *dist_sq) const
Definition: KmTree.cpp:402
Scalar DoKMeansStepAtNode(const Node *node, int k, int *candidates, Scalar *centers, Scalar *sums, int *counts, int *assignment) const
Definition: KmTree.cpp:252
Scalar SeedKMeansPlusPlus(int k, Scalar *centers) const
Definition: KmTree.cpp:346
~KmTree()
Definition: KmTree.cpp:52
Node * BuildNodes(Scalar *points, int first_index, int last_index, char **next_node_data)
Definition: KmTree.cpp:106
int first_point_index
Definition: KmTree.h:71
char * node_data_
Definition: KmTree.h:103
void SeedKmppSetClusterIndex(const Node *node, int index) const
Definition: KmTree.cpp:392
Scalar * points_
Definition: KmTree.h:101
Node * top_node_
Definition: KmTree.h:102
Node * upper_node
Definition: KmTree.h:76

 Page generated by Doxygen 1.8.14 for MRPT 1.9.9 Git: 7d5e6d718 Fri Aug 24 01:51:28 2018 +0200 at lun nov 2 08:35:50 CET 2020