Main MRPT website > C++ reference for MRPT 1.5.6
KmTree.h
Go to the documentation of this file.
1 /* +---------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | http://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2017, Individual contributors, see AUTHORS file |
6  | See: http://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See details in http://www.mrpt.org/License |
8  +---------------------------------------------------------------------------+ */
9 // BEWARE: BETA VERSION
10 // --------------------
11 //
12 // A k-d tree that vastly speeds up an iteration of k-means (in any number of dimensions). The main
13 // idea for this data structure is from Kanungo/Mount. This is used internally by Kmeans.cpp, and
14 // will most likely not need to be used directly.
15 //
16 // The stucture works as follows:
17 // - All data points are placed into a tree where we choose child nodes by partitioning all data
18 // points along a plane parallel to the axis.
19 // - We maintain for each node, the bounding box of all data points stored at that node.
20 // - To do a k-means iteration, we need to assign points to clusters and calculate the sum and
21 // the number of points assigned to each cluster. For each node in the tree, we can rule out
22 // some cluster centers as being too far away from every single point in that bounding box.
23 // Once only one cluster is left, all points in the node can be assigned to that cluster in
24 // batch.
25 //
26 // Author: David Arthur (darthur@gmail.com), 2009
27 
28 #ifndef KM_TREE_H__
29 #define KM_TREE_H__
30 
31 // Includes
32 #include "KmUtils.h"
33 
34 // KmTree class definition
35 class KmTree {
36  public:
37  // Constructs a tree out of the given n data points living in R^d.
38  KmTree(int n, int d, Scalar *points);
39  ~KmTree();
40 
41  // Given k cluster centers, this runs a full k-means iterations, choosing the next set of
42  // centers and returning the cost function for this set of centers. If assignment is not null,
43  // it should be an array of size n that will be filled with the index of the cluster (0 - k-1)
44  // that each data point is assigned to. The new center values will overwrite the old ones.
45  Scalar DoKMeansStep(int k, Scalar *centers, int *assignment) const;
46 
47  // Choose k initial centers for k-means using the kmeans++ seeding procedure. The resulting
48  // centers are returned via the centers variable, which should be pre-allocated to size k*d.
49  // The cost of the initial clustering is returned.
50  Scalar SeedKMeansPlusPlus(int k, Scalar *centers) const;
51 
52  private:
53  struct Node {
54  int num_points; // Number of points stored in this node
55  int first_point_index; // The smallest point index stored in this node
56  Scalar *median, *radius; // Bounding box center and half side-lengths
57  Scalar *sum; // Sum of the points stored in this node
58  Scalar opt_cost; // Min cost for putting all points in this node in 1 cluster
59  Node *lower_node, *upper_node; // Child nodes
60  mutable int kmpp_cluster_index; // The cluster these points are assigned to or -1 if variable
61  };
62 
63  // Helper functions for constructor
64  Node *BuildNodes(Scalar *points, int first_index, int last_index, char **next_node_data);
65  Scalar GetNodeCost(const Node *node, Scalar *center) const;
66 
67  // Helper functions for DoKMeans step
68  Scalar DoKMeansStepAtNode(const Node *node, int k, int *candidates, Scalar *centers,
69  Scalar *sums, int *counts, int *assignment) const;
70  bool ShouldBePruned(Scalar *box_median, Scalar *box_radius, Scalar *centers, int best_index,
71  int test_index) const;
72 
73  // Helper functions for SeedKMeansPlusPlus
74  void SeedKmppSetClusterIndex(const Node *node, int index) const;
75  Scalar SeedKmppUpdateAssignment(const Node *node, int new_cluster, Scalar *centers,
76  Scalar *dist_sq) const;
77 
78  int n_, d_;
81  char *node_data_;
83 };
84 
85 #endif
bool ShouldBePruned(Scalar *box_median, Scalar *box_radius, Scalar *centers, int best_index, int test_index) const
Definition: KmTree.cpp:273
Scalar * median
Definition: KmTree.h:56
int kmpp_cluster_index
Definition: KmTree.h:60
Scalar GetNodeCost(const Node *node, Scalar *center) const
Definition: KmTree.cpp:202
Scalar DoKMeansStepAtNode(const Node *node, int k, int *candidates, Scalar *centers, Scalar *sums, int *counts, int *assignment) const
Definition: KmTree.cpp:218
Scalar DoKMeansStep(int k, Scalar *centers, int *assignment) const
Definition: KmTree.cpp:54
KmTree(int n, int d, Scalar *points)
Definition: KmTree.cpp:19
Scalar * radius
Definition: KmTree.h:56
Scalar SeedKMeansPlusPlus(int k, Scalar *centers) const
Definition: KmTree.cpp:292
GLuint GLenum GLsizei GLsizei GLboolean void * points
Definition: glew.h:7748
GLsizei n
Definition: glew.h:5051
int n_
Definition: KmTree.h:78
Node * lower_node
Definition: KmTree.h:59
int * point_indices_
Definition: KmTree.h:82
Scalar opt_cost
Definition: KmTree.h:58
Scalar * sum
Definition: KmTree.h:57
int num_points
Definition: KmTree.h:54
int d_
Definition: KmTree.h:78
Definition: KmTree.h:35
GLuint index
Definition: glew.h:1721
void SeedKmppSetClusterIndex(const Node *node, int index) const
Definition: KmTree.cpp:332
~KmTree()
Definition: KmTree.cpp:49
Node * BuildNodes(Scalar *points, int first_index, int last_index, char **next_node_data)
Definition: KmTree.cpp:96
int first_point_index
Definition: KmTree.h:55
double Scalar
Definition: KmUtils.h:41
char * node_data_
Definition: KmTree.h:81
Scalar SeedKmppUpdateAssignment(const Node *node, int new_cluster, Scalar *centers, Scalar *dist_sq) const
Definition: KmTree.cpp:340
Scalar * points_
Definition: KmTree.h:79
Node * top_node_
Definition: KmTree.h:80
Node * upper_node
Definition: KmTree.h:59



Page generated by Doxygen 1.8.6 for MRPT 1.5.6 Git: 4c65e84 Tue Apr 24 08:18:17 2018 +0200 at mar abr 24 08:26:17 CEST 2018