Main MRPT website > C++ reference for MRPT 1.9.9
kmeans++/KMeans.cpp
Go to the documentation of this file.
1 /* +------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | http://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2017, Individual contributors, see AUTHORS file |
6  | See: http://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See details in http://www.mrpt.org/License |
8  +------------------------------------------------------------------------+ */
9 // See KMeans.h
10 //
11 // Author: David Arthur (darthur@gmail.com), 2009
12 
13 // Includes
14 #include "KMeans.h"
15 #include "KmTree.h"
16 #include <mrpt/utils/mrpt_macros.h>
17 #include <sstream>
18 #include <time.h>
19 #include <vector>
20 using namespace std;
21 
22 // Logging
23 static vector<ostream*> gLogOutputs;
24 static vector<ostream*> gVerboseLogOutputs;
25 #define LOG(verbose, text) \
26  { \
27  vector<ostream*>& outputs = \
28  (verbose ? gVerboseLogOutputs : gLogOutputs); \
29  if (outputs.size() > 0) \
30  { \
31  ostringstream string_stream; \
32  string_stream << text; \
33  for (int i = 0; i < (int)outputs.size(); i++) \
34  *(outputs[i]) << string_stream.str(); \
35  } \
36  }
37 void AddKMeansLogging(std::ostream* out, bool verbose)
38 {
39  if (verbose) gVerboseLogOutputs.push_back(out);
40  gLogOutputs.push_back(out);
41 }
43 {
44  gLogOutputs.clear();
45  gVerboseLogOutputs.clear();
46 }
47 
48 // Returns the number of seconds since the program began execution.
49 static double GetSeconds() { return double(clock()) / CLOCKS_PER_SEC; }
50 // See KMeans.h
51 // Performs one full execution of k-means, logging any relevant information, and
52 // tracking meta
53 // statistics for the run. If min or max values are negative, they are treated
54 // as unset.
55 // best_centers and best_assignment can be 0, in which case they are not set.
56 static void RunKMeansOnce(
57  const KmTree& tree, int n, int k, int d, Scalar* points, Scalar* centers,
58  Scalar* min_cost, Scalar* max_cost, Scalar* total_cost, double start_time,
59  double* min_time, double* max_time, double* total_time,
60  Scalar* best_centers, int* best_assignment)
61 {
64  const Scalar kEpsilon =
65  Scalar(1e-8); // Used to determine when to terminate k-means
66 
67  // Do iterations of k-means until the cost stabilizes
68  Scalar old_cost = 0;
69  bool is_done = false;
70  for (int iteration = 0; !is_done; iteration++)
71  {
72  Scalar new_cost = tree.DoKMeansStep(k, centers, 0);
73  is_done = (iteration > 0 && new_cost >= (1 - kEpsilon) * old_cost);
74  old_cost = new_cost;
75  LOG(true, "Completed iteration #" << (iteration + 1) << ", cost="
76  << new_cost << "..." << endl);
77  }
78  double this_time = GetSeconds() - start_time;
79 
80  // Log the clustering we found
81  LOG(false, "Completed run: cost=" << old_cost << " (" << this_time
82  << " seconds)" << endl);
83 
84  // Handle a new min cost, updating best_centers and best_assignment as
85  // appropriate
86  if (*min_cost < 0 || old_cost < *min_cost)
87  {
88  *min_cost = old_cost;
89  if (best_assignment != 0)
90  tree.DoKMeansStep(k, centers, best_assignment);
91  if (best_centers != 0)
92  memcpy(best_centers, centers, sizeof(Scalar) * k * d);
93  }
94 
95  // Update all other aggregate stats
96  if (*max_cost < old_cost) *max_cost = old_cost;
97  *total_cost += old_cost;
98  if (*min_time < 0 || *min_time > this_time) *min_time = this_time;
99  if (*max_time < this_time) *max_time = this_time;
100  *total_time += this_time;
101 }
102 
103 // Outputs all meta-stats for a set of k-means or k-means++ runs.
105  Scalar min_cost, Scalar max_cost, Scalar total_cost, double min_time,
106  double max_time, double total_time, int num_attempts)
107 {
108  LOG(false, "Aggregate info over " << num_attempts << " runs:" << endl);
109  LOG(false, " Cost: min=" << min_cost
110  << " average=" << (total_cost / num_attempts)
111  << " max=" << max_cost << endl);
112  LOG(false, " Time: min=" << min_time
113  << " average=" << (total_time / num_attempts)
114  << " max=" << max_time << endl
115  << endl);
116 }
117 
118 // See KMeans.h
120  int n, int k, int d, Scalar* points, int attempts, Scalar* ret_centers,
121  int* ret_assignment)
122 {
123  KM_ASSERT(k >= 1);
124 
125  // Create the tree and log
126  LOG(false, "Running k-means..." << endl);
127  KmTree tree(n, d, points);
128  LOG(false, "Done preprocessing..." << endl);
129 
130  // Initialization
131  Scalar* centers = (Scalar*)malloc(sizeof(Scalar) * k * d);
132  int* unused_centers = (int*)malloc(sizeof(int) * n);
133  KM_ASSERT(centers != 0 && unused_centers != 0);
134  Scalar min_cost = -1, max_cost = -1, total_cost = 0;
135  double min_time = -1, max_time = -1, total_time = 0;
136 
137  // Handle k > n
138  if (k > n)
139  {
140  memset(centers + n * d, -1, (k - d) * sizeof(Scalar));
141  k = n;
142  }
143 
144  // Run all the attempts
145  for (int attempt = 0; attempt < attempts; attempt++)
146  {
147  double start_time = GetSeconds();
148 
149  // Choose centers uniformly at random
150  for (int i = 0; i < n; i++) unused_centers[i] = i;
151  int num_unused_centers = n;
152  for (int i = 0; i < k; i++)
153  {
154  int j = GetRandom(num_unused_centers--);
155  memcpy(
156  centers + i * d, points + unused_centers[j] * d,
157  d * sizeof(Scalar));
158  unused_centers[j] = unused_centers[num_unused_centers];
159  }
160 
161  // Run k-means
163  tree, n, k, d, points, centers, &min_cost, &max_cost, &total_cost,
164  start_time, &min_time, &max_time, &total_time, ret_centers,
165  ret_assignment);
166  }
167  LogMetaStats(
168  min_cost, max_cost, total_cost, min_time, max_time, total_time,
169  attempts);
170 
171  // Clean up and return
172  free(unused_centers);
173  free(centers);
174  return min_cost;
175 }
176 
177 // See KMeans.h
179  int n, int k, int d, Scalar* points, int attempts, Scalar* ret_centers,
180  int* ret_assignment)
181 {
182  KM_ASSERT(k >= 1);
183 
184  // Create the tree and log
185  LOG(false, "Running k-means++..." << endl);
186  KmTree tree(n, d, points);
187  LOG(false, "Done preprocessing..." << endl);
188 
189  // Initialization
190  Scalar* centers = (Scalar*)malloc(sizeof(Scalar) * k * d);
191  KM_ASSERT(centers != 0);
192  Scalar min_cost = -1, max_cost = -1, total_cost = 0;
193  double min_time = -1, max_time = -1, total_time = 0;
194 
195  // Run all the attempts
196  for (int attempt = 0; attempt < attempts; attempt++)
197  {
198  double start_time = GetSeconds();
199 
200  // Choose centers using k-means++ seeding
201  tree.SeedKMeansPlusPlus(k, centers);
202 
203  // Run k-means
205  tree, n, k, d, points, centers, &min_cost, &max_cost, &total_cost,
206  start_time, &min_time, &max_time, &total_time, ret_centers,
207  ret_assignment);
208  }
209  LogMetaStats(
210  min_cost, max_cost, total_cost, min_time, max_time, total_time,
211  attempts);
212 
213  // Clean up and return
214  free(centers);
215  return min_cost;
216 }
Scalar RunKMeans(int n, int k, int d, Scalar *points, int attempts, Scalar *ret_centers, int *ret_assignment)
GLenum GLsizei n
Definition: glext.h:5074
STL namespace.
GLsizei const GLfloat * points
Definition: glext.h:5339
#define MRPT_UNUSED_PARAM(a)
Can be used to avoid "not used parameters" warnings from the compiler.
#define LOG(verbose, text)
Scalar RunKMeansPlusPlus(int n, int k, int d, Scalar *points, int attempts, Scalar *ret_centers, int *ret_assignment)
Scalar DoKMeansStep(int k, Scalar *centers, int *assignment) const
Definition: KmTree.cpp:58
void ClearKMeansLogging()
int GetRandom(int n)
Definition: KmUtils.h:101
Definition: KmTree.h:43
static vector< ostream * > gVerboseLogOutputs
#define KM_ASSERT(expression)
Definition: KmUtils.h:87
Scalar SeedKMeansPlusPlus(int k, Scalar *centers) const
Definition: KmTree.cpp:346
void AddKMeansLogging(std::ostream *out, bool verbose)
static void RunKMeansOnce(const KmTree &tree, int n, int k, int d, Scalar *points, Scalar *centers, Scalar *min_cost, Scalar *max_cost, Scalar *total_cost, double start_time, double *min_time, double *max_time, double *total_time, Scalar *best_centers, int *best_assignment)
static double GetSeconds()
static vector< ostream * > gLogOutputs
double Scalar
Definition: KmUtils.h:44
void LogMetaStats(Scalar min_cost, Scalar max_cost, Scalar total_cost, double min_time, double max_time, double total_time, int num_attempts)
void memcpy(void *dest, size_t destSize, const void *src, size_t copyCount) noexcept
An OS and compiler independent version of "memcpy".
Definition: os.cpp:355



Page generated by Doxygen 1.8.14 for MRPT 1.9.9 Git: ae4571287 Thu Nov 23 00:06:53 2017 +0100 at dom oct 27 23:51:55 CET 2019