MRPT  1.9.9
kmeans++/KMeans.cpp
Go to the documentation of this file.
1 /* +------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | http://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2018, Individual contributors, see AUTHORS file |
6  | See: http://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See details in http://www.mrpt.org/License |
8  +------------------------------------------------------------------------+ */
9 // See KMeans.h
10 //
11 // Author: David Arthur (darthur@gmail.com), 2009
12 
13 // Includes
14 #include "KMeans.h"
15 #include "KmTree.h"
16 #include <sstream>
17 #include <time.h>
18 #include <vector>
19 using namespace std;
20 
21 // Logging
22 static vector<ostream*> gLogOutputs;
23 static vector<ostream*> gVerboseLogOutputs;
24 #define LOG(verbose, text) \
25  { \
26  vector<ostream*>& outputs = \
27  (verbose ? gVerboseLogOutputs : gLogOutputs); \
28  if (outputs.size() > 0) \
29  { \
30  ostringstream string_stream; \
31  string_stream << text; \
32  for (int i = 0; i < (int)outputs.size(); i++) \
33  *(outputs[i]) << string_stream.str(); \
34  } \
35  }
36 void AddKMeansLogging(std::ostream* out, bool verbose)
37 {
38  if (verbose) gVerboseLogOutputs.push_back(out);
39  gLogOutputs.push_back(out);
40 }
42 {
43  gLogOutputs.clear();
44  gVerboseLogOutputs.clear();
45 }
46 
47 // Returns the number of seconds since the program began execution.
48 static double GetSeconds() { return double(clock()) / CLOCKS_PER_SEC; }
49 // See KMeans.h
50 // Performs one full execution of k-means, logging any relevant information, and
51 // tracking meta
52 // statistics for the run. If min or max values are negative, they are treated
53 // as unset.
54 // best_centers and best_assignment can be 0, in which case they are not set.
55 static void RunKMeansOnce(
56  const KmTree& tree, int n, int k, int d, Scalar* points, Scalar* centers,
57  Scalar* min_cost, Scalar* max_cost, Scalar* total_cost, double start_time,
58  double* min_time, double* max_time, double* total_time,
59  Scalar* best_centers, int* best_assignment)
60 {
61  (void)(n);
62  (void)(points);
63  const Scalar kEpsilon =
64  Scalar(1e-8); // Used to determine when to terminate k-means
65 
66  // Do iterations of k-means until the cost stabilizes
67  Scalar old_cost = 0;
68  bool is_done = false;
69  for (int iteration = 0; !is_done; iteration++)
70  {
71  Scalar new_cost = tree.DoKMeansStep(k, centers, 0);
72  is_done = (iteration > 0 && new_cost >= (1 - kEpsilon) * old_cost);
73  old_cost = new_cost;
74  LOG(true, "Completed iteration #" << (iteration + 1) << ", cost="
75  << new_cost << "..." << endl);
76  }
77  double this_time = GetSeconds() - start_time;
78 
79  // Log the clustering we found
80  LOG(false, "Completed run: cost=" << old_cost << " (" << this_time
81  << " seconds)" << endl);
82 
83  // Handle a new min cost, updating best_centers and best_assignment as
84  // appropriate
85  if (*min_cost < 0 || old_cost < *min_cost)
86  {
87  *min_cost = old_cost;
88  if (best_assignment != 0)
89  tree.DoKMeansStep(k, centers, best_assignment);
90  if (best_centers != 0)
91  memcpy(best_centers, centers, sizeof(Scalar) * k * d);
92  }
93 
94  // Update all other aggregate stats
95  if (*max_cost < old_cost) *max_cost = old_cost;
96  *total_cost += old_cost;
97  if (*min_time < 0 || *min_time > this_time) *min_time = this_time;
98  if (*max_time < this_time) *max_time = this_time;
99  *total_time += this_time;
100 }
101 
102 // Outputs all meta-stats for a set of k-means or k-means++ runs.
104  Scalar min_cost, Scalar max_cost, Scalar total_cost, double min_time,
105  double max_time, double total_time, int num_attempts)
106 {
107  LOG(false, "Aggregate info over " << num_attempts << " runs:" << endl);
108  LOG(false, " Cost: min=" << min_cost
109  << " average=" << (total_cost / num_attempts)
110  << " max=" << max_cost << endl);
111  LOG(false, " Time: min=" << min_time
112  << " average=" << (total_time / num_attempts)
113  << " max=" << max_time << endl
114  << endl);
115 }
116 
117 // See KMeans.h
119  int n, int k, int d, Scalar* points, int attempts, Scalar* ret_centers,
120  int* ret_assignment)
121 {
122  KM_ASSERT(k >= 1);
123 
124  // Create the tree and log
125  LOG(false, "Running k-means..." << endl);
126  KmTree tree(n, d, points);
127  LOG(false, "Done preprocessing..." << endl);
128 
129  // Initialization
130  Scalar* centers = (Scalar*)malloc(sizeof(Scalar) * k * d);
131  int* unused_centers = (int*)malloc(sizeof(int) * n);
132  KM_ASSERT(centers != 0 && unused_centers != 0);
133  Scalar min_cost = -1, max_cost = -1, total_cost = 0;
134  double min_time = -1, max_time = -1, total_time = 0;
135 
136  // Handle k > n
137  if (k > n)
138  {
139  memset(centers + n * d, -1, (k - d) * sizeof(Scalar));
140  k = n;
141  }
142 
143  // Run all the attempts
144  for (int attempt = 0; attempt < attempts; attempt++)
145  {
146  double start_time = GetSeconds();
147 
148  // Choose centers uniformly at random
149  for (int i = 0; i < n; i++) unused_centers[i] = i;
150  int num_unused_centers = n;
151  for (int i = 0; i < k; i++)
152  {
153  int j = GetRandom(num_unused_centers--);
154  memcpy(
155  centers + i * d, points + unused_centers[j] * d,
156  d * sizeof(Scalar));
157  unused_centers[j] = unused_centers[num_unused_centers];
158  }
159 
160  // Run k-means
162  tree, n, k, d, points, centers, &min_cost, &max_cost, &total_cost,
163  start_time, &min_time, &max_time, &total_time, ret_centers,
164  ret_assignment);
165  }
166  LogMetaStats(
167  min_cost, max_cost, total_cost, min_time, max_time, total_time,
168  attempts);
169 
170  // Clean up and return
171  free(unused_centers);
172  free(centers);
173  return min_cost;
174 }
175 
176 // See KMeans.h
178  int n, int k, int d, Scalar* points, int attempts, Scalar* ret_centers,
179  int* ret_assignment)
180 {
181  KM_ASSERT(k >= 1);
182 
183  // Create the tree and log
184  LOG(false, "Running k-means++..." << endl);
185  KmTree tree(n, d, points);
186  LOG(false, "Done preprocessing..." << endl);
187 
188  // Initialization
189  Scalar* centers = (Scalar*)malloc(sizeof(Scalar) * k * d);
190  KM_ASSERT(centers != 0);
191  Scalar min_cost = -1, max_cost = -1, total_cost = 0;
192  double min_time = -1, max_time = -1, total_time = 0;
193 
194  // Run all the attempts
195  for (int attempt = 0; attempt < attempts; attempt++)
196  {
197  double start_time = GetSeconds();
198 
199  // Choose centers using k-means++ seeding
200  tree.SeedKMeansPlusPlus(k, centers);
201 
202  // Run k-means
204  tree, n, k, d, points, centers, &min_cost, &max_cost, &total_cost,
205  start_time, &min_time, &max_time, &total_time, ret_centers,
206  ret_assignment);
207  }
208  LogMetaStats(
209  min_cost, max_cost, total_cost, min_time, max_time, total_time,
210  attempts);
211 
212  // Clean up and return
213  free(centers);
214  return min_cost;
215 }
Scalar RunKMeans(int n, int k, int d, Scalar *points, int attempts, Scalar *ret_centers, int *ret_assignment)
double Scalar
Definition: KmUtils.h:44
GLenum GLsizei n
Definition: glext.h:5074
STL namespace.
GLsizei const GLfloat * points
Definition: glext.h:5339
#define LOG(verbose, text)
Scalar RunKMeansPlusPlus(int n, int k, int d, Scalar *points, int attempts, Scalar *ret_centers, int *ret_assignment)
Scalar DoKMeansStep(int k, Scalar *centers, int *assignment) const
Definition: KmTree.cpp:58
void ClearKMeansLogging()
int GetRandom(int n)
Definition: KmUtils.h:101
Definition: KmTree.h:43
static vector< ostream * > gVerboseLogOutputs
#define KM_ASSERT(expression)
Definition: KmUtils.h:87
Scalar SeedKMeansPlusPlus(int k, Scalar *centers) const
Definition: KmTree.cpp:346
typedef void(APIENTRYP PFNGLBLENDCOLORPROC)(GLclampf red
void AddKMeansLogging(std::ostream *out, bool verbose)
static void RunKMeansOnce(const KmTree &tree, int n, int k, int d, Scalar *points, Scalar *centers, Scalar *min_cost, Scalar *max_cost, Scalar *total_cost, double start_time, double *min_time, double *max_time, double *total_time, Scalar *best_centers, int *best_assignment)
static double GetSeconds()
static vector< ostream * > gLogOutputs
void LogMetaStats(Scalar min_cost, Scalar max_cost, Scalar total_cost, double min_time, double max_time, double total_time, int num_attempts)
void memcpy(void *dest, size_t destSize, const void *src, size_t copyCount) noexcept
An OS and compiler independent version of "memcpy".
Definition: os.cpp:356



Page generated by Doxygen 1.8.14 for MRPT 1.9.9 Git: 7d5e6d718 Fri Aug 24 01:51:28 2018 +0200 at lun nov 2 08:35:50 CET 2020