Main MRPT website > C++ reference for MRPT 1.9.9
faster_corner_9.cpp
Go to the documentation of this file.
1 /* +------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | http://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2017, Individual contributors, see AUTHORS file |
6  | See: http://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See details in http://www.mrpt.org/License |
8  +------------------------------------------------------------------------+ */
9 
10 // ---------------------------------------------------------------------------
11 // LICENSING: This file is a slightly-modified version of part of libcvd,
12 // released under LGPL 2.1 by Edward Rosten
13 // ---------------------------------------------------------------------------
14 
15 #include <mrpt/utils/utils_defs.h>
16 #include <mrpt/system/memory.h>
18 
19 #include <mrpt/utils/SSE_types.h>
20 #include <mrpt/system/memory.h>
22 #include "corner_9.h"
23 
24 using namespace std;
25 using namespace mrpt;
26 using namespace mrpt::utils;
27 
28 #if MRPT_HAS_SSE2 && MRPT_HAS_OPENCV
29 
30 template <bool Aligned>
32  const IplImage* I, mrpt::vision::TSimpleFeatureList& corners, int barrier,
33  uint8_t octave, std::vector<size_t>* out_feats_index_by_row)
34 {
35  corners.reserve(corners.size() + 500);
36  // corners.mark_kdtree_as_outdated();
37 
38  size_t* ptr_feat_index_by_row;
39  if (out_feats_index_by_row)
40  {
41  out_feats_index_by_row->resize(I->height);
42  ptr_feat_index_by_row = &(*out_feats_index_by_row)[0];
43  }
44  else
45  {
46  ptr_feat_index_by_row = nullptr;
47  }
48 
49  const int w = I->width;
50  const int stride = 3 * I->widthStep; // 3*w;
51 
52  // The compiler refuses to reserve a register for this
53  const __m128i barriers = _mm_set1_epi8((uint8_t)barrier);
54 
55  int xend = I->width - 3;
56  xend -= (I->width - 3) % 16;
57 
58  // 3 first rows have no features:
59  if (ptr_feat_index_by_row)
60  {
61  *ptr_feat_index_by_row++ = corners.size();
62  *ptr_feat_index_by_row++ = corners.size();
63  *ptr_feat_index_by_row++ = corners.size();
64  }
65 
66  for (int y = 3; y < I->height - 3; y++)
67  {
68  if (ptr_feat_index_by_row) // save index by row:
69  *ptr_feat_index_by_row++ = corners.size();
70 
71  for (int x = 3; x < 16; x++)
72  if (is_corner_9<Less>(
73  (const uint8_t*)I->imageData + I->widthStep * y + x,
74  I->widthStep, barrier) ||
75  is_corner_9<Greater>(
76  (const uint8_t*)I->imageData + I->widthStep * y + x,
77  I->widthStep, barrier))
78  corners.push_back_fast(x << octave, y << octave);
79 
80  for (int x = 16; x < xend; x += 16)
81  {
82  const uint8_t* p =
83  (const uint8_t*)I->imageData + I->widthStep * y +
84  x; //(const uint8_t*)I->imageData+I->widthStep*y+x;
85  __m128i lo, hi;
86  {
87  const __m128i here = load_si128<Aligned>((const __m128i*)(p));
88  lo = _mm_subs_epu8(here, barriers);
89  hi = _mm_adds_epu8(barriers, here);
90  }
91  unsigned int ans_0, ans_8, possible;
92  {
93  __m128i top = load_si128<Aligned>((const __m128i*)(p - stride));
94  __m128i bottom =
95  load_si128<Aligned>((const __m128i*)(p + stride));
96 
97  CHECK_BARRIER(lo, hi, top, ans_0);
98  CHECK_BARRIER(lo, hi, bottom, ans_8);
99  possible = ans_0 | ans_8;
100  if (!possible) continue;
101  }
102 
103  unsigned int ans_15, ans_1;
104  {
105  __m128i a = _mm_loadu_si128((const __m128i*)(p - 1 - stride));
106  __m128i c = _mm_insert_epi16(
107  _mm_srli_si128(a, 2),
108  *(const unsigned short*)(p + 15 - stride), 7);
109  CHECK_BARRIER(lo, hi, a, ans_15);
110  CHECK_BARRIER(lo, hi, c, ans_1);
111  possible &= ans_8 | (ans_15 & ans_1);
112  if (!possible) continue;
113  }
114 
115  unsigned int ans_9, ans_7;
116  {
117  __m128i d = _mm_loadu_si128((const __m128i*)(p - 1 + stride));
118  __m128i f = _mm_insert_epi16(
119  _mm_srli_si128(d, 2),
120  *(const unsigned short*)(p + 15 + stride), 7);
121  CHECK_BARRIER(lo, hi, d, ans_9);
122  CHECK_BARRIER(lo, hi, f, ans_7);
123  possible &= ans_9 | (ans_0 & ans_1);
124  possible &= ans_7 | (ans_15 & ans_0);
125  if (!possible) continue;
126  }
127 
128  unsigned int ans_12, ans_4;
129  {
130  __m128i left = _mm_loadu_si128((const __m128i*)(p - 3));
131  __m128i right = _mm_loadu_si128((const __m128i*)(p + 3));
132  CHECK_BARRIER(lo, hi, left, ans_12);
133  CHECK_BARRIER(lo, hi, right, ans_4);
134  possible &= ans_12 | (ans_4 & (ans_1 | ans_7));
135  possible &= ans_4 | (ans_12 & (ans_9 | ans_15));
136  if (!possible) continue;
137  }
138 
139  unsigned int ans_14, ans_6;
140  {
141  __m128i ul = _mm_loadu_si128((const __m128i*)(p - 2 - 2 * w));
142  __m128i lr = _mm_loadu_si128((const __m128i*)(p + 2 + 2 * w));
143  CHECK_BARRIER(lo, hi, ul, ans_14);
144  CHECK_BARRIER(lo, hi, lr, ans_6);
145  {
146  const unsigned int ans_6_7 = ans_6 & ans_7;
147  possible &= ans_14 | (ans_6_7 & (ans_4 | (ans_8 & ans_9)));
148  possible &= ans_1 | (ans_6_7) | ans_12;
149  }
150  {
151  const unsigned int ans_14_15 = ans_14 & ans_15;
152  possible &=
153  ans_6 | (ans_14_15 & (ans_12 | (ans_0 & ans_1)));
154  possible &= ans_9 | (ans_14_15) | ans_4;
155  }
156  if (!possible) continue;
157  }
158 
159  unsigned int ans_10, ans_2;
160  {
161  __m128i ll = _mm_loadu_si128((const __m128i*)(p - 2 + 2 * w));
162  __m128i ur = _mm_loadu_si128((const __m128i*)(p + 2 - 2 * w));
163  CHECK_BARRIER(lo, hi, ll, ans_10);
164  CHECK_BARRIER(lo, hi, ur, ans_2);
165  {
166  const unsigned int ans_1_2 = ans_1 & ans_2;
167  possible &= ans_10 | (ans_1_2 & ((ans_0 & ans_15) | ans_4));
168  possible &= ans_12 | (ans_1_2) | (ans_6 & ans_7);
169  }
170  {
171  const unsigned int ans_9_10 = ans_9 & ans_10;
172  possible &= ans_2 | (ans_9_10 & ((ans_7 & ans_8) | ans_12));
173  possible &= ans_4 | (ans_9_10) | (ans_14 & ans_15);
174  }
175  possible &= ans_8 | ans_14 | ans_2;
176  possible &= ans_0 | ans_10 | ans_6;
177  if (!possible) continue;
178  }
179 
180  unsigned int ans_13, ans_5;
181  {
182  __m128i g = _mm_loadu_si128((const __m128i*)(p - 3 - w));
183  __m128i l = _mm_loadu_si128((const __m128i*)(p + 3 + w));
184  CHECK_BARRIER(lo, hi, g, ans_13);
185  CHECK_BARRIER(lo, hi, l, ans_5);
186  const unsigned int ans_15_0 = ans_15 & ans_0;
187  const unsigned int ans_7_8 = ans_7 & ans_8;
188  {
189  const unsigned int ans_12_13 = ans_12 & ans_13;
190  possible &=
191  ans_5 | (ans_12_13 & ans_14 & ((ans_15_0) | ans_10));
192  possible &= ans_7 | (ans_1 & ans_2) | (ans_12_13);
193  possible &= ans_2 | (ans_12_13) | (ans_7_8);
194  }
195  {
196  const unsigned int ans_4_5 = ans_4 & ans_5;
197  const unsigned int ans_9_10 = ans_9 & ans_10;
198  possible &=
199  ans_13 | (ans_4_5 & ans_6 & ((ans_7_8) | ans_2));
200  possible &= ans_15 | (ans_4_5) | (ans_9_10);
201  possible &= ans_10 | (ans_4_5) | (ans_15_0);
202  possible &= ans_15 | (ans_9_10) | (ans_4_5);
203  }
204 
205  possible &= ans_8 | (ans_13 & ans_14) | ans_2;
206  possible &= ans_0 | (ans_5 & ans_6) | ans_10;
207  if (!possible) continue;
208  }
209 
210  unsigned int ans_11, ans_3;
211  {
212  __m128i ii = _mm_loadu_si128((const __m128i*)(p - 3 + w));
213  __m128i jj = _mm_loadu_si128((const __m128i*)(p + 3 - w));
214  CHECK_BARRIER(lo, hi, ii, ans_11);
215  CHECK_BARRIER(lo, hi, jj, ans_3);
216  {
217  const unsigned int ans_2_3 = ans_2 & ans_3;
218  possible &= ans_11 | (ans_2_3 & ans_4 &
219  ((ans_0 & ans_1) | (ans_5 & ans_6)));
220  possible &= ans_13 | (ans_7 & ans_8) | (ans_2_3);
221  possible &= ans_8 | (ans_2_3) | (ans_13 & ans_14);
222  }
223  {
224  const unsigned int ans_11_12 = ans_11 & ans_12;
225  possible &= ans_3 | (ans_10 & ans_11_12 &
226  ((ans_8 & ans_9) | (ans_13 & ans_14)));
227  possible &= ans_1 | (ans_11_12) | (ans_6 & ans_7);
228  possible &= ans_6 | (ans_0 & ans_1) | (ans_11_12);
229  }
230  {
231  const unsigned int ans_3_4 = ans_3 & ans_4;
232  possible &= ans_9 | (ans_3_4) | (ans_14 & ans_15);
233  possible &= ans_14 | (ans_8 & ans_9) | (ans_3_4);
234  }
235  {
236  const unsigned int ans_10_11 = ans_10 & ans_11;
237  possible &= ans_5 | (ans_15 & ans_0) | (ans_10_11);
238  possible &= ans_0 | (ans_10_11) | (ans_5 & ans_6);
239  }
240  if (!possible) continue;
241  }
242 
243  possible |= (possible >> 16);
244 
245  // if(possible & 0x0f) //Does this make it faster?
246  {
247  if (possible & (1 << 0))
248  corners.push_back_fast((x + 0) << octave, y << octave);
249  if (possible & (1 << 1))
250  corners.push_back_fast((x + 1) << octave, y << octave);
251  if (possible & (1 << 2))
252  corners.push_back_fast((x + 2) << octave, y << octave);
253  if (possible & (1 << 3))
254  corners.push_back_fast((x + 3) << octave, y << octave);
255  if (possible & (1 << 4))
256  corners.push_back_fast((x + 4) << octave, y << octave);
257  if (possible & (1 << 5))
258  corners.push_back_fast((x + 5) << octave, y << octave);
259  if (possible & (1 << 6))
260  corners.push_back_fast((x + 6) << octave, y << octave);
261  if (possible & (1 << 7))
262  corners.push_back_fast((x + 7) << octave, y << octave);
263  }
264  // if(possible & 0xf0) //Does this mak( , fast)r?
265  {
266  if (possible & (1 << 8))
267  corners.push_back_fast((x + 8) << octave, y << octave);
268  if (possible & (1 << 9))
269  corners.push_back_fast((x + 9) << octave, y << octave);
270  if (possible & (1 << 10))
271  corners.push_back_fast((x + 10) << octave, y << octave);
272  if (possible & (1 << 11))
273  corners.push_back_fast((x + 11) << octave, y << octave);
274  if (possible & (1 << 12))
275  corners.push_back_fast((x + 12) << octave, y << octave);
276  if (possible & (1 << 13))
277  corners.push_back_fast((x + 13) << octave, y << octave);
278  if (possible & (1 << 14))
279  corners.push_back_fast((x + 14) << octave, y << octave);
280  if (possible & (1 << 15))
281  corners.push_back_fast((x + 15) << octave, y << octave);
282  }
283  }
284 
285  for (int x = xend; x < I->width - 3; x++)
286  if (is_corner_9<Less>(
287  (const uint8_t*)I->imageData + I->widthStep * y + x,
288  I->widthStep, barrier) ||
289  is_corner_9<Greater>(
290  (const uint8_t*)I->imageData + I->widthStep * y + x,
291  I->widthStep, barrier))
292  corners.push_back_fast(x << octave, y << octave);
293  }
294 
295  // 3 last rows have no features:
296  if (ptr_feat_index_by_row)
297  {
298  *ptr_feat_index_by_row++ = corners.size();
299  *ptr_feat_index_by_row++ = corners.size();
300  *ptr_feat_index_by_row++ = corners.size();
301  }
302 }
303 
304 #endif // MRPT_HAS_SSE2 && MRPT_HAS_OPENCV
305 
306 #if MRPT_HAS_OPENCV
307 
309  const IplImage* I, mrpt::vision::TSimpleFeatureList& corners, int barrier,
310  uint8_t octave, std::vector<size_t>* out_feats_index_by_row)
311 {
312  if (I->width < 22)
313  {
315  I, corners, barrier, octave, out_feats_index_by_row);
316  return;
317  }
318  else if (I->width < 22 || I->height < 7)
319  return;
320 
321 #if MRPT_HAS_SSE2
322  if (mrpt::system::is_aligned<16>(I->imageData) &&
323  mrpt::system::is_aligned<16>(I->imageData + I->widthStep))
324  faster_corner_detect_9<true>(
325  I, corners, barrier, octave, out_feats_index_by_row);
326  else
327  faster_corner_detect_9<false>(
328  I, corners, barrier, octave, out_feats_index_by_row);
329 #else
331  I, corners, barrier, octave, out_feats_index_by_row);
332 #endif
333 }
334 
335 #endif
bool is_aligned< 16 >(const void *ptr)
Definition: memory.h:126
Classes for serialization, sockets, ini-file manipulation, streams, list of properties-values, timewatch, extensions to STL.
STL namespace.
void fast_corner_detect_9(const IplImage *I, mrpt::vision::TSimpleFeatureList &corners, int barrier, uint8_t octave, std::vector< size_t > *out_feats_index_by_row)
#define CHECK_BARRIER(lo, hi, other, flags)
GLubyte GLubyte GLubyte GLubyte w
Definition: glext.h:4178
unsigned char uint8_t
Definition: rptypes.h:41
void fast_corner_detect_plain_9(const IplImage *i, TSimpleFeatureList &corners, int b, uint8_t octave, std::vector< size_t > *out_feats_index_by_row)
const GLubyte * c
Definition: glext.h:6313
GLsizei stride
Definition: glext.h:3825
GLubyte g
Definition: glext.h:6279
void faster_corner_detect_9(const IplImage *I, mrpt::vision::TSimpleFeatureList &corners, int barrier, uint8_t octave, std::vector< size_t > *out_feats_index_by_row)
This is the global namespace for all Mobile Robot Programming Toolkit (MRPT) libraries.
GLenum GLint GLint y
Definition: glext.h:3538
GLenum GLint x
Definition: glext.h:3538
GLubyte GLubyte GLubyte a
Definition: glext.h:6279
GLfloat GLfloat p
Definition: glext.h:6305



Page generated by Doxygen 1.8.14 for MRPT 1.9.9 Git: ae4571287 Thu Nov 23 00:06:53 2017 +0100 at dom oct 27 23:51:55 CET 2019