MRPT  2.0.1
se2_l2.SSE2.cpp
Go to the documentation of this file.
1 /* +------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | https://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2020, Individual contributors, see AUTHORS file |
6  | See: https://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See: https://www.mrpt.org/License |
8  +------------------------------------------------------------------------+ */
9 
10 #include "tfest-precomp.h" // Precompiled headers
11 
12 #include <mrpt/config.h>
13 #include "se2_l2_internal.h"
14 
15 #if MRPT_ARCH_INTEL_COMPATIBLE
16 
17 #include <mrpt/core/SSE_types.h>
18 #include <mrpt/core/exceptions.h>
19 
20 using namespace mrpt::tfest;
21 
22 internal::se2_l2_impl_return_t<float> internal::se2_l2_impl_SSE2(
23  const TMatchingPairList& in_correspondences)
24 {
25  // SSE vectorized version:
26  const size_t N = in_correspondences.size();
27  ASSERT_(N >= 2);
28  const float N_inv = 1.0f / N; // For efficiency, keep this value.
29 
30  // Ensure correct types:
31  static_assert(sizeof(TMatchingPair::this_x) == sizeof(float));
32  static_assert(sizeof(TMatchingPair::other_x) == sizeof(float));
33 
35 
36  __m128 sum_a_xyz = _mm_setzero_ps(); // All 4 zeros (0.0f)
37  __m128 sum_b_xyz = _mm_setzero_ps(); // All 4 zeros (0.0f)
38 
39  // [ f0 f1 f2 f3 ]
40  // xa*xb ya*yb xa*yb xb*ya
41  __m128 sum_ab_xyz = _mm_setzero_ps(); // All 4 zeros (0.0f)
42 
43  for (const auto& in_correspondence : in_correspondences)
44  {
45  // Get the pair of points in the correspondence:
46  // a_xyyx = [ xa ay | xa ya ]
47  // b_xyyx = [ xb yb | yb xb ]
48  // (product)
49  // [ xa*xb ya*yb xa*yb xb*ya
50  // LO0 LO1 HI2 HI3
51  // Note: _MM_SHUFFLE(hi3,hi2,lo1,lo0)
52  const __m128 a_xyz =
53  _mm_loadu_ps(&in_correspondence.this_x); // *Unaligned* load
54  const __m128 b_xyz =
55  _mm_loadu_ps(&in_correspondence.other_x); // *Unaligned* load
56 
57  const auto a_xyxy =
58  _mm_shuffle_ps(a_xyz, a_xyz, _MM_SHUFFLE(1, 0, 1, 0));
59  const auto b_xyyx =
60  _mm_shuffle_ps(b_xyz, b_xyz, _MM_SHUFFLE(0, 1, 1, 0));
61 
62  // Compute the terms:
63  sum_a_xyz = _mm_add_ps(sum_a_xyz, a_xyz);
64  sum_b_xyz = _mm_add_ps(sum_b_xyz, b_xyz);
65 
66  // [ f0 f1 f2 f3 ]
67  // xa*xb ya*yb xa*yb xb*ya
68  sum_ab_xyz = _mm_add_ps(sum_ab_xyz, _mm_mul_ps(a_xyxy, b_xyyx));
69  }
70 
71  alignas(MRPT_MAX_STATIC_ALIGN_BYTES) float sums_a[4], sums_b[4];
72  _mm_store_ps(sums_a, sum_a_xyz);
73  _mm_store_ps(sums_b, sum_b_xyz);
74 
75  float SumXa = sums_a[0];
76  float SumYa = sums_a[1];
77  float SumXb = sums_b[0];
78  float SumYb = sums_b[1];
79 
80  // Compute all four means:
81  const __m128 Ninv_4val =
82  _mm_set1_ps(N_inv); // load 4 copies of the same value
83  sum_a_xyz = _mm_mul_ps(sum_a_xyz, Ninv_4val);
84  sum_b_xyz = _mm_mul_ps(sum_b_xyz, Ninv_4val);
85 
86  // means_a[0]: mean_x_a
87  // means_a[1]: mean_y_a
88  // means_b[0]: mean_x_b
89  // means_b[1]: mean_y_b
90  alignas(MRPT_MAX_STATIC_ALIGN_BYTES) float means_a[4], means_b[4];
91  _mm_store_ps(means_a, sum_a_xyz);
92  _mm_store_ps(means_b, sum_b_xyz);
93 
94  ret.mean_x_a = means_a[0];
95  ret.mean_y_a = means_a[1];
96  ret.mean_x_b = means_b[0];
97  ret.mean_y_b = means_b[1];
98 
99  // Sxx Syy Sxy Syx
100  // xa*xb ya*yb xa*yb xb*ya
101  alignas(MRPT_MAX_STATIC_ALIGN_BYTES) float cross_sums[4];
102  _mm_store_ps(cross_sums, sum_ab_xyz);
103 
104  float Sxx = cross_sums[0];
105  float Syy = cross_sums[1];
106  float Sxy = cross_sums[2];
107  float Syx = cross_sums[3];
108 
109  // Auxiliary variables Ax,Ay:
110  ret.Ax = N * (Sxx + Syy) - SumXa * SumXb - SumYa * SumYb;
111  ret.Ay = SumXa * SumYb + N * (Syx - Sxy) - SumXb * SumYa;
112 
113  return ret;
114 }
115 
116 #endif // MRPT_ARCH_INTEL_COMPATIBLE
#define ASSERT_(f)
Defines an assertion mechanism.
Definition: exceptions.h:120
A list of TMatchingPair.
Definition: TMatchingPair.h:70
Functions for estimating the optimal transformation between two frames of references given measuremen...



Page generated by Doxygen 1.8.14 for MRPT 2.0.1 Git: 0fef1a6d7 Fri Apr 3 23:00:21 2020 +0200 at vie abr 3 23:20:28 CEST 2020