12 #include <mrpt/config.h> 15 #if MRPT_ARCH_INTEL_COMPATIBLE 26 const size_t N = in_correspondences.size();
28 const float N_inv = 1.0f / N;
36 __m128 sum_a_xyz = _mm_setzero_ps();
37 __m128 sum_b_xyz = _mm_setzero_ps();
41 __m128 sum_ab_xyz = _mm_setzero_ps();
43 for (
const auto& in_correspondence : in_correspondences)
53 _mm_loadu_ps(&in_correspondence.this_x);
55 _mm_loadu_ps(&in_correspondence.other_x);
58 _mm_shuffle_ps(a_xyz, a_xyz, _MM_SHUFFLE(1, 0, 1, 0));
60 _mm_shuffle_ps(b_xyz, b_xyz, _MM_SHUFFLE(0, 1, 1, 0));
63 sum_a_xyz = _mm_add_ps(sum_a_xyz, a_xyz);
64 sum_b_xyz = _mm_add_ps(sum_b_xyz, b_xyz);
68 sum_ab_xyz = _mm_add_ps(sum_ab_xyz, _mm_mul_ps(a_xyxy, b_xyyx));
71 alignas(MRPT_MAX_STATIC_ALIGN_BYTES)
float sums_a[4], sums_b[4];
72 _mm_store_ps(sums_a, sum_a_xyz);
73 _mm_store_ps(sums_b, sum_b_xyz);
75 float SumXa = sums_a[0];
76 float SumYa = sums_a[1];
77 float SumXb = sums_b[0];
78 float SumYb = sums_b[1];
81 const __m128 Ninv_4val =
83 sum_a_xyz = _mm_mul_ps(sum_a_xyz, Ninv_4val);
84 sum_b_xyz = _mm_mul_ps(sum_b_xyz, Ninv_4val);
90 alignas(MRPT_MAX_STATIC_ALIGN_BYTES)
float means_a[4], means_b[4];
91 _mm_store_ps(means_a, sum_a_xyz);
92 _mm_store_ps(means_b, sum_b_xyz);
101 alignas(MRPT_MAX_STATIC_ALIGN_BYTES)
float cross_sums[4];
102 _mm_store_ps(cross_sums, sum_ab_xyz);
104 float Sxx = cross_sums[0];
105 float Syy = cross_sums[1];
106 float Sxy = cross_sums[2];
107 float Syx = cross_sums[3];
110 ret.
Ax = N * (Sxx + Syy) - SumXa * SumXb - SumYa * SumYb;
111 ret.
Ay = SumXa * SumYb + N * (Syx - Sxy) - SumXb * SumYa;
116 #endif // MRPT_ARCH_INTEL_COMPATIBLE
#define ASSERT_(f)
Defines an assertion mechanism.
Functions for estimating the optimal transformation between two frames of references given measuremen...