39 MRPT_ALIGN16 const unsigned long long mask0[2] = { 0x0D0C080706020100ull, 0x808080808080800Eull };
40 MRPT_ALIGN16 const unsigned long long mask1[2] = { 0x8080808080808080ull, 0x0E0A090804030280ull };
41 MRPT_ALIGN16 const unsigned long long mask2[2] = { 0x0C0B0A0605040080ull, 0x8080808080808080ull };
42 MRPT_ALIGN16 const unsigned long long mask3[2] = { 0x808080808080800Full, 0x8080808080808080ull };
44 const __m128i m0 = _mm_load_si128((
const __m128i*)mask0);
45 const __m128i m1 = _mm_load_si128((
const __m128i*)mask1);
46 const __m128i m2 = _mm_load_si128((
const __m128i*)mask2);
47 const __m128i m3 = _mm_load_si128((
const __m128i*)mask3);
49 const int sw =
w >> 4;
50 const int sh = h >> 1;
54 for (
int i=0;
i<sh;
i++)
56 for (
int j=0; j<sw; j++)
59 __m128i d0 = _mm_load_si128((
const __m128i*)
in);
in += 16;
60 __m128i d1 = _mm_load_si128((
const __m128i*)
in);
in += 16;
63 __m128i shuf0 = _mm_shuffle_epi8(d0,m0);
64 __m128i shuf1 = _mm_shuffle_epi8(d1,m1);
66 __m128i res0 = _mm_or_si128(shuf0,shuf1);
69 _mm_storeu_si128((__m128i*)out,res0);
70 else _mm_store_si128 ((__m128i*)out,res0);
74 __m128i d2 = _mm_load_si128((
const __m128i*)
in);
in += 16;
78 _mm_or_si128(_mm_shuffle_epi8(d2,m2),_mm_shuffle_epi8(d1,m3))
89 template <
bool IS_RGB>
93 BUILD_128BIT_CONST(mask0, 80,00, 80,03, 80,06, 80,09, 80,0C, 80,0F, 80,80, 80,80)
94 BUILD_128BIT_CONST(mask1, 80,80, 80,80, 80,80, 80,80, 80,80, 80,80, 80,02, 80,05)
96 BUILD_128BIT_CONST(mask2, 80,01, 80,04, 80,07, 80,0A, 80,0D, 80,80, 80,80, 80,80)
97 BUILD_128BIT_CONST(mask3, 80,80, 80,80, 80,80, 80,80, 80,80, 80,00, 80,03, 80,06)
99 BUILD_128BIT_CONST(mask4, 80,02, 80,05, 80,08, 80,0B, 80,0E, 80,80, 80,80, 80,80)
100 BUILD_128BIT_CONST(mask5, 80,80, 80,80, 80,80, 80,80, 80,80, 80,01, 80,04, 80,07)
103 BUILD_128BIT_CONST(mask6, 80,08, 80,0B, 80,0E, 80,80, 80,80, 80,80, 80,80, 80,80)
104 BUILD_128BIT_CONST(mask7, 80,80, 80,80, 80,80, 80,01, 80,04, 80,07, 80,0A, 80,0D)
106 BUILD_128BIT_CONST(mask8, 80,09, 80,0C, 80,0F, 80,80, 80,80, 80,80, 80,80, 80,80)
107 BUILD_128BIT_CONST(mask9, 80,80, 80,80, 80,80, 80,02, 80,05, 80,08, 80,0B, 80,0E)
109 BUILD_128BIT_CONST(mask10,80,0A, 80,0D, 80,80, 80,80, 80,80, 80,80, 80,80, 80,80)
110 BUILD_128BIT_CONST(mask11,80,80, 80,80, 80,00, 80,03, 80,06, 80,09, 80,0C, 80,0F)
113 BUILD_128BIT_CONST(mask_to_low, 01,03,05,07,09,0B,0D,0F, 80,80,80,80,80,80,80,80)
117 BUILD_128BIT_CONST(val_red , 00,1D, 00,1D, 00,1D, 00,1D, 00,1D, 00,1D, 00,1D, 00,1D)
118 BUILD_128BIT_CONST(val_green , 00,96, 00,96, 00,96, 00,96, 00,96, 00,96, 00,96, 00,96)
119 BUILD_128BIT_CONST(val_blue , 00,4D, 00,4D, 00,4D, 00,4D, 00,4D, 00,4D, 00,4D, 00,4D)
121 const __m128i m0 = _mm_load_si128( IS_RGB ? (
const __m128i*)mask4 : (
const __m128i*)mask0);
122 const __m128i m1 = _mm_load_si128( IS_RGB ? (
const __m128i*)mask5 : (
const __m128i*)mask1);
123 const __m128i m2 = _mm_load_si128((
const __m128i*)mask2);
124 const __m128i m3 = _mm_load_si128((
const __m128i*)mask3);
125 const __m128i m4 = _mm_load_si128( IS_RGB ? (
const __m128i*)mask0 : (
const __m128i*)mask4);
126 const __m128i m5 = _mm_load_si128( IS_RGB ? (
const __m128i*)mask1 : (
const __m128i*)mask5);
128 const __m128i m6 = _mm_load_si128( IS_RGB ? (
const __m128i*)mask10 : (
const __m128i*)mask6);
129 const __m128i m7 = _mm_load_si128( IS_RGB ? (
const __m128i*)mask11 : (
const __m128i*)mask7);
130 const __m128i m8 = _mm_load_si128((
const __m128i*)mask8);
131 const __m128i m9 = _mm_load_si128((
const __m128i*)mask9);
132 const __m128i m10= _mm_load_si128( IS_RGB ? (
const __m128i*)mask6 : (
const __m128i*)mask10);
133 const __m128i m11= _mm_load_si128( IS_RGB ? (
const __m128i*)mask7 : (
const __m128i*)mask11);
135 const __m128i mask_low= _mm_load_si128((
const __m128i*)mask_to_low);
137 const __m128i VAL_R = _mm_load_si128((
const __m128i*)val_red);
138 const __m128i VAL_G = _mm_load_si128((
const __m128i*)val_green);
139 const __m128i VAL_B = _mm_load_si128((
const __m128i*)val_blue);
141 const int sw =
w >> 4;
144 for (
int i=0;
i<sh;
i++)
146 for (
int j=0; j<sw; j++)
149 const __m128i d0 = _mm_load_si128((
const __m128i*)
in);
in += 16;
150 const __m128i d1 = _mm_load_si128((
const __m128i*)
in);
in += 16;
151 const __m128i d2 = _mm_load_si128((
const __m128i*)
in);
in += 16;
155 const __m128i BLUES_0_7 = _mm_or_si128(_mm_shuffle_epi8(d0,m0),_mm_shuffle_epi8(d1,m1));
156 const __m128i GREENS_0_7 = _mm_or_si128(_mm_shuffle_epi8(d0,m2),_mm_shuffle_epi8(d1,m3));
157 const __m128i REDS_0_7 = _mm_or_si128(_mm_shuffle_epi8(d0,m4),_mm_shuffle_epi8(d1,m5));
165 const __m128i GRAYS_0_7 =
167 _mm_mulhi_epu16(REDS_0_7, VAL_R),
169 _mm_mulhi_epu16(GREENS_0_7, VAL_G),
170 _mm_mulhi_epu16(BLUES_0_7, VAL_B)
173 _mm_storel_epi64((__m128i*)out, _mm_shuffle_epi8(GRAYS_0_7,mask_low));
179 const __m128i BLUES_8_15 = _mm_or_si128(_mm_shuffle_epi8(d1,m6),_mm_shuffle_epi8(d2,m7));
180 const __m128i GREENS_8_15 = _mm_or_si128(_mm_shuffle_epi8(d1,m8),_mm_shuffle_epi8(d2,m9));
181 const __m128i REDS_8_15 = _mm_or_si128(_mm_shuffle_epi8(d1,m10),_mm_shuffle_epi8(d2,m11));
183 const __m128i GRAYS_8_15 =
185 _mm_mulhi_epu16(REDS_8_15, VAL_R),
187 _mm_mulhi_epu16(GREENS_8_15, VAL_G),
188 _mm_mulhi_epu16(BLUES_8_15, VAL_B)
191 _mm_storel_epi64((__m128i*)out, _mm_shuffle_epi8(GRAYS_8_15,mask_low));
210 private_image_SSSE3_rgb_or_bgr_to_gray_8u<false>(
in,out,
w,h);
223 private_image_SSSE3_rgb_or_bgr_to_gray_8u<true>(
in,out,
w,h);
229 #endif // end of MRPT_HAS_SSE3
void image_SSSE3_bgr_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h)
Convert a RGB image (3cu8) into a GRAYSCALE (1c8u) image, using Y=77*R+150*G+29*B.
GLubyte GLubyte GLubyte GLubyte w
void image_SSSE3_scale_half_3c8u(const uint8_t *in, uint8_t *out, int w, int h)
Subsample each 2x2 pixel block into 1x1 pixel, taking the first pixel & ignoring the other 3...
#define BUILD_128BIT_CONST(_name, B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, B10, B11, B12, B13, B14, B15)
void image_SSSE3_rgb_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h)
Convert a RGB image (3cu8) into a GRAYSCALE (1c8u) image, using Y=77*R+150*G+29*B.
void private_image_SSSE3_rgb_or_bgr_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h)