Main MRPT website > C++ reference for MRPT 1.9.9
jidctfst.cpp
Go to the documentation of this file.
1 /* +------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | http://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2017, Individual contributors, see AUTHORS file |
6  | See: http://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See details in http://www.mrpt.org/License |
8  +------------------------------------------------------------------------+ */
9 
10 #define JPEG_INTERNALS
11 #include "jinclude.h"
12 #include "mrpt_jpeglib.h"
13 #include "jdct.h" /* Private declarations for DCT subsystem */
14 
15 #ifdef DCT_IFAST_SUPPORTED
16 
17 /*
18  * This module is specialized to the case DCTSIZE = 8.
19  */
20 
21 #if DCTSIZE != 8
22 Sorry, this code only copes with 8x8 DCTs./* deliberate syntax err */
23 #endif
24 
25 /* Scaling decisions are generally the same as in the LL&M algorithm;
26  * see jidctint.c for more details. However, we choose to descale
27  * (right shift) multiplication products as soon as they are formed,
28  * rather than carrying additional fractional bits into subsequent additions.
29  * This compromises accuracy slightly, but it lets us save a few shifts.
30  * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
31  * everywhere except in the multiplications proper; this saves a good deal
32  * of work on 16-bit-int machines.
33  *
34  * The dequantized coefficients are not integers because the AA&N scaling
35  * factors have been incorporated. We represent them scaled up by PASS1_BITS,
36  * so that the first and second IDCT rounds have the same input scaling.
37  * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
38  * avoid a descaling shift; this compromises accuracy rather drastically
39  * for small quantization table entries, but it saves a lot of shifts.
40  * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
41  * so we use a much larger scaling factor to preserve accuracy.
42  *
43  * A final compromise is to represent the multiplicative constants to only
44  * 8 fractional bits, rather than 13. This saves some shifting work on some
45  * machines, and may also reduce the cost of multiplication (since there
46  * are fewer one-bits in the constants).
47  */
48 
49 #if BITS_IN_JSAMPLE == 8
50 #define CONST_BITS 8
51 #define PASS1_BITS 2
52 #else
53 #define CONST_BITS 8
54 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
55 #endif
56 
57 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
58  * causing a lot of useless floating-point operations at run time.
59  * To get around this we use the following pre-calculated constants.
60  * If you change CONST_BITS you may want to add appropriate values.
61  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
62  */
63 
64 #if CONST_BITS == 8
65 #define FIX_1_082392200 ((INT32)277) /* FIX(1.082392200) */
66 #define FIX_1_414213562 ((INT32)362) /* FIX(1.414213562) */
67 #define FIX_1_847759065 ((INT32)473) /* FIX(1.847759065) */
68 #define FIX_2_613125930 ((INT32)669) /* FIX(2.613125930) */
69 #else
70 #define FIX_1_082392200 FIX(1.082392200)
71 #define FIX_1_414213562 FIX(1.414213562)
72 #define FIX_1_847759065 FIX(1.847759065)
73 #define FIX_2_613125930 FIX(2.613125930)
74 #endif
75 
76 /* We can gain a little more speed, with a further compromise in accuracy,
77  * by omitting the addition in a descaling shift. This yields an incorrectly
78  * rounded result half the time...
79  */
80 
81 #ifndef USE_ACCURATE_ROUNDING
82 #undef DESCALE
83 #define DESCALE(x, n) RIGHT_SHIFT(x, n)
84 #endif
85 
86 /* Multiply a DCTELEM variable by an INT32 constant, and immediately
87  * descale to yield a DCTELEM result.
88  */
89 
90 #define MULTIPLY(var, const) ((DCTELEM)DESCALE((var) * (const), CONST_BITS))
91 
92 /* Dequantize a coefficient by multiplying it by the multiplier-table
93  * entry; produce a DCTELEM result. For 8-bit data a 16x16->16
94  * multiplication will do. For 12-bit data, the multiplier table is
95  * declared INT32, so a 32-bit multiply will be used.
96  */
97 
98 #if BITS_IN_JSAMPLE == 8
99 #define DEQUANTIZE(coef, quantval) (((IFAST_MULT_TYPE)(coef)) * (quantval))
100 #else
101 #define DEQUANTIZE(coef, quantval) \
102  DESCALE((coef) * (quantval), IFAST_SCALE_BITS - PASS1_BITS)
103 #endif
104 
105 /* Like DESCALE, but applies to a DCTELEM and produces an int.
106  * We assume that int right shift is unsigned if INT32 right shift is.
107  */
108 
109 #ifdef RIGHT_SHIFT_IS_UNSIGNED
110 #define ISHIFT_TEMPS DCTELEM ishift_temp;
111 #if BITS_IN_JSAMPLE == 8
112 #define DCTELEMBITS 16 /* DCTELEM may be 16 or 32 bits */
113 #else
114 #define DCTELEMBITS 32 /* DCTELEM must be 32 bits */
115 #endif
116 #define IRIGHT_SHIFT(x, shft) \
117  ((ishift_temp = (x)) < 0 \
118  ? (ishift_temp >> (shft)) | \
119  ((~((DCTELEM)0)) << (DCTELEMBITS - (shft))) \
120  : (ishift_temp >> (shft)))
121 #else
122 #define ISHIFT_TEMPS
123 #define IRIGHT_SHIFT(x, shft) ((x) >> (shft))
124 #endif
125 
126 #ifdef USE_ACCURATE_ROUNDING
127 #define IDESCALE(x, n) ((int)IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
128 #else
129 #define IDESCALE(x, n) ((int)IRIGHT_SHIFT(x, n))
130 #endif
131 
132  /*
133  * Perform dequantization and inverse DCT on one block of coefficients.
134  */
135 
136  GLOBAL(void) jpeg_idct_ifast(
139 {
140  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
145  int* wsptr;
148  int ctr;
149  int workspace[DCTSIZE2]; /* buffers data between passes */
150  SHIFT_TEMPS /* for DESCALE */
151  ISHIFT_TEMPS /* for IDESCALE */
152 
153  /* Pass 1: process columns from input, store into work array. */
154 
155  inptr = coef_block;
157  wsptr = workspace;
158  for (ctr = DCTSIZE; ctr > 0; ctr--)
159  {
160  /* Due to quantization, we will usually find that many of the input
161  * coefficients are zero, especially the AC terms. We can exploit this
162  * by short-circuiting the IDCT calculation for any column in which all
163  * the AC terms are zero. In that case each output is equal to the
164  * DC coefficient (with scale factor as needed).
165  * With typical images and quantization tables, half or more of the
166  * column DCT calculations can be simplified this way.
167  */
168 
169  if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
170  inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 &&
171  inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 &&
172  inptr[DCTSIZE * 7] == 0)
173  {
174  /* AC terms all zero */
175  int dcval =
176  (int)DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
177 
178  wsptr[DCTSIZE * 0] = dcval;
179  wsptr[DCTSIZE * 1] = dcval;
180  wsptr[DCTSIZE * 2] = dcval;
181  wsptr[DCTSIZE * 3] = dcval;
182  wsptr[DCTSIZE * 4] = dcval;
183  wsptr[DCTSIZE * 5] = dcval;
184  wsptr[DCTSIZE * 6] = dcval;
185  wsptr[DCTSIZE * 7] = dcval;
186 
187  inptr++; /* advance pointers to next column */
188  quantptr++;
189  wsptr++;
190  continue;
191  }
192 
193  /* Even part */
194 
195  tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
196  tmp1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
197  tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
198  tmp3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
199 
200  tmp10 = tmp0 + tmp2; /* phase 3 */
201  tmp11 = tmp0 - tmp2;
202 
203  tmp13 = tmp1 + tmp3; /* phases 5-3 */
204  tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
205 
206  tmp0 = tmp10 + tmp13; /* phase 2 */
207  tmp3 = tmp10 - tmp13;
208  tmp1 = tmp11 + tmp12;
209  tmp2 = tmp11 - tmp12;
210 
211  /* Odd part */
212 
213  tmp4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
214  tmp5 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
215  tmp6 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
216  tmp7 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
217 
218  z13 = tmp6 + tmp5; /* phase 6 */
219  z10 = tmp6 - tmp5;
220  z11 = tmp4 + tmp7;
221  z12 = tmp4 - tmp7;
222 
223  tmp7 = z11 + z13; /* phase 5 */
224  tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
225 
226  z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
227  tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
228  tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */
229 
230  tmp6 = tmp12 - tmp7; /* phase 2 */
231  tmp5 = tmp11 - tmp6;
232  tmp4 = tmp10 + tmp5;
233 
234  wsptr[DCTSIZE * 0] = (int)(tmp0 + tmp7);
235  wsptr[DCTSIZE * 7] = (int)(tmp0 - tmp7);
236  wsptr[DCTSIZE * 1] = (int)(tmp1 + tmp6);
237  wsptr[DCTSIZE * 6] = (int)(tmp1 - tmp6);
238  wsptr[DCTSIZE * 2] = (int)(tmp2 + tmp5);
239  wsptr[DCTSIZE * 5] = (int)(tmp2 - tmp5);
240  wsptr[DCTSIZE * 4] = (int)(tmp3 + tmp4);
241  wsptr[DCTSIZE * 3] = (int)(tmp3 - tmp4);
242 
243  inptr++; /* advance pointers to next column */
244  quantptr++;
245  wsptr++;
246  }
247 
248  /* Pass 2: process rows from work array, store into output array. */
249  /* Note that we must descale the results by a factor of 8 == 2**3, */
250  /* and also undo the PASS1_BITS scaling. */
251 
252  wsptr = workspace;
253  for (ctr = 0; ctr < DCTSIZE; ctr++)
254  {
256 /* Rows of zeroes can be exploited in the same way as we did with columns.
257  * However, the column calculation has created many nonzero AC terms, so
258  * the simplification applies less often (typically 5% to 10% of the time).
259  * On machines with very fast multiplication, it's possible that the
260  * test takes more time than it's worth. In that case this section
261  * may be commented out.
262  */
263 
264 #ifndef NO_ZERO_ROW_TEST
265  if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
266  wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0)
267  {
268  /* AC terms all zero */
269  JSAMPLE dcval =
271 
272  outptr[0] = dcval;
273  outptr[1] = dcval;
274  outptr[2] = dcval;
275  outptr[3] = dcval;
276  outptr[4] = dcval;
277  outptr[5] = dcval;
278  outptr[6] = dcval;
279  outptr[7] = dcval;
280 
281  wsptr += DCTSIZE; /* advance pointer to next row */
282  continue;
283  }
284 #endif
285 
286  /* Even part */
287 
288  tmp10 = ((DCTELEM)wsptr[0] + (DCTELEM)wsptr[4]);
289  tmp11 = ((DCTELEM)wsptr[0] - (DCTELEM)wsptr[4]);
290 
291  tmp13 = ((DCTELEM)wsptr[2] + (DCTELEM)wsptr[6]);
292  tmp12 =
294  tmp13;
295 
296  tmp0 = tmp10 + tmp13;
297  tmp3 = tmp10 - tmp13;
298  tmp1 = tmp11 + tmp12;
299  tmp2 = tmp11 - tmp12;
300 
301  /* Odd part */
302 
303  z13 = (DCTELEM)wsptr[5] + (DCTELEM)wsptr[3];
304  z10 = (DCTELEM)wsptr[5] - (DCTELEM)wsptr[3];
305  z11 = (DCTELEM)wsptr[1] + (DCTELEM)wsptr[7];
306  z12 = (DCTELEM)wsptr[1] - (DCTELEM)wsptr[7];
307 
308  tmp7 = z11 + z13; /* phase 5 */
309  tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
310 
311  z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
312  tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
313  tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */
314 
315  tmp6 = tmp12 - tmp7; /* phase 2 */
316  tmp5 = tmp11 - tmp6;
317  tmp4 = tmp10 + tmp5;
318 
319  /* Final output stage: scale down by a factor of 8 and range-limit */
320 
321  outptr[0] =
322  range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS + 3) & RANGE_MASK];
323  outptr[7] =
324  range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS + 3) & RANGE_MASK];
325  outptr[1] =
326  range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS + 3) & RANGE_MASK];
327  outptr[6] =
328  range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS + 3) & RANGE_MASK];
329  outptr[2] =
330  range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS + 3) & RANGE_MASK];
331  outptr[5] =
332  range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS + 3) & RANGE_MASK];
333  outptr[4] =
334  range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS + 3) & RANGE_MASK];
335  outptr[3] =
336  range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS + 3) & RANGE_MASK];
337 
338  wsptr += DCTSIZE; /* advance pointer to next row */
339  }
340 }
341 
342 #endif /* DCT_IFAST_SUPPORTED */
JSAMPLE * range_limit
Definition: jidctfst.cpp:147
#define IDCT_range_limit(cinfo)
Definition: jdct.h:68
char JSAMPLE
Definition: jmorecfg.h:58
#define FIX_1_847759065
Definition: jidctfst.cpp:67
GLOBAL(void) jpeg_idct_ifast(j_decompress_ptr cinfo
jpeg_component_info * compptr
Definition: jidctfst.cpp:137
jpeg_component_info JCOEFPTR JSAMPARRAY JDIMENSION output_col
Definition: jidctfst.cpp:139
#define FIX_1_082392200
Definition: jidctfst.cpp:65
JSAMPROW outptr
Definition: jidctfst.cpp:146
DCTELEM z11
Definition: jidctfst.cpp:142
#define DCTSIZE
Definition: mrpt_jpeglib.h:36
#define ISHIFT_TEMPS
Definition: jidctfst.cpp:122
#define RANGE_MASK
Definition: jdct.h:70
INT32 DCTELEM
Definition: jdct.h:26
JSAMPLE FAR * JSAMPROW
Definition: mrpt_jpeglib.h:60
#define SHIFT_TEMPS
Definition: jpegint.h:301
DCTELEM tmp10
Definition: jidctfst.cpp:141
DCTELEM tmp11
Definition: jidctfst.cpp:141
DCTELEM z10
Definition: jidctfst.cpp:142
for(ctr=DCTSIZE;ctr > 0;ctr--)
Definition: jidctfst.cpp:158
DCTELEM z13
Definition: jidctfst.cpp:142
JSAMPROW * JSAMPARRAY
Definition: mrpt_jpeglib.h:61
jpeg_component_info JCOEFPTR JSAMPARRAY output_buf
Definition: jidctfst.cpp:137
JCOEF FAR * JCOEFPTR
Definition: mrpt_jpeglib.h:69
int workspace[DCTSIZE2]
Definition: jidctfst.cpp:149
#define DCTSIZE2
Definition: mrpt_jpeglib.h:37
INT32 IFAST_MULT_TYPE
Definition: jdct.h:54
IFAST_MULT_TYPE * quantptr
Definition: jidctfst.cpp:144
#define IDESCALE(x, n)
Definition: jidctfst.cpp:129
#define FIX_2_613125930
Definition: jidctfst.cpp:68
#define FIX_1_414213562
Definition: jidctfst.cpp:66
DCTELEM z5
Definition: jidctfst.cpp:142
Definition: inftrees.h:28
DCTELEM tmp13
Definition: jidctfst.cpp:141
jpeg_component_info JCOEFPTR coef_block
Definition: jidctfst.cpp:137
DCTELEM z12
Definition: jidctfst.cpp:142
unsigned int JDIMENSION
Definition: jmorecfg.h:161
#define MULTIPLY(var, const)
Definition: jidctfst.cpp:90
int * wsptr
Definition: jidctfst.cpp:145
int ctr
Definition: jidctfst.cpp:148
#define PASS1_BITS
Definition: jidctfst.cpp:51
JCOEFPTR inptr
Definition: jidctfst.cpp:143
#define DEQUANTIZE(coef, quantval)
Definition: jidctfst.cpp:99
DCTELEM tmp12
Definition: jidctfst.cpp:141



Page generated by Doxygen 1.8.14 for MRPT 1.9.9 Git: ae4571287 Thu Nov 23 00:06:53 2017 +0100 at dom oct 27 23:51:55 CET 2019