Main MRPT website > C++ reference for MRPT 1.5.9
jidctfst.cpp
Go to the documentation of this file.
1 /* +---------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | http://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2017, Individual contributors, see AUTHORS file |
6  | See: http://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See details in http://www.mrpt.org/License |
8  +---------------------------------------------------------------------------+ */
9 
10 #define JPEG_INTERNALS
11 #include "jinclude.h"
12 #include "mrpt_jpeglib.h"
13 #include "jdct.h" /* Private declarations for DCT subsystem */
14 
15 #ifdef DCT_IFAST_SUPPORTED
16 
17 
18 /*
19  * This module is specialized to the case DCTSIZE = 8.
20  */
21 
22 #if DCTSIZE != 8
23  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
24 #endif
25 
26 
27 /* Scaling decisions are generally the same as in the LL&M algorithm;
28  * see jidctint.c for more details. However, we choose to descale
29  * (right shift) multiplication products as soon as they are formed,
30  * rather than carrying additional fractional bits into subsequent additions.
31  * This compromises accuracy slightly, but it lets us save a few shifts.
32  * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
33  * everywhere except in the multiplications proper; this saves a good deal
34  * of work on 16-bit-int machines.
35  *
36  * The dequantized coefficients are not integers because the AA&N scaling
37  * factors have been incorporated. We represent them scaled up by PASS1_BITS,
38  * so that the first and second IDCT rounds have the same input scaling.
39  * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
40  * avoid a descaling shift; this compromises accuracy rather drastically
41  * for small quantization table entries, but it saves a lot of shifts.
42  * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
43  * so we use a much larger scaling factor to preserve accuracy.
44  *
45  * A final compromise is to represent the multiplicative constants to only
46  * 8 fractional bits, rather than 13. This saves some shifting work on some
47  * machines, and may also reduce the cost of multiplication (since there
48  * are fewer one-bits in the constants).
49  */
50 
51 #if BITS_IN_JSAMPLE == 8
52 #define CONST_BITS 8
53 #define PASS1_BITS 2
54 #else
55 #define CONST_BITS 8
56 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
57 #endif
58 
59 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
60  * causing a lot of useless floating-point operations at run time.
61  * To get around this we use the following pre-calculated constants.
62  * If you change CONST_BITS you may want to add appropriate values.
63  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
64  */
65 
66 #if CONST_BITS == 8
67 #define FIX_1_082392200 ((INT32) 277) /* FIX(1.082392200) */
68 #define FIX_1_414213562 ((INT32) 362) /* FIX(1.414213562) */
69 #define FIX_1_847759065 ((INT32) 473) /* FIX(1.847759065) */
70 #define FIX_2_613125930 ((INT32) 669) /* FIX(2.613125930) */
71 #else
72 #define FIX_1_082392200 FIX(1.082392200)
73 #define FIX_1_414213562 FIX(1.414213562)
74 #define FIX_1_847759065 FIX(1.847759065)
75 #define FIX_2_613125930 FIX(2.613125930)
76 #endif
77 
78 
79 /* We can gain a little more speed, with a further compromise in accuracy,
80  * by omitting the addition in a descaling shift. This yields an incorrectly
81  * rounded result half the time...
82  */
83 
84 #ifndef USE_ACCURATE_ROUNDING
85 #undef DESCALE
86 #define DESCALE(x,n) RIGHT_SHIFT(x, n)
87 #endif
88 
89 
90 /* Multiply a DCTELEM variable by an INT32 constant, and immediately
91  * descale to yield a DCTELEM result.
92  */
93 
94 #define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
95 
96 
97 /* Dequantize a coefficient by multiplying it by the multiplier-table
98  * entry; produce a DCTELEM result. For 8-bit data a 16x16->16
99  * multiplication will do. For 12-bit data, the multiplier table is
100  * declared INT32, so a 32-bit multiply will be used.
101  */
102 
103 #if BITS_IN_JSAMPLE == 8
104 #define DEQUANTIZE(coef,quantval) (((IFAST_MULT_TYPE) (coef)) * (quantval))
105 #else
106 #define DEQUANTIZE(coef,quantval) \
107  DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
108 #endif
109 
110 
111 /* Like DESCALE, but applies to a DCTELEM and produces an int.
112  * We assume that int right shift is unsigned if INT32 right shift is.
113  */
114 
115 #ifdef RIGHT_SHIFT_IS_UNSIGNED
116 #define ISHIFT_TEMPS DCTELEM ishift_temp;
117 #if BITS_IN_JSAMPLE == 8
118 #define DCTELEMBITS 16 /* DCTELEM may be 16 or 32 bits */
119 #else
120 #define DCTELEMBITS 32 /* DCTELEM must be 32 bits */
121 #endif
122 #define IRIGHT_SHIFT(x,shft) \
123  ((ishift_temp = (x)) < 0 ? \
124  (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
125  (ishift_temp >> (shft)))
126 #else
127 #define ISHIFT_TEMPS
128 #define IRIGHT_SHIFT(x,shft) ((x) >> (shft))
129 #endif
130 
131 #ifdef USE_ACCURATE_ROUNDING
132 #define IDESCALE(x,n) ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
133 #else
134 #define IDESCALE(x,n) ((int) IRIGHT_SHIFT(x, n))
135 #endif
136 
137 
138 /*
139  * Perform dequantization and inverse DCT on one block of coefficients.
140  */
141 
142 GLOBAL(void)
146 {
147  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
148  DCTELEM tmp10, tmp11, tmp12, tmp13;
149  DCTELEM z5, z10, z11, z12, z13;
150  JCOEFPTR inptr;
151  IFAST_MULT_TYPE * quantptr;
152  int * wsptr;
153  JSAMPROW outptr;
154  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
155  int ctr;
156  int workspace[DCTSIZE2]; /* buffers data between passes */
157  SHIFT_TEMPS /* for DESCALE */
158  ISHIFT_TEMPS /* for IDESCALE */
159 
160  /* Pass 1: process columns from input, store into work array. */
161 
162  inptr = coef_block;
163  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
164  wsptr = workspace;
165  for (ctr = DCTSIZE; ctr > 0; ctr--) {
166  /* Due to quantization, we will usually find that many of the input
167  * coefficients are zero, especially the AC terms. We can exploit this
168  * by short-circuiting the IDCT calculation for any column in which all
169  * the AC terms are zero. In that case each output is equal to the
170  * DC coefficient (with scale factor as needed).
171  * With typical images and quantization tables, half or more of the
172  * column DCT calculations can be simplified this way.
173  */
174 
175  if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
176  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
177  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
178  inptr[DCTSIZE*7] == 0) {
179  /* AC terms all zero */
180  int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
181 
182  wsptr[DCTSIZE*0] = dcval;
183  wsptr[DCTSIZE*1] = dcval;
184  wsptr[DCTSIZE*2] = dcval;
185  wsptr[DCTSIZE*3] = dcval;
186  wsptr[DCTSIZE*4] = dcval;
187  wsptr[DCTSIZE*5] = dcval;
188  wsptr[DCTSIZE*6] = dcval;
189  wsptr[DCTSIZE*7] = dcval;
190 
191  inptr++; /* advance pointers to next column */
192  quantptr++;
193  wsptr++;
194  continue;
195  }
196 
197  /* Even part */
198 
199  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
200  tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
201  tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
202  tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
203 
204  tmp10 = tmp0 + tmp2; /* phase 3 */
205  tmp11 = tmp0 - tmp2;
206 
207  tmp13 = tmp1 + tmp3; /* phases 5-3 */
208  tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
209 
210  tmp0 = tmp10 + tmp13; /* phase 2 */
211  tmp3 = tmp10 - tmp13;
212  tmp1 = tmp11 + tmp12;
213  tmp2 = tmp11 - tmp12;
214 
215  /* Odd part */
216 
217  tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
218  tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
219  tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
220  tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
221 
222  z13 = tmp6 + tmp5; /* phase 6 */
223  z10 = tmp6 - tmp5;
224  z11 = tmp4 + tmp7;
225  z12 = tmp4 - tmp7;
226 
227  tmp7 = z11 + z13; /* phase 5 */
228  tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
229 
230  z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
231  tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
232  tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
233 
234  tmp6 = tmp12 - tmp7; /* phase 2 */
235  tmp5 = tmp11 - tmp6;
236  tmp4 = tmp10 + tmp5;
237 
238  wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
239  wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
240  wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
241  wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
242  wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
243  wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
244  wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
245  wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
246 
247  inptr++; /* advance pointers to next column */
248  quantptr++;
249  wsptr++;
250  }
251 
252  /* Pass 2: process rows from work array, store into output array. */
253  /* Note that we must descale the results by a factor of 8 == 2**3, */
254  /* and also undo the PASS1_BITS scaling. */
255 
256  wsptr = workspace;
257  for (ctr = 0; ctr < DCTSIZE; ctr++) {
258  outptr = output_buf[ctr] + output_col;
259  /* Rows of zeroes can be exploited in the same way as we did with columns.
260  * However, the column calculation has created many nonzero AC terms, so
261  * the simplification applies less often (typically 5% to 10% of the time).
262  * On machines with very fast multiplication, it's possible that the
263  * test takes more time than it's worth. In that case this section
264  * may be commented out.
265  */
266 
267 #ifndef NO_ZERO_ROW_TEST
268  if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
269  wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
270  /* AC terms all zero */
271  JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
272  & RANGE_MASK];
273 
274  outptr[0] = dcval;
275  outptr[1] = dcval;
276  outptr[2] = dcval;
277  outptr[3] = dcval;
278  outptr[4] = dcval;
279  outptr[5] = dcval;
280  outptr[6] = dcval;
281  outptr[7] = dcval;
282 
283  wsptr += DCTSIZE; /* advance pointer to next row */
284  continue;
285  }
286 #endif
287 
288  /* Even part */
289 
290  tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
291  tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
292 
293  tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
294  tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
295  - tmp13;
296 
297  tmp0 = tmp10 + tmp13;
298  tmp3 = tmp10 - tmp13;
299  tmp1 = tmp11 + tmp12;
300  tmp2 = tmp11 - tmp12;
301 
302  /* Odd part */
303 
304  z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
305  z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
306  z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
307  z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
308 
309  tmp7 = z11 + z13; /* phase 5 */
310  tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
311 
312  z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
313  tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
314  tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
315 
316  tmp6 = tmp12 - tmp7; /* phase 2 */
317  tmp5 = tmp11 - tmp6;
318  tmp4 = tmp10 + tmp5;
319 
320  /* Final output stage: scale down by a factor of 8 and range-limit */
321 
322  outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
323  & RANGE_MASK];
324  outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
325  & RANGE_MASK];
326  outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
327  & RANGE_MASK];
328  outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
329  & RANGE_MASK];
330  outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
331  & RANGE_MASK];
332  outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
333  & RANGE_MASK];
334  outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
335  & RANGE_MASK];
336  outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
337  & RANGE_MASK];
338 
339  wsptr += DCTSIZE; /* advance pointer to next row */
340  }
341 }
342 
343 #endif /* DCT_IFAST_SUPPORTED */
#define IDCT_range_limit(cinfo)
Definition: jdct.h:71
char JSAMPLE
Definition: jmorecfg.h:61
#define FIX_1_847759065
Definition: jidctfst.cpp:69
#define FIX_1_082392200
Definition: jidctfst.cpp:67
#define DCTSIZE
Definition: mrpt_jpeglib.h:38
jpeg_component_info JCOEFPTR coef_block
Definition: jdct.h:97
#define ISHIFT_TEMPS
Definition: jidctfst.cpp:127
#define RANGE_MASK
Definition: jdct.h:73
INT32 DCTELEM
Definition: jdct.h:27
JSAMPLE FAR * JSAMPROW
Definition: mrpt_jpeglib.h:63
#define SHIFT_TEMPS
Definition: jpegint.h:286
jpeg_component_info * compptr
Definition: jdct.h:97
jpeg_component_info JCOEFPTR JSAMPARRAY JDIMENSION output_col
Definition: jdct.h:97
JSAMPROW * JSAMPARRAY
Definition: mrpt_jpeglib.h:64
JCOEF FAR * JCOEFPTR
Definition: mrpt_jpeglib.h:72
#define DCTSIZE2
Definition: mrpt_jpeglib.h:39
INT32 IFAST_MULT_TYPE
Definition: jdct.h:56
#define IDESCALE(x, n)
Definition: jidctfst.cpp:134
#define FIX_2_613125930
Definition: jidctfst.cpp:70
#define FIX_1_414213562
Definition: jidctfst.cpp:68
Definition: inftrees.h:28
JSAMPIMAGE output_buf
Definition: jdcoefct.cpp:59
#define GLOBAL(type)
Definition: jmorecfg.h:185
unsigned int JDIMENSION
Definition: jmorecfg.h:168
#define MULTIPLY(var, const)
Definition: jidctfst.cpp:94
jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)
Definition: jidctfst.cpp:143
#define PASS1_BITS
Definition: jidctfst.cpp:53
#define DEQUANTIZE(coef, quantval)
Definition: jidctfst.cpp:104



Page generated by Doxygen 1.8.14 for MRPT 1.5.9 Git: 690a4699f Wed Apr 15 19:29:53 2020 +0200 at miƩ abr 15 19:30:12 CEST 2020