Main MRPT website > C++ reference for MRPT 1.5.6
jidctint.cpp
Go to the documentation of this file.
1 /* +---------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | http://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2017, Individual contributors, see AUTHORS file |
6  | See: http://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See details in http://www.mrpt.org/License |
8  +---------------------------------------------------------------------------+ */
9 
10 #define JPEG_INTERNALS
11 #include "jinclude.h"
12 #include "mrpt_jpeglib.h"
13 #include "jdct.h" /* Private declarations for DCT subsystem */
14 
15 #ifdef DCT_ISLOW_SUPPORTED
16 
17 
18 /*
19  * This module is specialized to the case DCTSIZE = 8.
20  */
21 
22 #if DCTSIZE != 8
23  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
24 #endif
25 
26 
27 /*
28  * The poop on this scaling stuff is as follows:
29  *
30  * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
31  * larger than the true IDCT outputs. The final outputs are therefore
32  * a factor of N larger than desired; since N=8 this can be cured by
33  * a simple right shift at the end of the algorithm. The advantage of
34  * this arrangement is that we save two multiplications per 1-D IDCT,
35  * because the y0 and y4 inputs need not be divided by sqrt(N).
36  *
37  * We have to do addition and subtraction of the integer inputs, which
38  * is no problem, and multiplication by fractional constants, which is
39  * a problem to do in integer arithmetic. We multiply all the constants
40  * by CONST_SCALE and convert them to integer constants (thus retaining
41  * CONST_BITS bits of precision in the constants). After doing a
42  * multiplication we have to divide the product by CONST_SCALE, with proper
43  * rounding, to produce the correct output. This division can be done
44  * cheaply as a right shift of CONST_BITS bits. We postpone shifting
45  * as long as possible so that partial sums can be added together with
46  * full fractional precision.
47  *
48  * The outputs of the first pass are scaled up by PASS1_BITS bits so that
49  * they are represented to better-than-integral precision. These outputs
50  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
51  * with the recommended scaling. (To scale up 12-bit sample data further, an
52  * intermediate INT32 array would be needed.)
53  *
54  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
55  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
56  * shows that the values given below are the most effective.
57  */
58 
59 #if BITS_IN_JSAMPLE == 8
60 #define CONST_BITS 13
61 #define PASS1_BITS 2
62 #else
63 #define CONST_BITS 13
64 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
65 #endif
66 
67 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
68  * causing a lot of useless floating-point operations at run time.
69  * To get around this we use the following pre-calculated constants.
70  * If you change CONST_BITS you may want to add appropriate values.
71  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
72  */
73 
74 #if CONST_BITS == 13
75 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
76 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
77 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
78 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
79 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
80 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
81 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
82 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
83 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
84 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
85 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
86 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
87 #else
88 #define FIX_0_298631336 FIX(0.298631336)
89 #define FIX_0_390180644 FIX(0.390180644)
90 #define FIX_0_541196100 FIX(0.541196100)
91 #define FIX_0_765366865 FIX(0.765366865)
92 #define FIX_0_899976223 FIX(0.899976223)
93 #define FIX_1_175875602 FIX(1.175875602)
94 #define FIX_1_501321110 FIX(1.501321110)
95 #define FIX_1_847759065 FIX(1.847759065)
96 #define FIX_1_961570560 FIX(1.961570560)
97 #define FIX_2_053119869 FIX(2.053119869)
98 #define FIX_2_562915447 FIX(2.562915447)
99 #define FIX_3_072711026 FIX(3.072711026)
100 #endif
101 
102 
103 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
104  * For 8-bit samples with the recommended scaling, all the variable
105  * and constant values involved are no more than 16 bits wide, so a
106  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
107  * For 12-bit samples, a full 32-bit multiplication will be needed.
108  */
109 
110 #if BITS_IN_JSAMPLE == 8
111 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
112 #else
113 #define MULTIPLY(var,const) ((var) * (const))
114 #endif
115 
116 
117 /* Dequantize a coefficient by multiplying it by the multiplier-table
118  * entry; produce an int result. In this module, both inputs and result
119  * are 16 bits or less, so either int or short multiply will work.
120  */
121 
122 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
123 
124 
125 /*
126  * Perform dequantization and inverse DCT on one block of coefficients.
127  */
128 
129 GLOBAL(void)
133 {
134  INT32 tmp0, tmp1, tmp2, tmp3;
135  INT32 tmp10, tmp11, tmp12, tmp13;
136  INT32 z1, z2, z3, z4, z5;
137  JCOEFPTR inptr;
138  ISLOW_MULT_TYPE * quantptr;
139  int * wsptr;
140  JSAMPROW outptr;
141  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
142  int ctr;
143  int workspace[DCTSIZE2]; /* buffers data between passes */
145 
146  /* Pass 1: process columns from input, store into work array. */
147  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
148  /* furthermore, we scale the results by 2**PASS1_BITS. */
149 
150  inptr = coef_block;
151  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
152  wsptr = workspace;
153  for (ctr = DCTSIZE; ctr > 0; ctr--) {
154  /* Due to quantization, we will usually find that many of the input
155  * coefficients are zero, especially the AC terms. We can exploit this
156  * by short-circuiting the IDCT calculation for any column in which all
157  * the AC terms are zero. In that case each output is equal to the
158  * DC coefficient (with scale factor as needed).
159  * With typical images and quantization tables, half or more of the
160  * column DCT calculations can be simplified this way.
161  */
162 
163  if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
164  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
165  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
166  inptr[DCTSIZE*7] == 0) {
167  /* AC terms all zero */
168  int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
169 
170  wsptr[DCTSIZE*0] = dcval;
171  wsptr[DCTSIZE*1] = dcval;
172  wsptr[DCTSIZE*2] = dcval;
173  wsptr[DCTSIZE*3] = dcval;
174  wsptr[DCTSIZE*4] = dcval;
175  wsptr[DCTSIZE*5] = dcval;
176  wsptr[DCTSIZE*6] = dcval;
177  wsptr[DCTSIZE*7] = dcval;
178 
179  inptr++; /* advance pointers to next column */
180  quantptr++;
181  wsptr++;
182  continue;
183  }
184 
185  /* Even part: reverse the even part of the forward DCT. */
186  /* The rotator is sqrt(2)*c(-6). */
187 
188  z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
189  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
190 
191  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
192  tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
193  tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
194 
195  z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
196  z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
197 
198  tmp0 = (z2 + z3) << CONST_BITS;
199  tmp1 = (z2 - z3) << CONST_BITS;
200 
201  tmp10 = tmp0 + tmp3;
202  tmp13 = tmp0 - tmp3;
203  tmp11 = tmp1 + tmp2;
204  tmp12 = tmp1 - tmp2;
205 
206  /* Odd part per figure 8; the matrix is unitary and hence its
207  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
208  */
209 
210  tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
211  tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
212  tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
213  tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
214 
215  z1 = tmp0 + tmp3;
216  z2 = tmp1 + tmp2;
217  z3 = tmp0 + tmp2;
218  z4 = tmp1 + tmp3;
219  z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
220 
221  tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
222  tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
223  tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
224  tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
225  z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
226  z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
227  z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
228  z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
229 
230  z3 += z5;
231  z4 += z5;
232 
233  tmp0 += z1 + z3;
234  tmp1 += z2 + z4;
235  tmp2 += z2 + z3;
236  tmp3 += z1 + z4;
237 
238  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
239 
240  wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
241  wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
242  wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
243  wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
244  wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
245  wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
246  wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
247  wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
248 
249  inptr++; /* advance pointers to next column */
250  quantptr++;
251  wsptr++;
252  }
253 
254  /* Pass 2: process rows from work array, store into output array. */
255  /* Note that we must descale the results by a factor of 8 == 2**3, */
256  /* and also undo the PASS1_BITS scaling. */
257 
258  wsptr = workspace;
259  for (ctr = 0; ctr < DCTSIZE; ctr++) {
260  outptr = output_buf[ctr] + output_col;
261  /* Rows of zeroes can be exploited in the same way as we did with columns.
262  * However, the column calculation has created many nonzero AC terms, so
263  * the simplification applies less often (typically 5% to 10% of the time).
264  * On machines with very fast multiplication, it's possible that the
265  * test takes more time than it's worth. In that case this section
266  * may be commented out.
267  */
268 
269 #ifndef NO_ZERO_ROW_TEST
270  if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
271  wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
272  /* AC terms all zero */
273  JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
274  & RANGE_MASK];
275 
276  outptr[0] = dcval;
277  outptr[1] = dcval;
278  outptr[2] = dcval;
279  outptr[3] = dcval;
280  outptr[4] = dcval;
281  outptr[5] = dcval;
282  outptr[6] = dcval;
283  outptr[7] = dcval;
284 
285  wsptr += DCTSIZE; /* advance pointer to next row */
286  continue;
287  }
288 #endif
289 
290  /* Even part: reverse the even part of the forward DCT. */
291  /* The rotator is sqrt(2)*c(-6). */
292 
293  z2 = (INT32) wsptr[2];
294  z3 = (INT32) wsptr[6];
295 
296  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
297  tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
298  tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
299 
300  tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
301  tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
302 
303  tmp10 = tmp0 + tmp3;
304  tmp13 = tmp0 - tmp3;
305  tmp11 = tmp1 + tmp2;
306  tmp12 = tmp1 - tmp2;
307 
308  /* Odd part per figure 8; the matrix is unitary and hence its
309  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
310  */
311 
312  tmp0 = (INT32) wsptr[7];
313  tmp1 = (INT32) wsptr[5];
314  tmp2 = (INT32) wsptr[3];
315  tmp3 = (INT32) wsptr[1];
316 
317  z1 = tmp0 + tmp3;
318  z2 = tmp1 + tmp2;
319  z3 = tmp0 + tmp2;
320  z4 = tmp1 + tmp3;
321  z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
322 
323  tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
324  tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
325  tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
326  tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
327  z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
328  z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
329  z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
330  z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
331 
332  z3 += z5;
333  z4 += z5;
334 
335  tmp0 += z1 + z3;
336  tmp1 += z2 + z4;
337  tmp2 += z2 + z3;
338  tmp3 += z1 + z4;
339 
340  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
341 
342  outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
344  & RANGE_MASK];
345  outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
347  & RANGE_MASK];
348  outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
350  & RANGE_MASK];
351  outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
353  & RANGE_MASK];
354  outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
356  & RANGE_MASK];
357  outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
359  & RANGE_MASK];
360  outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
362  & RANGE_MASK];
363  outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
365  & RANGE_MASK];
366 
367  wsptr += DCTSIZE; /* advance pointer to next row */
368  }
369 }
370 
371 #endif /* DCT_ISLOW_SUPPORTED */
#define DESCALE(x, n)
Definition: jdct.h:141
#define FIX_0_298631336
Definition: jidctint.cpp:75
#define IDCT_range_limit(cinfo)
Definition: jdct.h:71
char JSAMPLE
Definition: jmorecfg.h:61
#define FIX_0_541196100
Definition: jidctint.cpp:77
#define FIX_3_072711026
Definition: jidctint.cpp:86
#define DCTSIZE
Definition: mrpt_jpeglib.h:38
jpeg_component_info JCOEFPTR coef_block
Definition: jdct.h:97
jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)
Definition: jidctint.cpp:130
#define RANGE_MASK
Definition: jdct.h:73
JSAMPLE FAR * JSAMPROW
Definition: mrpt_jpeglib.h:63
long INT32
Definition: jmorecfg.h:158
#define SHIFT_TEMPS
Definition: jpegint.h:286
jpeg_component_info * compptr
Definition: jdct.h:97
#define DEQUANTIZE(coef, quantval)
Definition: jidctint.cpp:122
jpeg_component_info JCOEFPTR JSAMPARRAY JDIMENSION output_col
Definition: jdct.h:97
#define CONST_BITS
Definition: jidctint.cpp:60
#define FIX_2_053119869
Definition: jidctint.cpp:84
#define FIX_0_390180644
Definition: jidctint.cpp:76
#define MULTIPLY(var, const)
Definition: jidctint.cpp:111
JSAMPROW * JSAMPARRAY
Definition: mrpt_jpeglib.h:64
JCOEF FAR * JCOEFPTR
Definition: mrpt_jpeglib.h:72
#define DCTSIZE2
Definition: mrpt_jpeglib.h:39
MULTIPLIER ISLOW_MULT_TYPE
Definition: jdct.h:51
#define PASS1_BITS
Definition: jidctint.cpp:61
#define FIX_1_847759065
Definition: jidctint.cpp:82
Definition: inftrees.h:28
#define FIX_1_501321110
Definition: jidctint.cpp:81
JSAMPIMAGE output_buf
Definition: jdcoefct.cpp:59
#define GLOBAL(type)
Definition: jmorecfg.h:185
#define FIX_1_961570560
Definition: jidctint.cpp:83
unsigned int JDIMENSION
Definition: jmorecfg.h:168
#define FIX_1_175875602
Definition: jidctint.cpp:80
#define FIX_2_562915447
Definition: jidctint.cpp:85
#define FIX_0_899976223
Definition: jidctint.cpp:79
#define FIX_0_765366865
Definition: jidctint.cpp:78



Page generated by Doxygen 1.8.14 for MRPT 1.5.6 Git: 4c65e8431 Tue Apr 24 08:18:17 2018 +0200 at lun oct 28 01:35:26 CET 2019