ReactOS  0.4.15-dev-3720-g4cf9b79
jidctint.c
Go to the documentation of this file.
1 /*
2  * jidctint.c
3  *
4  * Copyright (C) 1991-1998, Thomas G. Lane.
5  * Modification developed 2002-2018 by Guido Vollbeding.
6  * This file is part of the Independent JPEG Group's software.
7  * For conditions of distribution and use, see the accompanying README file.
8  *
9  * This file contains a slow-but-accurate integer implementation of the
10  * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11  * must also perform dequantization of the input coefficients.
12  *
13  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14  * on each row (or vice versa, but it's more convenient to emit a row at
15  * a time). Direct algorithms are also available, but they are much more
16  * complex and seem not to be any faster when reduced to code.
17  *
18  * This implementation is based on an algorithm described in
19  * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20  * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21  * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22  * The primary algorithm described there uses 11 multiplies and 29 adds.
23  * We use their alternate method with 12 multiplies and 32 adds.
24  * The advantage of this method is that no data path contains more than one
25  * multiplication; this allows a very simple and accurate implementation in
26  * scaled fixed-point arithmetic, with a minimal number of shifts.
27  *
28  * We also provide IDCT routines with various output sample block sizes for
29  * direct resolution reduction or enlargement and for direct resolving the
30  * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31  * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
32  *
33  * For N<8 we simply take the corresponding low-frequency coefficients of
34  * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35  * to yield the downscaled outputs.
36  * This can be seen as direct low-pass downsampling from the DCT domain
37  * point of view rather than the usual spatial domain point of view,
38  * yielding significant computational savings and results at least
39  * as good as common bilinear (averaging) spatial downsampling.
40  *
41  * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42  * lower frequencies and higher frequencies assumed to be zero.
43  * It turns out that the computational effort is similar to the 8x8 IDCT
44  * regarding the output size.
45  * Furthermore, the scaling and descaling is the same for all IDCT sizes.
46  *
47  * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48  * since there would be too many additional constants to pre-calculate.
49  */
50 
51 #define JPEG_INTERNALS
52 #include "jinclude.h"
53 #include "jpeglib.h"
54 #include "jdct.h" /* Private declarations for DCT subsystem */
55 
56 #ifdef DCT_ISLOW_SUPPORTED
57 
58 
59 /*
60  * This module is specialized to the case DCTSIZE = 8.
61  */
62 
63 #if DCTSIZE != 8
64  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65 #endif
66 
67 
68 /*
69  * The poop on this scaling stuff is as follows:
70  *
71  * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72  * larger than the true IDCT outputs. The final outputs are therefore
73  * a factor of N larger than desired; since N=8 this can be cured by
74  * a simple right shift at the end of the algorithm. The advantage of
75  * this arrangement is that we save two multiplications per 1-D IDCT,
76  * because the y0 and y4 inputs need not be divided by sqrt(N).
77  *
78  * We have to do addition and subtraction of the integer inputs, which
79  * is no problem, and multiplication by fractional constants, which is
80  * a problem to do in integer arithmetic. We multiply all the constants
81  * by CONST_SCALE and convert them to integer constants (thus retaining
82  * CONST_BITS bits of precision in the constants). After doing a
83  * multiplication we have to divide the product by CONST_SCALE, with proper
84  * rounding, to produce the correct output. This division can be done
85  * cheaply as a right shift of CONST_BITS bits. We postpone shifting
86  * as long as possible so that partial sums can be added together with
87  * full fractional precision.
88  *
89  * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90  * they are represented to better-than-integral precision. These outputs
91  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92  * with the recommended scaling. (To scale up 12-bit sample data further, an
93  * intermediate INT32 array would be needed.)
94  *
95  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
96  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
97  * shows that the values given below are the most effective.
98  */
99 
100 #if BITS_IN_JSAMPLE == 8
101 #define CONST_BITS 13
102 #define PASS1_BITS 2
103 #else
104 #define CONST_BITS 13
105 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
106 #endif
107 
108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109  * causing a lot of useless floating-point operations at run time.
110  * To get around this we use the following pre-calculated constants.
111  * If you change CONST_BITS you may want to add appropriate values.
112  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
113  */
114 
115 #if CONST_BITS == 13
116 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
117 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
118 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
119 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
120 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
121 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
122 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
123 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
124 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
125 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
126 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
127 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
128 #else
129 #define FIX_0_298631336 FIX(0.298631336)
130 #define FIX_0_390180644 FIX(0.390180644)
131 #define FIX_0_541196100 FIX(0.541196100)
132 #define FIX_0_765366865 FIX(0.765366865)
133 #define FIX_0_899976223 FIX(0.899976223)
134 #define FIX_1_175875602 FIX(1.175875602)
135 #define FIX_1_501321110 FIX(1.501321110)
136 #define FIX_1_847759065 FIX(1.847759065)
137 #define FIX_1_961570560 FIX(1.961570560)
138 #define FIX_2_053119869 FIX(2.053119869)
139 #define FIX_2_562915447 FIX(2.562915447)
140 #define FIX_3_072711026 FIX(3.072711026)
141 #endif
142 
143 
144 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
145  * For 8-bit samples with the recommended scaling, all the variable
146  * and constant values involved are no more than 16 bits wide, so a
147  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
148  * For 12-bit samples, a full 32-bit multiplication will be needed.
149  */
150 
151 #if BITS_IN_JSAMPLE == 8
152 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
153 #else
154 #define MULTIPLY(var,const) ((var) * (const))
155 #endif
156 
157 
158 /* Dequantize a coefficient by multiplying it by the multiplier-table
159  * entry; produce an int result. In this module, both inputs and result
160  * are 16 bits or less, so either int or short multiply will work.
161  */
162 
163 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
164 
165 
166 /*
167  * Perform dequantization and inverse DCT on one block of coefficients.
168  *
169  * Optimized algorithm with 12 multiplications in the 1-D kernel.
170  * cK represents sqrt(2) * cos(K*pi/16).
171  */
172 
173 GLOBAL(void)
174 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
177 {
178  INT32 tmp0, tmp1, tmp2, tmp3;
179  INT32 tmp10, tmp11, tmp12, tmp13;
180  INT32 z1, z2, z3;
181  JCOEFPTR inptr;
182  ISLOW_MULT_TYPE * quantptr;
183  int * wsptr;
184  JSAMPROW outptr;
185  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
186  int ctr;
187  int workspace[DCTSIZE2]; /* buffers data between passes */
189 
190  /* Pass 1: process columns from input, store into work array.
191  * Note results are scaled up by sqrt(8) compared to a true IDCT;
192  * furthermore, we scale the results by 2**PASS1_BITS.
193  */
194 
195  inptr = coef_block;
196  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
197  wsptr = workspace;
198  for (ctr = DCTSIZE; ctr > 0; ctr--) {
199  /* Due to quantization, we will usually find that many of the input
200  * coefficients are zero, especially the AC terms. We can exploit this
201  * by short-circuiting the IDCT calculation for any column in which all
202  * the AC terms are zero. In that case each output is equal to the
203  * DC coefficient (with scale factor as needed).
204  * With typical images and quantization tables, half or more of the
205  * column DCT calculations can be simplified this way.
206  */
207 
208  if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
209  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
210  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
211  inptr[DCTSIZE*7] == 0) {
212  /* AC terms all zero */
213  int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
214 
215  wsptr[DCTSIZE*0] = dcval;
216  wsptr[DCTSIZE*1] = dcval;
217  wsptr[DCTSIZE*2] = dcval;
218  wsptr[DCTSIZE*3] = dcval;
219  wsptr[DCTSIZE*4] = dcval;
220  wsptr[DCTSIZE*5] = dcval;
221  wsptr[DCTSIZE*6] = dcval;
222  wsptr[DCTSIZE*7] = dcval;
223 
224  inptr++; /* advance pointers to next column */
225  quantptr++;
226  wsptr++;
227  continue;
228  }
229 
230  /* Even part: reverse the even part of the forward DCT.
231  * The rotator is c(-6).
232  */
233 
234  z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235  z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
236  z2 <<= CONST_BITS;
237  z3 <<= CONST_BITS;
238  /* Add fudge factor here for final descale. */
239  z2 += ONE << (CONST_BITS-PASS1_BITS-1);
240 
241  tmp0 = z2 + z3;
242  tmp1 = z2 - z3;
243 
244  z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
245  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
246 
247  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
248  tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
249  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
250 
251  tmp10 = tmp0 + tmp2;
252  tmp13 = tmp0 - tmp2;
253  tmp11 = tmp1 + tmp3;
254  tmp12 = tmp1 - tmp3;
255 
256  /* Odd part per figure 8; the matrix is unitary and hence its
257  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
258  */
259 
260  tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
261  tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
262  tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
263  tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
264 
265  z2 = tmp0 + tmp2;
266  z3 = tmp1 + tmp3;
267 
268  z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
269  z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
270  z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
271  z2 += z1;
272  z3 += z1;
273 
274  z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
275  tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
276  tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
277  tmp0 += z1 + z2;
278  tmp3 += z1 + z3;
279 
280  z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
281  tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
282  tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
283  tmp1 += z1 + z3;
284  tmp2 += z1 + z2;
285 
286  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
287 
288  wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
289  wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
290  wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
291  wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
292  wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
293  wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
294  wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
295  wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
296 
297  inptr++; /* advance pointers to next column */
298  quantptr++;
299  wsptr++;
300  }
301 
302  /* Pass 2: process rows from work array, store into output array.
303  * Note that we must descale the results by a factor of 8 == 2**3,
304  * and also undo the PASS1_BITS scaling.
305  */
306 
307  wsptr = workspace;
308  for (ctr = 0; ctr < DCTSIZE; ctr++) {
309  outptr = output_buf[ctr] + output_col;
310 
311  /* Add range center and fudge factor for final descale and range-limit. */
312  z2 = (INT32) wsptr[0] +
313  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
314  (ONE << (PASS1_BITS+2)));
315 
316  /* Rows of zeroes can be exploited in the same way as we did with columns.
317  * However, the column calculation has created many nonzero AC terms, so
318  * the simplification applies less often (typically 5% to 10% of the time).
319  * On machines with very fast multiplication, it's possible that the
320  * test takes more time than it's worth. In that case this section
321  * may be commented out.
322  */
323 
324 #ifndef NO_ZERO_ROW_TEST
325  if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
326  wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
327  /* AC terms all zero */
328  JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
329  & RANGE_MASK];
330 
331  outptr[0] = dcval;
332  outptr[1] = dcval;
333  outptr[2] = dcval;
334  outptr[3] = dcval;
335  outptr[4] = dcval;
336  outptr[5] = dcval;
337  outptr[6] = dcval;
338  outptr[7] = dcval;
339 
340  wsptr += DCTSIZE; /* advance pointer to next row */
341  continue;
342  }
343 #endif
344 
345  /* Even part: reverse the even part of the forward DCT.
346  * The rotator is c(-6).
347  */
348 
349  z3 = (INT32) wsptr[4];
350 
351  tmp0 = (z2 + z3) << CONST_BITS;
352  tmp1 = (z2 - z3) << CONST_BITS;
353 
354  z2 = (INT32) wsptr[2];
355  z3 = (INT32) wsptr[6];
356 
357  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
358  tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
359  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
360 
361  tmp10 = tmp0 + tmp2;
362  tmp13 = tmp0 - tmp2;
363  tmp11 = tmp1 + tmp3;
364  tmp12 = tmp1 - tmp3;
365 
366  /* Odd part per figure 8; the matrix is unitary and hence its
367  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
368  */
369 
370  tmp0 = (INT32) wsptr[7];
371  tmp1 = (INT32) wsptr[5];
372  tmp2 = (INT32) wsptr[3];
373  tmp3 = (INT32) wsptr[1];
374 
375  z2 = tmp0 + tmp2;
376  z3 = tmp1 + tmp3;
377 
378  z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
379  z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
380  z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
381  z2 += z1;
382  z3 += z1;
383 
384  z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
385  tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
386  tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
387  tmp0 += z1 + z2;
388  tmp3 += z1 + z3;
389 
390  z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
391  tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
392  tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
393  tmp1 += z1 + z3;
394  tmp2 += z1 + z2;
395 
396  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
397 
398  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
399  CONST_BITS+PASS1_BITS+3)
400  & RANGE_MASK];
401  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
402  CONST_BITS+PASS1_BITS+3)
403  & RANGE_MASK];
404  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
405  CONST_BITS+PASS1_BITS+3)
406  & RANGE_MASK];
407  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
408  CONST_BITS+PASS1_BITS+3)
409  & RANGE_MASK];
410  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
411  CONST_BITS+PASS1_BITS+3)
412  & RANGE_MASK];
413  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
414  CONST_BITS+PASS1_BITS+3)
415  & RANGE_MASK];
416  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
417  CONST_BITS+PASS1_BITS+3)
418  & RANGE_MASK];
419  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
420  CONST_BITS+PASS1_BITS+3)
421  & RANGE_MASK];
422 
423  wsptr += DCTSIZE; /* advance pointer to next row */
424  }
425 }
426 
427 #ifdef IDCT_SCALING_SUPPORTED
428 
429 
430 /*
431  * Perform dequantization and inverse DCT on one block of coefficients,
432  * producing a reduced-size 7x7 output block.
433  *
434  * Optimized algorithm with 12 multiplications in the 1-D kernel.
435  * cK represents sqrt(2) * cos(K*pi/14).
436  */
437 
438 GLOBAL(void)
439 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
442 {
443  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
444  INT32 z1, z2, z3;
445  JCOEFPTR inptr;
446  ISLOW_MULT_TYPE * quantptr;
447  int * wsptr;
448  JSAMPROW outptr;
449  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
450  int ctr;
451  int workspace[7*7]; /* buffers data between passes */
453 
454  /* Pass 1: process columns from input, store into work array. */
455 
456  inptr = coef_block;
457  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
458  wsptr = workspace;
459  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
460  /* Even part */
461 
462  tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
463  tmp13 <<= CONST_BITS;
464  /* Add fudge factor here for final descale. */
465  tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
466 
467  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
468  z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
469  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
470 
471  tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
472  tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
473  tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
474  tmp0 = z1 + z3;
475  z2 -= tmp0;
476  tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
477  tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
478  tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
479  tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
480 
481  /* Odd part */
482 
483  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
484  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
485  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
486 
487  tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
488  tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
489  tmp0 = tmp1 - tmp2;
490  tmp1 += tmp2;
491  tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
492  tmp1 += tmp2;
493  z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
494  tmp0 += z2;
495  tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
496 
497  /* Final output stage */
498 
499  wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
500  wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
501  wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
502  wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
503  wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
504  wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
505  wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
506  }
507 
508  /* Pass 2: process 7 rows from work array, store into output array. */
509 
510  wsptr = workspace;
511  for (ctr = 0; ctr < 7; ctr++) {
512  outptr = output_buf[ctr] + output_col;
513 
514  /* Even part */
515 
516  /* Add range center and fudge factor for final descale and range-limit. */
517  tmp13 = (INT32) wsptr[0] +
518  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
519  (ONE << (PASS1_BITS+2)));
520  tmp13 <<= CONST_BITS;
521 
522  z1 = (INT32) wsptr[2];
523  z2 = (INT32) wsptr[4];
524  z3 = (INT32) wsptr[6];
525 
526  tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
527  tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
528  tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
529  tmp0 = z1 + z3;
530  z2 -= tmp0;
531  tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
532  tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
533  tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
534  tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
535 
536  /* Odd part */
537 
538  z1 = (INT32) wsptr[1];
539  z2 = (INT32) wsptr[3];
540  z3 = (INT32) wsptr[5];
541 
542  tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
543  tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
544  tmp0 = tmp1 - tmp2;
545  tmp1 += tmp2;
546  tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
547  tmp1 += tmp2;
548  z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
549  tmp0 += z2;
550  tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
551 
552  /* Final output stage */
553 
554  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
555  CONST_BITS+PASS1_BITS+3)
556  & RANGE_MASK];
557  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
558  CONST_BITS+PASS1_BITS+3)
559  & RANGE_MASK];
560  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
561  CONST_BITS+PASS1_BITS+3)
562  & RANGE_MASK];
563  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
564  CONST_BITS+PASS1_BITS+3)
565  & RANGE_MASK];
566  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
567  CONST_BITS+PASS1_BITS+3)
568  & RANGE_MASK];
569  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
570  CONST_BITS+PASS1_BITS+3)
571  & RANGE_MASK];
572  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
573  CONST_BITS+PASS1_BITS+3)
574  & RANGE_MASK];
575 
576  wsptr += 7; /* advance pointer to next row */
577  }
578 }
579 
580 
581 /*
582  * Perform dequantization and inverse DCT on one block of coefficients,
583  * producing a reduced-size 6x6 output block.
584  *
585  * Optimized algorithm with 3 multiplications in the 1-D kernel.
586  * cK represents sqrt(2) * cos(K*pi/12).
587  */
588 
589 GLOBAL(void)
590 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
593 {
594  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
595  INT32 z1, z2, z3;
596  JCOEFPTR inptr;
597  ISLOW_MULT_TYPE * quantptr;
598  int * wsptr;
599  JSAMPROW outptr;
600  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
601  int ctr;
602  int workspace[6*6]; /* buffers data between passes */
604 
605  /* Pass 1: process columns from input, store into work array. */
606 
607  inptr = coef_block;
608  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
609  wsptr = workspace;
610  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
611  /* Even part */
612 
613  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
614  tmp0 <<= CONST_BITS;
615  /* Add fudge factor here for final descale. */
616  tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
617  tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
618  tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
619  tmp1 = tmp0 + tmp10;
620  tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
621  tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
622  tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
623  tmp10 = tmp1 + tmp0;
624  tmp12 = tmp1 - tmp0;
625 
626  /* Odd part */
627 
628  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
629  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
630  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
631  tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
632  tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
633  tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
634  tmp1 = (z1 - z2 - z3) << PASS1_BITS;
635 
636  /* Final output stage */
637 
638  wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
639  wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
640  wsptr[6*1] = (int) (tmp11 + tmp1);
641  wsptr[6*4] = (int) (tmp11 - tmp1);
642  wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
643  wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
644  }
645 
646  /* Pass 2: process 6 rows from work array, store into output array. */
647 
648  wsptr = workspace;
649  for (ctr = 0; ctr < 6; ctr++) {
650  outptr = output_buf[ctr] + output_col;
651 
652  /* Even part */
653 
654  /* Add range center and fudge factor for final descale and range-limit. */
655  tmp0 = (INT32) wsptr[0] +
656  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
657  (ONE << (PASS1_BITS+2)));
658  tmp0 <<= CONST_BITS;
659  tmp2 = (INT32) wsptr[4];
660  tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
661  tmp1 = tmp0 + tmp10;
662  tmp11 = tmp0 - tmp10 - tmp10;
663  tmp10 = (INT32) wsptr[2];
664  tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
665  tmp10 = tmp1 + tmp0;
666  tmp12 = tmp1 - tmp0;
667 
668  /* Odd part */
669 
670  z1 = (INT32) wsptr[1];
671  z2 = (INT32) wsptr[3];
672  z3 = (INT32) wsptr[5];
673  tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
674  tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
675  tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
676  tmp1 = (z1 - z2 - z3) << CONST_BITS;
677 
678  /* Final output stage */
679 
680  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
681  CONST_BITS+PASS1_BITS+3)
682  & RANGE_MASK];
683  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
684  CONST_BITS+PASS1_BITS+3)
685  & RANGE_MASK];
686  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
687  CONST_BITS+PASS1_BITS+3)
688  & RANGE_MASK];
689  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
690  CONST_BITS+PASS1_BITS+3)
691  & RANGE_MASK];
692  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
693  CONST_BITS+PASS1_BITS+3)
694  & RANGE_MASK];
695  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
696  CONST_BITS+PASS1_BITS+3)
697  & RANGE_MASK];
698 
699  wsptr += 6; /* advance pointer to next row */
700  }
701 }
702 
703 
704 /*
705  * Perform dequantization and inverse DCT on one block of coefficients,
706  * producing a reduced-size 5x5 output block.
707  *
708  * Optimized algorithm with 5 multiplications in the 1-D kernel.
709  * cK represents sqrt(2) * cos(K*pi/10).
710  */
711 
712 GLOBAL(void)
713 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
716 {
717  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
718  INT32 z1, z2, z3;
719  JCOEFPTR inptr;
720  ISLOW_MULT_TYPE * quantptr;
721  int * wsptr;
722  JSAMPROW outptr;
723  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
724  int ctr;
725  int workspace[5*5]; /* buffers data between passes */
727 
728  /* Pass 1: process columns from input, store into work array. */
729 
730  inptr = coef_block;
731  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
732  wsptr = workspace;
733  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
734  /* Even part */
735 
736  tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
737  tmp12 <<= CONST_BITS;
738  /* Add fudge factor here for final descale. */
739  tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
740  tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
741  tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
742  z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
743  z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
744  z3 = tmp12 + z2;
745  tmp10 = z3 + z1;
746  tmp11 = z3 - z1;
747  tmp12 -= z2 << 2;
748 
749  /* Odd part */
750 
751  z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
752  z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
753 
754  z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
755  tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
756  tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
757 
758  /* Final output stage */
759 
760  wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
761  wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
762  wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
763  wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
764  wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
765  }
766 
767  /* Pass 2: process 5 rows from work array, store into output array. */
768 
769  wsptr = workspace;
770  for (ctr = 0; ctr < 5; ctr++) {
771  outptr = output_buf[ctr] + output_col;
772 
773  /* Even part */
774 
775  /* Add range center and fudge factor for final descale and range-limit. */
776  tmp12 = (INT32) wsptr[0] +
777  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
778  (ONE << (PASS1_BITS+2)));
779  tmp12 <<= CONST_BITS;
780  tmp0 = (INT32) wsptr[2];
781  tmp1 = (INT32) wsptr[4];
782  z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
783  z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
784  z3 = tmp12 + z2;
785  tmp10 = z3 + z1;
786  tmp11 = z3 - z1;
787  tmp12 -= z2 << 2;
788 
789  /* Odd part */
790 
791  z2 = (INT32) wsptr[1];
792  z3 = (INT32) wsptr[3];
793 
794  z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
795  tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
796  tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
797 
798  /* Final output stage */
799 
800  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
801  CONST_BITS+PASS1_BITS+3)
802  & RANGE_MASK];
803  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
804  CONST_BITS+PASS1_BITS+3)
805  & RANGE_MASK];
806  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
807  CONST_BITS+PASS1_BITS+3)
808  & RANGE_MASK];
809  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
810  CONST_BITS+PASS1_BITS+3)
811  & RANGE_MASK];
812  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
813  CONST_BITS+PASS1_BITS+3)
814  & RANGE_MASK];
815 
816  wsptr += 5; /* advance pointer to next row */
817  }
818 }
819 
820 
821 /*
822  * Perform dequantization and inverse DCT on one block of coefficients,
823  * producing a reduced-size 4x4 output block.
824  *
825  * Optimized algorithm with 3 multiplications in the 1-D kernel.
826  * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
827  */
828 
829 GLOBAL(void)
830 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
833 {
834  INT32 tmp0, tmp2, tmp10, tmp12;
835  INT32 z1, z2, z3;
836  JCOEFPTR inptr;
837  ISLOW_MULT_TYPE * quantptr;
838  int * wsptr;
839  JSAMPROW outptr;
840  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
841  int ctr;
842  int workspace[4*4]; /* buffers data between passes */
844 
845  /* Pass 1: process columns from input, store into work array. */
846 
847  inptr = coef_block;
848  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
849  wsptr = workspace;
850  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
851  /* Even part */
852 
853  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
854  tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
855 
856  tmp10 = (tmp0 + tmp2) << PASS1_BITS;
857  tmp12 = (tmp0 - tmp2) << PASS1_BITS;
858 
859  /* Odd part */
860  /* Same rotation as in the even part of the 8x8 LL&M IDCT */
861 
862  z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
863  z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
864 
865  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
866  /* Add fudge factor here for final descale. */
867  z1 += ONE << (CONST_BITS-PASS1_BITS-1);
868  tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
869  CONST_BITS-PASS1_BITS);
870  tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
871  CONST_BITS-PASS1_BITS);
872 
873  /* Final output stage */
874 
875  wsptr[4*0] = (int) (tmp10 + tmp0);
876  wsptr[4*3] = (int) (tmp10 - tmp0);
877  wsptr[4*1] = (int) (tmp12 + tmp2);
878  wsptr[4*2] = (int) (tmp12 - tmp2);
879  }
880 
881  /* Pass 2: process 4 rows from work array, store into output array. */
882 
883  wsptr = workspace;
884  for (ctr = 0; ctr < 4; ctr++) {
885  outptr = output_buf[ctr] + output_col;
886 
887  /* Even part */
888 
889  /* Add range center and fudge factor for final descale and range-limit. */
890  tmp0 = (INT32) wsptr[0] +
891  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
892  (ONE << (PASS1_BITS+2)));
893  tmp2 = (INT32) wsptr[2];
894 
895  tmp10 = (tmp0 + tmp2) << CONST_BITS;
896  tmp12 = (tmp0 - tmp2) << CONST_BITS;
897 
898  /* Odd part */
899  /* Same rotation as in the even part of the 8x8 LL&M IDCT */
900 
901  z2 = (INT32) wsptr[1];
902  z3 = (INT32) wsptr[3];
903 
904  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
905  tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
906  tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
907 
908  /* Final output stage */
909 
910  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
911  CONST_BITS+PASS1_BITS+3)
912  & RANGE_MASK];
913  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
914  CONST_BITS+PASS1_BITS+3)
915  & RANGE_MASK];
916  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
917  CONST_BITS+PASS1_BITS+3)
918  & RANGE_MASK];
919  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
920  CONST_BITS+PASS1_BITS+3)
921  & RANGE_MASK];
922 
923  wsptr += 4; /* advance pointer to next row */
924  }
925 }
926 
927 
928 /*
929  * Perform dequantization and inverse DCT on one block of coefficients,
930  * producing a reduced-size 3x3 output block.
931  *
932  * Optimized algorithm with 2 multiplications in the 1-D kernel.
933  * cK represents sqrt(2) * cos(K*pi/6).
934  */
935 
936 GLOBAL(void)
937 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
940 {
941  INT32 tmp0, tmp2, tmp10, tmp12;
942  JCOEFPTR inptr;
943  ISLOW_MULT_TYPE * quantptr;
944  int * wsptr;
945  JSAMPROW outptr;
946  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
947  int ctr;
948  int workspace[3*3]; /* buffers data between passes */
950 
951  /* Pass 1: process columns from input, store into work array. */
952 
953  inptr = coef_block;
954  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
955  wsptr = workspace;
956  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
957  /* Even part */
958 
959  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
960  tmp0 <<= CONST_BITS;
961  /* Add fudge factor here for final descale. */
962  tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
963  tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
964  tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
965  tmp10 = tmp0 + tmp12;
966  tmp2 = tmp0 - tmp12 - tmp12;
967 
968  /* Odd part */
969 
970  tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
971  tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
972 
973  /* Final output stage */
974 
975  wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
976  wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
977  wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
978  }
979 
980  /* Pass 2: process 3 rows from work array, store into output array. */
981 
982  wsptr = workspace;
983  for (ctr = 0; ctr < 3; ctr++) {
984  outptr = output_buf[ctr] + output_col;
985 
986  /* Even part */
987 
988  /* Add range center and fudge factor for final descale and range-limit. */
989  tmp0 = (INT32) wsptr[0] +
990  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
991  (ONE << (PASS1_BITS+2)));
992  tmp0 <<= CONST_BITS;
993  tmp2 = (INT32) wsptr[2];
994  tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
995  tmp10 = tmp0 + tmp12;
996  tmp2 = tmp0 - tmp12 - tmp12;
997 
998  /* Odd part */
999 
1000  tmp12 = (INT32) wsptr[1];
1001  tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1002 
1003  /* Final output stage */
1004 
1005  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1006  CONST_BITS+PASS1_BITS+3)
1007  & RANGE_MASK];
1008  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1009  CONST_BITS+PASS1_BITS+3)
1010  & RANGE_MASK];
1011  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1012  CONST_BITS+PASS1_BITS+3)
1013  & RANGE_MASK];
1014 
1015  wsptr += 3; /* advance pointer to next row */
1016  }
1017 }
1018 
1019 
1020 /*
1021  * Perform dequantization and inverse DCT on one block of coefficients,
1022  * producing a reduced-size 2x2 output block.
1023  *
1024  * Multiplication-less algorithm.
1025  */
1026 
1027 GLOBAL(void)
1028 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1031 {
1032  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1033  ISLOW_MULT_TYPE * quantptr;
1034  JSAMPROW outptr;
1035  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1036  ISHIFT_TEMPS
1037 
1038  /* Pass 1: process columns from input. */
1039 
1040  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1041 
1042  /* Column 0 */
1043  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1044  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1045  /* Add range center and fudge factor for final descale and range-limit. */
1046  tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1047 
1048  tmp0 = tmp4 + tmp5;
1049  tmp2 = tmp4 - tmp5;
1050 
1051  /* Column 1 */
1052  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1053  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1054 
1055  tmp1 = tmp4 + tmp5;
1056  tmp3 = tmp4 - tmp5;
1057 
1058  /* Pass 2: process 2 rows, store into output array. */
1059 
1060  /* Row 0 */
1061  outptr = output_buf[0] + output_col;
1062 
1063  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1064  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1065 
1066  /* Row 1 */
1067  outptr = output_buf[1] + output_col;
1068 
1069  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1070  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1071 }
1072 
1073 
1074 /*
1075  * Perform dequantization and inverse DCT on one block of coefficients,
1076  * producing a reduced-size 1x1 output block.
1077  *
1078  * We hardly need an inverse DCT routine for this: just take the
1079  * average pixel value, which is one-eighth of the DC coefficient.
1080  */
1081 
1082 GLOBAL(void)
1083 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1086 {
1087  DCTELEM dcval;
1088  ISLOW_MULT_TYPE * quantptr;
1089  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1090  ISHIFT_TEMPS
1091 
1092  /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1093 
1094  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1095 
1096  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1097  /* Add range center and fudge factor for descale and range-limit. */
1098  dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1099 
1100  output_buf[0][output_col] =
1101  range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
1102 }
1103 
1104 
1105 /*
1106  * Perform dequantization and inverse DCT on one block of coefficients,
1107  * producing a 9x9 output block.
1108  *
1109  * Optimized algorithm with 10 multiplications in the 1-D kernel.
1110  * cK represents sqrt(2) * cos(K*pi/18).
1111  */
1112 
1113 GLOBAL(void)
1114 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1117 {
1118  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1119  INT32 z1, z2, z3, z4;
1120  JCOEFPTR inptr;
1121  ISLOW_MULT_TYPE * quantptr;
1122  int * wsptr;
1123  JSAMPROW outptr;
1124  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1125  int ctr;
1126  int workspace[8*9]; /* buffers data between passes */
1127  SHIFT_TEMPS
1128 
1129  /* Pass 1: process columns from input, store into work array. */
1130 
1131  inptr = coef_block;
1132  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1133  wsptr = workspace;
1134  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1135  /* Even part */
1136 
1137  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1138  tmp0 <<= CONST_BITS;
1139  /* Add fudge factor here for final descale. */
1140  tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1141 
1142  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1143  z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1144  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1145 
1146  tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1147  tmp1 = tmp0 + tmp3;
1148  tmp2 = tmp0 - tmp3 - tmp3;
1149 
1150  tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1151  tmp11 = tmp2 + tmp0;
1152  tmp14 = tmp2 - tmp0 - tmp0;
1153 
1154  tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1155  tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1156  tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1157 
1158  tmp10 = tmp1 + tmp0 - tmp3;
1159  tmp12 = tmp1 - tmp0 + tmp2;
1160  tmp13 = tmp1 - tmp2 + tmp3;
1161 
1162  /* Odd part */
1163 
1164  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1165  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1166  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1167  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1168 
1169  z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1170 
1171  tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1172  tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1173  tmp0 = tmp2 + tmp3 - z2;
1174  tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1175  tmp2 += z2 - tmp1;
1176  tmp3 += z2 + tmp1;
1177  tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1178 
1179  /* Final output stage */
1180 
1181  wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1182  wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1183  wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1184  wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1185  wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1186  wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1187  wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1188  wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1189  wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1190  }
1191 
1192  /* Pass 2: process 9 rows from work array, store into output array. */
1193 
1194  wsptr = workspace;
1195  for (ctr = 0; ctr < 9; ctr++) {
1196  outptr = output_buf[ctr] + output_col;
1197 
1198  /* Even part */
1199 
1200  /* Add range center and fudge factor for final descale and range-limit. */
1201  tmp0 = (INT32) wsptr[0] +
1202  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1203  (ONE << (PASS1_BITS+2)));
1204  tmp0 <<= CONST_BITS;
1205 
1206  z1 = (INT32) wsptr[2];
1207  z2 = (INT32) wsptr[4];
1208  z3 = (INT32) wsptr[6];
1209 
1210  tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1211  tmp1 = tmp0 + tmp3;
1212  tmp2 = tmp0 - tmp3 - tmp3;
1213 
1214  tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1215  tmp11 = tmp2 + tmp0;
1216  tmp14 = tmp2 - tmp0 - tmp0;
1217 
1218  tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1219  tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1220  tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1221 
1222  tmp10 = tmp1 + tmp0 - tmp3;
1223  tmp12 = tmp1 - tmp0 + tmp2;
1224  tmp13 = tmp1 - tmp2 + tmp3;
1225 
1226  /* Odd part */
1227 
1228  z1 = (INT32) wsptr[1];
1229  z2 = (INT32) wsptr[3];
1230  z3 = (INT32) wsptr[5];
1231  z4 = (INT32) wsptr[7];
1232 
1233  z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1234 
1235  tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1236  tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1237  tmp0 = tmp2 + tmp3 - z2;
1238  tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1239  tmp2 += z2 - tmp1;
1240  tmp3 += z2 + tmp1;
1241  tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1242 
1243  /* Final output stage */
1244 
1245  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1246  CONST_BITS+PASS1_BITS+3)
1247  & RANGE_MASK];
1248  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1249  CONST_BITS+PASS1_BITS+3)
1250  & RANGE_MASK];
1251  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1252  CONST_BITS+PASS1_BITS+3)
1253  & RANGE_MASK];
1254  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1255  CONST_BITS+PASS1_BITS+3)
1256  & RANGE_MASK];
1257  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1258  CONST_BITS+PASS1_BITS+3)
1259  & RANGE_MASK];
1260  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1261  CONST_BITS+PASS1_BITS+3)
1262  & RANGE_MASK];
1263  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1264  CONST_BITS+PASS1_BITS+3)
1265  & RANGE_MASK];
1266  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1267  CONST_BITS+PASS1_BITS+3)
1268  & RANGE_MASK];
1269  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1270  CONST_BITS+PASS1_BITS+3)
1271  & RANGE_MASK];
1272 
1273  wsptr += 8; /* advance pointer to next row */
1274  }
1275 }
1276 
1277 
1278 /*
1279  * Perform dequantization and inverse DCT on one block of coefficients,
1280  * producing a 10x10 output block.
1281  *
1282  * Optimized algorithm with 12 multiplications in the 1-D kernel.
1283  * cK represents sqrt(2) * cos(K*pi/20).
1284  */
1285 
1286 GLOBAL(void)
1287 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1290 {
1291  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1292  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1293  INT32 z1, z2, z3, z4, z5;
1294  JCOEFPTR inptr;
1295  ISLOW_MULT_TYPE * quantptr;
1296  int * wsptr;
1297  JSAMPROW outptr;
1298  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1299  int ctr;
1300  int workspace[8*10]; /* buffers data between passes */
1301  SHIFT_TEMPS
1302 
1303  /* Pass 1: process columns from input, store into work array. */
1304 
1305  inptr = coef_block;
1306  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1307  wsptr = workspace;
1308  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1309  /* Even part */
1310 
1311  z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1312  z3 <<= CONST_BITS;
1313  /* Add fudge factor here for final descale. */
1314  z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1315  z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1316  z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1317  z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1318  tmp10 = z3 + z1;
1319  tmp11 = z3 - z2;
1320 
1321  tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1322  CONST_BITS-PASS1_BITS);
1323 
1324  z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1325  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1326 
1327  z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1328  tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1329  tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1330 
1331  tmp20 = tmp10 + tmp12;
1332  tmp24 = tmp10 - tmp12;
1333  tmp21 = tmp11 + tmp13;
1334  tmp23 = tmp11 - tmp13;
1335 
1336  /* Odd part */
1337 
1338  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1339  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1340  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1341  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1342 
1343  tmp11 = z2 + z4;
1344  tmp13 = z2 - z4;
1345 
1346  tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1347  z5 = z3 << CONST_BITS;
1348 
1349  z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1350  z4 = z5 + tmp12;
1351 
1352  tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1353  tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1354 
1355  z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1356  z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1357 
1358  tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1359 
1360  tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1361  tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1362 
1363  /* Final output stage */
1364 
1365  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1366  wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1367  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1368  wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1369  wsptr[8*2] = (int) (tmp22 + tmp12);
1370  wsptr[8*7] = (int) (tmp22 - tmp12);
1371  wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1372  wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1373  wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1374  wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1375  }
1376 
1377  /* Pass 2: process 10 rows from work array, store into output array. */
1378 
1379  wsptr = workspace;
1380  for (ctr = 0; ctr < 10; ctr++) {
1381  outptr = output_buf[ctr] + output_col;
1382 
1383  /* Even part */
1384 
1385  /* Add range center and fudge factor for final descale and range-limit. */
1386  z3 = (INT32) wsptr[0] +
1387  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1388  (ONE << (PASS1_BITS+2)));
1389  z3 <<= CONST_BITS;
1390  z4 = (INT32) wsptr[4];
1391  z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1392  z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1393  tmp10 = z3 + z1;
1394  tmp11 = z3 - z2;
1395 
1396  tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1397 
1398  z2 = (INT32) wsptr[2];
1399  z3 = (INT32) wsptr[6];
1400 
1401  z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1402  tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1403  tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1404 
1405  tmp20 = tmp10 + tmp12;
1406  tmp24 = tmp10 - tmp12;
1407  tmp21 = tmp11 + tmp13;
1408  tmp23 = tmp11 - tmp13;
1409 
1410  /* Odd part */
1411 
1412  z1 = (INT32) wsptr[1];
1413  z2 = (INT32) wsptr[3];
1414  z3 = (INT32) wsptr[5];
1415  z3 <<= CONST_BITS;
1416  z4 = (INT32) wsptr[7];
1417 
1418  tmp11 = z2 + z4;
1419  tmp13 = z2 - z4;
1420 
1421  tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1422 
1423  z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1424  z4 = z3 + tmp12;
1425 
1426  tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1427  tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1428 
1429  z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1430  z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1431 
1432  tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1433 
1434  tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1435  tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1436 
1437  /* Final output stage */
1438 
1439  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1440  CONST_BITS+PASS1_BITS+3)
1441  & RANGE_MASK];
1442  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1443  CONST_BITS+PASS1_BITS+3)
1444  & RANGE_MASK];
1445  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1446  CONST_BITS+PASS1_BITS+3)
1447  & RANGE_MASK];
1448  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1449  CONST_BITS+PASS1_BITS+3)
1450  & RANGE_MASK];
1451  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1452  CONST_BITS+PASS1_BITS+3)
1453  & RANGE_MASK];
1454  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1455  CONST_BITS+PASS1_BITS+3)
1456  & RANGE_MASK];
1457  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1458  CONST_BITS+PASS1_BITS+3)
1459  & RANGE_MASK];
1460  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1461  CONST_BITS+PASS1_BITS+3)
1462  & RANGE_MASK];
1463  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1464  CONST_BITS+PASS1_BITS+3)
1465  & RANGE_MASK];
1466  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1467  CONST_BITS+PASS1_BITS+3)
1468  & RANGE_MASK];
1469 
1470  wsptr += 8; /* advance pointer to next row */
1471  }
1472 }
1473 
1474 
1475 /*
1476  * Perform dequantization and inverse DCT on one block of coefficients,
1477  * producing an 11x11 output block.
1478  *
1479  * Optimized algorithm with 24 multiplications in the 1-D kernel.
1480  * cK represents sqrt(2) * cos(K*pi/22).
1481  */
1482 
1483 GLOBAL(void)
1484 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1487 {
1488  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1489  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1490  INT32 z1, z2, z3, z4;
1491  JCOEFPTR inptr;
1492  ISLOW_MULT_TYPE * quantptr;
1493  int * wsptr;
1494  JSAMPROW outptr;
1495  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1496  int ctr;
1497  int workspace[8*11]; /* buffers data between passes */
1498  SHIFT_TEMPS
1499 
1500  /* Pass 1: process columns from input, store into work array. */
1501 
1502  inptr = coef_block;
1503  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1504  wsptr = workspace;
1505  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1506  /* Even part */
1507 
1508  tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1509  tmp10 <<= CONST_BITS;
1510  /* Add fudge factor here for final descale. */
1511  tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1512 
1513  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1514  z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1515  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1516 
1517  tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1518  tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1519  z4 = z1 + z3;
1520  tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1521  z4 -= z2;
1522  tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1523  tmp21 = tmp20 + tmp23 + tmp25 -
1524  MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1525  tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1526  tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1527  tmp24 += tmp25;
1528  tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1529  tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1530  MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1531  tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1532 
1533  /* Odd part */
1534 
1535  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1536  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1537  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1538  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1539 
1540  tmp11 = z1 + z2;
1541  tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1542  tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1543  tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1544  tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1545  tmp10 = tmp11 + tmp12 + tmp13 -
1546  MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1547  z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1548  tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1549  tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1550  z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1551  tmp11 += z1;
1552  tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1553  tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1554  MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1555  MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1556 
1557  /* Final output stage */
1558 
1559  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1560  wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1561  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1562  wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1563  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1564  wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1565  wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1566  wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1567  wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1568  wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1569  wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1570  }
1571 
1572  /* Pass 2: process 11 rows from work array, store into output array. */
1573 
1574  wsptr = workspace;
1575  for (ctr = 0; ctr < 11; ctr++) {
1576  outptr = output_buf[ctr] + output_col;
1577 
1578  /* Even part */
1579 
1580  /* Add range center and fudge factor for final descale and range-limit. */
1581  tmp10 = (INT32) wsptr[0] +
1582  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1583  (ONE << (PASS1_BITS+2)));
1584  tmp10 <<= CONST_BITS;
1585 
1586  z1 = (INT32) wsptr[2];
1587  z2 = (INT32) wsptr[4];
1588  z3 = (INT32) wsptr[6];
1589 
1590  tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1591  tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1592  z4 = z1 + z3;
1593  tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1594  z4 -= z2;
1595  tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1596  tmp21 = tmp20 + tmp23 + tmp25 -
1597  MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1598  tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1599  tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1600  tmp24 += tmp25;
1601  tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1602  tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1603  MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1604  tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1605 
1606  /* Odd part */
1607 
1608  z1 = (INT32) wsptr[1];
1609  z2 = (INT32) wsptr[3];
1610  z3 = (INT32) wsptr[5];
1611  z4 = (INT32) wsptr[7];
1612 
1613  tmp11 = z1 + z2;
1614  tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1615  tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1616  tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1617  tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1618  tmp10 = tmp11 + tmp12 + tmp13 -
1619  MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1620  z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1621  tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1622  tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1623  z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1624  tmp11 += z1;
1625  tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1626  tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1627  MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1628  MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1629 
1630  /* Final output stage */
1631 
1632  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1633  CONST_BITS+PASS1_BITS+3)
1634  & RANGE_MASK];
1635  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1636  CONST_BITS+PASS1_BITS+3)
1637  & RANGE_MASK];
1638  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1639  CONST_BITS+PASS1_BITS+3)
1640  & RANGE_MASK];
1641  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1642  CONST_BITS+PASS1_BITS+3)
1643  & RANGE_MASK];
1644  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1645  CONST_BITS+PASS1_BITS+3)
1646  & RANGE_MASK];
1647  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1648  CONST_BITS+PASS1_BITS+3)
1649  & RANGE_MASK];
1650  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1651  CONST_BITS+PASS1_BITS+3)
1652  & RANGE_MASK];
1653  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1654  CONST_BITS+PASS1_BITS+3)
1655  & RANGE_MASK];
1656  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1657  CONST_BITS+PASS1_BITS+3)
1658  & RANGE_MASK];
1659  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1660  CONST_BITS+PASS1_BITS+3)
1661  & RANGE_MASK];
1662  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1663  CONST_BITS+PASS1_BITS+3)
1664  & RANGE_MASK];
1665 
1666  wsptr += 8; /* advance pointer to next row */
1667  }
1668 }
1669 
1670 
1671 /*
1672  * Perform dequantization and inverse DCT on one block of coefficients,
1673  * producing a 12x12 output block.
1674  *
1675  * Optimized algorithm with 15 multiplications in the 1-D kernel.
1676  * cK represents sqrt(2) * cos(K*pi/24).
1677  */
1678 
1679 GLOBAL(void)
1680 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1683 {
1684  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1685  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1686  INT32 z1, z2, z3, z4;
1687  JCOEFPTR inptr;
1688  ISLOW_MULT_TYPE * quantptr;
1689  int * wsptr;
1690  JSAMPROW outptr;
1691  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1692  int ctr;
1693  int workspace[8*12]; /* buffers data between passes */
1694  SHIFT_TEMPS
1695 
1696  /* Pass 1: process columns from input, store into work array. */
1697 
1698  inptr = coef_block;
1699  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1700  wsptr = workspace;
1701  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1702  /* Even part */
1703 
1704  z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1705  z3 <<= CONST_BITS;
1706  /* Add fudge factor here for final descale. */
1707  z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1708 
1709  z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1710  z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1711 
1712  tmp10 = z3 + z4;
1713  tmp11 = z3 - z4;
1714 
1715  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1716  z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1717  z1 <<= CONST_BITS;
1718  z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1719  z2 <<= CONST_BITS;
1720 
1721  tmp12 = z1 - z2;
1722 
1723  tmp21 = z3 + tmp12;
1724  tmp24 = z3 - tmp12;
1725 
1726  tmp12 = z4 + z2;
1727 
1728  tmp20 = tmp10 + tmp12;
1729  tmp25 = tmp10 - tmp12;
1730 
1731  tmp12 = z4 - z1 - z2;
1732 
1733  tmp22 = tmp11 + tmp12;
1734  tmp23 = tmp11 - tmp12;
1735 
1736  /* Odd part */
1737 
1738  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1739  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1740  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1741  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1742 
1743  tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1744  tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1745 
1746  tmp10 = z1 + z3;
1747  tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1748  tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1749  tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1750  tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1751  tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1752  tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1753  tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1754  MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1755 
1756  z1 -= z4;
1757  z2 -= z3;
1758  z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1759  tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1760  tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1761 
1762  /* Final output stage */
1763 
1764  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1765  wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1766  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1767  wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1768  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1769  wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1770  wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1771  wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1772  wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1773  wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1774  wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1775  wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1776  }
1777 
1778  /* Pass 2: process 12 rows from work array, store into output array. */
1779 
1780  wsptr = workspace;
1781  for (ctr = 0; ctr < 12; ctr++) {
1782  outptr = output_buf[ctr] + output_col;
1783 
1784  /* Even part */
1785 
1786  /* Add range center and fudge factor for final descale and range-limit. */
1787  z3 = (INT32) wsptr[0] +
1788  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1789  (ONE << (PASS1_BITS+2)));
1790  z3 <<= CONST_BITS;
1791 
1792  z4 = (INT32) wsptr[4];
1793  z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1794 
1795  tmp10 = z3 + z4;
1796  tmp11 = z3 - z4;
1797 
1798  z1 = (INT32) wsptr[2];
1799  z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1800  z1 <<= CONST_BITS;
1801  z2 = (INT32) wsptr[6];
1802  z2 <<= CONST_BITS;
1803 
1804  tmp12 = z1 - z2;
1805 
1806  tmp21 = z3 + tmp12;
1807  tmp24 = z3 - tmp12;
1808 
1809  tmp12 = z4 + z2;
1810 
1811  tmp20 = tmp10 + tmp12;
1812  tmp25 = tmp10 - tmp12;
1813 
1814  tmp12 = z4 - z1 - z2;
1815 
1816  tmp22 = tmp11 + tmp12;
1817  tmp23 = tmp11 - tmp12;
1818 
1819  /* Odd part */
1820 
1821  z1 = (INT32) wsptr[1];
1822  z2 = (INT32) wsptr[3];
1823  z3 = (INT32) wsptr[5];
1824  z4 = (INT32) wsptr[7];
1825 
1826  tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1827  tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1828 
1829  tmp10 = z1 + z3;
1830  tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1831  tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1832  tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1833  tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1834  tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1835  tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1836  tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1837  MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1838 
1839  z1 -= z4;
1840  z2 -= z3;
1841  z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1842  tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1843  tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1844 
1845  /* Final output stage */
1846 
1847  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1848  CONST_BITS+PASS1_BITS+3)
1849  & RANGE_MASK];
1850  outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1851  CONST_BITS+PASS1_BITS+3)
1852  & RANGE_MASK];
1853  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1854  CONST_BITS+PASS1_BITS+3)
1855  & RANGE_MASK];
1856  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1857  CONST_BITS+PASS1_BITS+3)
1858  & RANGE_MASK];
1859  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1860  CONST_BITS+PASS1_BITS+3)
1861  & RANGE_MASK];
1862  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1863  CONST_BITS+PASS1_BITS+3)
1864  & RANGE_MASK];
1865  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1866  CONST_BITS+PASS1_BITS+3)
1867  & RANGE_MASK];
1868  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1869  CONST_BITS+PASS1_BITS+3)
1870  & RANGE_MASK];
1871  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1872  CONST_BITS+PASS1_BITS+3)
1873  & RANGE_MASK];
1874  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1875  CONST_BITS+PASS1_BITS+3)
1876  & RANGE_MASK];
1877  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1878  CONST_BITS+PASS1_BITS+3)
1879  & RANGE_MASK];
1880  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1881  CONST_BITS+PASS1_BITS+3)
1882  & RANGE_MASK];
1883 
1884  wsptr += 8; /* advance pointer to next row */
1885  }
1886 }
1887 
1888 
1889 /*
1890  * Perform dequantization and inverse DCT on one block of coefficients,
1891  * producing a 13x13 output block.
1892  *
1893  * Optimized algorithm with 29 multiplications in the 1-D kernel.
1894  * cK represents sqrt(2) * cos(K*pi/26).
1895  */
1896 
1897 GLOBAL(void)
1898 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1901 {
1902  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1903  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1904  INT32 z1, z2, z3, z4;
1905  JCOEFPTR inptr;
1906  ISLOW_MULT_TYPE * quantptr;
1907  int * wsptr;
1908  JSAMPROW outptr;
1909  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1910  int ctr;
1911  int workspace[8*13]; /* buffers data between passes */
1912  SHIFT_TEMPS
1913 
1914  /* Pass 1: process columns from input, store into work array. */
1915 
1916  inptr = coef_block;
1917  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1918  wsptr = workspace;
1919  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1920  /* Even part */
1921 
1922  z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1923  z1 <<= CONST_BITS;
1924  /* Add fudge factor here for final descale. */
1925  z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1926 
1927  z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1928  z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1929  z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1930 
1931  tmp10 = z3 + z4;
1932  tmp11 = z3 - z4;
1933 
1934  tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1935  tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1936 
1937  tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1938  tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1939 
1940  tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1941  tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1942 
1943  tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1944  tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1945 
1946  tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1947  tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1948 
1949  tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1950  tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1951 
1952  tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1953 
1954  /* Odd part */
1955 
1956  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1957  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1958  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1959  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1960 
1961  tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1962  tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1963  tmp15 = z1 + z4;
1964  tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1965  tmp10 = tmp11 + tmp12 + tmp13 -
1966  MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1967  tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
1968  tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1969  tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1970  tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
1971  tmp11 += tmp14;
1972  tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1973  tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
1974  tmp12 += tmp14;
1975  tmp13 += tmp14;
1976  tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1977  tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1978  MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1979  z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1980  tmp14 += z1;
1981  tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1982  MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1983 
1984  /* Final output stage */
1985 
1986  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1987  wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1988  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1989  wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1990  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1991  wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1992  wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1993  wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1994  wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1995  wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1996  wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1997  wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1998  wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1999  }
2000 
2001  /* Pass 2: process 13 rows from work array, store into output array. */
2002 
2003  wsptr = workspace;
2004  for (ctr = 0; ctr < 13; ctr++) {
2005  outptr = output_buf[ctr] + output_col;
2006 
2007  /* Even part */
2008 
2009  /* Add range center and fudge factor for final descale and range-limit. */
2010  z1 = (INT32) wsptr[0] +
2011  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2012  (ONE << (PASS1_BITS+2)));
2013  z1 <<= CONST_BITS;
2014 
2015  z2 = (INT32) wsptr[2];
2016  z3 = (INT32) wsptr[4];
2017  z4 = (INT32) wsptr[6];
2018 
2019  tmp10 = z3 + z4;
2020  tmp11 = z3 - z4;
2021 
2022  tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2023  tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2024 
2025  tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2026  tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2027 
2028  tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2029  tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2030 
2031  tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2032  tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2033 
2034  tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2035  tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2036 
2037  tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2038  tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2039 
2040  tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2041 
2042  /* Odd part */
2043 
2044  z1 = (INT32) wsptr[1];
2045  z2 = (INT32) wsptr[3];
2046  z3 = (INT32) wsptr[5];
2047  z4 = (INT32) wsptr[7];
2048 
2049  tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2050  tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2051  tmp15 = z1 + z4;
2052  tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2053  tmp10 = tmp11 + tmp12 + tmp13 -
2054  MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2055  tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2056  tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2057  tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2058  tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2059  tmp11 += tmp14;
2060  tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2061  tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2062  tmp12 += tmp14;
2063  tmp13 += tmp14;
2064  tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2065  tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2066  MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2067  z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2068  tmp14 += z1;
2069  tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2070  MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2071 
2072  /* Final output stage */
2073 
2074  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2075  CONST_BITS+PASS1_BITS+3)
2076  & RANGE_MASK];
2077  outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2078  CONST_BITS+PASS1_BITS+3)
2079  & RANGE_MASK];
2080  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2081  CONST_BITS+PASS1_BITS+3)
2082  & RANGE_MASK];
2083  outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2084  CONST_BITS+PASS1_BITS+3)
2085  & RANGE_MASK];
2086  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2087  CONST_BITS+PASS1_BITS+3)
2088  & RANGE_MASK];
2089  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2090  CONST_BITS+PASS1_BITS+3)
2091  & RANGE_MASK];
2092  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2093  CONST_BITS+PASS1_BITS+3)
2094  & RANGE_MASK];
2095  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2096  CONST_BITS+PASS1_BITS+3)
2097  & RANGE_MASK];
2098  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2099  CONST_BITS+PASS1_BITS+3)
2100  & RANGE_MASK];
2101  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2102  CONST_BITS+PASS1_BITS+3)
2103  & RANGE_MASK];
2104  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2105  CONST_BITS+PASS1_BITS+3)
2106  & RANGE_MASK];
2107  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2108  CONST_BITS+PASS1_BITS+3)
2109  & RANGE_MASK];
2110  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2111  CONST_BITS+PASS1_BITS+3)
2112  & RANGE_MASK];
2113 
2114  wsptr += 8; /* advance pointer to next row */
2115  }
2116 }
2117 
2118 
2119 /*
2120  * Perform dequantization and inverse DCT on one block of coefficients,
2121  * producing a 14x14 output block.
2122  *
2123  * Optimized algorithm with 20 multiplications in the 1-D kernel.
2124  * cK represents sqrt(2) * cos(K*pi/28).
2125  */
2126 
2127 GLOBAL(void)
2128 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2131 {
2132  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2133  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2134  INT32 z1, z2, z3, z4;
2135  JCOEFPTR inptr;
2136  ISLOW_MULT_TYPE * quantptr;
2137  int * wsptr;
2138  JSAMPROW outptr;
2139  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2140  int ctr;
2141  int workspace[8*14]; /* buffers data between passes */
2142  SHIFT_TEMPS
2143 
2144  /* Pass 1: process columns from input, store into work array. */
2145 
2146  inptr = coef_block;
2147  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2148  wsptr = workspace;
2149  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2150  /* Even part */
2151 
2152  z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2153  z1 <<= CONST_BITS;
2154  /* Add fudge factor here for final descale. */
2155  z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2156  z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2157  z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2158  z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2159  z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2160 
2161  tmp10 = z1 + z2;
2162  tmp11 = z1 + z3;
2163  tmp12 = z1 - z4;
2164 
2165  tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2166  CONST_BITS-PASS1_BITS);
2167 
2168  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2169  z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2170 
2171  z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2172 
2173  tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2174  tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2175  tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2176  MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2177 
2178  tmp20 = tmp10 + tmp13;
2179  tmp26 = tmp10 - tmp13;
2180  tmp21 = tmp11 + tmp14;
2181  tmp25 = tmp11 - tmp14;
2182  tmp22 = tmp12 + tmp15;
2183  tmp24 = tmp12 - tmp15;
2184 
2185  /* Odd part */
2186 
2187  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2188  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2189  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2190  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2191  tmp13 = z4 << CONST_BITS;
2192 
2193  tmp14 = z1 + z3;
2194  tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2195  tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2196  tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2197  tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2198  tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2199  z1 -= z2;
2200  tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2201  tmp16 += tmp15;
2202  z1 += z4;
2203  z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2204  tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2205  tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2206  z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2207  tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2208  tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2209 
2210  tmp13 = (z1 - z3) << PASS1_BITS;
2211 
2212  /* Final output stage */
2213 
2214  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2215  wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2216  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2217  wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2218  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2219  wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2220  wsptr[8*3] = (int) (tmp23 + tmp13);
2221  wsptr[8*10] = (int) (tmp23 - tmp13);
2222  wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2223  wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2224  wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2225  wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2226  wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2227  wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2228  }
2229 
2230  /* Pass 2: process 14 rows from work array, store into output array. */
2231 
2232  wsptr = workspace;
2233  for (ctr = 0; ctr < 14; ctr++) {
2234  outptr = output_buf[ctr] + output_col;
2235 
2236  /* Even part */
2237 
2238  /* Add range center and fudge factor for final descale and range-limit. */
2239  z1 = (INT32) wsptr[0] +
2240  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2241  (ONE << (PASS1_BITS+2)));
2242  z1 <<= CONST_BITS;
2243  z4 = (INT32) wsptr[4];
2244  z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2245  z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2246  z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2247 
2248  tmp10 = z1 + z2;
2249  tmp11 = z1 + z3;
2250  tmp12 = z1 - z4;
2251 
2252  tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2253 
2254  z1 = (INT32) wsptr[2];
2255  z2 = (INT32) wsptr[6];
2256 
2257  z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2258 
2259  tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2260  tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2261  tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2262  MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2263 
2264  tmp20 = tmp10 + tmp13;
2265  tmp26 = tmp10 - tmp13;
2266  tmp21 = tmp11 + tmp14;
2267  tmp25 = tmp11 - tmp14;
2268  tmp22 = tmp12 + tmp15;
2269  tmp24 = tmp12 - tmp15;
2270 
2271  /* Odd part */
2272 
2273  z1 = (INT32) wsptr[1];
2274  z2 = (INT32) wsptr[3];
2275  z3 = (INT32) wsptr[5];
2276  z4 = (INT32) wsptr[7];
2277  z4 <<= CONST_BITS;
2278 
2279  tmp14 = z1 + z3;
2280  tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2281  tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2282  tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2283  tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2284  tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2285  z1 -= z2;
2286  tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2287  tmp16 += tmp15;
2288  tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2289  tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2290  tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2291  tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2292  tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2293  tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2294 
2295  tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2296 
2297  /* Final output stage */
2298 
2299  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2300  CONST_BITS+PASS1_BITS+3)
2301  & RANGE_MASK];
2302  outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2303  CONST_BITS+PASS1_BITS+3)
2304  & RANGE_MASK];
2305  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2306  CONST_BITS+PASS1_BITS+3)
2307  & RANGE_MASK];
2308  outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2309  CONST_BITS+PASS1_BITS+3)
2310  & RANGE_MASK];
2311  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2312  CONST_BITS+PASS1_BITS+3)
2313  & RANGE_MASK];
2314  outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2315  CONST_BITS+PASS1_BITS+3)
2316  & RANGE_MASK];
2317  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2318  CONST_BITS+PASS1_BITS+3)
2319  & RANGE_MASK];
2320  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2321  CONST_BITS+PASS1_BITS+3)
2322  & RANGE_MASK];
2323  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2324  CONST_BITS+PASS1_BITS+3)
2325  & RANGE_MASK];
2326  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2327  CONST_BITS+PASS1_BITS+3)
2328  & RANGE_MASK];
2329  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2330  CONST_BITS+PASS1_BITS+3)
2331  & RANGE_MASK];
2332  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2333  CONST_BITS+PASS1_BITS+3)
2334  & RANGE_MASK];
2335  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2336  CONST_BITS+PASS1_BITS+3)
2337  & RANGE_MASK];
2338  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2339  CONST_BITS+PASS1_BITS+3)
2340  & RANGE_MASK];
2341 
2342  wsptr += 8; /* advance pointer to next row */
2343  }
2344 }
2345 
2346 
2347 /*
2348  * Perform dequantization and inverse DCT on one block of coefficients,
2349  * producing a 15x15 output block.
2350  *
2351  * Optimized algorithm with 22 multiplications in the 1-D kernel.
2352  * cK represents sqrt(2) * cos(K*pi/30).
2353  */
2354 
2355 GLOBAL(void)
2356 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2359 {
2360  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2361  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2362  INT32 z1, z2, z3, z4;
2363  JCOEFPTR inptr;
2364  ISLOW_MULT_TYPE * quantptr;
2365  int * wsptr;
2366  JSAMPROW outptr;
2367  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2368  int ctr;
2369  int workspace[8*15]; /* buffers data between passes */
2370  SHIFT_TEMPS
2371 
2372  /* Pass 1: process columns from input, store into work array. */
2373 
2374  inptr = coef_block;
2375  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2376  wsptr = workspace;
2377  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2378  /* Even part */
2379 
2380  z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2381  z1 <<= CONST_BITS;
2382  /* Add fudge factor here for final descale. */
2383  z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2384 
2385  z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2386  z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2387  z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2388 
2389  tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2390  tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2391 
2392  tmp12 = z1 - tmp10;
2393  tmp13 = z1 + tmp11;
2394  z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2395 
2396  z4 = z2 - z3;
2397  z3 += z2;
2398  tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2399  tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2400  z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2401 
2402  tmp20 = tmp13 + tmp10 + tmp11;
2403  tmp23 = tmp12 - tmp10 + tmp11 + z2;
2404 
2405  tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2406  tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2407 
2408  tmp25 = tmp13 - tmp10 - tmp11;
2409  tmp26 = tmp12 + tmp10 - tmp11 - z2;
2410 
2411  tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2412  tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2413 
2414  tmp21 = tmp12 + tmp10 + tmp11;
2415  tmp24 = tmp13 - tmp10 + tmp11;
2416  tmp11 += tmp11;
2417  tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2418  tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2419 
2420  /* Odd part */
2421 
2422  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2423  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2424  z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2425  z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2426  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2427 
2428  tmp13 = z2 - z4;
2429  tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2430  tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2431  tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2432 
2433  tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2434  tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2435  z2 = z1 - z4;
2436  tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2437 
2438  tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2439  tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2440  tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2441  z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2442  tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2443  tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2444 
2445  /* Final output stage */
2446 
2447  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2448  wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2449  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2450  wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2451  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2452  wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2453  wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2454  wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2455  wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2456  wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2457  wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2458  wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2459  wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2460  wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2461  wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2462  }
2463 
2464  /* Pass 2: process 15 rows from work array, store into output array. */
2465 
2466  wsptr = workspace;
2467  for (ctr = 0; ctr < 15; ctr++) {
2468  outptr = output_buf[ctr] + output_col;
2469 
2470  /* Even part */
2471 
2472  /* Add range center and fudge factor for final descale and range-limit. */
2473  z1 = (INT32) wsptr[0] +
2474  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2475  (ONE << (PASS1_BITS+2)));
2476  z1 <<= CONST_BITS;
2477 
2478  z2 = (INT32) wsptr[2];
2479  z3 = (INT32) wsptr[4];
2480  z4 = (INT32) wsptr[6];
2481 
2482  tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2483  tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2484 
2485  tmp12 = z1 - tmp10;
2486  tmp13 = z1 + tmp11;
2487  z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2488 
2489  z4 = z2 - z3;
2490  z3 += z2;
2491  tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2492  tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2493  z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2494 
2495  tmp20 = tmp13 + tmp10 + tmp11;
2496  tmp23 = tmp12 - tmp10 + tmp11 + z2;
2497 
2498  tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2499  tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2500 
2501  tmp25 = tmp13 - tmp10 - tmp11;
2502  tmp26 = tmp12 + tmp10 - tmp11 - z2;
2503 
2504  tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2505  tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2506 
2507  tmp21 = tmp12 + tmp10 + tmp11;
2508  tmp24 = tmp13 - tmp10 + tmp11;
2509  tmp11 += tmp11;
2510  tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2511  tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2512 
2513  /* Odd part */
2514 
2515  z1 = (INT32) wsptr[1];
2516  z2 = (INT32) wsptr[3];
2517  z4 = (INT32) wsptr[5];
2518  z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2519  z4 = (INT32) wsptr[7];
2520 
2521  tmp13 = z2 - z4;
2522  tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2523  tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2524  tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2525 
2526  tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2527  tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2528  z2 = z1 - z4;
2529  tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2530 
2531  tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2532  tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2533  tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2534  z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2535  tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2536  tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2537 
2538  /* Final output stage */
2539 
2540  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2541  CONST_BITS+PASS1_BITS+3)
2542  & RANGE_MASK];
2543  outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2544  CONST_BITS+PASS1_BITS+3)
2545  & RANGE_MASK];
2546  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2547  CONST_BITS+PASS1_BITS+3)
2548  & RANGE_MASK];
2549  outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2550  CONST_BITS+PASS1_BITS+3)
2551  & RANGE_MASK];
2552  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2553  CONST_BITS+PASS1_BITS+3)
2554  & RANGE_MASK];
2555  outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2556  CONST_BITS+PASS1_BITS+3)
2557  & RANGE_MASK];
2558  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2559  CONST_BITS+PASS1_BITS+3)
2560  & RANGE_MASK];
2561  outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2562  CONST_BITS+PASS1_BITS+3)
2563  & RANGE_MASK];
2564  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2565  CONST_BITS+PASS1_BITS+3)
2566  & RANGE_MASK];
2567  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2568  CONST_BITS+PASS1_BITS+3)
2569  & RANGE_MASK];
2570  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2571  CONST_BITS+PASS1_BITS+3)
2572  & RANGE_MASK];
2573  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2574  CONST_BITS+PASS1_BITS+3)
2575  & RANGE_MASK];
2576  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2577  CONST_BITS+PASS1_BITS+3)
2578  & RANGE_MASK];
2579  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2580  CONST_BITS+PASS1_BITS+3)
2581  & RANGE_MASK];
2582  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2583  CONST_BITS+PASS1_BITS+3)
2584  & RANGE_MASK];
2585 
2586  wsptr += 8; /* advance pointer to next row */
2587  }
2588 }
2589 
2590 
2591 /*
2592  * Perform dequantization and inverse DCT on one block of coefficients,
2593  * producing a 16x16 output block.
2594  *
2595  * Optimized algorithm with 28 multiplications in the 1-D kernel.
2596  * cK represents sqrt(2) * cos(K*pi/32).
2597  */
2598 
2599 GLOBAL(void)
2600 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2603 {
2604  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2605  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2606  INT32 z1, z2, z3, z4;
2607  JCOEFPTR inptr;
2608  ISLOW_MULT_TYPE * quantptr;
2609  int * wsptr;
2610  JSAMPROW outptr;
2611  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2612  int ctr;
2613  int workspace[8*16]; /* buffers data between passes */
2614  SHIFT_TEMPS
2615 
2616  /* Pass 1: process columns from input, store into work array. */
2617 
2618  inptr = coef_block;
2619  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2620  wsptr = workspace;
2621  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2622  /* Even part */
2623 
2624  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2625  tmp0 <<= CONST_BITS;
2626  /* Add fudge factor here for final descale. */
2627  tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2628 
2629  z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2630  tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2631  tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2632 
2633  tmp10 = tmp0 + tmp1;
2634  tmp11 = tmp0 - tmp1;
2635  tmp12 = tmp0 + tmp2;
2636  tmp13 = tmp0 - tmp2;
2637 
2638  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2639  z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2640  z3 = z1 - z2;
2641  z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2642  z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2643 
2644  tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2645  tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2646  tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2647  tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2648 
2649  tmp20 = tmp10 + tmp0;
2650  tmp27 = tmp10 - tmp0;
2651  tmp21 = tmp12 + tmp1;
2652  tmp26 = tmp12 - tmp1;
2653  tmp22 = tmp13 + tmp2;
2654  tmp25 = tmp13 - tmp2;
2655  tmp23 = tmp11 + tmp3;
2656  tmp24 = tmp11 - tmp3;
2657 
2658  /* Odd part */
2659 
2660  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2661  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2662  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2663  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2664 
2665  tmp11 = z1 + z3;
2666 
2667  tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2668  tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2669  tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2670  tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2671  tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2672  tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2673  tmp0 = tmp1 + tmp2 + tmp3 -
2674  MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2675  tmp13 = tmp10 + tmp11 + tmp12 -
2676  MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2677  z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2678  tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2679  tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2680  z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2681  tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2682  tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2683  z2 += z4;
2684  z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2685  tmp1 += z1;
2686  tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2687  z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2688  tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2689  tmp12 += z2;
2690  z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2691  tmp2 += z2;
2692  tmp3 += z2;
2693  z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2694  tmp10 += z2;
2695  tmp11 += z2;
2696 
2697  /* Final output stage */
2698 
2699  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2700  wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2701  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2702  wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2703  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2704  wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2705  wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2706  wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2707  wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2708  wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2709  wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2710  wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2711  wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2712  wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2713  wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2714  wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2715  }
2716 
2717  /* Pass 2: process 16 rows from work array, store into output array. */
2718 
2719  wsptr = workspace;
2720  for (ctr = 0; ctr < 16; ctr++) {
2721  outptr = output_buf[ctr] + output_col;
2722 
2723  /* Even part */
2724 
2725  /* Add range center and fudge factor for final descale and range-limit. */
2726  tmp0 = (INT32) wsptr[0] +
2727  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2728  (ONE << (PASS1_BITS+2)));
2729  tmp0 <<= CONST_BITS;
2730 
2731  z1 = (INT32) wsptr[4];
2732  tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2733  tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2734 
2735  tmp10 = tmp0 + tmp1;
2736  tmp11 = tmp0 - tmp1;
2737  tmp12 = tmp0 + tmp2;
2738  tmp13 = tmp0 - tmp2;
2739 
2740  z1 = (INT32) wsptr[2];
2741  z2 = (INT32) wsptr[6];
2742  z3 = z1 - z2;
2743  z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2744  z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2745 
2746  tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2747  tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2748  tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2749  tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2750 
2751  tmp20 = tmp10 + tmp0;
2752  tmp27 = tmp10 - tmp0;
2753  tmp21 = tmp12 + tmp1;
2754  tmp26 = tmp12 - tmp1;
2755  tmp22 = tmp13 + tmp2;
2756  tmp25 = tmp13 - tmp2;
2757  tmp23 = tmp11 + tmp3;
2758  tmp24 = tmp11 - tmp3;
2759 
2760  /* Odd part */
2761 
2762  z1 = (INT32) wsptr[1];
2763  z2 = (INT32) wsptr[3];
2764  z3 = (INT32) wsptr[5];
2765  z4 = (INT32) wsptr[7];
2766 
2767  tmp11 = z1 + z3;
2768 
2769  tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2770  tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2771  tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2772  tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2773  tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2774  tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2775  tmp0 = tmp1 + tmp2 + tmp3 -
2776  MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2777  tmp13 = tmp10 + tmp11 + tmp12 -
2778  MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2779  z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2780  tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2781  tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2782  z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2783  tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2784  tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2785  z2 += z4;
2786  z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2787  tmp1 += z1;
2788  tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2789  z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2790  tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2791  tmp12 += z2;
2792  z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2793  tmp2 += z2;
2794  tmp3 += z2;
2795  z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2796  tmp10 += z2;
2797  tmp11 += z2;
2798 
2799  /* Final output stage */
2800 
2801  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2802  CONST_BITS+PASS1_BITS+3)
2803  & RANGE_MASK];
2804  outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2805  CONST_BITS+PASS1_BITS+3)
2806  & RANGE_MASK];
2807  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2808  CONST_BITS+PASS1_BITS+3)
2809  & RANGE_MASK];
2810  outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2811  CONST_BITS+PASS1_BITS+3)
2812  & RANGE_MASK];
2813  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2814  CONST_BITS+PASS1_BITS+3)
2815  & RANGE_MASK];
2816  outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2817  CONST_BITS+PASS1_BITS+3)
2818  & RANGE_MASK];
2819  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2820  CONST_BITS+PASS1_BITS+3)
2821  & RANGE_MASK];
2822  outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2823  CONST_BITS+PASS1_BITS+3)
2824  & RANGE_MASK];
2825  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2826  CONST_BITS+PASS1_BITS+3)
2827  & RANGE_MASK];
2828  outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2829  CONST_BITS+PASS1_BITS+3)
2830  & RANGE_MASK];
2831  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2832  CONST_BITS+PASS1_BITS+3)
2833  & RANGE_MASK];
2834  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2835  CONST_BITS+PASS1_BITS+3)
2836  & RANGE_MASK];
2837  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2838  CONST_BITS+PASS1_BITS+3)
2839  & RANGE_MASK];
2840  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2841  CONST_BITS+PASS1_BITS+3)
2842  & RANGE_MASK];
2843  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2844  CONST_BITS+PASS1_BITS+3)
2845  & RANGE_MASK];
2846  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2847  CONST_BITS+PASS1_BITS+3)
2848  & RANGE_MASK];
2849 
2850  wsptr += 8; /* advance pointer to next row */
2851  }
2852 }
2853 
2854 
2855 /*
2856  * Perform dequantization and inverse DCT on one block of coefficients,
2857  * producing a 16x8 output block.
2858  *
2859  * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2860  */
2861 
2862 GLOBAL(void)
2863 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2866 {
2867  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2868  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2869  INT32 z1, z2, z3, z4;
2870  JCOEFPTR inptr;
2871  ISLOW_MULT_TYPE * quantptr;
2872  int * wsptr;
2873  JSAMPROW outptr;
2874  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2875  int ctr;
2876  int workspace[8*8]; /* buffers data between passes */
2877  SHIFT_TEMPS
2878 
2879  /* Pass 1: process columns from input, store into work array.
2880  * Note results are scaled up by sqrt(8) compared to a true IDCT;
2881  * furthermore, we scale the results by 2**PASS1_BITS.
2882  * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2883  */
2884 
2885  inptr = coef_block;
2886  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2887  wsptr = workspace;
2888  for (ctr = DCTSIZE; ctr > 0; ctr--) {
2889  /* Due to quantization, we will usually find that many of the input
2890  * coefficients are zero, especially the AC terms. We can exploit this
2891  * by short-circuiting the IDCT calculation for any column in which all
2892  * the AC terms are zero. In that case each output is equal to the
2893  * DC coefficient (with scale factor as needed).
2894  * With typical images and quantization tables, half or more of the
2895  * column DCT calculations can be simplified this way.
2896  */
2897 
2898  if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2899  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2900  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2901  inptr[DCTSIZE*7] == 0) {
2902  /* AC terms all zero */
2903  int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
2904 
2905  wsptr[DCTSIZE*0] = dcval;
2906  wsptr[DCTSIZE*1] = dcval;
2907  wsptr[DCTSIZE*2] = dcval;
2908  wsptr[DCTSIZE*3] = dcval;
2909  wsptr[DCTSIZE*4] = dcval;
2910  wsptr[DCTSIZE*5] = dcval;
2911  wsptr[DCTSIZE*6] = dcval;
2912  wsptr[DCTSIZE*7] = dcval;
2913 
2914  inptr++; /* advance pointers to next column */
2915  quantptr++;
2916  wsptr++;
2917  continue;
2918  }
2919 
2920  /* Even part: reverse the even part of the forward DCT.
2921  * The rotator is c(-6).
2922  */
2923 
2924  z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2925  z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2926  z2 <<= CONST_BITS;
2927  z3 <<= CONST_BITS;
2928  /* Add fudge factor here for final descale. */
2929  z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2930 
2931  tmp0 = z2 + z3;
2932  tmp1 = z2 - z3;
2933 
2934  z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2935  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2936 
2937  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
2938  tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
2939  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
2940 
2941  tmp10 = tmp0 + tmp2;
2942  tmp13 = tmp0 - tmp2;
2943  tmp11 = tmp1 + tmp3;
2944  tmp12 = tmp1 - tmp3;
2945 
2946  /* Odd part per figure 8; the matrix is unitary and hence its
2947  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2948  */
2949 
2950  tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2951  tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2952  tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2953  tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2954 
2955  z2 = tmp0 + tmp2;
2956  z3 = tmp1 + tmp3;
2957 
2958  z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
2959  z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
2960  z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
2961  z2 += z1;
2962  z3 += z1;
2963 
2964  z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2965  tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
2966  tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
2967  tmp0 += z1 + z2;
2968  tmp3 += z1 + z3;
2969 
2970  z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2971  tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
2972  tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
2973  tmp1 += z1 + z3;
2974  tmp2 += z1 + z2;
2975 
2976  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2977 
2978  wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2979  wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2980  wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2981  wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2982  wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2983  wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2984  wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2985  wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2986 
2987  inptr++; /* advance pointers to next column */
2988  quantptr++;
2989  wsptr++;
2990  }
2991 
2992  /* Pass 2: process 8 rows from work array, store into output array.
2993  * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2994  */
2995 
2996  wsptr = workspace;
2997  for (ctr = 0; ctr < 8; ctr++) {
2998  outptr = output_buf[ctr] + output_col;
2999 
3000  /* Even part */
3001 
3002  /* Add range center and fudge factor for final descale and range-limit. */
3003  tmp0 = (INT32) wsptr[0] +
3004  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3005  (ONE << (PASS1_BITS+2)));
3006  tmp0 <<= CONST_BITS;
3007 
3008  z1 = (INT32) wsptr[4];
3009  tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3010  tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3011 
3012  tmp10 = tmp0 + tmp1;
3013  tmp11 = tmp0 - tmp1;
3014  tmp12 = tmp0 + tmp2;
3015  tmp13 = tmp0 - tmp2;
3016 
3017  z1 = (INT32) wsptr[2];
3018  z2 = (INT32) wsptr[6];
3019  z3 = z1 - z2;
3020  z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3021  z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3022 
3023  tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3024  tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3025  tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3026  tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3027 
3028  tmp20 = tmp10 + tmp0;
3029  tmp27 = tmp10 - tmp0;
3030  tmp21 = tmp12 + tmp1;
3031  tmp26 = tmp12 - tmp1;
3032  tmp22 = tmp13 + tmp2;
3033  tmp25 = tmp13 - tmp2;
3034  tmp23 = tmp11 + tmp3;
3035  tmp24 = tmp11 - tmp3;
3036 
3037  /* Odd part */
3038 
3039  z1 = (INT32) wsptr[1];
3040  z2 = (INT32) wsptr[3];
3041  z3 = (INT32) wsptr[5];
3042  z4 = (INT32) wsptr[7];
3043 
3044  tmp11 = z1 + z3;
3045 
3046  tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3047  tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3048  tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3049  tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3050  tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3051  tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3052  tmp0 = tmp1 + tmp2 + tmp3 -
3053  MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3054  tmp13 = tmp10 + tmp11 + tmp12 -
3055  MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3056  z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3057  tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3058  tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3059  z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3060  tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3061  tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3062  z2 += z4;
3063  z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3064  tmp1 += z1;
3065  tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3066  z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3067  tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3068  tmp12 += z2;
3069  z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3070  tmp2 += z2;
3071  tmp3 += z2;
3072  z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3073  tmp10 += z2;
3074  tmp11 += z2;
3075 
3076  /* Final output stage */
3077 
3078  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3079  CONST_BITS+PASS1_BITS+3)
3080  & RANGE_MASK];
3081  outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3082  CONST_BITS+PASS1_BITS+3)
3083  & RANGE_MASK];
3084  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3085  CONST_BITS+PASS1_BITS+3)
3086  & RANGE_MASK];
3087  outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3088  CONST_BITS+PASS1_BITS+3)
3089  & RANGE_MASK];
3090  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3091  CONST_BITS+PASS1_BITS+3)
3092  & RANGE_MASK];
3093  outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3094  CONST_BITS+PASS1_BITS+3)
3095  & RANGE_MASK];
3096  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3097  CONST_BITS+PASS1_BITS+3)
3098  & RANGE_MASK];
3099  outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3100  CONST_BITS+PASS1_BITS+3)
3101  & RANGE_MASK];
3102  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3103  CONST_BITS+PASS1_BITS+3)
3104  & RANGE_MASK];
3105  outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3106  CONST_BITS+PASS1_BITS+3)
3107  & RANGE_MASK];
3108  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3109  CONST_BITS+PASS1_BITS+3)
3110  & RANGE_MASK];
3111  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3112  CONST_BITS+PASS1_BITS+3)
3113  & RANGE_MASK];
3114  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3115  CONST_BITS+PASS1_BITS+3)
3116  & RANGE_MASK];
3117  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3118  CONST_BITS+PASS1_BITS+3)
3119  & RANGE_MASK];
3120  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3121  CONST_BITS+PASS1_BITS+3)
3122  & RANGE_MASK];
3123  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3124  CONST_BITS+PASS1_BITS+3)
3125  & RANGE_MASK];
3126 
3127  wsptr += 8; /* advance pointer to next row */
3128  }
3129 }
3130 
3131 
3132 /*
3133  * Perform dequantization and inverse DCT on one block of coefficients,
3134  * producing a 14x7 output block.
3135  *
3136  * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3137  */
3138 
3139 GLOBAL(void)
3140 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3143 {
3144  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3145  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3146  INT32 z1, z2, z3, z4;
3147  JCOEFPTR inptr;
3148  ISLOW_MULT_TYPE * quantptr;
3149  int * wsptr;
3150  JSAMPROW outptr;
3151  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3152  int ctr;
3153  int workspace[8*7]; /* buffers data between passes */
3154  SHIFT_TEMPS
3155 
3156  /* Pass 1: process columns from input, store into work array.
3157  * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3158  */
3159 
3160  inptr = coef_block;
3161  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3162  wsptr = workspace;
3163  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3164  /* Even part */
3165 
3166  tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3167  tmp23 <<= CONST_BITS;
3168  /* Add fudge factor here for final descale. */
3169  tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3170 
3171  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3172  z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3173  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3174 
3175  tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3176  tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3177  tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3178  tmp10 = z1 + z3;
3179  z2 -= tmp10;
3180  tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3181  tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3182  tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3183  tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3184 
3185  /* Odd part */
3186 
3187  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3188  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3189  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3190 
3191  tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3192  tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3193  tmp10 = tmp11 - tmp12;
3194  tmp11 += tmp12;
3195  tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3196  tmp11 += tmp12;
3197  z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3198  tmp10 += z2;
3199  tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3200 
3201  /* Final output stage */
3202 
3203  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3204  wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3205  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3206  wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3207  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3208  wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3209  wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3210  }
3211 
3212  /* Pass 2: process 7 rows from work array, store into output array.
3213  * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3214  */
3215 
3216  wsptr = workspace;
3217  for (ctr = 0; ctr < 7; ctr++) {
3218  outptr = output_buf[ctr] + output_col;
3219 
3220  /* Even part */
3221 
3222  /* Add range center and fudge factor for final descale and range-limit. */
3223  z1 = (INT32) wsptr[0] +
3224  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3225  (ONE << (PASS1_BITS+2)));
3226  z1 <<= CONST_BITS;
3227  z4 = (INT32) wsptr[4];
3228  z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3229  z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3230  z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3231 
3232  tmp10 = z1 + z2;
3233  tmp11 = z1 + z3;
3234  tmp12 = z1 - z4;
3235 
3236  tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3237 
3238  z1 = (INT32) wsptr[2];
3239  z2 = (INT32) wsptr[6];
3240 
3241  z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3242 
3243  tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3244  tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3245  tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3246  MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3247 
3248  tmp20 = tmp10 + tmp13;
3249  tmp26 = tmp10 - tmp13;
3250  tmp21 = tmp11 + tmp14;
3251  tmp25 = tmp11 - tmp14;
3252  tmp22 = tmp12 + tmp15;
3253  tmp24 = tmp12 - tmp15;
3254 
3255  /* Odd part */
3256 
3257  z1 = (INT32) wsptr[1];
3258  z2 = (INT32) wsptr[3];
3259  z3 = (INT32) wsptr[5];
3260  z4 = (INT32) wsptr[7];
3261  z4 <<= CONST_BITS;
3262 
3263  tmp14 = z1 + z3;
3264  tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3265  tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3266  tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3267  tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3268  tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3269  z1 -= z2;
3270  tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3271  tmp16 += tmp15;
3272  tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3273  tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3274  tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3275  tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3276  tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3277  tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3278 
3279  tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3280 
3281  /* Final output stage */
3282 
3283  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3284  CONST_BITS+PASS1_BITS+3)
3285  & RANGE_MASK];
3286  outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3287  CONST_BITS+PASS1_BITS+3)
3288  & RANGE_MASK];
3289  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3290  CONST_BITS+PASS1_BITS+3)
3291  & RANGE_MASK];
3292  outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3293  CONST_BITS+PASS1_BITS+3)
3294  & RANGE_MASK];
3295  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3296  CONST_BITS+PASS1_BITS+3)
3297  & RANGE_MASK];
3298  outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3299  CONST_BITS+PASS1_BITS+3)
3300  & RANGE_MASK];
3301  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3302  CONST_BITS+PASS1_BITS+3)
3303  & RANGE_MASK];
3304  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3305  CONST_BITS+PASS1_BITS+3)
3306  & RANGE_MASK];
3307  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3308  CONST_BITS+PASS1_BITS+3)
3309  & RANGE_MASK];
3310  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3311  CONST_BITS+PASS1_BITS+3)
3312  & RANGE_MASK];
3313  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3314  CONST_BITS+PASS1_BITS+3)
3315  & RANGE_MASK];
3316  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3317  CONST_BITS+PASS1_BITS+3)
3318  & RANGE_MASK];
3319  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3320  CONST_BITS+PASS1_BITS+3)
3321  & RANGE_MASK];
3322  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3323  CONST_BITS+PASS1_BITS+3)
3324  & RANGE_MASK];
3325 
3326  wsptr += 8; /* advance pointer to next row */
3327  }
3328 }
3329 
3330 
3331 /*
3332  * Perform dequantization and inverse DCT on one block of coefficients,
3333  * producing a 12x6 output block.
3334  *
3335  * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3336  */
3337 
3338 GLOBAL(void)
3339 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3342 {
3343  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3344  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3345  INT32 z1, z2, z3, z4;
3346  JCOEFPTR inptr;
3347  ISLOW_MULT_TYPE * quantptr;
3348  int * wsptr;
3349  JSAMPROW outptr;
3350  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3351  int ctr;
3352  int workspace[8*6]; /* buffers data between passes */
3353  SHIFT_TEMPS
3354 
3355  /* Pass 1: process columns from input, store into work array.
3356  * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3357  */
3358 
3359  inptr = coef_block;
3360  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3361  wsptr = workspace;
3362  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3363  /* Even part */
3364 
3365  tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3366  tmp10 <<= CONST_BITS;
3367  /* Add fudge factor here for final descale. */
3368  tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3369  tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3370  tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3371  tmp11 = tmp10 + tmp20;
3372  tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3373  tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3374  tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3375  tmp20 = tmp11 + tmp10;
3376  tmp22 = tmp11 - tmp10;
3377 
3378  /* Odd part */
3379 
3380  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3381  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3382  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3383  tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3384  tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3385  tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3386  tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3387 
3388  /* Final output stage */
3389 
3390  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3391  wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3392  wsptr[8*1] = (int) (tmp21 + tmp11);
3393  wsptr[8*4] = (int) (tmp21 - tmp11);
3394  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3395  wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3396  }
3397 
3398  /* Pass 2: process 6 rows from work array, store into output array.
3399  * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3400  */
3401 
3402  wsptr = workspace;
3403  for (ctr = 0; ctr < 6; ctr++) {
3404  outptr = output_buf[ctr] + output_col;
3405 
3406  /* Even part */
3407 
3408  /* Add range center and fudge factor for final descale and range-limit. */
3409  z3 = (INT32) wsptr[0] +
3410  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3411  (ONE << (PASS1_BITS+2)));
3412  z3 <<= CONST_BITS;
3413 
3414  z4 = (INT32) wsptr[4];
3415  z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3416 
3417  tmp10 = z3 + z4;
3418  tmp11 = z3 - z4;
3419 
3420  z1 = (INT32) wsptr[2];
3421  z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3422  z1 <<= CONST_BITS;
3423  z2 = (INT32) wsptr[6];
3424  z2 <<= CONST_BITS;
3425 
3426  tmp12 = z1 - z2;
3427 
3428  tmp21 = z3 + tmp12;
3429  tmp24 = z3 - tmp12;
3430 
3431  tmp12 = z4 + z2;
3432 
3433  tmp20 = tmp10 + tmp12;
3434  tmp25 = tmp10 - tmp12;
3435 
3436  tmp12 = z4 - z1 - z2;
3437 
3438  tmp22 = tmp11 + tmp12;
3439  tmp23 = tmp11 - tmp12;
3440 
3441  /* Odd part */
3442 
3443  z1 = (INT32) wsptr[1];
3444  z2 = (INT32) wsptr[3];
3445  z3 = (INT32) wsptr[5];
3446  z4 = (INT32) wsptr[7];
3447 
3448  tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3449  tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3450 
3451  tmp10 = z1 + z3;
3452  tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3453  tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3454  tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3455  tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3456  tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3457  tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3458  tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3459  MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3460 
3461  z1 -= z4;
3462  z2 -= z3;
3463  z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3464  tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3465  tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3466 
3467  /* Final output stage */
3468 
3469  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3470  CONST_BITS+PASS1_BITS+3)
3471  & RANGE_MASK];
3472  outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3473  CONST_BITS+PASS1_BITS+3)
3474  & RANGE_MASK];
3475  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3476  CONST_BITS+PASS1_BITS+3)
3477  & RANGE_MASK];
3478  outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3479  CONST_BITS+PASS1_BITS+3)
3480  & RANGE_MASK];
3481  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3482  CONST_BITS+PASS1_BITS+3)
3483  & RANGE_MASK];
3484  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3485  CONST_BITS+PASS1_BITS+3)
3486  & RANGE_MASK];
3487  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3488  CONST_BITS+PASS1_BITS+3)
3489  & RANGE_MASK];
3490  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3491  CONST_BITS+PASS1_BITS+3)
3492  & RANGE_MASK];
3493  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3494  CONST_BITS+PASS1_BITS+3)
3495  & RANGE_MASK];
3496  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3497  CONST_BITS+PASS1_BITS+3)
3498  & RANGE_MASK];
3499  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3500  CONST_BITS+PASS1_BITS+3)
3501  & RANGE_MASK];
3502  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3503  CONST_BITS+PASS1_BITS+3)
3504  & RANGE_MASK];
3505 
3506  wsptr += 8; /* advance pointer to next row */
3507  }
3508 }
3509 
3510 
3511 /*
3512  * Perform dequantization and inverse DCT on one block of coefficients,
3513  * producing a 10x5 output block.
3514  *
3515  * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3516  */
3517 
3518 GLOBAL(void)
3519 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3522 {
3523  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3524  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3525  INT32 z1, z2, z3, z4;
3526  JCOEFPTR inptr;
3527  ISLOW_MULT_TYPE * quantptr;
3528  int * wsptr;
3529  JSAMPROW outptr;
3530  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3531  int ctr;
3532  int workspace[8*5]; /* buffers data between passes */
3533  SHIFT_TEMPS
3534 
3535  /* Pass 1: process columns from input, store into work array.
3536  * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3537  */
3538 
3539  inptr = coef_block;
3540  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3541  wsptr = workspace;
3542  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3543  /* Even part */
3544 
3545  tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3546  tmp12 <<= CONST_BITS;
3547  /* Add fudge factor here for final descale. */
3548  tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3549  tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3550  tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3551  z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3552  z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3553  z3 = tmp12 + z2;
3554  tmp10 = z3 + z1;
3555  tmp11 = z3 - z1;
3556  tmp12 -= z2 << 2;
3557 
3558  /* Odd part */
3559 
3560  z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3561  z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3562 
3563  z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3564  tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3565  tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3566 
3567  /* Final output stage */
3568 
3569  wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3570  wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3571  wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3572  wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3573  wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3574  }
3575 
3576  /* Pass 2: process 5 rows from work array, store into output array.
3577  * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3578  */
3579 
3580  wsptr = workspace;
3581  for (ctr = 0; ctr < 5; ctr++) {
3582  outptr = output_buf[ctr] + output_col;
3583 
3584  /* Even part */
3585 
3586  /* Add range center and fudge factor for final descale and range-limit. */
3587  z3 = (INT32) wsptr[0] +
3588  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3589  (ONE << (PASS1_BITS+2)));
3590  z3 <<= CONST_BITS;
3591  z4 = (INT32) wsptr[4];
3592  z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3593  z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3594  tmp10 = z3 + z1;
3595  tmp11 = z3 - z2;
3596 
3597  tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3598 
3599  z2 = (INT32) wsptr[2];
3600  z3 = (INT32) wsptr[6];
3601 
3602  z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3603  tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3604  tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3605 
3606  tmp20 = tmp10 + tmp12;
3607  tmp24 = tmp10 - tmp12;
3608  tmp21 = tmp11 + tmp13;
3609  tmp23 = tmp11 - tmp13;
3610 
3611  /* Odd part */
3612 
3613  z1 = (INT32) wsptr[1];
3614  z2 = (INT32) wsptr[3];
3615  z3 = (INT32) wsptr[5];
3616  z3 <<= CONST_BITS;
3617  z4 = (INT32) wsptr[7];
3618 
3619  tmp11 = z2 + z4;
3620  tmp13 = z2 - z4;
3621 
3622  tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3623 
3624  z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3625  z4 = z3 + tmp12;
3626 
3627  tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3628  tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3629 
3630  z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3631  z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3632 
3633  tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3634 
3635  tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3636  tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3637 
3638  /* Final output stage */
3639 
3640  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3641  CONST_BITS+PASS1_BITS+3)
3642  & RANGE_MASK];
3643  outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3644  CONST_BITS+PASS1_BITS+3)
3645  & RANGE_MASK];
3646  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3647  CONST_BITS+PASS1_BITS+3)
3648  & RANGE_MASK];
3649  outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3650  CONST_BITS+PASS1_BITS+3)
3651  & RANGE_MASK];
3652  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3653  CONST_BITS+PASS1_BITS+3)
3654  & RANGE_MASK];
3655  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3656  CONST_BITS+PASS1_BITS+3)
3657  & RANGE_MASK];
3658  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3659  CONST_BITS+PASS1_BITS+3)
3660  & RANGE_MASK];
3661  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3662  CONST_BITS+PASS1_BITS+3)
3663  & RANGE_MASK];
3664  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3665  CONST_BITS+PASS1_BITS+3)
3666  & RANGE_MASK];
3667  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3668  CONST_BITS+PASS1_BITS+3)
3669  & RANGE_MASK];
3670 
3671  wsptr += 8; /* advance pointer to next row */
3672  }
3673 }
3674 
3675 
3676 /*
3677  * Perform dequantization and inverse DCT on one block of coefficients,
3678  * producing an 8x4 output block.
3679  *
3680  * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3681  */
3682 
3683 GLOBAL(void)
3684 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3687 {
3688  INT32 tmp0, tmp1, tmp2, tmp3;
3689  INT32 tmp10, tmp11, tmp12, tmp13;
3690  INT32 z1, z2, z3;
3691  JCOEFPTR inptr;
3692  ISLOW_MULT_TYPE * quantptr;
3693  int * wsptr;
3694  JSAMPROW outptr;
3695  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3696  int ctr;
3697  int workspace[8*4]; /* buffers data between passes */
3698  SHIFT_TEMPS
3699 
3700  /* Pass 1: process columns from input, store into work array.
3701  * 4-point IDCT kernel,
3702  * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3703  */
3704 
3705  inptr = coef_block;
3706  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3707  wsptr = workspace;
3708  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3709  /* Even part */
3710 
3711  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3712  tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3713 
3714  tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3715  tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3716 
3717  /* Odd part */
3718  /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3719 
3720  z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3721  z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3722 
3723  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3724  /* Add fudge factor here for final descale. */
3725  z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3726  tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3727  CONST_BITS-PASS1_BITS);
3728  tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3729  CONST_BITS-PASS1_BITS);
3730 
3731  /* Final output stage */
3732 
3733  wsptr[8*0] = (int) (tmp10 + tmp0);
3734  wsptr[8*3] = (int) (tmp10 - tmp0);
3735  wsptr[8*1] = (int) (tmp12 + tmp2);
3736  wsptr[8*2] = (int) (tmp12 - tmp2);
3737  }
3738 
3739  /* Pass 2: process rows from work array, store into output array.
3740  * Note that we must descale the results by a factor of 8 == 2**3,
3741  * and also undo the PASS1_BITS scaling.
3742  * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3743  */
3744 
3745  wsptr = workspace;
3746  for (ctr = 0; ctr < 4; ctr++) {
3747  outptr = output_buf[ctr] + output_col;
3748 
3749  /* Even part: reverse the even part of the forward DCT.
3750  * The rotator is c(-6).
3751  */
3752 
3753  /* Add range center and fudge factor for final descale and range-limit. */
3754  z2 = (INT32) wsptr[0] +
3755  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3756  (ONE << (PASS1_BITS+2)));
3757  z3 = (INT32) wsptr[4];
3758 
3759  tmp0 = (z2 + z3) << CONST_BITS;
3760  tmp1 = (z2 - z3) << CONST_BITS;
3761 
3762  z2 = (INT32) wsptr[2];
3763  z3 = (INT32) wsptr[6];
3764 
3765  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3766  tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3767  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3768 
3769  tmp10 = tmp0 + tmp2;
3770  tmp13 = tmp0 - tmp2;
3771  tmp11 = tmp1 + tmp3;
3772  tmp12 = tmp1 - tmp3;
3773 
3774  /* Odd part per figure 8; the matrix is unitary and hence its
3775  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3776  */
3777 
3778  tmp0 = (INT32) wsptr[7];
3779  tmp1 = (INT32) wsptr[5];
3780  tmp2 = (INT32) wsptr[3];
3781  tmp3 = (INT32) wsptr[1];
3782 
3783  z2 = tmp0 + tmp2;
3784  z3 = tmp1 + tmp3;
3785 
3786  z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3787  z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3788  z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3789  z2 += z1;
3790  z3 += z1;
3791 
3792  z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3793  tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3794  tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3795  tmp0 += z1 + z2;
3796  tmp3 += z1 + z3;
3797 
3798  z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3799  tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3800  tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3801  tmp1 += z1 + z3;
3802  tmp2 += z1 + z2;
3803 
3804  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3805 
3806  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3807  CONST_BITS+PASS1_BITS+3)
3808  & RANGE_MASK];
3809  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3810  CONST_BITS+PASS1_BITS+3)
3811  & RANGE_MASK];
3812  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3813  CONST_BITS+PASS1_BITS+3)
3814  & RANGE_MASK];
3815  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3816  CONST_BITS+PASS1_BITS+3)
3817  & RANGE_MASK];
3818  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3819  CONST_BITS+PASS1_BITS+3)
3820  & RANGE_MASK];
3821  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3822  CONST_BITS+PASS1_BITS+3)
3823  & RANGE_MASK];
3824  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3825  CONST_BITS+PASS1_BITS+3)
3826  & RANGE_MASK];
3827  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3828  CONST_BITS+PASS1_BITS+3)
3829  & RANGE_MASK];
3830 
3831  wsptr += DCTSIZE; /* advance pointer to next row */
3832  }
3833 }
3834 
3835 
3836 /*
3837  * Perform dequantization and inverse DCT on one block of coefficients,
3838  * producing a 6x3 output block.
3839  *
3840  * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3841  */
3842 
3843 GLOBAL(void)
3844 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3847 {
3848  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3849  INT32 z1, z2, z3;
3850  JCOEFPTR inptr;
3851  ISLOW_MULT_TYPE * quantptr;
3852  int * wsptr;
3853  JSAMPROW outptr;
3854  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3855  int ctr;
3856  int workspace[6*3]; /* buffers data between passes */
3857  SHIFT_TEMPS
3858 
3859  /* Pass 1: process columns from input, store into work array.
3860  * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3861  */
3862 
3863  inptr = coef_block;
3864  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3865  wsptr = workspace;
3866  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3867  /* Even part */
3868 
3869  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3870  tmp0 <<= CONST_BITS;
3871  /* Add fudge factor here for final descale. */
3872  tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3873  tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3874  tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3875  tmp10 = tmp0 + tmp12;
3876  tmp2 = tmp0 - tmp12 - tmp12;
3877 
3878  /* Odd part */
3879 
3880  tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3881  tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3882 
3883  /* Final output stage */
3884 
3885  wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3886  wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3887  wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3888  }
3889 
3890  /* Pass 2: process 3 rows from work array, store into output array.
3891  * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3892  */
3893 
3894  wsptr = workspace;
3895  for (ctr = 0; ctr < 3; ctr++) {
3896  outptr = output_buf[ctr] + output_col;
3897 
3898  /* Even part */
3899 
3900  /* Add range center and fudge factor for final descale and range-limit. */
3901  tmp0 = (INT32) wsptr[0] +
3902  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3903  (ONE << (PASS1_BITS+2)));
3904  tmp0 <<= CONST_BITS;
3905  tmp2 = (INT32) wsptr[4];
3906  tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3907  tmp1 = tmp0 + tmp10;
3908  tmp11 = tmp0 - tmp10 - tmp10;
3909  tmp10 = (INT32) wsptr[2];
3910  tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3911  tmp10 = tmp1 + tmp0;
3912  tmp12 = tmp1 - tmp0;
3913 
3914  /* Odd part */
3915 
3916  z1 = (INT32) wsptr[1];
3917  z2 = (INT32) wsptr[3];
3918  z3 = (INT32) wsptr[5];
3919  tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3920  tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3921  tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3922  tmp1 = (z1 - z2 - z3) << CONST_BITS;
3923 
3924  /* Final output stage */
3925 
3926  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3927  CONST_BITS+PASS1_BITS+3)
3928  & RANGE_MASK];
3929  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
3930  CONST_BITS+PASS1_BITS+3)
3931  & RANGE_MASK];
3932  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
3933  CONST_BITS+PASS1_BITS+3)
3934  & RANGE_MASK];
3935  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
3936  CONST_BITS+PASS1_BITS+3)
3937  & RANGE_MASK];
3938  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
3939  CONST_BITS+PASS1_BITS+3)
3940  & RANGE_MASK];
3941  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
3942  CONST_BITS+PASS1_BITS+3)
3943  & RANGE_MASK];
3944 
3945  wsptr += 6; /* advance pointer to next row */
3946  }
3947 }
3948 
3949 
3950 /*
3951  * Perform dequantization and inverse DCT on one block of coefficients,
3952  * producing a 4x2 output block.
3953  *
3954  * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
3955  */
3956 
3957 GLOBAL(void)
3958 jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3961 {
3962  INT32 tmp0, tmp2, tmp10, tmp12;
3963  INT32 z1, z2, z3;
3964  JCOEFPTR inptr;
3965  ISLOW_MULT_TYPE * quantptr;
3966  INT32 * wsptr;
3967  JSAMPROW outptr;
3968  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3969  int ctr;
3970  INT32 workspace[4*2]; /* buffers data between passes */
3971  SHIFT_TEMPS
3972 
3973  /* Pass 1: process columns from input, store into work array. */
3974 
3975  inptr = coef_block;
3976  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3977  wsptr = workspace;
3978  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3979  /* Even part */
3980 
3981  tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3982 
3983  /* Odd part */
3984 
3985  tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3986 
3987  /* Final output stage */
3988 
3989  wsptr[4*0] = tmp10 + tmp0;
3990  wsptr[4*1] = tmp10 - tmp0;
3991  }
3992 
3993  /* Pass 2: process 2 rows from work array, store into output array.
3994  * 4-point IDCT kernel,
3995  * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3996  */
3997 
3998  wsptr = workspace;
3999  for (ctr = 0; ctr < 2; ctr++) {
4000  outptr = output_buf[ctr] + output_col;
4001 
4002  /* Even part */
4003 
4004  /* Add range center and fudge factor for final descale and range-limit. */
4005  tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
4006  tmp2 = wsptr[2];
4007 
4008  tmp10 = (tmp0 + tmp2) << CONST_BITS;
4009  tmp12 = (tmp0 - tmp2) << CONST_BITS;
4010 
4011  /* Odd part */
4012  /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4013 
4014  z2 = wsptr[1];
4015  z3 = wsptr[3];
4016 
4017  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4018  tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4019  tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4020 
4021  /* Final output stage */
4022 
4023  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4024  CONST_BITS+3)
4025  & RANGE_MASK];
4026  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4027  CONST_BITS+3)
4028  & RANGE_MASK];
4029  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4030  CONST_BITS+3)
4031  & RANGE_MASK];
4032  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4033  CONST_BITS+3)
4034  & RANGE_MASK];
4035 
4036  wsptr += 4; /* advance pointer to next row */
4037  }
4038 }
4039 
4040 
4041 /*
4042  * Perform dequantization and inverse DCT on one block of coefficients,
4043  * producing a 2x1 output block.
4044  *
4045  * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4046  */
4047 
4048 GLOBAL(void)
4049 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4052 {
4053  DCTELEM tmp0, tmp1;
4054  ISLOW_MULT_TYPE * quantptr;
4055  JSAMPROW outptr;
4056  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4057  ISHIFT_TEMPS
4058 
4059  /* Pass 1: empty. */
4060 
4061  /* Pass 2: process 1 row from input, store into output array. */
4062 
4063  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4064  outptr = output_buf[0] + output_col;
4065 
4066  /* Even part */
4067 
4068  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4069  /* Add range center and fudge factor for final descale and range-limit. */
4070  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
4071 
4072  /* Odd part */
4073 
4074  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4075 
4076  /* Final output stage */
4077 
4078  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
4079  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
4080 }
4081 
4082 
4083 /*
4084  * Perform dequantization and inverse DCT on one block of coefficients,
4085  * producing an 8x16 output block.
4086  *
4087  * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4088  */
4089 
4090 GLOBAL(void)
4091 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4094 {
4095  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4096  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4097  INT32 z1, z2, z3, z4;
4098  JCOEFPTR inptr;
4099  ISLOW_MULT_TYPE * quantptr;
4100  int * wsptr;
4101  JSAMPROW outptr;
4102  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4103  int ctr;
4104  int workspace[8*16]; /* buffers data between passes */
4105  SHIFT_TEMPS
4106 
4107  /* Pass 1: process columns from input, store into work array.
4108  * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4109  */
4110 
4111  inptr = coef_block;
4112  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4113  wsptr = workspace;
4114  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4115  /* Even part */
4116 
4117  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4118  tmp0 <<= CONST_BITS;
4119  /* Add fudge factor here for final descale. */
4120  tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4121 
4122  z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4123  tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4124  tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4125 
4126  tmp10 = tmp0 + tmp1;
4127  tmp11 = tmp0 - tmp1;
4128  tmp12 = tmp0 + tmp2;
4129  tmp13 = tmp0 - tmp2;
4130 
4131  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4132  z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4133  z3 = z1 - z2;
4134  z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4135  z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4136 
4137  tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4138  tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4139  tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4140  tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4141 
4142  tmp20 = tmp10 + tmp0;
4143  tmp27 = tmp10 - tmp0;
4144  tmp21 = tmp12 + tmp1;
4145  tmp26 = tmp12 - tmp1;
4146  tmp22 = tmp13 + tmp2;
4147  tmp25 = tmp13 - tmp2;
4148  tmp23 = tmp11 + tmp3;
4149  tmp24 = tmp11 - tmp3;
4150 
4151  /* Odd part */
4152 
4153  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4154  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4155  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4156  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4157 
4158  tmp11 = z1 + z3;
4159 
4160  tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4161  tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4162  tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4163  tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4164  tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4165  tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4166  tmp0 = tmp1 + tmp2 + tmp3 -
4167  MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4168  tmp13 = tmp10 + tmp11 + tmp12 -
4169  MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4170  z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4171  tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4172  tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4173  z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4174  tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4175  tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4176  z2 += z4;
4177  z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4178  tmp1 += z1;
4179  tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4180  z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4181  tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4182  tmp12 += z2;
4183  z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4184  tmp2 += z2;
4185  tmp3 += z2;
4186  z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4187  tmp10 += z2;
4188  tmp11 += z2;
4189 
4190  /* Final output stage */
4191 
4192  wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4193  wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4194  wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4195  wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4196  wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4197  wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4198  wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4199  wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4200  wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4201  wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4202  wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4203  wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4204  wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4205  wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4206  wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4207  wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4208  }
4209 
4210  /* Pass 2: process rows from work array, store into output array.
4211  * Note that we must descale the results by a factor of 8 == 2**3,
4212  * and also undo the PASS1_BITS scaling.
4213  * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4214  */
4215 
4216  wsptr = workspace;
4217  for (ctr = 0; ctr < 16; ctr++) {
4218  outptr = output_buf[ctr] + output_col;
4219 
4220  /* Even part: reverse the even part of the forward DCT.
4221  * The rotator is c(-6).
4222  */
4223 
4224  /* Add range center and fudge factor for final descale and range-limit. */
4225  z2 = (INT32) wsptr[0] +
4226  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4227  (ONE << (PASS1_BITS+2)));
4228  z3 = (INT32) wsptr[4];
4229 
4230  tmp0 = (z2 + z3) << CONST_BITS;
4231  tmp1 = (z2 - z3) << CONST_BITS;
4232 
4233  z2 = (INT32) wsptr[2];
4234  z3 = (INT32) wsptr[6];
4235 
4236  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4237  tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4238  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4239 
4240  tmp10 = tmp0 + tmp2;
4241  tmp13 = tmp0 - tmp2;
4242  tmp11 = tmp1 + tmp3;
4243  tmp12 = tmp1 - tmp3;
4244 
4245  /* Odd part per figure 8; the matrix is unitary and hence its
4246  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4247  */
4248 
4249  tmp0 = (INT32) wsptr[7];
4250  tmp1 = (INT32) wsptr[5];
4251  tmp2 = (INT32) wsptr[3];
4252  tmp3 = (INT32) wsptr[1];
4253 
4254  z2 = tmp0 + tmp2;
4255  z3 = tmp1 + tmp3;
4256 
4257  z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4258  z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4259  z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4260  z2 += z1;
4261  z3 += z1;
4262 
4263  z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4264  tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4265  tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4266  tmp0 += z1 + z2;
4267  tmp3 += z1 + z3;
4268 
4269  z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4270  tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4271  tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4272  tmp1 += z1 + z3;
4273  tmp2 += z1 + z2;
4274 
4275  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4276 
4277  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4278  CONST_BITS+PASS1_BITS+3)
4279  & RANGE_MASK];
4280  outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4281  CONST_BITS+PASS1_BITS+3)
4282  & RANGE_MASK];
4283  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4284  CONST_BITS+PASS1_BITS+3)
4285  & RANGE_MASK];
4286  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4287  CONST_BITS+PASS1_BITS+3)
4288  & RANGE_MASK];
4289  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4290  CONST_BITS+PASS1_BITS+3)
4291  & RANGE_MASK];
4292  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4293  CONST_BITS+PASS1_BITS+3)
4294  & RANGE_MASK];
4295  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4296  CONST_BITS+PASS1_BITS+3)
4297  & RANGE_MASK];
4298  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4299  CONST_BITS+PASS1_BITS+3)
4300  & RANGE_MASK];
4301 
4302  wsptr += DCTSIZE; /* advance pointer to next row */
4303  }
4304 }
4305 
4306 
4307 /*
4308  * Perform dequantization and inverse DCT on one block of coefficients,
4309  * producing a 7x14 output block.
4310  *
4311  * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4312  */
4313 
4314 GLOBAL(void)
4315 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4318 {
4319  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4320  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4321  INT32 z1, z2, z3, z4;
4322  JCOEFPTR inptr;
4323  ISLOW_MULT_TYPE * quantptr;
4324  int * wsptr;
4325  JSAMPROW outptr;
4326  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4327  int ctr;
4328  int workspace[7*14]; /* buffers data between passes */
4329  SHIFT_TEMPS
4330 
4331  /* Pass 1: process columns from input, store into work array.
4332  * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4333  */
4334 
4335  inptr = coef_block;
4336  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4337  wsptr = workspace;
4338  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4339  /* Even part */
4340 
4341  z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4342  z1 <<= CONST_BITS;
4343  /* Add fudge factor here for final descale. */
4344  z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4345  z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4346  z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4347  z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4348  z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4349 
4350  tmp10 = z1 + z2;
4351  tmp11 = z1 + z3;
4352  tmp12 = z1 - z4;
4353 
4354  tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4355  CONST_BITS-PASS1_BITS);
4356 
4357  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4358  z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4359 
4360  z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4361 
4362  tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4363  tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4364  tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4365  MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4366 
4367  tmp20 = tmp10 + tmp13;
4368  tmp26 = tmp10 - tmp13;
4369  tmp21 = tmp11 + tmp14;
4370  tmp25 = tmp11 - tmp14;
4371  tmp22 = tmp12 + tmp15;
4372  tmp24 = tmp12 - tmp15;
4373 
4374  /* Odd part */
4375 
4376  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4377  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4378  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4379  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4380  tmp13 = z4 << CONST_BITS;
4381 
4382  tmp14 = z1 + z3;
4383  tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4384  tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4385  tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4386  tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4387  tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4388  z1 -= z2;
4389  tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4390  tmp16 += tmp15;
4391  z1 += z4;
4392  z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4393  tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4394  tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4395  z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4396  tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4397  tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4398 
4399  tmp13 = (z1 - z3) << PASS1_BITS;
4400 
4401  /* Final output stage */
4402 
4403  wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4404  wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4405  wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4406  wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4407  wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4408  wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4409  wsptr[7*3] = (int) (tmp23 + tmp13);
4410  wsptr[7*10] = (int) (tmp23 - tmp13);
4411  wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4412  wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4413  wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4414  wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4415  wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4416  wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4417  }
4418 
4419  /* Pass 2: process 14 rows from work array, store into output array.
4420  * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4421  */
4422 
4423  wsptr = workspace;
4424  for (ctr = 0; ctr < 14; ctr++) {
4425  outptr = output_buf[ctr] + output_col;
4426 
4427  /* Even part */
4428 
4429  /* Add range center and fudge factor for final descale and range-limit. */
4430  tmp23 = (INT32) wsptr[0] +
4431  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4432  (ONE << (PASS1_BITS+2)));
4433  tmp23 <<= CONST_BITS;
4434 
4435  z1 = (INT32) wsptr[2];
4436  z2 = (INT32) wsptr[4];
4437  z3 = (INT32) wsptr[6];
4438 
4439  tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4440  tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4441  tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4442  tmp10 = z1 + z3;
4443  z2 -= tmp10;
4444  tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4445  tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4446  tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4447  tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4448 
4449  /* Odd part */
4450 
4451  z1 = (INT32) wsptr[1];
4452  z2 = (INT32) wsptr[3];
4453  z3 = (INT32) wsptr[5];
4454 
4455  tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4456  tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4457  tmp10 = tmp11 - tmp12;
4458  tmp11 += tmp12;
4459  tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4460  tmp11 += tmp12;
4461  z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4462  tmp10 += z2;
4463  tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4464 
4465  /* Final output stage */
4466 
4467  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4468  CONST_BITS+PASS1_BITS+3)
4469  & RANGE_MASK];
4470  outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4471  CONST_BITS+PASS1_BITS+3)
4472  & RANGE_MASK];
4473  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4474  CONST_BITS+PASS1_BITS+3)
4475  & RANGE_MASK];
4476  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4477  CONST_BITS+PASS1_BITS+3)
4478  & RANGE_MASK];
4479  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4480  CONST_BITS+PASS1_BITS+3)
4481  & RANGE_MASK];
4482  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4483  CONST_BITS+PASS1_BITS+3)
4484  & RANGE_MASK];
4485  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4486  CONST_BITS+PASS1_BITS+3)
4487  & RANGE_MASK];
4488 
4489  wsptr += 7; /* advance pointer to next row */
4490  }
4491 }
4492 
4493 
4494 /*
4495  * Perform dequantization and inverse DCT on one block of coefficients,
4496  * producing a 6x12 output block.
4497  *
4498  * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4499  */
4500 
4501 GLOBAL(void)
4502 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4505 {
4506  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4507  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4508  INT32 z1, z2, z3, z4;
4509  JCOEFPTR inptr;
4510  ISLOW_MULT_TYPE * quantptr;
4511  int * wsptr;
4512  JSAMPROW outptr;
4513  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4514  int ctr;
4515  int workspace[6*12]; /* buffers data between passes */
4516  SHIFT_TEMPS
4517 
4518  /* Pass 1: process columns from input, store into work array.
4519  * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4520  */
4521 
4522  inptr = coef_block;
4523  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4524  wsptr = workspace;
4525  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4526  /* Even part */
4527 
4528  z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4529  z3 <<= CONST_BITS;
4530  /* Add fudge factor here for final descale. */
4531  z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4532 
4533  z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4534  z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4535 
4536  tmp10 = z3 + z4;
4537  tmp11 = z3 - z4;
4538 
4539  z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4540  z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4541  z1 <<= CONST_BITS;
4542  z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4543  z2 <<= CONST_BITS;
4544 
4545  tmp12 = z1 - z2;
4546 
4547  tmp21 = z3 + tmp12;
4548  tmp24 = z3 - tmp12;
4549 
4550  tmp12 = z4 + z2;
4551 
4552  tmp20 = tmp10 + tmp12;
4553  tmp25 = tmp10 - tmp12;
4554 
4555  tmp12 = z4 - z1 - z2;
4556 
4557  tmp22 = tmp11 + tmp12;
4558  tmp23 = tmp11 - tmp12;
4559 
4560  /* Odd part */
4561 
4562  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4563  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4564  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4565  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4566 
4567  tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4568  tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4569 
4570  tmp10 = z1 + z3;
4571  tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4572  tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4573  tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4574  tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4575  tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4576  tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4577  tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4578  MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4579 
4580  z1 -= z4;
4581  z2 -= z3;
4582  z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4583  tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4584  tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4585 
4586  /* Final output stage */
4587 
4588  wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4589  wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4590  wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4591  wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4592  wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4593  wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4594  wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4595  wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4596  wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4597  wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4598  wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4599  wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4600  }
4601 
4602  /* Pass 2: process 12 rows from work array, store into output array.
4603  * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4604  */
4605 
4606  wsptr = workspace;
4607  for (ctr = 0; ctr < 12; ctr++) {
4608  outptr = output_buf[ctr] + output_col;
4609 
4610  /* Even part */
4611 
4612  /* Add range center and fudge factor for final descale and range-limit. */
4613  tmp10 = (INT32) wsptr[0] +
4614  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4615  (ONE << (PASS1_BITS+2)));
4616  tmp10 <<= CONST_BITS;
4617  tmp12 = (INT32) wsptr[4];
4618  tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4619  tmp11 = tmp10 + tmp20;
4620  tmp21 = tmp10 - tmp20 - tmp20;
4621  tmp20 = (INT32) wsptr[2];
4622  tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4623  tmp20 = tmp11 + tmp10;
4624  tmp22 = tmp11 - tmp10;
4625 
4626  /* Odd part */
4627 
4628  z1 = (INT32) wsptr[1];
4629  z2 = (INT32) wsptr[3];
4630  z3 = (INT32) wsptr[5];
4631  tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4632  tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4633  tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4634  tmp11 = (z1 - z2 - z3) << CONST_BITS;
4635 
4636  /* Final output stage */
4637 
4638  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4639  CONST_BITS+PASS1_BITS+3)
4640  & RANGE_MASK];
4641  outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4642  CONST_BITS+PASS1_BITS+3)
4643  & RANGE_MASK];
4644  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4645  CONST_BITS+PASS1_BITS+3)
4646  & RANGE_MASK];
4647  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4648  CONST_BITS+PASS1_BITS+3)
4649  & RANGE_MASK];
4650  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4651  CONST_BITS+PASS1_BITS+3)
4652  & RANGE_MASK];
4653  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4654  CONST_BITS+PASS1_BITS+3)
4655  & RANGE_MASK];
4656 
4657  wsptr += 6; /* advance pointer to next row */
4658  }
4659 }
4660 
4661 
4662 /*
4663  * Perform dequantization and inverse DCT on one block of coefficients,
4664  * producing a 5x10 output block.
4665  *
4666  * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4667  */
4668 
4669 GLOBAL(void)
4670 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4673 {
4674  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4675  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4676  INT32 z1, z2, z3, z4, z5;
4677  JCOEFPTR inptr;
4678  ISLOW_MULT_TYPE * quantptr;
4679  int * wsptr;
4680  JSAMPROW outptr;
4681  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4682  int ctr;
4683  int workspace[5*10]; /* buffers data between passes */
4684  SHIFT_TEMPS
4685 
4686  /* Pass 1: process columns from input, store into work array.
4687  * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4688  */
4689 
4690  inptr = coef_block;
4691  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4692  wsptr = workspace;
4693  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4694  /* Even part */
4695 
4696  z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4697  z3 <<= CONST_BITS;
4698  /* Add fudge factor here for final descale. */
4699  z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4700  z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4701  z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4702  z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4703  tmp10 = z3 + z1;
4704  tmp11 = z3 - z2;
4705 
4706  tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4707  CONST_BITS-PASS1_BITS);
4708 
4709  z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4710  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4711 
4712  z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4713  tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4714  tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4715 
4716  tmp20 = tmp10 + tmp12;
4717  tmp24 = tmp10 - tmp12;
4718  tmp21 = tmp11 + tmp13;
4719  tmp23 = tmp11 - tmp13;
4720 
4721  /* Odd part */
4722 
4723  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4724  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4725  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4726  z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4727 
4728  tmp11 = z2 + z4;
4729  tmp13 = z2 - z4;
4730 
4731  tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4732  z5 = z3 << CONST_BITS;
4733 
4734  z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4735  z4 = z5 + tmp12;
4736 
4737  tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4738  tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4739 
4740  z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4741  z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4742 
4743  tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4744 
4745  tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4746  tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4747 
4748  /* Final output stage */
4749 
4750  wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4751  wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4752  wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4753  wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4754  wsptr[5*2] = (int) (tmp22 + tmp12);
4755  wsptr[5*7] = (int) (tmp22 - tmp12);
4756  wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4757  wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4758  wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4759  wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4760  }
4761 
4762  /* Pass 2: process 10 rows from work array, store into output array.
4763  * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4764  */
4765 
4766  wsptr = workspace;
4767  for (ctr = 0; ctr < 10; ctr++) {
4768  outptr = output_buf[ctr] + output_col;
4769 
4770  /* Even part */
4771 
4772  /* Add range center and fudge factor for final descale and range-limit. */
4773  tmp12 = (INT32) wsptr[0] +
4774  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4775  (ONE << (PASS1_BITS+2)));
4776  tmp12 <<= CONST_BITS;
4777  tmp13 = (INT32) wsptr[2];
4778  tmp14 = (INT32) wsptr[4];
4779  z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4780  z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4781  z3 = tmp12 + z2;
4782  tmp10 = z3 + z1;
4783  tmp11 = z3 - z1;
4784  tmp12 -= z2 << 2;
4785 
4786  /* Odd part */
4787 
4788  z2 = (INT32) wsptr[1];
4789  z3 = (INT32) wsptr[3];
4790 
4791  z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4792  tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4793  tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4794 
4795  /* Final output stage */
4796 
4797  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
4798  CONST_BITS+PASS1_BITS+3)
4799  & RANGE_MASK];
4800  outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
4801  CONST_BITS+PASS1_BITS+3)
4802  & RANGE_MASK];
4803  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
4804  CONST_BITS+PASS1_BITS+3)
4805  & RANGE_MASK];
4806  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
4807  CONST_BITS+PASS1_BITS+3)
4808  & RANGE_MASK];
4809  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
4810  CONST_BITS+PASS1_BITS+3)
4811  & RANGE_MASK];
4812 
4813  wsptr += 5; /* advance pointer to next row */
4814  }
4815 }
4816 
4817 
4818 /*
4819  * Perform dequantization and inverse DCT on one block of coefficients,
4820  * producing a 4x8 output block.
4821  *
4822  * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4823  */
4824 
4825 GLOBAL(void)
4826 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4829 {
4830  INT32 tmp0, tmp1, tmp2, tmp3;
4831  INT32 tmp10, tmp11, tmp12, tmp13;
4832  INT32 z1, z2, z3;
4833  JCOEFPTR inptr;
4834  ISLOW_MULT_TYPE * quantptr;
4835  int * wsptr;
4836  JSAMPROW outptr;
4837  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4838  int ctr;
4839  int workspace[4*8]; /* buffers data between passes */
4840  SHIFT_TEMPS
4841 
4842  /* Pass 1: process columns from input, store into work array.
4843  * Note results are scaled up by sqrt(8) compared to a true IDCT;
4844  * furthermore, we scale the results by 2**PASS1_BITS.
4845  * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4846  */
4847 
4848  inptr = coef_block;
4849  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4850  wsptr = workspace;
4851  for (ctr = 4; ctr > 0; ctr--) {
4852  /* Due to quantization, we will usually find that many of the input
4853  * coefficients are zero, especially the AC terms. We can exploit this
4854  * by short-circuiting the IDCT calculation for any column in which all
4855  * the AC terms are zero. In that case each output is equal to the
4856  * DC coefficient (with scale factor as needed).
4857  * With typical images and quantization tables, half or more of the
4858  * column DCT calculations can be simplified this way.
4859  */
4860 
4861  if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4862  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4863  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4864  inptr[DCTSIZE*7] == 0) {
4865  /* AC terms all zero */
4866  int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
4867 
4868  wsptr[4*0] = dcval;
4869  wsptr[4*1] = dcval;
4870  wsptr[4*2] = dcval;
4871  wsptr[4*3] = dcval;
4872  wsptr[4*4] = dcval;
4873  wsptr[4*5] = dcval;
4874  wsptr[4*6] = dcval;
4875  wsptr[4*7] = dcval;
4876 
4877  inptr++; /* advance pointers to next column */
4878  quantptr++;
4879  wsptr++;
4880  continue;
4881  }
4882 
4883  /* Even part: reverse the even part of the forward DCT.
4884  * The rotator is c(-6).
4885  */
4886 
4887  z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4888  z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4889  z2 <<= CONST_BITS;
4890  z3 <<= CONST_BITS;
4891  /* Add fudge factor here for final descale. */
4892  z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4893 
4894  tmp0 = z2 + z3;
4895  tmp1 = z2 - z3;
4896 
4897  z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4898  z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4899 
4900  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4901  tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4902  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4903 
4904  tmp10 = tmp0 + tmp2;
4905  tmp13 = tmp0 - tmp2;
4906  tmp11 = tmp1 + tmp3;
4907  tmp12 = tmp1 - tmp3;
4908 
4909  /* Odd part per figure 8; the matrix is unitary and hence its
4910  * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4911  */
4912 
4913  tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4914  tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4915  tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4916  tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4917 
4918  z2 = tmp0 + tmp2;
4919  z3 = tmp1 + tmp3;
4920 
4921  z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4922  z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4923  z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4924  z2 += z1;
4925  z3 += z1;
4926 
4927  z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4928  tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4929  tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4930  tmp0 += z1 + z2;
4931  tmp3 += z1 + z3;
4932 
4933  z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4934  tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4935  tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4936  tmp1 += z1 + z3;
4937  tmp2 += z1 + z2;
4938 
4939  /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4940 
4941  wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4942  wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4943  wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4944  wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4945  wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4946  wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4947  wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4948  wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4949 
4950  inptr++; /* advance pointers to next column */
4951  quantptr++;
4952  wsptr++;
4953  }
4954 
4955  /* Pass 2: process 8 rows from work array, store into output array.
4956  * 4-point IDCT kernel,
4957  * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4958  */
4959 
4960  wsptr = workspace;
4961  for (ctr = 0; ctr < 8; ctr++) {
4962  outptr = output_buf[ctr] + output_col;
4963 
4964  /* Even part */
4965 
4966  /* Add range center and fudge factor for final descale and range-limit. */
4967  tmp0 = (INT32) wsptr[0] +
4968  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4969  (ONE << (PASS1_BITS+2)));
4970  tmp2 = (INT32) wsptr[2];
4971 
4972  tmp10 = (tmp0 + tmp2) << CONST_BITS;
4973  tmp12 = (tmp0 - tmp2) << CONST_BITS;
4974 
4975  /* Odd part */
4976  /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4977 
4978  z2 = (INT32) wsptr[1];
4979  z3 = (INT32) wsptr[3];
4980 
4981  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4982  tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4983  tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4984 
4985  /* Final output stage */
4986 
4987  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4988  CONST_BITS+PASS1_BITS+3)
4989  & RANGE_MASK];
4990  outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4991  CONST_BITS+PASS1_BITS+3)
4992  & RANGE_MASK];
4993  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4994  CONST_BITS+PASS1_BITS+3)
4995  & RANGE_MASK];
4996  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4997  CONST_BITS+PASS1_BITS+3)
4998  & RANGE_MASK];
4999 
5000  wsptr += 4; /* advance pointer to next row */
5001  }
5002 }
5003 
5004 
5005 /*
5006  * Perform dequantization and inverse DCT on one block of coefficients,
5007  * producing a 3x6 output block.
5008  *
5009  * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5010  */
5011 
5012 GLOBAL(void)
5013 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5016 {
5017  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5018  INT32 z1, z2, z3;
5019  JCOEFPTR inptr;
5020  ISLOW_MULT_TYPE * quantptr;
5021  int * wsptr;
5022  JSAMPROW outptr;
5023  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5024  int ctr;
5025  int workspace[3*6]; /* buffers data between passes */
5026  SHIFT_TEMPS
5027 
5028  /* Pass 1: process columns from input, store into work array.
5029  * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5030  */
5031 
5032  inptr = coef_block;
5033  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5034  wsptr = workspace;
5035  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5036  /* Even part */
5037 
5038  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5039  tmp0 <<= CONST_BITS;
5040  /* Add fudge factor here for final descale. */
5041  tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5042  tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5043  tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5044  tmp1 = tmp0 + tmp10;
5045  tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5046  tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5047  tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5048  tmp10 = tmp1 + tmp0;
5049  tmp12 = tmp1 - tmp0;
5050 
5051  /* Odd part */
5052 
5053  z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5054  z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5055  z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5056  tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5057  tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5058  tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5059  tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5060 
5061  /* Final output stage */
5062 
5063  wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5064  wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5065  wsptr[3*1] = (int) (tmp11 + tmp1);
5066  wsptr[3*4] = (int) (tmp11 - tmp1);
5067  wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5068  wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5069  }
5070 
5071  /* Pass 2: process 6 rows from work array, store into output array.
5072  * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5073  */
5074 
5075  wsptr = workspace;
5076  for (ctr = 0; ctr < 6; ctr++) {
5077  outptr = output_buf[ctr] + output_col;
5078 
5079  /* Even part */
5080 
5081  /* Add range center and fudge factor for final descale and range-limit. */
5082  tmp0 = (INT32) wsptr[0] +
5083  ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5084  (ONE << (PASS1_BITS+2)));
5085  tmp0 <<= CONST_BITS;
5086  tmp2 = (INT32) wsptr[2];
5087  tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5088  tmp10 = tmp0 + tmp12;
5089  tmp2 = tmp0 - tmp12 - tmp12;
5090 
5091  /* Odd part */
5092 
5093  tmp12 = (INT32) wsptr[1];
5094  tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5095 
5096  /* Final output stage */
5097 
5098  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5099  CONST_BITS+PASS1_BITS+3)
5100  & RANGE_MASK];
5101  outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5102  CONST_BITS+PASS1_BITS+3)
5103  & RANGE_MASK];
5104  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5105  CONST_BITS+PASS1_BITS+3)
5106  & RANGE_MASK];
5107 
5108  wsptr += 3; /* advance pointer to next row */
5109  }
5110 }
5111 
5112 
5113 /*
5114  * Perform dequantization and inverse DCT on one block of coefficients,
5115  * producing a 2x4 output block.
5116  *
5117  * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5118  */
5119 
5120 GLOBAL(void)
5121 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5124 {
5125  INT32 tmp0, tmp2, tmp10, tmp12;
5126  INT32 z1, z2, z3;
5127  JCOEFPTR inptr;
5128  ISLOW_MULT_TYPE * quantptr;
5129  INT32 * wsptr;
5130  JSAMPROW outptr;
5131  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5132  int ctr;
5133  INT32 workspace[2*4]; /* buffers data between passes */
5134  SHIFT_TEMPS
5135 
5136  /* Pass 1: process columns from input, store into work array.
5137  * 4-point IDCT kernel,
5138  * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5139  */
5140 
5141  inptr = coef_block;
5142  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5143  wsptr = workspace;
5144  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5145  /* Even part */
5146 
5147  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5148  tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5149 
5150  tmp10 = (tmp0 + tmp2) << CONST_BITS;
5151  tmp12 = (tmp0 - tmp2) << CONST_BITS;
5152 
5153  /* Odd part */
5154  /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5155 
5156  z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5157  z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5158 
5159  z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5160  tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5161  tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5162 
5163  /* Final output stage */
5164 
5165  wsptr[2*0] = tmp10 + tmp0;
5166  wsptr[2*3] = tmp10 - tmp0;
5167  wsptr[2*1] = tmp12 + tmp2;
5168  wsptr[2*2] = tmp12 - tmp2;
5169  }
5170 
5171  /* Pass 2: process 4 rows from work array, store into output array. */
5172 
5173  wsptr = workspace;
5174  for (ctr = 0; ctr < 4; ctr++) {
5175  outptr = output_buf[ctr] + output_col;
5176 
5177  /* Even part */
5178 
5179  /* Add range center and fudge factor for final descale and range-limit. */
5180  tmp10 = wsptr[0] +
5181  ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
5182  (ONE << (CONST_BITS+2)));
5183 
5184  /* Odd part */
5185 
5186  tmp0 = wsptr[1];
5187 
5188  /* Final output stage */
5189 
5190  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5191  & RANGE_MASK];
5192  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5193  & RANGE_MASK];
5194 
5195  wsptr += 2; /* advance pointer to next row */
5196  }
5197 }
5198 
5199 
5200 /*
5201  * Perform dequantization and inverse DCT on one block of coefficients,
5202  * producing a 1x2 output block.
5203  *
5204  * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5205  */
5206 
5207 GLOBAL(void)
5208 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5211 {
5212  DCTELEM tmp0, tmp1;
5213  ISLOW_MULT_TYPE * quantptr;
5214  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5215  ISHIFT_TEMPS
5216 
5217  /* Process 1 column from input, store into output array. */
5218 
5219  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5220 
5221  /* Even part */
5222 
5223  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5224  /* Add range center and fudge factor for final descale and range-limit. */
5225  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
5226 
5227  /* Odd part */
5228 
5229  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5230 
5231  /* Final output stage */
5232 
5233  output_buf[0][output_col] =
5234  range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
5235  output_buf[1][output_col] =
5236  range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
5237 }
5238 
5239 #endif /* IDCT_SCALING_SUPPORTED */
5240 #endif /* DCT_ISLOW_SUPPORTED */
#define IDCT_range_limit(cinfo)
Definition: jdct.h:90
char JSAMPLE
Definition: jmorecfg.h:74
JSAMPLE FAR * JSAMPROW
Definition: jpeglib.h:75
jpeg_component_info JCOEFPTR coef_block
Definition: jdct.h:238
#define RANGE_MASK
Definition: jdct.h:87
INT32 DCTELEM
Definition: jdct.h:38
#define ONE
Definition: jdct.h:351
#define SHIFT_TEMPS
Definition: jpegint.h:301
jpeg_component_info * compptr
Definition: jdct.h:238
jpeg_component_info JCOEFPTR JSAMPARRAY JDIMENSION output_col
Definition: jdct.h:238
static int blocks
Definition: mkdosfs.c:527
#define DCTSIZE2
Definition: jpeglib.h:51
MULTIPLIER ISLOW_MULT_TYPE
Definition: jdct.h:66
#define for
Definition: utility.h:88
JCOEF FAR * JCOEFPTR
Definition: jpeglib.h:84
#define RANGE_CENTER
Definition: jpegint.h:273
#define IRIGHT_SHIFT(x, shft)
Definition: jcarith.c:111
Definition: inflate.c:139
JSAMPROW * JSAMPARRAY
Definition: jpeglib.h:76
#define ISHIFT_TEMPS
Definition: jcarith.c:110
#define GLOBAL(type)
Definition: jmorecfg.h:291
#define FIX(x)
Definition: jccolor.c:74
#define RIGHT_SHIFT(x, shft)
Definition: jpegint.h:302
#define DCTSIZE
Definition: jpeglib.h:50
jpeg_component_info JCOEFPTR JSAMPARRAY output_buf
Definition: jdct.h:238
unsigned int JDIMENSION
Definition: jmorecfg.h:229
signed int INT32
Sorry
Definition: jdcolor.c:19
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31