ReactOS 0.4.16-dev-106-g10b08aa
jidctint.c
Go to the documentation of this file.
1/*
2 * jidctint.c
3 *
4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * Modification developed 2002-2018 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains a slow-but-accurate integer implementation of the
10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11 * must also perform dequantization of the input coefficients.
12 *
13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14 * on each row (or vice versa, but it's more convenient to emit a row at
15 * a time). Direct algorithms are also available, but they are much more
16 * complex and seem not to be any faster when reduced to code.
17 *
18 * This implementation is based on an algorithm described in
19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22 * The primary algorithm described there uses 11 multiplies and 29 adds.
23 * We use their alternate method with 12 multiplies and 32 adds.
24 * The advantage of this method is that no data path contains more than one
25 * multiplication; this allows a very simple and accurate implementation in
26 * scaled fixed-point arithmetic, with a minimal number of shifts.
27 *
28 * We also provide IDCT routines with various output sample block sizes for
29 * direct resolution reduction or enlargement and for direct resolving the
30 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
32 *
33 * For N<8 we simply take the corresponding low-frequency coefficients of
34 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35 * to yield the downscaled outputs.
36 * This can be seen as direct low-pass downsampling from the DCT domain
37 * point of view rather than the usual spatial domain point of view,
38 * yielding significant computational savings and results at least
39 * as good as common bilinear (averaging) spatial downsampling.
40 *
41 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42 * lower frequencies and higher frequencies assumed to be zero.
43 * It turns out that the computational effort is similar to the 8x8 IDCT
44 * regarding the output size.
45 * Furthermore, the scaling and descaling is the same for all IDCT sizes.
46 *
47 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48 * since there would be too many additional constants to pre-calculate.
49 */
50
51#define JPEG_INTERNALS
52#include "jinclude.h"
53#include "jpeglib.h"
54#include "jdct.h" /* Private declarations for DCT subsystem */
55
56#ifdef DCT_ISLOW_SUPPORTED
57
58
59/*
60 * This module is specialized to the case DCTSIZE = 8.
61 */
62
63#if DCTSIZE != 8
64 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65#endif
66
67
68/*
69 * The poop on this scaling stuff is as follows:
70 *
71 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72 * larger than the true IDCT outputs. The final outputs are therefore
73 * a factor of N larger than desired; since N=8 this can be cured by
74 * a simple right shift at the end of the algorithm. The advantage of
75 * this arrangement is that we save two multiplications per 1-D IDCT,
76 * because the y0 and y4 inputs need not be divided by sqrt(N).
77 *
78 * We have to do addition and subtraction of the integer inputs, which
79 * is no problem, and multiplication by fractional constants, which is
80 * a problem to do in integer arithmetic. We multiply all the constants
81 * by CONST_SCALE and convert them to integer constants (thus retaining
82 * CONST_BITS bits of precision in the constants). After doing a
83 * multiplication we have to divide the product by CONST_SCALE, with proper
84 * rounding, to produce the correct output. This division can be done
85 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
86 * as long as possible so that partial sums can be added together with
87 * full fractional precision.
88 *
89 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90 * they are represented to better-than-integral precision. These outputs
91 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92 * with the recommended scaling. (To scale up 12-bit sample data further, an
93 * intermediate INT32 array would be needed.)
94 *
95 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
96 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
97 * shows that the values given below are the most effective.
98 */
99
100#if BITS_IN_JSAMPLE == 8
101#define CONST_BITS 13
102#define PASS1_BITS 2
103#else
104#define CONST_BITS 13
105#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
106#endif
107
108/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109 * causing a lot of useless floating-point operations at run time.
110 * To get around this we use the following pre-calculated constants.
111 * If you change CONST_BITS you may want to add appropriate values.
112 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
113 */
114
115#if CONST_BITS == 13
116#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
117#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
118#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
119#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
120#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
121#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
122#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
123#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
124#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
125#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
126#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
127#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
128#else
129#define FIX_0_298631336 FIX(0.298631336)
130#define FIX_0_390180644 FIX(0.390180644)
131#define FIX_0_541196100 FIX(0.541196100)
132#define FIX_0_765366865 FIX(0.765366865)
133#define FIX_0_899976223 FIX(0.899976223)
134#define FIX_1_175875602 FIX(1.175875602)
135#define FIX_1_501321110 FIX(1.501321110)
136#define FIX_1_847759065 FIX(1.847759065)
137#define FIX_1_961570560 FIX(1.961570560)
138#define FIX_2_053119869 FIX(2.053119869)
139#define FIX_2_562915447 FIX(2.562915447)
140#define FIX_3_072711026 FIX(3.072711026)
141#endif
142
143
144/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
145 * For 8-bit samples with the recommended scaling, all the variable
146 * and constant values involved are no more than 16 bits wide, so a
147 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
148 * For 12-bit samples, a full 32-bit multiplication will be needed.
149 */
150
151#if BITS_IN_JSAMPLE == 8
152#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
153#else
154#define MULTIPLY(var,const) ((var) * (const))
155#endif
156
157
158/* Dequantize a coefficient by multiplying it by the multiplier-table
159 * entry; produce an int result. In this module, both inputs and result
160 * are 16 bits or less, so either int or short multiply will work.
161 */
162
163#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
164
165
166/*
167 * Perform dequantization and inverse DCT on one block of coefficients.
168 *
169 * Optimized algorithm with 12 multiplications in the 1-D kernel.
170 * cK represents sqrt(2) * cos(K*pi/16).
171 */
172
173GLOBAL(void)
174jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
177{
178 INT32 tmp0, tmp1, tmp2, tmp3;
179 INT32 tmp10, tmp11, tmp12, tmp13;
180 INT32 z1, z2, z3;
181 JCOEFPTR inptr;
182 ISLOW_MULT_TYPE * quantptr;
183 int * wsptr;
184 JSAMPROW outptr;
185 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
186 int ctr;
187 int workspace[DCTSIZE2]; /* buffers data between passes */
189
190 /* Pass 1: process columns from input, store into work array.
191 * Note results are scaled up by sqrt(8) compared to a true IDCT;
192 * furthermore, we scale the results by 2**PASS1_BITS.
193 */
194
195 inptr = coef_block;
196 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
197 wsptr = workspace;
198 for (ctr = DCTSIZE; ctr > 0; ctr--) {
199 /* Due to quantization, we will usually find that many of the input
200 * coefficients are zero, especially the AC terms. We can exploit this
201 * by short-circuiting the IDCT calculation for any column in which all
202 * the AC terms are zero. In that case each output is equal to the
203 * DC coefficient (with scale factor as needed).
204 * With typical images and quantization tables, half or more of the
205 * column DCT calculations can be simplified this way.
206 */
207
208 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
209 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
210 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
211 inptr[DCTSIZE*7] == 0) {
212 /* AC terms all zero */
213 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
214
215 wsptr[DCTSIZE*0] = dcval;
216 wsptr[DCTSIZE*1] = dcval;
217 wsptr[DCTSIZE*2] = dcval;
218 wsptr[DCTSIZE*3] = dcval;
219 wsptr[DCTSIZE*4] = dcval;
220 wsptr[DCTSIZE*5] = dcval;
221 wsptr[DCTSIZE*6] = dcval;
222 wsptr[DCTSIZE*7] = dcval;
223
224 inptr++; /* advance pointers to next column */
225 quantptr++;
226 wsptr++;
227 continue;
228 }
229
230 /* Even part: reverse the even part of the forward DCT.
231 * The rotator is c(-6).
232 */
233
234 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
236 z2 <<= CONST_BITS;
237 z3 <<= CONST_BITS;
238 /* Add fudge factor here for final descale. */
239 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
240
241 tmp0 = z2 + z3;
242 tmp1 = z2 - z3;
243
244 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
245 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
246
247 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
248 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
249 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
250
251 tmp10 = tmp0 + tmp2;
252 tmp13 = tmp0 - tmp2;
253 tmp11 = tmp1 + tmp3;
254 tmp12 = tmp1 - tmp3;
255
256 /* Odd part per figure 8; the matrix is unitary and hence its
257 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
258 */
259
260 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
261 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
262 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
263 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
264
265 z2 = tmp0 + tmp2;
266 z3 = tmp1 + tmp3;
267
268 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
269 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
270 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
271 z2 += z1;
272 z3 += z1;
273
274 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
275 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
276 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
277 tmp0 += z1 + z2;
278 tmp3 += z1 + z3;
279
280 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
281 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
282 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
283 tmp1 += z1 + z3;
284 tmp2 += z1 + z2;
285
286 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
287
288 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
289 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
290 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
291 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
292 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
293 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
294 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
295 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
296
297 inptr++; /* advance pointers to next column */
298 quantptr++;
299 wsptr++;
300 }
301
302 /* Pass 2: process rows from work array, store into output array.
303 * Note that we must descale the results by a factor of 8 == 2**3,
304 * and also undo the PASS1_BITS scaling.
305 */
306
307 wsptr = workspace;
308 for (ctr = 0; ctr < DCTSIZE; ctr++) {
309 outptr = output_buf[ctr] + output_col;
310
311 /* Add range center and fudge factor for final descale and range-limit. */
312 z2 = (INT32) wsptr[0] +
313 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
314 (ONE << (PASS1_BITS+2)));
315
316 /* Rows of zeroes can be exploited in the same way as we did with columns.
317 * However, the column calculation has created many nonzero AC terms, so
318 * the simplification applies less often (typically 5% to 10% of the time).
319 * On machines with very fast multiplication, it's possible that the
320 * test takes more time than it's worth. In that case this section
321 * may be commented out.
322 */
323
324#ifndef NO_ZERO_ROW_TEST
325 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
326 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
327 /* AC terms all zero */
328 JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
329 & RANGE_MASK];
330
331 outptr[0] = dcval;
332 outptr[1] = dcval;
333 outptr[2] = dcval;
334 outptr[3] = dcval;
335 outptr[4] = dcval;
336 outptr[5] = dcval;
337 outptr[6] = dcval;
338 outptr[7] = dcval;
339
340 wsptr += DCTSIZE; /* advance pointer to next row */
341 continue;
342 }
343#endif
344
345 /* Even part: reverse the even part of the forward DCT.
346 * The rotator is c(-6).
347 */
348
349 z3 = (INT32) wsptr[4];
350
351 tmp0 = (z2 + z3) << CONST_BITS;
352 tmp1 = (z2 - z3) << CONST_BITS;
353
354 z2 = (INT32) wsptr[2];
355 z3 = (INT32) wsptr[6];
356
357 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
358 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
359 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
360
361 tmp10 = tmp0 + tmp2;
362 tmp13 = tmp0 - tmp2;
363 tmp11 = tmp1 + tmp3;
364 tmp12 = tmp1 - tmp3;
365
366 /* Odd part per figure 8; the matrix is unitary and hence its
367 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
368 */
369
370 tmp0 = (INT32) wsptr[7];
371 tmp1 = (INT32) wsptr[5];
372 tmp2 = (INT32) wsptr[3];
373 tmp3 = (INT32) wsptr[1];
374
375 z2 = tmp0 + tmp2;
376 z3 = tmp1 + tmp3;
377
378 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
379 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
380 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
381 z2 += z1;
382 z3 += z1;
383
384 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
385 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
386 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
387 tmp0 += z1 + z2;
388 tmp3 += z1 + z3;
389
390 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
391 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
392 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
393 tmp1 += z1 + z3;
394 tmp2 += z1 + z2;
395
396 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
397
398 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
399 CONST_BITS+PASS1_BITS+3)
400 & RANGE_MASK];
401 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
402 CONST_BITS+PASS1_BITS+3)
403 & RANGE_MASK];
404 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
405 CONST_BITS+PASS1_BITS+3)
406 & RANGE_MASK];
407 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
408 CONST_BITS+PASS1_BITS+3)
409 & RANGE_MASK];
410 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
411 CONST_BITS+PASS1_BITS+3)
412 & RANGE_MASK];
413 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
414 CONST_BITS+PASS1_BITS+3)
415 & RANGE_MASK];
416 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
417 CONST_BITS+PASS1_BITS+3)
418 & RANGE_MASK];
419 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
420 CONST_BITS+PASS1_BITS+3)
421 & RANGE_MASK];
422
423 wsptr += DCTSIZE; /* advance pointer to next row */
424 }
425}
426
427#ifdef IDCT_SCALING_SUPPORTED
428
429
430/*
431 * Perform dequantization and inverse DCT on one block of coefficients,
432 * producing a reduced-size 7x7 output block.
433 *
434 * Optimized algorithm with 12 multiplications in the 1-D kernel.
435 * cK represents sqrt(2) * cos(K*pi/14).
436 */
437
438GLOBAL(void)
439jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
442{
443 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
444 INT32 z1, z2, z3;
445 JCOEFPTR inptr;
446 ISLOW_MULT_TYPE * quantptr;
447 int * wsptr;
448 JSAMPROW outptr;
449 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
450 int ctr;
451 int workspace[7*7]; /* buffers data between passes */
453
454 /* Pass 1: process columns from input, store into work array. */
455
456 inptr = coef_block;
457 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
458 wsptr = workspace;
459 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
460 /* Even part */
461
462 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
463 tmp13 <<= CONST_BITS;
464 /* Add fudge factor here for final descale. */
465 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
466
467 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
468 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
469 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
470
471 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
472 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
473 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
474 tmp0 = z1 + z3;
475 z2 -= tmp0;
476 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
477 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
478 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
479 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
480
481 /* Odd part */
482
483 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
484 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
485 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
486
487 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
488 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
489 tmp0 = tmp1 - tmp2;
490 tmp1 += tmp2;
491 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
492 tmp1 += tmp2;
493 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
494 tmp0 += z2;
495 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
496
497 /* Final output stage */
498
499 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
500 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
501 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
502 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
503 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
504 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
505 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
506 }
507
508 /* Pass 2: process 7 rows from work array, store into output array. */
509
510 wsptr = workspace;
511 for (ctr = 0; ctr < 7; ctr++) {
512 outptr = output_buf[ctr] + output_col;
513
514 /* Even part */
515
516 /* Add range center and fudge factor for final descale and range-limit. */
517 tmp13 = (INT32) wsptr[0] +
518 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
519 (ONE << (PASS1_BITS+2)));
520 tmp13 <<= CONST_BITS;
521
522 z1 = (INT32) wsptr[2];
523 z2 = (INT32) wsptr[4];
524 z3 = (INT32) wsptr[6];
525
526 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
527 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
528 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
529 tmp0 = z1 + z3;
530 z2 -= tmp0;
531 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
532 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
533 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
534 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
535
536 /* Odd part */
537
538 z1 = (INT32) wsptr[1];
539 z2 = (INT32) wsptr[3];
540 z3 = (INT32) wsptr[5];
541
542 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
543 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
544 tmp0 = tmp1 - tmp2;
545 tmp1 += tmp2;
546 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
547 tmp1 += tmp2;
548 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
549 tmp0 += z2;
550 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
551
552 /* Final output stage */
553
554 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
555 CONST_BITS+PASS1_BITS+3)
556 & RANGE_MASK];
557 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
558 CONST_BITS+PASS1_BITS+3)
559 & RANGE_MASK];
560 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
561 CONST_BITS+PASS1_BITS+3)
562 & RANGE_MASK];
563 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
564 CONST_BITS+PASS1_BITS+3)
565 & RANGE_MASK];
566 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
567 CONST_BITS+PASS1_BITS+3)
568 & RANGE_MASK];
569 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
570 CONST_BITS+PASS1_BITS+3)
571 & RANGE_MASK];
572 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
573 CONST_BITS+PASS1_BITS+3)
574 & RANGE_MASK];
575
576 wsptr += 7; /* advance pointer to next row */
577 }
578}
579
580
581/*
582 * Perform dequantization and inverse DCT on one block of coefficients,
583 * producing a reduced-size 6x6 output block.
584 *
585 * Optimized algorithm with 3 multiplications in the 1-D kernel.
586 * cK represents sqrt(2) * cos(K*pi/12).
587 */
588
589GLOBAL(void)
590jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
593{
594 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
595 INT32 z1, z2, z3;
596 JCOEFPTR inptr;
597 ISLOW_MULT_TYPE * quantptr;
598 int * wsptr;
599 JSAMPROW outptr;
600 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
601 int ctr;
602 int workspace[6*6]; /* buffers data between passes */
604
605 /* Pass 1: process columns from input, store into work array. */
606
607 inptr = coef_block;
608 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
609 wsptr = workspace;
610 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
611 /* Even part */
612
613 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
614 tmp0 <<= CONST_BITS;
615 /* Add fudge factor here for final descale. */
616 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
617 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
618 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
619 tmp1 = tmp0 + tmp10;
620 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
621 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
622 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
623 tmp10 = tmp1 + tmp0;
624 tmp12 = tmp1 - tmp0;
625
626 /* Odd part */
627
628 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
629 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
630 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
631 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
632 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
633 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
634 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
635
636 /* Final output stage */
637
638 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
639 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
640 wsptr[6*1] = (int) (tmp11 + tmp1);
641 wsptr[6*4] = (int) (tmp11 - tmp1);
642 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
643 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
644 }
645
646 /* Pass 2: process 6 rows from work array, store into output array. */
647
648 wsptr = workspace;
649 for (ctr = 0; ctr < 6; ctr++) {
650 outptr = output_buf[ctr] + output_col;
651
652 /* Even part */
653
654 /* Add range center and fudge factor for final descale and range-limit. */
655 tmp0 = (INT32) wsptr[0] +
656 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
657 (ONE << (PASS1_BITS+2)));
658 tmp0 <<= CONST_BITS;
659 tmp2 = (INT32) wsptr[4];
660 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
661 tmp1 = tmp0 + tmp10;
662 tmp11 = tmp0 - tmp10 - tmp10;
663 tmp10 = (INT32) wsptr[2];
664 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
665 tmp10 = tmp1 + tmp0;
666 tmp12 = tmp1 - tmp0;
667
668 /* Odd part */
669
670 z1 = (INT32) wsptr[1];
671 z2 = (INT32) wsptr[3];
672 z3 = (INT32) wsptr[5];
673 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
674 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
675 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
676 tmp1 = (z1 - z2 - z3) << CONST_BITS;
677
678 /* Final output stage */
679
680 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
681 CONST_BITS+PASS1_BITS+3)
682 & RANGE_MASK];
683 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
684 CONST_BITS+PASS1_BITS+3)
685 & RANGE_MASK];
686 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
687 CONST_BITS+PASS1_BITS+3)
688 & RANGE_MASK];
689 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
690 CONST_BITS+PASS1_BITS+3)
691 & RANGE_MASK];
692 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
693 CONST_BITS+PASS1_BITS+3)
694 & RANGE_MASK];
695 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
696 CONST_BITS+PASS1_BITS+3)
697 & RANGE_MASK];
698
699 wsptr += 6; /* advance pointer to next row */
700 }
701}
702
703
704/*
705 * Perform dequantization and inverse DCT on one block of coefficients,
706 * producing a reduced-size 5x5 output block.
707 *
708 * Optimized algorithm with 5 multiplications in the 1-D kernel.
709 * cK represents sqrt(2) * cos(K*pi/10).
710 */
711
712GLOBAL(void)
713jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
716{
717 INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
718 INT32 z1, z2, z3;
719 JCOEFPTR inptr;
720 ISLOW_MULT_TYPE * quantptr;
721 int * wsptr;
722 JSAMPROW outptr;
723 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
724 int ctr;
725 int workspace[5*5]; /* buffers data between passes */
727
728 /* Pass 1: process columns from input, store into work array. */
729
730 inptr = coef_block;
731 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
732 wsptr = workspace;
733 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
734 /* Even part */
735
736 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
737 tmp12 <<= CONST_BITS;
738 /* Add fudge factor here for final descale. */
739 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
740 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
741 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
742 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
743 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
744 z3 = tmp12 + z2;
745 tmp10 = z3 + z1;
746 tmp11 = z3 - z1;
747 tmp12 -= z2 << 2;
748
749 /* Odd part */
750
751 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
752 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
753
754 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
755 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
756 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
757
758 /* Final output stage */
759
760 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
761 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
762 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
763 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
764 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
765 }
766
767 /* Pass 2: process 5 rows from work array, store into output array. */
768
769 wsptr = workspace;
770 for (ctr = 0; ctr < 5; ctr++) {
771 outptr = output_buf[ctr] + output_col;
772
773 /* Even part */
774
775 /* Add range center and fudge factor for final descale and range-limit. */
776 tmp12 = (INT32) wsptr[0] +
777 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
778 (ONE << (PASS1_BITS+2)));
779 tmp12 <<= CONST_BITS;
780 tmp0 = (INT32) wsptr[2];
781 tmp1 = (INT32) wsptr[4];
782 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
783 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
784 z3 = tmp12 + z2;
785 tmp10 = z3 + z1;
786 tmp11 = z3 - z1;
787 tmp12 -= z2 << 2;
788
789 /* Odd part */
790
791 z2 = (INT32) wsptr[1];
792 z3 = (INT32) wsptr[3];
793
794 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
795 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
796 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
797
798 /* Final output stage */
799
800 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
801 CONST_BITS+PASS1_BITS+3)
802 & RANGE_MASK];
803 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
804 CONST_BITS+PASS1_BITS+3)
805 & RANGE_MASK];
806 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
807 CONST_BITS+PASS1_BITS+3)
808 & RANGE_MASK];
809 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
810 CONST_BITS+PASS1_BITS+3)
811 & RANGE_MASK];
812 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
813 CONST_BITS+PASS1_BITS+3)
814 & RANGE_MASK];
815
816 wsptr += 5; /* advance pointer to next row */
817 }
818}
819
820
821/*
822 * Perform dequantization and inverse DCT on one block of coefficients,
823 * producing a reduced-size 4x4 output block.
824 *
825 * Optimized algorithm with 3 multiplications in the 1-D kernel.
826 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
827 */
828
829GLOBAL(void)
830jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
833{
834 INT32 tmp0, tmp2, tmp10, tmp12;
835 INT32 z1, z2, z3;
836 JCOEFPTR inptr;
837 ISLOW_MULT_TYPE * quantptr;
838 int * wsptr;
839 JSAMPROW outptr;
840 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
841 int ctr;
842 int workspace[4*4]; /* buffers data between passes */
844
845 /* Pass 1: process columns from input, store into work array. */
846
847 inptr = coef_block;
848 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
849 wsptr = workspace;
850 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
851 /* Even part */
852
853 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
854 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
855
856 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
857 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
858
859 /* Odd part */
860 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
861
862 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
863 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
864
865 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
866 /* Add fudge factor here for final descale. */
867 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
868 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
869 CONST_BITS-PASS1_BITS);
870 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
871 CONST_BITS-PASS1_BITS);
872
873 /* Final output stage */
874
875 wsptr[4*0] = (int) (tmp10 + tmp0);
876 wsptr[4*3] = (int) (tmp10 - tmp0);
877 wsptr[4*1] = (int) (tmp12 + tmp2);
878 wsptr[4*2] = (int) (tmp12 - tmp2);
879 }
880
881 /* Pass 2: process 4 rows from work array, store into output array. */
882
883 wsptr = workspace;
884 for (ctr = 0; ctr < 4; ctr++) {
885 outptr = output_buf[ctr] + output_col;
886
887 /* Even part */
888
889 /* Add range center and fudge factor for final descale and range-limit. */
890 tmp0 = (INT32) wsptr[0] +
891 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
892 (ONE << (PASS1_BITS+2)));
893 tmp2 = (INT32) wsptr[2];
894
895 tmp10 = (tmp0 + tmp2) << CONST_BITS;
896 tmp12 = (tmp0 - tmp2) << CONST_BITS;
897
898 /* Odd part */
899 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
900
901 z2 = (INT32) wsptr[1];
902 z3 = (INT32) wsptr[3];
903
904 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
905 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
906 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
907
908 /* Final output stage */
909
910 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
911 CONST_BITS+PASS1_BITS+3)
912 & RANGE_MASK];
913 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
914 CONST_BITS+PASS1_BITS+3)
915 & RANGE_MASK];
916 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
917 CONST_BITS+PASS1_BITS+3)
918 & RANGE_MASK];
919 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
920 CONST_BITS+PASS1_BITS+3)
921 & RANGE_MASK];
922
923 wsptr += 4; /* advance pointer to next row */
924 }
925}
926
927
928/*
929 * Perform dequantization and inverse DCT on one block of coefficients,
930 * producing a reduced-size 3x3 output block.
931 *
932 * Optimized algorithm with 2 multiplications in the 1-D kernel.
933 * cK represents sqrt(2) * cos(K*pi/6).
934 */
935
936GLOBAL(void)
937jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
940{
941 INT32 tmp0, tmp2, tmp10, tmp12;
942 JCOEFPTR inptr;
943 ISLOW_MULT_TYPE * quantptr;
944 int * wsptr;
945 JSAMPROW outptr;
946 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
947 int ctr;
948 int workspace[3*3]; /* buffers data between passes */
950
951 /* Pass 1: process columns from input, store into work array. */
952
953 inptr = coef_block;
954 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
955 wsptr = workspace;
956 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
957 /* Even part */
958
959 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
960 tmp0 <<= CONST_BITS;
961 /* Add fudge factor here for final descale. */
962 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
963 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
964 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
965 tmp10 = tmp0 + tmp12;
966 tmp2 = tmp0 - tmp12 - tmp12;
967
968 /* Odd part */
969
970 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
971 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
972
973 /* Final output stage */
974
975 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
976 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
977 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
978 }
979
980 /* Pass 2: process 3 rows from work array, store into output array. */
981
982 wsptr = workspace;
983 for (ctr = 0; ctr < 3; ctr++) {
984 outptr = output_buf[ctr] + output_col;
985
986 /* Even part */
987
988 /* Add range center and fudge factor for final descale and range-limit. */
989 tmp0 = (INT32) wsptr[0] +
990 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
991 (ONE << (PASS1_BITS+2)));
992 tmp0 <<= CONST_BITS;
993 tmp2 = (INT32) wsptr[2];
994 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
995 tmp10 = tmp0 + tmp12;
996 tmp2 = tmp0 - tmp12 - tmp12;
997
998 /* Odd part */
999
1000 tmp12 = (INT32) wsptr[1];
1001 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1002
1003 /* Final output stage */
1004
1005 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1006 CONST_BITS+PASS1_BITS+3)
1007 & RANGE_MASK];
1008 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1009 CONST_BITS+PASS1_BITS+3)
1010 & RANGE_MASK];
1011 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1012 CONST_BITS+PASS1_BITS+3)
1013 & RANGE_MASK];
1014
1015 wsptr += 3; /* advance pointer to next row */
1016 }
1017}
1018
1019
1020/*
1021 * Perform dequantization and inverse DCT on one block of coefficients,
1022 * producing a reduced-size 2x2 output block.
1023 *
1024 * Multiplication-less algorithm.
1025 */
1026
1027GLOBAL(void)
1028jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1031{
1032 DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1033 ISLOW_MULT_TYPE * quantptr;
1034 JSAMPROW outptr;
1035 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1037
1038 /* Pass 1: process columns from input. */
1039
1040 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1041
1042 /* Column 0 */
1043 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1044 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1045 /* Add range center and fudge factor for final descale and range-limit. */
1046 tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1047
1048 tmp0 = tmp4 + tmp5;
1049 tmp2 = tmp4 - tmp5;
1050
1051 /* Column 1 */
1052 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1053 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1054
1055 tmp1 = tmp4 + tmp5;
1056 tmp3 = tmp4 - tmp5;
1057
1058 /* Pass 2: process 2 rows, store into output array. */
1059
1060 /* Row 0 */
1061 outptr = output_buf[0] + output_col;
1062
1063 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1064 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1065
1066 /* Row 1 */
1067 outptr = output_buf[1] + output_col;
1068
1069 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1070 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1071}
1072
1073
1074/*
1075 * Perform dequantization and inverse DCT on one block of coefficients,
1076 * producing a reduced-size 1x1 output block.
1077 *
1078 * We hardly need an inverse DCT routine for this: just take the
1079 * average pixel value, which is one-eighth of the DC coefficient.
1080 */
1081
1082GLOBAL(void)
1083jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1086{
1087 DCTELEM dcval;
1088 ISLOW_MULT_TYPE * quantptr;
1089 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1091
1092 /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1093
1094 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1095
1096 dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1097 /* Add range center and fudge factor for descale and range-limit. */
1098 dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1099
1101 range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
1102}
1103
1104
1105/*
1106 * Perform dequantization and inverse DCT on one block of coefficients,
1107 * producing a 9x9 output block.
1108 *
1109 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1110 * cK represents sqrt(2) * cos(K*pi/18).
1111 */
1112
1113GLOBAL(void)
1114jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1117{
1118 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1119 INT32 z1, z2, z3, z4;
1120 JCOEFPTR inptr;
1121 ISLOW_MULT_TYPE * quantptr;
1122 int * wsptr;
1123 JSAMPROW outptr;
1124 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1125 int ctr;
1126 int workspace[8*9]; /* buffers data between passes */
1128
1129 /* Pass 1: process columns from input, store into work array. */
1130
1131 inptr = coef_block;
1132 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1133 wsptr = workspace;
1134 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1135 /* Even part */
1136
1137 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1138 tmp0 <<= CONST_BITS;
1139 /* Add fudge factor here for final descale. */
1140 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1141
1142 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1143 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1144 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1145
1146 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1147 tmp1 = tmp0 + tmp3;
1148 tmp2 = tmp0 - tmp3 - tmp3;
1149
1150 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1151 tmp11 = tmp2 + tmp0;
1152 tmp14 = tmp2 - tmp0 - tmp0;
1153
1154 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1155 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1156 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1157
1158 tmp10 = tmp1 + tmp0 - tmp3;
1159 tmp12 = tmp1 - tmp0 + tmp2;
1160 tmp13 = tmp1 - tmp2 + tmp3;
1161
1162 /* Odd part */
1163
1164 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1165 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1166 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1167 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1168
1169 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1170
1171 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1172 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1173 tmp0 = tmp2 + tmp3 - z2;
1174 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1175 tmp2 += z2 - tmp1;
1176 tmp3 += z2 + tmp1;
1177 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1178
1179 /* Final output stage */
1180
1181 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1182 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1183 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1184 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1185 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1186 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1187 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1188 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1189 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1190 }
1191
1192 /* Pass 2: process 9 rows from work array, store into output array. */
1193
1194 wsptr = workspace;
1195 for (ctr = 0; ctr < 9; ctr++) {
1196 outptr = output_buf[ctr] + output_col;
1197
1198 /* Even part */
1199
1200 /* Add range center and fudge factor for final descale and range-limit. */
1201 tmp0 = (INT32) wsptr[0] +
1202 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1203 (ONE << (PASS1_BITS+2)));
1204 tmp0 <<= CONST_BITS;
1205
1206 z1 = (INT32) wsptr[2];
1207 z2 = (INT32) wsptr[4];
1208 z3 = (INT32) wsptr[6];
1209
1210 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1211 tmp1 = tmp0 + tmp3;
1212 tmp2 = tmp0 - tmp3 - tmp3;
1213
1214 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1215 tmp11 = tmp2 + tmp0;
1216 tmp14 = tmp2 - tmp0 - tmp0;
1217
1218 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1219 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1220 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1221
1222 tmp10 = tmp1 + tmp0 - tmp3;
1223 tmp12 = tmp1 - tmp0 + tmp2;
1224 tmp13 = tmp1 - tmp2 + tmp3;
1225
1226 /* Odd part */
1227
1228 z1 = (INT32) wsptr[1];
1229 z2 = (INT32) wsptr[3];
1230 z3 = (INT32) wsptr[5];
1231 z4 = (INT32) wsptr[7];
1232
1233 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1234
1235 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1236 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1237 tmp0 = tmp2 + tmp3 - z2;
1238 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1239 tmp2 += z2 - tmp1;
1240 tmp3 += z2 + tmp1;
1241 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1242
1243 /* Final output stage */
1244
1245 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1246 CONST_BITS+PASS1_BITS+3)
1247 & RANGE_MASK];
1248 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1249 CONST_BITS+PASS1_BITS+3)
1250 & RANGE_MASK];
1251 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1252 CONST_BITS+PASS1_BITS+3)
1253 & RANGE_MASK];
1254 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1255 CONST_BITS+PASS1_BITS+3)
1256 & RANGE_MASK];
1257 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1258 CONST_BITS+PASS1_BITS+3)
1259 & RANGE_MASK];
1260 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1261 CONST_BITS+PASS1_BITS+3)
1262 & RANGE_MASK];
1263 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1264 CONST_BITS+PASS1_BITS+3)
1265 & RANGE_MASK];
1266 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1267 CONST_BITS+PASS1_BITS+3)
1268 & RANGE_MASK];
1269 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1270 CONST_BITS+PASS1_BITS+3)
1271 & RANGE_MASK];
1272
1273 wsptr += 8; /* advance pointer to next row */
1274 }
1275}
1276
1277
1278/*
1279 * Perform dequantization and inverse DCT on one block of coefficients,
1280 * producing a 10x10 output block.
1281 *
1282 * Optimized algorithm with 12 multiplications in the 1-D kernel.
1283 * cK represents sqrt(2) * cos(K*pi/20).
1284 */
1285
1286GLOBAL(void)
1287jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1290{
1291 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1292 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1293 INT32 z1, z2, z3, z4, z5;
1294 JCOEFPTR inptr;
1295 ISLOW_MULT_TYPE * quantptr;
1296 int * wsptr;
1297 JSAMPROW outptr;
1298 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1299 int ctr;
1300 int workspace[8*10]; /* buffers data between passes */
1302
1303 /* Pass 1: process columns from input, store into work array. */
1304
1305 inptr = coef_block;
1306 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1307 wsptr = workspace;
1308 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1309 /* Even part */
1310
1311 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1312 z3 <<= CONST_BITS;
1313 /* Add fudge factor here for final descale. */
1314 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1315 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1316 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1317 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1318 tmp10 = z3 + z1;
1319 tmp11 = z3 - z2;
1320
1321 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1322 CONST_BITS-PASS1_BITS);
1323
1324 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1325 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1326
1327 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1328 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1329 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1330
1331 tmp20 = tmp10 + tmp12;
1332 tmp24 = tmp10 - tmp12;
1333 tmp21 = tmp11 + tmp13;
1334 tmp23 = tmp11 - tmp13;
1335
1336 /* Odd part */
1337
1338 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1339 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1340 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1341 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1342
1343 tmp11 = z2 + z4;
1344 tmp13 = z2 - z4;
1345
1346 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1347 z5 = z3 << CONST_BITS;
1348
1349 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1350 z4 = z5 + tmp12;
1351
1352 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1353 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1354
1355 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1356 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1357
1358 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1359
1360 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1361 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1362
1363 /* Final output stage */
1364
1365 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1366 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1367 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1368 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1369 wsptr[8*2] = (int) (tmp22 + tmp12);
1370 wsptr[8*7] = (int) (tmp22 - tmp12);
1371 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1372 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1373 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1374 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1375 }
1376
1377 /* Pass 2: process 10 rows from work array, store into output array. */
1378
1379 wsptr = workspace;
1380 for (ctr = 0; ctr < 10; ctr++) {
1381 outptr = output_buf[ctr] + output_col;
1382
1383 /* Even part */
1384
1385 /* Add range center and fudge factor for final descale and range-limit. */
1386 z3 = (INT32) wsptr[0] +
1387 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1388 (ONE << (PASS1_BITS+2)));
1389 z3 <<= CONST_BITS;
1390 z4 = (INT32) wsptr[4];
1391 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1392 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1393 tmp10 = z3 + z1;
1394 tmp11 = z3 - z2;
1395
1396 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1397
1398 z2 = (INT32) wsptr[2];
1399 z3 = (INT32) wsptr[6];
1400
1401 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1402 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1403 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1404
1405 tmp20 = tmp10 + tmp12;
1406 tmp24 = tmp10 - tmp12;
1407 tmp21 = tmp11 + tmp13;
1408 tmp23 = tmp11 - tmp13;
1409
1410 /* Odd part */
1411
1412 z1 = (INT32) wsptr[1];
1413 z2 = (INT32) wsptr[3];
1414 z3 = (INT32) wsptr[5];
1415 z3 <<= CONST_BITS;
1416 z4 = (INT32) wsptr[7];
1417
1418 tmp11 = z2 + z4;
1419 tmp13 = z2 - z4;
1420
1421 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1422
1423 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1424 z4 = z3 + tmp12;
1425
1426 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1427 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1428
1429 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1430 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1431
1432 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1433
1434 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1435 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1436
1437 /* Final output stage */
1438
1439 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1440 CONST_BITS+PASS1_BITS+3)
1441 & RANGE_MASK];
1442 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1443 CONST_BITS+PASS1_BITS+3)
1444 & RANGE_MASK];
1445 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1446 CONST_BITS+PASS1_BITS+3)
1447 & RANGE_MASK];
1448 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1449 CONST_BITS+PASS1_BITS+3)
1450 & RANGE_MASK];
1451 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1452 CONST_BITS+PASS1_BITS+3)
1453 & RANGE_MASK];
1454 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1455 CONST_BITS+PASS1_BITS+3)
1456 & RANGE_MASK];
1457 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1458 CONST_BITS+PASS1_BITS+3)
1459 & RANGE_MASK];
1460 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1461 CONST_BITS+PASS1_BITS+3)
1462 & RANGE_MASK];
1463 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1464 CONST_BITS+PASS1_BITS+3)
1465 & RANGE_MASK];
1466 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1467 CONST_BITS+PASS1_BITS+3)
1468 & RANGE_MASK];
1469
1470 wsptr += 8; /* advance pointer to next row */
1471 }
1472}
1473
1474
1475/*
1476 * Perform dequantization and inverse DCT on one block of coefficients,
1477 * producing an 11x11 output block.
1478 *
1479 * Optimized algorithm with 24 multiplications in the 1-D kernel.
1480 * cK represents sqrt(2) * cos(K*pi/22).
1481 */
1482
1483GLOBAL(void)
1484jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1487{
1488 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1489 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1490 INT32 z1, z2, z3, z4;
1491 JCOEFPTR inptr;
1492 ISLOW_MULT_TYPE * quantptr;
1493 int * wsptr;
1494 JSAMPROW outptr;
1495 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1496 int ctr;
1497 int workspace[8*11]; /* buffers data between passes */
1499
1500 /* Pass 1: process columns from input, store into work array. */
1501
1502 inptr = coef_block;
1503 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1504 wsptr = workspace;
1505 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1506 /* Even part */
1507
1508 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1509 tmp10 <<= CONST_BITS;
1510 /* Add fudge factor here for final descale. */
1511 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1512
1513 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1514 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1515 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1516
1517 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1518 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1519 z4 = z1 + z3;
1520 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1521 z4 -= z2;
1522 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1523 tmp21 = tmp20 + tmp23 + tmp25 -
1524 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1525 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1526 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1527 tmp24 += tmp25;
1528 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1529 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1530 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1531 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1532
1533 /* Odd part */
1534
1535 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1536 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1537 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1538 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1539
1540 tmp11 = z1 + z2;
1541 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1542 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1543 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1544 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1545 tmp10 = tmp11 + tmp12 + tmp13 -
1546 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1547 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1548 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1549 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1550 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1551 tmp11 += z1;
1552 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1553 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1554 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1555 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1556
1557 /* Final output stage */
1558
1559 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1560 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1561 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1562 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1563 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1564 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1565 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1566 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1567 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1568 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1569 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1570 }
1571
1572 /* Pass 2: process 11 rows from work array, store into output array. */
1573
1574 wsptr = workspace;
1575 for (ctr = 0; ctr < 11; ctr++) {
1576 outptr = output_buf[ctr] + output_col;
1577
1578 /* Even part */
1579
1580 /* Add range center and fudge factor for final descale and range-limit. */
1581 tmp10 = (INT32) wsptr[0] +
1582 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1583 (ONE << (PASS1_BITS+2)));
1584 tmp10 <<= CONST_BITS;
1585
1586 z1 = (INT32) wsptr[2];
1587 z2 = (INT32) wsptr[4];
1588 z3 = (INT32) wsptr[6];
1589
1590 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1591 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1592 z4 = z1 + z3;
1593 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1594 z4 -= z2;
1595 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1596 tmp21 = tmp20 + tmp23 + tmp25 -
1597 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1598 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1599 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1600 tmp24 += tmp25;
1601 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1602 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1603 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1604 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1605
1606 /* Odd part */
1607
1608 z1 = (INT32) wsptr[1];
1609 z2 = (INT32) wsptr[3];
1610 z3 = (INT32) wsptr[5];
1611 z4 = (INT32) wsptr[7];
1612
1613 tmp11 = z1 + z2;
1614 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1615 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1616 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1617 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1618 tmp10 = tmp11 + tmp12 + tmp13 -
1619 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1620 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1621 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1622 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1623 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1624 tmp11 += z1;
1625 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1626 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1627 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1628 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1629
1630 /* Final output stage */
1631
1632 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1633 CONST_BITS+PASS1_BITS+3)
1634 & RANGE_MASK];
1635 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1636 CONST_BITS+PASS1_BITS+3)
1637 & RANGE_MASK];
1638 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1639 CONST_BITS+PASS1_BITS+3)
1640 & RANGE_MASK];
1641 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1642 CONST_BITS+PASS1_BITS+3)
1643 & RANGE_MASK];
1644 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1645 CONST_BITS+PASS1_BITS+3)
1646 & RANGE_MASK];
1647 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1648 CONST_BITS+PASS1_BITS+3)
1649 & RANGE_MASK];
1650 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1651 CONST_BITS+PASS1_BITS+3)
1652 & RANGE_MASK];
1653 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1654 CONST_BITS+PASS1_BITS+3)
1655 & RANGE_MASK];
1656 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1657 CONST_BITS+PASS1_BITS+3)
1658 & RANGE_MASK];
1659 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1660 CONST_BITS+PASS1_BITS+3)
1661 & RANGE_MASK];
1662 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1663 CONST_BITS+PASS1_BITS+3)
1664 & RANGE_MASK];
1665
1666 wsptr += 8; /* advance pointer to next row */
1667 }
1668}
1669
1670
1671/*
1672 * Perform dequantization and inverse DCT on one block of coefficients,
1673 * producing a 12x12 output block.
1674 *
1675 * Optimized algorithm with 15 multiplications in the 1-D kernel.
1676 * cK represents sqrt(2) * cos(K*pi/24).
1677 */
1678
1679GLOBAL(void)
1680jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1683{
1684 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1685 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1686 INT32 z1, z2, z3, z4;
1687 JCOEFPTR inptr;
1688 ISLOW_MULT_TYPE * quantptr;
1689 int * wsptr;
1690 JSAMPROW outptr;
1691 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1692 int ctr;
1693 int workspace[8*12]; /* buffers data between passes */
1695
1696 /* Pass 1: process columns from input, store into work array. */
1697
1698 inptr = coef_block;
1699 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1700 wsptr = workspace;
1701 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1702 /* Even part */
1703
1704 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1705 z3 <<= CONST_BITS;
1706 /* Add fudge factor here for final descale. */
1707 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1708
1709 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1710 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1711
1712 tmp10 = z3 + z4;
1713 tmp11 = z3 - z4;
1714
1715 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1716 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1717 z1 <<= CONST_BITS;
1718 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1719 z2 <<= CONST_BITS;
1720
1721 tmp12 = z1 - z2;
1722
1723 tmp21 = z3 + tmp12;
1724 tmp24 = z3 - tmp12;
1725
1726 tmp12 = z4 + z2;
1727
1728 tmp20 = tmp10 + tmp12;
1729 tmp25 = tmp10 - tmp12;
1730
1731 tmp12 = z4 - z1 - z2;
1732
1733 tmp22 = tmp11 + tmp12;
1734 tmp23 = tmp11 - tmp12;
1735
1736 /* Odd part */
1737
1738 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1739 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1740 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1741 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1742
1743 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1744 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1745
1746 tmp10 = z1 + z3;
1747 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1748 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1749 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1750 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1751 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1752 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1753 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1754 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1755
1756 z1 -= z4;
1757 z2 -= z3;
1758 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1759 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1760 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1761
1762 /* Final output stage */
1763
1764 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1765 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1766 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1767 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1768 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1769 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1770 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1771 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1772 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1773 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1774 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1775 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1776 }
1777
1778 /* Pass 2: process 12 rows from work array, store into output array. */
1779
1780 wsptr = workspace;
1781 for (ctr = 0; ctr < 12; ctr++) {
1782 outptr = output_buf[ctr] + output_col;
1783
1784 /* Even part */
1785
1786 /* Add range center and fudge factor for final descale and range-limit. */
1787 z3 = (INT32) wsptr[0] +
1788 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1789 (ONE << (PASS1_BITS+2)));
1790 z3 <<= CONST_BITS;
1791
1792 z4 = (INT32) wsptr[4];
1793 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1794
1795 tmp10 = z3 + z4;
1796 tmp11 = z3 - z4;
1797
1798 z1 = (INT32) wsptr[2];
1799 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1800 z1 <<= CONST_BITS;
1801 z2 = (INT32) wsptr[6];
1802 z2 <<= CONST_BITS;
1803
1804 tmp12 = z1 - z2;
1805
1806 tmp21 = z3 + tmp12;
1807 tmp24 = z3 - tmp12;
1808
1809 tmp12 = z4 + z2;
1810
1811 tmp20 = tmp10 + tmp12;
1812 tmp25 = tmp10 - tmp12;
1813
1814 tmp12 = z4 - z1 - z2;
1815
1816 tmp22 = tmp11 + tmp12;
1817 tmp23 = tmp11 - tmp12;
1818
1819 /* Odd part */
1820
1821 z1 = (INT32) wsptr[1];
1822 z2 = (INT32) wsptr[3];
1823 z3 = (INT32) wsptr[5];
1824 z4 = (INT32) wsptr[7];
1825
1826 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1827 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1828
1829 tmp10 = z1 + z3;
1830 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1831 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1832 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1833 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1834 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1835 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1836 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1837 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1838
1839 z1 -= z4;
1840 z2 -= z3;
1841 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1842 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1843 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1844
1845 /* Final output stage */
1846
1847 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1848 CONST_BITS+PASS1_BITS+3)
1849 & RANGE_MASK];
1850 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1851 CONST_BITS+PASS1_BITS+3)
1852 & RANGE_MASK];
1853 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1854 CONST_BITS+PASS1_BITS+3)
1855 & RANGE_MASK];
1856 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1857 CONST_BITS+PASS1_BITS+3)
1858 & RANGE_MASK];
1859 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1860 CONST_BITS+PASS1_BITS+3)
1861 & RANGE_MASK];
1862 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1863 CONST_BITS+PASS1_BITS+3)
1864 & RANGE_MASK];
1865 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1866 CONST_BITS+PASS1_BITS+3)
1867 & RANGE_MASK];
1868 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1869 CONST_BITS+PASS1_BITS+3)
1870 & RANGE_MASK];
1871 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1872 CONST_BITS+PASS1_BITS+3)
1873 & RANGE_MASK];
1874 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1875 CONST_BITS+PASS1_BITS+3)
1876 & RANGE_MASK];
1877 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1878 CONST_BITS+PASS1_BITS+3)
1879 & RANGE_MASK];
1880 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1881 CONST_BITS+PASS1_BITS+3)
1882 & RANGE_MASK];
1883
1884 wsptr += 8; /* advance pointer to next row */
1885 }
1886}
1887
1888
1889/*
1890 * Perform dequantization and inverse DCT on one block of coefficients,
1891 * producing a 13x13 output block.
1892 *
1893 * Optimized algorithm with 29 multiplications in the 1-D kernel.
1894 * cK represents sqrt(2) * cos(K*pi/26).
1895 */
1896
1897GLOBAL(void)
1898jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1901{
1902 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1903 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1904 INT32 z1, z2, z3, z4;
1905 JCOEFPTR inptr;
1906 ISLOW_MULT_TYPE * quantptr;
1907 int * wsptr;
1908 JSAMPROW outptr;
1909 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1910 int ctr;
1911 int workspace[8*13]; /* buffers data between passes */
1913
1914 /* Pass 1: process columns from input, store into work array. */
1915
1916 inptr = coef_block;
1917 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1918 wsptr = workspace;
1919 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1920 /* Even part */
1921
1922 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1923 z1 <<= CONST_BITS;
1924 /* Add fudge factor here for final descale. */
1925 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1926
1927 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1928 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1929 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1930
1931 tmp10 = z3 + z4;
1932 tmp11 = z3 - z4;
1933
1934 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1935 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1936
1937 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1938 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1939
1940 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1941 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1942
1943 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1944 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1945
1946 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1947 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1948
1949 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1950 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1951
1952 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1953
1954 /* Odd part */
1955
1956 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1957 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1958 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1959 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1960
1961 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1962 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1963 tmp15 = z1 + z4;
1964 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1965 tmp10 = tmp11 + tmp12 + tmp13 -
1966 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1967 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
1968 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1969 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1970 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
1971 tmp11 += tmp14;
1972 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1973 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
1974 tmp12 += tmp14;
1975 tmp13 += tmp14;
1976 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1977 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1978 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1979 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1980 tmp14 += z1;
1981 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1982 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1983
1984 /* Final output stage */
1985
1986 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1987 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1988 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1989 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1990 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1991 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1992 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1993 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1994 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1995 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1996 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1997 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1998 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1999 }
2000
2001 /* Pass 2: process 13 rows from work array, store into output array. */
2002
2003 wsptr = workspace;
2004 for (ctr = 0; ctr < 13; ctr++) {
2005 outptr = output_buf[ctr] + output_col;
2006
2007 /* Even part */
2008
2009 /* Add range center and fudge factor for final descale and range-limit. */
2010 z1 = (INT32) wsptr[0] +
2011 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2012 (ONE << (PASS1_BITS+2)));
2013 z1 <<= CONST_BITS;
2014
2015 z2 = (INT32) wsptr[2];
2016 z3 = (INT32) wsptr[4];
2017 z4 = (INT32) wsptr[6];
2018
2019 tmp10 = z3 + z4;
2020 tmp11 = z3 - z4;
2021
2022 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2023 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2024
2025 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2026 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2027
2028 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2029 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2030
2031 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2032 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2033
2034 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2035 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2036
2037 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2038 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2039
2040 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2041
2042 /* Odd part */
2043
2044 z1 = (INT32) wsptr[1];
2045 z2 = (INT32) wsptr[3];
2046 z3 = (INT32) wsptr[5];
2047 z4 = (INT32) wsptr[7];
2048
2049 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2050 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2051 tmp15 = z1 + z4;
2052 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2053 tmp10 = tmp11 + tmp12 + tmp13 -
2054 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2055 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2056 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2057 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2058 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2059 tmp11 += tmp14;
2060 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2061 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2062 tmp12 += tmp14;
2063 tmp13 += tmp14;
2064 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2065 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2066 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2067 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2068 tmp14 += z1;
2069 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2070 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2071
2072 /* Final output stage */
2073
2074 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2075 CONST_BITS+PASS1_BITS+3)
2076 & RANGE_MASK];
2077 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2078 CONST_BITS+PASS1_BITS+3)
2079 & RANGE_MASK];
2080 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2081 CONST_BITS+PASS1_BITS+3)
2082 & RANGE_MASK];
2083 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2084 CONST_BITS+PASS1_BITS+3)
2085 & RANGE_MASK];
2086 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2087 CONST_BITS+PASS1_BITS+3)
2088 & RANGE_MASK];
2089 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2090 CONST_BITS+PASS1_BITS+3)
2091 & RANGE_MASK];
2092 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2093 CONST_BITS+PASS1_BITS+3)
2094 & RANGE_MASK];
2095 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2096 CONST_BITS+PASS1_BITS+3)
2097 & RANGE_MASK];
2098 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2099 CONST_BITS+PASS1_BITS+3)
2100 & RANGE_MASK];
2101 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2102 CONST_BITS+PASS1_BITS+3)
2103 & RANGE_MASK];
2104 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2105 CONST_BITS+PASS1_BITS+3)
2106 & RANGE_MASK];
2107 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2108 CONST_BITS+PASS1_BITS+3)
2109 & RANGE_MASK];
2110 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2111 CONST_BITS+PASS1_BITS+3)
2112 & RANGE_MASK];
2113
2114 wsptr += 8; /* advance pointer to next row */
2115 }
2116}
2117
2118
2119/*
2120 * Perform dequantization and inverse DCT on one block of coefficients,
2121 * producing a 14x14 output block.
2122 *
2123 * Optimized algorithm with 20 multiplications in the 1-D kernel.
2124 * cK represents sqrt(2) * cos(K*pi/28).
2125 */
2126
2127GLOBAL(void)
2128jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2131{
2132 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2133 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2134 INT32 z1, z2, z3, z4;
2135 JCOEFPTR inptr;
2136 ISLOW_MULT_TYPE * quantptr;
2137 int * wsptr;
2138 JSAMPROW outptr;
2139 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2140 int ctr;
2141 int workspace[8*14]; /* buffers data between passes */
2143
2144 /* Pass 1: process columns from input, store into work array. */
2145
2146 inptr = coef_block;
2147 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2148 wsptr = workspace;
2149 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2150 /* Even part */
2151
2152 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2153 z1 <<= CONST_BITS;
2154 /* Add fudge factor here for final descale. */
2155 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2156 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2157 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2158 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2159 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2160
2161 tmp10 = z1 + z2;
2162 tmp11 = z1 + z3;
2163 tmp12 = z1 - z4;
2164
2165 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2166 CONST_BITS-PASS1_BITS);
2167
2168 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2169 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2170
2171 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2172
2173 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2174 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2175 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2176 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2177
2178 tmp20 = tmp10 + tmp13;
2179 tmp26 = tmp10 - tmp13;
2180 tmp21 = tmp11 + tmp14;
2181 tmp25 = tmp11 - tmp14;
2182 tmp22 = tmp12 + tmp15;
2183 tmp24 = tmp12 - tmp15;
2184
2185 /* Odd part */
2186
2187 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2188 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2189 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2190 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2191 tmp13 = z4 << CONST_BITS;
2192
2193 tmp14 = z1 + z3;
2194 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2195 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2196 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2197 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2198 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2199 z1 -= z2;
2200 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2201 tmp16 += tmp15;
2202 z1 += z4;
2203 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2204 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2205 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2206 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2207 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2208 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2209
2210 tmp13 = (z1 - z3) << PASS1_BITS;
2211
2212 /* Final output stage */
2213
2214 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2215 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2216 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2217 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2218 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2219 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2220 wsptr[8*3] = (int) (tmp23 + tmp13);
2221 wsptr[8*10] = (int) (tmp23 - tmp13);
2222 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2223 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2224 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2225 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2226 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2227 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2228 }
2229
2230 /* Pass 2: process 14 rows from work array, store into output array. */
2231
2232 wsptr = workspace;
2233 for (ctr = 0; ctr < 14; ctr++) {
2234 outptr = output_buf[ctr] + output_col;
2235
2236 /* Even part */
2237
2238 /* Add range center and fudge factor for final descale and range-limit. */
2239 z1 = (INT32) wsptr[0] +
2240 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2241 (ONE << (PASS1_BITS+2)));
2242 z1 <<= CONST_BITS;
2243 z4 = (INT32) wsptr[4];
2244 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2245 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2246 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2247
2248 tmp10 = z1 + z2;
2249 tmp11 = z1 + z3;
2250 tmp12 = z1 - z4;
2251
2252 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2253
2254 z1 = (INT32) wsptr[2];
2255 z2 = (INT32) wsptr[6];
2256
2257 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2258
2259 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2260 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2261 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2262 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2263
2264 tmp20 = tmp10 + tmp13;
2265 tmp26 = tmp10 - tmp13;
2266 tmp21 = tmp11 + tmp14;
2267 tmp25 = tmp11 - tmp14;
2268 tmp22 = tmp12 + tmp15;
2269 tmp24 = tmp12 - tmp15;
2270
2271 /* Odd part */
2272
2273 z1 = (INT32) wsptr[1];
2274 z2 = (INT32) wsptr[3];
2275 z3 = (INT32) wsptr[5];
2276 z4 = (INT32) wsptr[7];
2277 z4 <<= CONST_BITS;
2278
2279 tmp14 = z1 + z3;
2280 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2281 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2282 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2283 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2284 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2285 z1 -= z2;
2286 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2287 tmp16 += tmp15;
2288 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2289 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2290 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2291 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2292 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2293 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2294
2295 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2296
2297 /* Final output stage */
2298
2299 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2300 CONST_BITS+PASS1_BITS+3)
2301 & RANGE_MASK];
2302 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2303 CONST_BITS+PASS1_BITS+3)
2304 & RANGE_MASK];
2305 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2306 CONST_BITS+PASS1_BITS+3)
2307 & RANGE_MASK];
2308 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2309 CONST_BITS+PASS1_BITS+3)
2310 & RANGE_MASK];
2311 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2312 CONST_BITS+PASS1_BITS+3)
2313 & RANGE_MASK];
2314 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2315 CONST_BITS+PASS1_BITS+3)
2316 & RANGE_MASK];
2317 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2318 CONST_BITS+PASS1_BITS+3)
2319 & RANGE_MASK];
2320 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2321 CONST_BITS+PASS1_BITS+3)
2322 & RANGE_MASK];
2323 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2324 CONST_BITS+PASS1_BITS+3)
2325 & RANGE_MASK];
2326 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2327 CONST_BITS+PASS1_BITS+3)
2328 & RANGE_MASK];
2329 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2330 CONST_BITS+PASS1_BITS+3)
2331 & RANGE_MASK];
2332 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2333 CONST_BITS+PASS1_BITS+3)
2334 & RANGE_MASK];
2335 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2336 CONST_BITS+PASS1_BITS+3)
2337 & RANGE_MASK];
2338 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2339 CONST_BITS+PASS1_BITS+3)
2340 & RANGE_MASK];
2341
2342 wsptr += 8; /* advance pointer to next row */
2343 }
2344}
2345
2346
2347/*
2348 * Perform dequantization and inverse DCT on one block of coefficients,
2349 * producing a 15x15 output block.
2350 *
2351 * Optimized algorithm with 22 multiplications in the 1-D kernel.
2352 * cK represents sqrt(2) * cos(K*pi/30).
2353 */
2354
2355GLOBAL(void)
2356jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2359{
2360 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2361 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2362 INT32 z1, z2, z3, z4;
2363 JCOEFPTR inptr;
2364 ISLOW_MULT_TYPE * quantptr;
2365 int * wsptr;
2366 JSAMPROW outptr;
2367 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2368 int ctr;
2369 int workspace[8*15]; /* buffers data between passes */
2371
2372 /* Pass 1: process columns from input, store into work array. */
2373
2374 inptr = coef_block;
2375 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2376 wsptr = workspace;
2377 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2378 /* Even part */
2379
2380 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2381 z1 <<= CONST_BITS;
2382 /* Add fudge factor here for final descale. */
2383 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2384
2385 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2386 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2387 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2388
2389 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2390 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2391
2392 tmp12 = z1 - tmp10;
2393 tmp13 = z1 + tmp11;
2394 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2395
2396 z4 = z2 - z3;
2397 z3 += z2;
2398 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2399 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2400 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2401
2402 tmp20 = tmp13 + tmp10 + tmp11;
2403 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2404
2405 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2406 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2407
2408 tmp25 = tmp13 - tmp10 - tmp11;
2409 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2410
2411 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2412 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2413
2414 tmp21 = tmp12 + tmp10 + tmp11;
2415 tmp24 = tmp13 - tmp10 + tmp11;
2416 tmp11 += tmp11;
2417 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2418 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2419
2420 /* Odd part */
2421
2422 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2423 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2424 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2425 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2426 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2427
2428 tmp13 = z2 - z4;
2429 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2430 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2431 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2432
2433 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2434 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2435 z2 = z1 - z4;
2436 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2437
2438 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2439 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2440 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2441 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2442 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2443 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2444
2445 /* Final output stage */
2446
2447 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2448 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2449 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2450 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2451 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2452 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2453 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2454 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2455 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2456 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2457 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2458 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2459 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2460 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2461 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2462 }
2463
2464 /* Pass 2: process 15 rows from work array, store into output array. */
2465
2466 wsptr = workspace;
2467 for (ctr = 0; ctr < 15; ctr++) {
2468 outptr = output_buf[ctr] + output_col;
2469
2470 /* Even part */
2471
2472 /* Add range center and fudge factor for final descale and range-limit. */
2473 z1 = (INT32) wsptr[0] +
2474 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2475 (ONE << (PASS1_BITS+2)));
2476 z1 <<= CONST_BITS;
2477
2478 z2 = (INT32) wsptr[2];
2479 z3 = (INT32) wsptr[4];
2480 z4 = (INT32) wsptr[6];
2481
2482 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2483 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2484
2485 tmp12 = z1 - tmp10;
2486 tmp13 = z1 + tmp11;
2487 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2488
2489 z4 = z2 - z3;
2490 z3 += z2;
2491 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2492 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2493 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2494
2495 tmp20 = tmp13 + tmp10 + tmp11;
2496 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2497
2498 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2499 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2500
2501 tmp25 = tmp13 - tmp10 - tmp11;
2502 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2503
2504 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2505 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2506
2507 tmp21 = tmp12 + tmp10 + tmp11;
2508 tmp24 = tmp13 - tmp10 + tmp11;
2509 tmp11 += tmp11;
2510 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2511 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2512
2513 /* Odd part */
2514
2515 z1 = (INT32) wsptr[1];
2516 z2 = (INT32) wsptr[3];
2517 z4 = (INT32) wsptr[5];
2518 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2519 z4 = (INT32) wsptr[7];
2520
2521 tmp13 = z2 - z4;
2522 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2523 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2524 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2525
2526 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2527 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2528 z2 = z1 - z4;
2529 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2530
2531 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2532 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2533 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2534 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2535 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2536 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2537
2538 /* Final output stage */
2539
2540 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2541 CONST_BITS+PASS1_BITS+3)
2542 & RANGE_MASK];
2543 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2544 CONST_BITS+PASS1_BITS+3)
2545 & RANGE_MASK];
2546 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2547 CONST_BITS+PASS1_BITS+3)
2548 & RANGE_MASK];
2549 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2550 CONST_BITS+PASS1_BITS+3)
2551 & RANGE_MASK];
2552 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2553 CONST_BITS+PASS1_BITS+3)
2554 & RANGE_MASK];
2555 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2556 CONST_BITS+PASS1_BITS+3)
2557 & RANGE_MASK];
2558 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2559 CONST_BITS+PASS1_BITS+3)
2560 & RANGE_MASK];
2561 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2562 CONST_BITS+PASS1_BITS+3)
2563 & RANGE_MASK];
2564 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2565 CONST_BITS+PASS1_BITS+3)
2566 & RANGE_MASK];
2567 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2568 CONST_BITS+PASS1_BITS+3)
2569 & RANGE_MASK];
2570 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2571 CONST_BITS+PASS1_BITS+3)
2572 & RANGE_MASK];
2573 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2574 CONST_BITS+PASS1_BITS+3)
2575 & RANGE_MASK];
2576 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2577 CONST_BITS+PASS1_BITS+3)
2578 & RANGE_MASK];
2579 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2580 CONST_BITS+PASS1_BITS+3)
2581 & RANGE_MASK];
2582 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2583 CONST_BITS+PASS1_BITS+3)
2584 & RANGE_MASK];
2585
2586 wsptr += 8; /* advance pointer to next row */
2587 }
2588}
2589
2590
2591/*
2592 * Perform dequantization and inverse DCT on one block of coefficients,
2593 * producing a 16x16 output block.
2594 *
2595 * Optimized algorithm with 28 multiplications in the 1-D kernel.
2596 * cK represents sqrt(2) * cos(K*pi/32).
2597 */
2598
2599GLOBAL(void)
2600jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2603{
2604 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2605 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2606 INT32 z1, z2, z3, z4;
2607 JCOEFPTR inptr;
2608 ISLOW_MULT_TYPE * quantptr;
2609 int * wsptr;
2610 JSAMPROW outptr;
2611 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2612 int ctr;
2613 int workspace[8*16]; /* buffers data between passes */
2615
2616 /* Pass 1: process columns from input, store into work array. */
2617
2618 inptr = coef_block;
2619 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2620 wsptr = workspace;
2621 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2622 /* Even part */
2623
2624 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2625 tmp0 <<= CONST_BITS;
2626 /* Add fudge factor here for final descale. */
2627 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2628
2629 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2630 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2631 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2632
2633 tmp10 = tmp0 + tmp1;
2634 tmp11 = tmp0 - tmp1;
2635 tmp12 = tmp0 + tmp2;
2636 tmp13 = tmp0 - tmp2;
2637
2638 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2639 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2640 z3 = z1 - z2;
2641 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2642 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2643
2644 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2645 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2646 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2647 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2648
2649 tmp20 = tmp10 + tmp0;
2650 tmp27 = tmp10 - tmp0;
2651 tmp21 = tmp12 + tmp1;
2652 tmp26 = tmp12 - tmp1;
2653 tmp22 = tmp13 + tmp2;
2654 tmp25 = tmp13 - tmp2;
2655 tmp23 = tmp11 + tmp3;
2656 tmp24 = tmp11 - tmp3;
2657
2658 /* Odd part */
2659
2660 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2661 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2662 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2663 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2664
2665 tmp11 = z1 + z3;
2666
2667 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2668 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2669 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2670 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2671 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2672 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2673 tmp0 = tmp1 + tmp2 + tmp3 -
2674 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2675 tmp13 = tmp10 + tmp11 + tmp12 -
2676 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2677 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2678 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2679 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2680 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2681 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2682 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2683 z2 += z4;
2684 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2685 tmp1 += z1;
2686 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2687 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2688 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2689 tmp12 += z2;
2690 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2691 tmp2 += z2;
2692 tmp3 += z2;
2693 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2694 tmp10 += z2;
2695 tmp11 += z2;
2696
2697 /* Final output stage */
2698
2699 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2700 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2701 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2702 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2703 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2704 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2705 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2706 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2707 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2708 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2709 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2710 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2711 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2712 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2713 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2714 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2715 }
2716
2717 /* Pass 2: process 16 rows from work array, store into output array. */
2718
2719 wsptr = workspace;
2720 for (ctr = 0; ctr < 16; ctr++) {
2721 outptr = output_buf[ctr] + output_col;
2722
2723 /* Even part */
2724
2725 /* Add range center and fudge factor for final descale and range-limit. */
2726 tmp0 = (INT32) wsptr[0] +
2727 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2728 (ONE << (PASS1_BITS+2)));
2729 tmp0 <<= CONST_BITS;
2730
2731 z1 = (INT32) wsptr[4];
2732 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2733 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2734
2735 tmp10 = tmp0 + tmp1;
2736 tmp11 = tmp0 - tmp1;
2737 tmp12 = tmp0 + tmp2;
2738 tmp13 = tmp0 - tmp2;
2739
2740 z1 = (INT32) wsptr[2];
2741 z2 = (INT32) wsptr[6];
2742 z3 = z1 - z2;
2743 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2744 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2745
2746 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2747 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2748 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2749 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2750
2751 tmp20 = tmp10 + tmp0;
2752 tmp27 = tmp10 - tmp0;
2753 tmp21 = tmp12 + tmp1;
2754 tmp26 = tmp12 - tmp1;
2755 tmp22 = tmp13 + tmp2;
2756 tmp25 = tmp13 - tmp2;
2757 tmp23 = tmp11 + tmp3;
2758 tmp24 = tmp11 - tmp3;
2759
2760 /* Odd part */
2761
2762 z1 = (INT32) wsptr[1];
2763 z2 = (INT32) wsptr[3];
2764 z3 = (INT32) wsptr[5];
2765 z4 = (INT32) wsptr[7];
2766
2767 tmp11 = z1 + z3;
2768
2769 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2770 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2771 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2772 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2773 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2774 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2775 tmp0 = tmp1 + tmp2 + tmp3 -
2776 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2777 tmp13 = tmp10 + tmp11 + tmp12 -
2778 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2779 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2780 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2781 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2782 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2783 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2784 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2785 z2 += z4;
2786 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2787 tmp1 += z1;
2788 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2789 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2790 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2791 tmp12 += z2;
2792 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2793 tmp2 += z2;
2794 tmp3 += z2;
2795 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2796 tmp10 += z2;
2797 tmp11 += z2;
2798
2799 /* Final output stage */
2800
2801 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2802 CONST_BITS+PASS1_BITS+3)
2803 & RANGE_MASK];
2804 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2805 CONST_BITS+PASS1_BITS+3)
2806 & RANGE_MASK];
2807 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2808 CONST_BITS+PASS1_BITS+3)
2809 & RANGE_MASK];
2810 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2811 CONST_BITS+PASS1_BITS+3)
2812 & RANGE_MASK];
2813 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2814 CONST_BITS+PASS1_BITS+3)
2815 & RANGE_MASK];
2816 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2817 CONST_BITS+PASS1_BITS+3)
2818 & RANGE_MASK];
2819 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2820 CONST_BITS+PASS1_BITS+3)
2821 & RANGE_MASK];
2822 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2823 CONST_BITS+PASS1_BITS+3)
2824 & RANGE_MASK];
2825 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2826 CONST_BITS+PASS1_BITS+3)
2827 & RANGE_MASK];
2828 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2829 CONST_BITS+PASS1_BITS+3)
2830 & RANGE_MASK];
2831 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2832 CONST_BITS+PASS1_BITS+3)
2833 & RANGE_MASK];
2834 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2835 CONST_BITS+PASS1_BITS+3)
2836 & RANGE_MASK];
2837 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2838 CONST_BITS+PASS1_BITS+3)
2839 & RANGE_MASK];
2840 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2841 CONST_BITS+PASS1_BITS+3)
2842 & RANGE_MASK];
2843 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2844 CONST_BITS+PASS1_BITS+3)
2845 & RANGE_MASK];
2846 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2847 CONST_BITS+PASS1_BITS+3)
2848 & RANGE_MASK];
2849
2850 wsptr += 8; /* advance pointer to next row */
2851 }
2852}
2853
2854
2855/*
2856 * Perform dequantization and inverse DCT on one block of coefficients,
2857 * producing a 16x8 output block.
2858 *
2859 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2860 */
2861
2862GLOBAL(void)
2863jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2866{
2867 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2868 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2869 INT32 z1, z2, z3, z4;
2870 JCOEFPTR inptr;
2871 ISLOW_MULT_TYPE * quantptr;
2872 int * wsptr;
2873 JSAMPROW outptr;
2874 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2875 int ctr;
2876 int workspace[8*8]; /* buffers data between passes */
2878
2879 /* Pass 1: process columns from input, store into work array.
2880 * Note results are scaled up by sqrt(8) compared to a true IDCT;
2881 * furthermore, we scale the results by 2**PASS1_BITS.
2882 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2883 */
2884
2885 inptr = coef_block;
2886 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2887 wsptr = workspace;
2888 for (ctr = DCTSIZE; ctr > 0; ctr--) {
2889 /* Due to quantization, we will usually find that many of the input
2890 * coefficients are zero, especially the AC terms. We can exploit this
2891 * by short-circuiting the IDCT calculation for any column in which all
2892 * the AC terms are zero. In that case each output is equal to the
2893 * DC coefficient (with scale factor as needed).
2894 * With typical images and quantization tables, half or more of the
2895 * column DCT calculations can be simplified this way.
2896 */
2897
2898 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2899 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2900 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2901 inptr[DCTSIZE*7] == 0) {
2902 /* AC terms all zero */
2903 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
2904
2905 wsptr[DCTSIZE*0] = dcval;
2906 wsptr[DCTSIZE*1] = dcval;
2907 wsptr[DCTSIZE*2] = dcval;
2908 wsptr[DCTSIZE*3] = dcval;
2909 wsptr[DCTSIZE*4] = dcval;
2910 wsptr[DCTSIZE*5] = dcval;
2911 wsptr[DCTSIZE*6] = dcval;
2912 wsptr[DCTSIZE*7] = dcval;
2913
2914 inptr++; /* advance pointers to next column */
2915 quantptr++;
2916 wsptr++;
2917 continue;
2918 }
2919
2920 /* Even part: reverse the even part of the forward DCT.
2921 * The rotator is c(-6).
2922 */
2923
2924 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2925 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2926 z2 <<= CONST_BITS;
2927 z3 <<= CONST_BITS;
2928 /* Add fudge factor here for final descale. */
2929 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2930
2931 tmp0 = z2 + z3;
2932 tmp1 = z2 - z3;
2933
2934 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2935 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2936
2937 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
2938 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
2939 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
2940
2941 tmp10 = tmp0 + tmp2;
2942 tmp13 = tmp0 - tmp2;
2943 tmp11 = tmp1 + tmp3;
2944 tmp12 = tmp1 - tmp3;
2945
2946 /* Odd part per figure 8; the matrix is unitary and hence its
2947 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2948 */
2949
2950 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2951 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2952 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2953 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2954
2955 z2 = tmp0 + tmp2;
2956 z3 = tmp1 + tmp3;
2957
2958 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
2959 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
2960 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
2961 z2 += z1;
2962 z3 += z1;
2963
2964 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2965 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
2966 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
2967 tmp0 += z1 + z2;
2968 tmp3 += z1 + z3;
2969
2970 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2971 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
2972 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
2973 tmp1 += z1 + z3;
2974 tmp2 += z1 + z2;
2975
2976 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2977
2978 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2979 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2980 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2981 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2982 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2983 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2984 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2985 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2986
2987 inptr++; /* advance pointers to next column */
2988 quantptr++;
2989 wsptr++;
2990 }
2991
2992 /* Pass 2: process 8 rows from work array, store into output array.
2993 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2994 */
2995
2996 wsptr = workspace;
2997 for (ctr = 0; ctr < 8; ctr++) {
2998 outptr = output_buf[ctr] + output_col;
2999
3000 /* Even part */
3001
3002 /* Add range center and fudge factor for final descale and range-limit. */
3003 tmp0 = (INT32) wsptr[0] +
3004 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3005 (ONE << (PASS1_BITS+2)));
3006 tmp0 <<= CONST_BITS;
3007
3008 z1 = (INT32) wsptr[4];
3009 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3010 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3011
3012 tmp10 = tmp0 + tmp1;
3013 tmp11 = tmp0 - tmp1;
3014 tmp12 = tmp0 + tmp2;
3015 tmp13 = tmp0 - tmp2;
3016
3017 z1 = (INT32) wsptr[2];
3018 z2 = (INT32) wsptr[6];
3019 z3 = z1 - z2;
3020 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3021 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3022
3023 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3024 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3025 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3026 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3027
3028 tmp20 = tmp10 + tmp0;
3029 tmp27 = tmp10 - tmp0;
3030 tmp21 = tmp12 + tmp1;
3031 tmp26 = tmp12 - tmp1;
3032 tmp22 = tmp13 + tmp2;
3033 tmp25 = tmp13 - tmp2;
3034 tmp23 = tmp11 + tmp3;
3035 tmp24 = tmp11 - tmp3;
3036
3037 /* Odd part */
3038
3039 z1 = (INT32) wsptr[1];
3040 z2 = (INT32) wsptr[3];
3041 z3 = (INT32) wsptr[5];
3042 z4 = (INT32) wsptr[7];
3043
3044 tmp11 = z1 + z3;
3045
3046 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3047 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3048 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3049 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3050 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3051 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3052 tmp0 = tmp1 + tmp2 + tmp3 -
3053 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3054 tmp13 = tmp10 + tmp11 + tmp12 -
3055 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3056 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3057 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3058 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3059 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3060 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3061 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3062 z2 += z4;
3063 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3064 tmp1 += z1;
3065 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3066 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3067 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3068 tmp12 += z2;
3069 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3070 tmp2 += z2;
3071 tmp3 += z2;
3072 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3073 tmp10 += z2;
3074 tmp11 += z2;
3075
3076 /* Final output stage */
3077
3078 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3079 CONST_BITS+PASS1_BITS+3)
3080 & RANGE_MASK];
3081 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3082 CONST_BITS+PASS1_BITS+3)
3083 & RANGE_MASK];
3084 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3085 CONST_BITS+PASS1_BITS+3)
3086 & RANGE_MASK];
3087 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3088 CONST_BITS+PASS1_BITS+3)
3089 & RANGE_MASK];
3090 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3091 CONST_BITS+PASS1_BITS+3)
3092 & RANGE_MASK];
3093 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3094 CONST_BITS+PASS1_BITS+3)
3095 & RANGE_MASK];
3096 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3097 CONST_BITS+PASS1_BITS+3)
3098 & RANGE_MASK];
3099 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3100 CONST_BITS+PASS1_BITS+3)
3101 & RANGE_MASK];
3102 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3103 CONST_BITS+PASS1_BITS+3)
3104 & RANGE_MASK];
3105 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3106 CONST_BITS+PASS1_BITS+3)
3107 & RANGE_MASK];
3108 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3109 CONST_BITS+PASS1_BITS+3)
3110 & RANGE_MASK];
3111 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3112 CONST_BITS+PASS1_BITS+3)
3113 & RANGE_MASK];
3114 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3115 CONST_BITS+PASS1_BITS+3)
3116 & RANGE_MASK];
3117 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3118 CONST_BITS+PASS1_BITS+3)
3119 & RANGE_MASK];
3120 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3121 CONST_BITS+PASS1_BITS+3)
3122 & RANGE_MASK];
3123 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3124 CONST_BITS+PASS1_BITS+3)
3125 & RANGE_MASK];
3126
3127 wsptr += 8; /* advance pointer to next row */
3128 }
3129}
3130
3131
3132/*
3133 * Perform dequantization and inverse DCT on one block of coefficients,
3134 * producing a 14x7 output block.
3135 *
3136 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3137 */
3138
3139GLOBAL(void)
3140jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3143{
3144 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3145 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3146 INT32 z1, z2, z3, z4;
3147 JCOEFPTR inptr;
3148 ISLOW_MULT_TYPE * quantptr;
3149 int * wsptr;
3150 JSAMPROW outptr;
3151 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3152 int ctr;
3153 int workspace[8*7]; /* buffers data between passes */
3155
3156 /* Pass 1: process columns from input, store into work array.
3157 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3158 */
3159
3160 inptr = coef_block;
3161 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3162 wsptr = workspace;
3163 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3164 /* Even part */
3165
3166 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3167 tmp23 <<= CONST_BITS;
3168 /* Add fudge factor here for final descale. */
3169 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3170
3171 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3172 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3173 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3174
3175 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3176 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3177 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3178 tmp10 = z1 + z3;
3179 z2 -= tmp10;
3180 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3181 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3182 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3183 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3184
3185 /* Odd part */
3186
3187 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3188 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3189 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3190
3191 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3192 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3193 tmp10 = tmp11 - tmp12;
3194 tmp11 += tmp12;
3195 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3196 tmp11 += tmp12;
3197 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3198 tmp10 += z2;
3199 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3200
3201 /* Final output stage */
3202
3203 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3204 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3205 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3206 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3207 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3208 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3209 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3210 }
3211
3212 /* Pass 2: process 7 rows from work array, store into output array.
3213 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3214 */
3215
3216 wsptr = workspace;
3217 for (ctr = 0; ctr < 7; ctr++) {
3218 outptr = output_buf[ctr] + output_col;
3219
3220 /* Even part */
3221
3222 /* Add range center and fudge factor for final descale and range-limit. */
3223 z1 = (INT32) wsptr[0] +
3224 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3225 (ONE << (PASS1_BITS+2)));
3226 z1 <<= CONST_BITS;
3227 z4 = (INT32) wsptr[4];
3228 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3229 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3230 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3231
3232 tmp10 = z1 + z2;
3233 tmp11 = z1 + z3;
3234 tmp12 = z1 - z4;
3235
3236 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3237
3238 z1 = (INT32) wsptr[2];
3239 z2 = (INT32) wsptr[6];
3240
3241 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3242
3243 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3244 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3245 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3246 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3247
3248 tmp20 = tmp10 + tmp13;
3249 tmp26 = tmp10 - tmp13;
3250 tmp21 = tmp11 + tmp14;
3251 tmp25 = tmp11 - tmp14;
3252 tmp22 = tmp12 + tmp15;
3253 tmp24 = tmp12 - tmp15;
3254
3255 /* Odd part */
3256
3257 z1 = (INT32) wsptr[1];
3258 z2 = (INT32) wsptr[3];
3259 z3 = (INT32) wsptr[5];
3260 z4 = (INT32) wsptr[7];
3261 z4 <<= CONST_BITS;
3262
3263 tmp14 = z1 + z3;
3264 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3265 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3266 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3267 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3268 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3269 z1 -= z2;
3270 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3271 tmp16 += tmp15;
3272 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3273 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3274 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3275 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3276 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3277 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3278
3279 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3280
3281 /* Final output stage */
3282
3283 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3284 CONST_BITS+PASS1_BITS+3)
3285 & RANGE_MASK];
3286 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3287 CONST_BITS+PASS1_BITS+3)
3288 & RANGE_MASK];
3289 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3290 CONST_BITS+PASS1_BITS+3)
3291 & RANGE_MASK];
3292 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3293 CONST_BITS+PASS1_BITS+3)
3294 & RANGE_MASK];
3295 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3296 CONST_BITS+PASS1_BITS+3)
3297 & RANGE_MASK];
3298 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3299 CONST_BITS+PASS1_BITS+3)
3300 & RANGE_MASK];
3301 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3302 CONST_BITS+PASS1_BITS+3)
3303 & RANGE_MASK];
3304 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3305 CONST_BITS+PASS1_BITS+3)
3306 & RANGE_MASK];
3307 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3308 CONST_BITS+PASS1_BITS+3)
3309 & RANGE_MASK];
3310 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3311 CONST_BITS+PASS1_BITS+3)
3312 & RANGE_MASK];
3313 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3314 CONST_BITS+PASS1_BITS+3)
3315 & RANGE_MASK];
3316 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3317 CONST_BITS+PASS1_BITS+3)
3318 & RANGE_MASK];
3319 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3320 CONST_BITS+PASS1_BITS+3)
3321 & RANGE_MASK];
3322 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3323 CONST_BITS+PASS1_BITS+3)
3324 & RANGE_MASK];
3325
3326 wsptr += 8; /* advance pointer to next row */
3327 }
3328}
3329
3330
3331/*
3332 * Perform dequantization and inverse DCT on one block of coefficients,
3333 * producing a 12x6 output block.
3334 *
3335 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3336 */
3337
3338GLOBAL(void)
3339jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3342{
3343 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3344 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3345 INT32 z1, z2, z3, z4;
3346 JCOEFPTR inptr;
3347 ISLOW_MULT_TYPE * quantptr;
3348 int * wsptr;
3349 JSAMPROW outptr;
3350 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3351 int ctr;
3352 int workspace[8*6]; /* buffers data between passes */
3354
3355 /* Pass 1: process columns from input, store into work array.
3356 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3357 */
3358
3359 inptr = coef_block;
3360 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3361 wsptr = workspace;
3362 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3363 /* Even part */
3364
3365 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3366 tmp10 <<= CONST_BITS;
3367 /* Add fudge factor here for final descale. */
3368 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3369 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3370 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3371 tmp11 = tmp10 + tmp20;
3372 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3373 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3374 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3375 tmp20 = tmp11 + tmp10;
3376 tmp22 = tmp11 - tmp10;
3377
3378 /* Odd part */
3379
3380 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3381 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3382 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3383 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3384 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3385 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3386 tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3387
3388 /* Final output stage */
3389
3390 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3391 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3392 wsptr[8*1] = (int) (tmp21 + tmp11);
3393 wsptr[8*4] = (int) (tmp21 - tmp11);
3394 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3395 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3396 }
3397
3398 /* Pass 2: process 6 rows from work array, store into output array.
3399 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3400 */
3401
3402 wsptr = workspace;
3403 for (ctr = 0; ctr < 6; ctr++) {
3404 outptr = output_buf[ctr] + output_col;
3405
3406 /* Even part */
3407
3408 /* Add range center and fudge factor for final descale and range-limit. */
3409 z3 = (INT32) wsptr[0] +
3410 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3411 (ONE << (PASS1_BITS+2)));
3412 z3 <<= CONST_BITS;
3413
3414 z4 = (INT32) wsptr[4];
3415 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3416
3417 tmp10 = z3 + z4;
3418 tmp11 = z3 - z4;
3419
3420 z1 = (INT32) wsptr[2];
3421 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3422 z1 <<= CONST_BITS;
3423 z2 = (INT32) wsptr[6];
3424 z2 <<= CONST_BITS;
3425
3426 tmp12 = z1 - z2;
3427
3428 tmp21 = z3 + tmp12;
3429 tmp24 = z3 - tmp12;
3430
3431 tmp12 = z4 + z2;
3432
3433 tmp20 = tmp10 + tmp12;
3434 tmp25 = tmp10 - tmp12;
3435
3436 tmp12 = z4 - z1 - z2;
3437
3438 tmp22 = tmp11 + tmp12;
3439 tmp23 = tmp11 - tmp12;
3440
3441 /* Odd part */
3442
3443 z1 = (INT32) wsptr[1];
3444 z2 = (INT32) wsptr[3];
3445 z3 = (INT32) wsptr[5];
3446 z4 = (INT32) wsptr[7];
3447
3448 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3449 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3450
3451 tmp10 = z1 + z3;
3452 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3453 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3454 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3455 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3456 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3457 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3458 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3459 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3460
3461 z1 -= z4;
3462 z2 -= z3;
3463 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3464 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3465 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3466
3467 /* Final output stage */
3468
3469 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3470 CONST_BITS+PASS1_BITS+3)
3471 & RANGE_MASK];
3472 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3473 CONST_BITS+PASS1_BITS+3)
3474 & RANGE_MASK];
3475 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3476 CONST_BITS+PASS1_BITS+3)
3477 & RANGE_MASK];
3478 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3479 CONST_BITS+PASS1_BITS+3)
3480 & RANGE_MASK];
3481 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3482 CONST_BITS+PASS1_BITS+3)
3483 & RANGE_MASK];
3484 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3485 CONST_BITS+PASS1_BITS+3)
3486 & RANGE_MASK];
3487 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3488 CONST_BITS+PASS1_BITS+3)
3489 & RANGE_MASK];
3490 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3491 CONST_BITS+PASS1_BITS+3)
3492 & RANGE_MASK];
3493 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3494 CONST_BITS+PASS1_BITS+3)
3495 & RANGE_MASK];
3496 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3497 CONST_BITS+PASS1_BITS+3)
3498 & RANGE_MASK];
3499 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3500 CONST_BITS+PASS1_BITS+3)
3501 & RANGE_MASK];
3502 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3503 CONST_BITS+PASS1_BITS+3)
3504 & RANGE_MASK];
3505
3506 wsptr += 8; /* advance pointer to next row */
3507 }
3508}
3509
3510
3511/*
3512 * Perform dequantization and inverse DCT on one block of coefficients,
3513 * producing a 10x5 output block.
3514 *
3515 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3516 */
3517
3518GLOBAL(void)
3519jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3522{
3523 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3524 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3525 INT32 z1, z2, z3, z4;
3526 JCOEFPTR inptr;
3527 ISLOW_MULT_TYPE * quantptr;
3528 int * wsptr;
3529 JSAMPROW outptr;
3530 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3531 int ctr;
3532 int workspace[8*5]; /* buffers data between passes */
3534
3535 /* Pass 1: process columns from input, store into work array.
3536 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3537 */
3538
3539 inptr = coef_block;
3540 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3541 wsptr = workspace;
3542 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3543 /* Even part */
3544
3545 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3546 tmp12 <<= CONST_BITS;
3547 /* Add fudge factor here for final descale. */
3548 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3549 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3550 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3551 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3552 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3553 z3 = tmp12 + z2;
3554 tmp10 = z3 + z1;
3555 tmp11 = z3 - z1;
3556 tmp12 -= z2 << 2;
3557
3558 /* Odd part */
3559
3560 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3561 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3562
3563 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3564 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3565 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3566
3567 /* Final output stage */
3568
3569 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3570 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3571 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3572 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3573 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3574 }
3575
3576 /* Pass 2: process 5 rows from work array, store into output array.
3577 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3578 */
3579
3580 wsptr = workspace;
3581 for (ctr = 0; ctr < 5; ctr++) {
3582 outptr = output_buf[ctr] + output_col;
3583
3584 /* Even part */
3585
3586 /* Add range center and fudge factor for final descale and range-limit. */
3587 z3 = (INT32) wsptr[0] +
3588 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3589 (ONE << (PASS1_BITS+2)));
3590 z3 <<= CONST_BITS;
3591 z4 = (INT32) wsptr[4];
3592 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3593 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3594 tmp10 = z3 + z1;
3595 tmp11 = z3 - z2;
3596
3597 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3598
3599 z2 = (INT32) wsptr[2];
3600 z3 = (INT32) wsptr[6];
3601
3602 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3603 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3604 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3605
3606 tmp20 = tmp10 + tmp12;
3607 tmp24 = tmp10 - tmp12;
3608 tmp21 = tmp11 + tmp13;
3609 tmp23 = tmp11 - tmp13;
3610
3611 /* Odd part */
3612
3613 z1 = (INT32) wsptr[1];
3614 z2 = (INT32) wsptr[3];
3615 z3 = (INT32) wsptr[5];
3616 z3 <<= CONST_BITS;
3617 z4 = (INT32) wsptr[7];
3618
3619 tmp11 = z2 + z4;
3620 tmp13 = z2 - z4;
3621
3622 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3623
3624 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3625 z4 = z3 + tmp12;
3626
3627 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3628 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3629
3630 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3631 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3632
3633 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3634
3635 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3636 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3637
3638 /* Final output stage */
3639
3640 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3641 CONST_BITS+PASS1_BITS+3)
3642 & RANGE_MASK];
3643 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3644 CONST_BITS+PASS1_BITS+3)
3645 & RANGE_MASK];
3646 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3647 CONST_BITS+PASS1_BITS+3)
3648 & RANGE_MASK];
3649 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3650 CONST_BITS+PASS1_BITS+3)
3651 & RANGE_MASK];
3652 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3653 CONST_BITS+PASS1_BITS+3)
3654 & RANGE_MASK];
3655 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3656 CONST_BITS+PASS1_BITS+3)
3657 & RANGE_MASK];
3658 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3659 CONST_BITS+PASS1_BITS+3)
3660 & RANGE_MASK];
3661 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3662 CONST_BITS+PASS1_BITS+3)
3663 & RANGE_MASK];
3664 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3665 CONST_BITS+PASS1_BITS+3)
3666 & RANGE_MASK];
3667 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3668 CONST_BITS+PASS1_BITS+3)
3669 & RANGE_MASK];
3670
3671 wsptr += 8; /* advance pointer to next row */
3672 }
3673}
3674
3675
3676/*
3677 * Perform dequantization and inverse DCT on one block of coefficients,
3678 * producing an 8x4 output block.
3679 *
3680 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3681 */
3682
3683GLOBAL(void)
3684jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3687{
3688 INT32 tmp0, tmp1, tmp2, tmp3;
3689 INT32 tmp10, tmp11, tmp12, tmp13;
3690 INT32 z1, z2, z3;
3691 JCOEFPTR inptr;
3692 ISLOW_MULT_TYPE * quantptr;
3693 int * wsptr;
3694 JSAMPROW outptr;
3695 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3696 int ctr;
3697 int workspace[8*4]; /* buffers data between passes */
3699
3700 /* Pass 1: process columns from input, store into work array.
3701 * 4-point IDCT kernel,
3702 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3703 */
3704
3705 inptr = coef_block;
3706 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3707 wsptr = workspace;
3708 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3709 /* Even part */
3710
3711 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3712 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3713
3714 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3715 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3716
3717 /* Odd part */
3718 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3719
3720 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3721 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3722
3723 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3724 /* Add fudge factor here for final descale. */
3725 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3726 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3727 CONST_BITS-PASS1_BITS);
3728 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3729 CONST_BITS-PASS1_BITS);
3730
3731 /* Final output stage */
3732
3733 wsptr[8*0] = (int) (tmp10 + tmp0);
3734 wsptr[8*3] = (int) (tmp10 - tmp0);
3735 wsptr[8*1] = (int) (tmp12 + tmp2);
3736 wsptr[8*2] = (int) (tmp12 - tmp2);
3737 }
3738
3739 /* Pass 2: process rows from work array, store into output array.
3740 * Note that we must descale the results by a factor of 8 == 2**3,
3741 * and also undo the PASS1_BITS scaling.
3742 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3743 */
3744
3745 wsptr = workspace;
3746 for (ctr = 0; ctr < 4; ctr++) {
3747 outptr = output_buf[ctr] + output_col;
3748
3749 /* Even part: reverse the even part of the forward DCT.
3750 * The rotator is c(-6).
3751 */
3752
3753 /* Add range center and fudge factor for final descale and range-limit. */
3754 z2 = (INT32) wsptr[0] +
3755 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3756 (ONE << (PASS1_BITS+2)));
3757 z3 = (INT32) wsptr[4];
3758
3759 tmp0 = (z2 + z3) << CONST_BITS;
3760 tmp1 = (z2 - z3) << CONST_BITS;
3761
3762 z2 = (INT32) wsptr[2];
3763 z3 = (INT32) wsptr[6];
3764
3765 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3766 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3767 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3768
3769 tmp10 = tmp0 + tmp2;
3770 tmp13 = tmp0 - tmp2;
3771 tmp11 = tmp1 + tmp3;
3772 tmp12 = tmp1 - tmp3;
3773
3774 /* Odd part per figure 8; the matrix is unitary and hence its
3775 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3776 */
3777
3778 tmp0 = (INT32) wsptr[7];
3779 tmp1 = (INT32) wsptr[5];
3780 tmp2 = (INT32) wsptr[3];
3781 tmp3 = (INT32) wsptr[1];
3782
3783 z2 = tmp0 + tmp2;
3784 z3 = tmp1 + tmp3;
3785
3786 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3787 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3788 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3789 z2 += z1;
3790 z3 += z1;
3791
3792 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3793 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3794 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3795 tmp0 += z1 + z2;
3796 tmp3 += z1 + z3;
3797
3798 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3799 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3800 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3801 tmp1 += z1 + z3;
3802 tmp2 += z1 + z2;
3803
3804 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3805
3806 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3807 CONST_BITS+PASS1_BITS+3)
3808 & RANGE_MASK];
3809 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3810 CONST_BITS+PASS1_BITS+3)
3811 & RANGE_MASK];
3812 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3813 CONST_BITS+PASS1_BITS+3)
3814 & RANGE_MASK];
3815 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3816 CONST_BITS+PASS1_BITS+3)
3817 & RANGE_MASK];
3818 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3819 CONST_BITS+PASS1_BITS+3)
3820 & RANGE_MASK];
3821 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3822 CONST_BITS+PASS1_BITS+3)
3823 & RANGE_MASK];
3824 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3825 CONST_BITS+PASS1_BITS+3)
3826 & RANGE_MASK];
3827 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3828 CONST_BITS+PASS1_BITS+3)
3829 & RANGE_MASK];
3830
3831 wsptr += DCTSIZE; /* advance pointer to next row */
3832 }
3833}
3834
3835
3836/*
3837 * Perform dequantization and inverse DCT on one block of coefficients,
3838 * producing a 6x3 output block.
3839 *
3840 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3841 */
3842
3843GLOBAL(void)
3844jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3847{
3848 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3849 INT32 z1, z2, z3;
3850 JCOEFPTR inptr;
3851 ISLOW_MULT_TYPE * quantptr;
3852 int * wsptr;
3853 JSAMPROW outptr;
3854 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3855 int ctr;
3856 int workspace[6*3]; /* buffers data between passes */
3858
3859 /* Pass 1: process columns from input, store into work array.
3860 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3861 */
3862
3863 inptr = coef_block;
3864 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3865 wsptr = workspace;
3866 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3867 /* Even part */
3868
3869 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3870 tmp0 <<= CONST_BITS;
3871 /* Add fudge factor here for final descale. */
3872 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3873 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3874 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3875 tmp10 = tmp0 + tmp12;
3876 tmp2 = tmp0 - tmp12 - tmp12;
3877
3878 /* Odd part */
3879
3880 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3881 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3882
3883 /* Final output stage */
3884
3885 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3886 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3887 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3888 }
3889
3890 /* Pass 2: process 3 rows from work array, store into output array.
3891 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3892 */
3893
3894 wsptr = workspace;
3895 for (ctr = 0; ctr < 3; ctr++) {
3896 outptr = output_buf[ctr] + output_col;
3897
3898 /* Even part */
3899
3900 /* Add range center and fudge factor for final descale and range-limit. */
3901 tmp0 = (INT32) wsptr[0] +
3902 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3903 (ONE << (PASS1_BITS+2)));
3904 tmp0 <<= CONST_BITS;
3905 tmp2 = (INT32) wsptr[4];
3906 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3907 tmp1 = tmp0 + tmp10;
3908 tmp11 = tmp0 - tmp10 - tmp10;
3909 tmp10 = (INT32) wsptr[2];
3910 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3911 tmp10 = tmp1 + tmp0;
3912 tmp12 = tmp1 - tmp0;
3913
3914 /* Odd part */
3915
3916 z1 = (INT32) wsptr[1];
3917 z2 = (INT32) wsptr[3];
3918 z3 = (INT32) wsptr[5];
3919 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3920 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3921 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3922 tmp1 = (z1 - z2 - z3) << CONST_BITS;
3923
3924 /* Final output stage */
3925
3926 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3927 CONST_BITS+PASS1_BITS+3)
3928 & RANGE_MASK];
3929 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
3930 CONST_BITS+PASS1_BITS+3)
3931 & RANGE_MASK];
3932 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
3933 CONST_BITS+PASS1_BITS+3)
3934 & RANGE_MASK];
3935 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
3936 CONST_BITS+PASS1_BITS+3)
3937 & RANGE_MASK];
3938 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
3939 CONST_BITS+PASS1_BITS+3)
3940 & RANGE_MASK];
3941 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
3942 CONST_BITS+PASS1_BITS+3)
3943 & RANGE_MASK];
3944
3945 wsptr += 6; /* advance pointer to next row */
3946 }
3947}
3948
3949
3950/*
3951 * Perform dequantization and inverse DCT on one block of coefficients,
3952 * producing a 4x2 output block.
3953 *
3954 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
3955 */
3956
3957GLOBAL(void)
3958jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3961{
3962 INT32 tmp0, tmp2, tmp10, tmp12;
3963 INT32 z1, z2, z3;
3964 JCOEFPTR inptr;
3965 ISLOW_MULT_TYPE * quantptr;
3966 INT32 * wsptr;
3967 JSAMPROW outptr;
3968 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3969 int ctr;
3970 INT32 workspace[4*2]; /* buffers data between passes */
3972
3973 /* Pass 1: process columns from input, store into work array. */
3974
3975 inptr = coef_block;
3976 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3977 wsptr = workspace;
3978 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3979 /* Even part */
3980
3981 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3982
3983 /* Odd part */
3984
3985 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3986
3987 /* Final output stage */
3988
3989 wsptr[4*0] = tmp10 + tmp0;
3990 wsptr[4*1] = tmp10 - tmp0;
3991 }
3992
3993 /* Pass 2: process 2 rows from work array, store into output array.
3994 * 4-point IDCT kernel,
3995 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3996 */
3997
3998 wsptr = workspace;
3999 for (ctr = 0; ctr < 2; ctr++) {
4000 outptr = output_buf[ctr] + output_col;
4001
4002 /* Even part */
4003
4004 /* Add range center and fudge factor for final descale and range-limit. */
4005 tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
4006 tmp2 = wsptr[2];
4007
4008 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4009 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4010
4011 /* Odd part */
4012 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4013
4014 z2 = wsptr[1];
4015 z3 = wsptr[3];
4016
4017 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4018 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4019 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4020
4021 /* Final output stage */
4022
4023 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4024 CONST_BITS+3)
4025 & RANGE_MASK];
4026 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4027 CONST_BITS+3)
4028 & RANGE_MASK];
4029 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4030 CONST_BITS+3)
4031 & RANGE_MASK];
4032 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4033 CONST_BITS+3)
4034 & RANGE_MASK];
4035
4036 wsptr += 4; /* advance pointer to next row */
4037 }
4038}
4039
4040
4041/*
4042 * Perform dequantization and inverse DCT on one block of coefficients,
4043 * producing a 2x1 output block.
4044 *
4045 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4046 */
4047
4048GLOBAL(void)
4049jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4052{
4053 DCTELEM tmp0, tmp1;
4054 ISLOW_MULT_TYPE * quantptr;
4055 JSAMPROW outptr;
4056 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4058
4059 /* Pass 1: empty. */
4060
4061 /* Pass 2: process 1 row from input, store into output array. */
4062
4063 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4064 outptr = output_buf[0] + output_col;
4065
4066 /* Even part */
4067
4068 tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4069 /* Add range center and fudge factor for final descale and range-limit. */
4070 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
4071
4072 /* Odd part */
4073
4074 tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4075
4076 /* Final output stage */
4077
4078 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
4079 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
4080}
4081
4082
4083/*
4084 * Perform dequantization and inverse DCT on one block of coefficients,
4085 * producing an 8x16 output block.
4086 *
4087 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4088 */
4089
4090GLOBAL(void)
4091jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4094{
4095 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4096 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4097 INT32 z1, z2, z3, z4;
4098 JCOEFPTR inptr;
4099 ISLOW_MULT_TYPE * quantptr;
4100 int * wsptr;
4101 JSAMPROW outptr;
4102 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4103 int ctr;
4104 int workspace[8*16]; /* buffers data between passes */
4106
4107 /* Pass 1: process columns from input, store into work array.
4108 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4109 */
4110
4111 inptr = coef_block;
4112 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4113 wsptr = workspace;
4114 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4115 /* Even part */
4116
4117 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4118 tmp0 <<= CONST_BITS;
4119 /* Add fudge factor here for final descale. */
4120 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4121
4122 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4123 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4124 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4125
4126 tmp10 = tmp0 + tmp1;
4127 tmp11 = tmp0 - tmp1;
4128 tmp12 = tmp0 + tmp2;
4129 tmp13 = tmp0 - tmp2;
4130
4131 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4132 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4133 z3 = z1 - z2;
4134 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4135 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4136
4137 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4138 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4139 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4140 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4141
4142 tmp20 = tmp10 + tmp0;
4143 tmp27 = tmp10 - tmp0;
4144 tmp21 = tmp12 + tmp1;
4145 tmp26 = tmp12 - tmp1;
4146 tmp22 = tmp13 + tmp2;
4147 tmp25 = tmp13 - tmp2;
4148 tmp23 = tmp11 + tmp3;
4149 tmp24 = tmp11 - tmp3;
4150
4151 /* Odd part */
4152
4153 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4154 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4155 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4156 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4157
4158 tmp11 = z1 + z3;
4159
4160 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4161 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4162 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4163 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4164 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4165 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4166 tmp0 = tmp1 + tmp2 + tmp3 -
4167 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4168 tmp13 = tmp10 + tmp11 + tmp12 -
4169 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4170 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4171 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4172 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4173 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4174 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4175 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4176 z2 += z4;
4177 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4178 tmp1 += z1;
4179 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4180 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4181 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4182 tmp12 += z2;
4183 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4184 tmp2 += z2;
4185 tmp3 += z2;
4186 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4187 tmp10 += z2;
4188 tmp11 += z2;
4189
4190 /* Final output stage */
4191
4192 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4193 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4194 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4195 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4196 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4197 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4198 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4199 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4200 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4201 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4202 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4203 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4204 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4205 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4206 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4207 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4208 }
4209
4210 /* Pass 2: process rows from work array, store into output array.
4211 * Note that we must descale the results by a factor of 8 == 2**3,
4212 * and also undo the PASS1_BITS scaling.
4213 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4214 */
4215
4216 wsptr = workspace;
4217 for (ctr = 0; ctr < 16; ctr++) {
4218 outptr = output_buf[ctr] + output_col;
4219
4220 /* Even part: reverse the even part of the forward DCT.
4221 * The rotator is c(-6).
4222 */
4223
4224 /* Add range center and fudge factor for final descale and range-limit. */
4225 z2 = (INT32) wsptr[0] +
4226 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4227 (ONE << (PASS1_BITS+2)));
4228 z3 = (INT32) wsptr[4];
4229
4230 tmp0 = (z2 + z3) << CONST_BITS;
4231 tmp1 = (z2 - z3) << CONST_BITS;
4232
4233 z2 = (INT32) wsptr[2];
4234 z3 = (INT32) wsptr[6];
4235
4236 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4237 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4238 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4239
4240 tmp10 = tmp0 + tmp2;
4241 tmp13 = tmp0 - tmp2;
4242 tmp11 = tmp1 + tmp3;
4243 tmp12 = tmp1 - tmp3;
4244
4245 /* Odd part per figure 8; the matrix is unitary and hence its
4246 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4247 */
4248
4249 tmp0 = (INT32) wsptr[7];
4250 tmp1 = (INT32) wsptr[5];
4251 tmp2 = (INT32) wsptr[3];
4252 tmp3 = (INT32) wsptr[1];
4253
4254 z2 = tmp0 + tmp2;
4255 z3 = tmp1 + tmp3;
4256
4257 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4258 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4259 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4260 z2 += z1;
4261 z3 += z1;
4262
4263 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4264 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4265 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4266 tmp0 += z1 + z2;
4267 tmp3 += z1 + z3;
4268
4269 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4270 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4271 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4272 tmp1 += z1 + z3;
4273 tmp2 += z1 + z2;
4274
4275 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4276
4277 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4278 CONST_BITS+PASS1_BITS+3)
4279 & RANGE_MASK];
4280 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4281 CONST_BITS+PASS1_BITS+3)
4282 & RANGE_MASK];
4283 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4284 CONST_BITS+PASS1_BITS+3)
4285 & RANGE_MASK];
4286 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4287 CONST_BITS+PASS1_BITS+3)
4288 & RANGE_MASK];
4289 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4290 CONST_BITS+PASS1_BITS+3)
4291 & RANGE_MASK];
4292 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4293 CONST_BITS+PASS1_BITS+3)
4294 & RANGE_MASK];
4295 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4296 CONST_BITS+PASS1_BITS+3)
4297 & RANGE_MASK];
4298 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4299 CONST_BITS+PASS1_BITS+3)
4300 & RANGE_MASK];
4301
4302 wsptr += DCTSIZE; /* advance pointer to next row */
4303 }
4304}
4305
4306
4307/*
4308 * Perform dequantization and inverse DCT on one block of coefficients,
4309 * producing a 7x14 output block.
4310 *
4311 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4312 */
4313
4314GLOBAL(void)
4315jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4318{
4319 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4320 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4321 INT32 z1, z2, z3, z4;
4322 JCOEFPTR inptr;
4323 ISLOW_MULT_TYPE * quantptr;
4324 int * wsptr;
4325 JSAMPROW outptr;
4326 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4327 int ctr;
4328 int workspace[7*14]; /* buffers data between passes */
4330
4331 /* Pass 1: process columns from input, store into work array.
4332 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4333 */
4334
4335 inptr = coef_block;
4336 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4337 wsptr = workspace;
4338 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4339 /* Even part */
4340
4341 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4342 z1 <<= CONST_BITS;
4343 /* Add fudge factor here for final descale. */
4344 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4345 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4346 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4347 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4348 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4349
4350 tmp10 = z1 + z2;
4351 tmp11 = z1 + z3;
4352 tmp12 = z1 - z4;
4353
4354 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4355 CONST_BITS-PASS1_BITS);
4356
4357 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4358 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4359
4360 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4361
4362 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4363 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4364 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4365 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4366
4367 tmp20 = tmp10 + tmp13;
4368 tmp26 = tmp10 - tmp13;
4369 tmp21 = tmp11 + tmp14;
4370 tmp25 = tmp11 - tmp14;
4371 tmp22 = tmp12 + tmp15;
4372 tmp24 = tmp12 - tmp15;
4373
4374 /* Odd part */
4375
4376 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4377 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4378 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4379 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4380 tmp13 = z4 << CONST_BITS;
4381
4382 tmp14 = z1 + z3;
4383 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4384 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4385 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4386 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4387 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4388 z1 -= z2;
4389 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4390 tmp16 += tmp15;
4391 z1 += z4;
4392 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4393 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4394 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4395 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4396 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4397 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4398
4399 tmp13 = (z1 - z3) << PASS1_BITS;
4400
4401 /* Final output stage */
4402
4403 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4404 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4405 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4406 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4407 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4408 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4409 wsptr[7*3] = (int) (tmp23 + tmp13);
4410 wsptr[7*10] = (int) (tmp23 - tmp13);
4411 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4412 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4413 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4414 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4415 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4416 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4417 }
4418
4419 /* Pass 2: process 14 rows from work array, store into output array.
4420 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4421 */
4422
4423 wsptr = workspace;
4424 for (ctr = 0; ctr < 14; ctr++) {
4425 outptr = output_buf[ctr] + output_col;
4426
4427 /* Even part */
4428
4429 /* Add range center and fudge factor for final descale and range-limit. */
4430 tmp23 = (INT32) wsptr[0] +
4431 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4432 (ONE << (PASS1_BITS+2)));
4433 tmp23 <<= CONST_BITS;
4434
4435 z1 = (INT32) wsptr[2];
4436 z2 = (INT32) wsptr[4];
4437 z3 = (INT32) wsptr[6];
4438
4439 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4440 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4441 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4442 tmp10 = z1 + z3;
4443 z2 -= tmp10;
4444 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4445 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4446 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4447 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4448
4449 /* Odd part */
4450
4451 z1 = (INT32) wsptr[1];
4452 z2 = (INT32) wsptr[3];
4453 z3 = (INT32) wsptr[5];
4454
4455 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4456 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4457 tmp10 = tmp11 - tmp12;
4458 tmp11 += tmp12;
4459 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4460 tmp11 += tmp12;
4461 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4462 tmp10 += z2;
4463 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4464
4465 /* Final output stage */
4466
4467 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4468 CONST_BITS+PASS1_BITS+3)
4469 & RANGE_MASK];
4470 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4471 CONST_BITS+PASS1_BITS+3)
4472 & RANGE_MASK];
4473 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4474 CONST_BITS+PASS1_BITS+3)
4475 & RANGE_MASK];
4476 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4477 CONST_BITS+PASS1_BITS+3)
4478 & RANGE_MASK];
4479 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4480 CONST_BITS+PASS1_BITS+3)
4481 & RANGE_MASK];
4482 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4483 CONST_BITS+PASS1_BITS+3)
4484 & RANGE_MASK];
4485 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4486 CONST_BITS+PASS1_BITS+3)
4487 & RANGE_MASK];
4488
4489 wsptr += 7; /* advance pointer to next row */
4490 }
4491}
4492
4493
4494/*
4495 * Perform dequantization and inverse DCT on one block of coefficients,
4496 * producing a 6x12 output block.
4497 *
4498 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4499 */
4500
4501GLOBAL(void)
4502jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4505{
4506 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4507 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4508 INT32 z1, z2, z3, z4;
4509 JCOEFPTR inptr;
4510 ISLOW_MULT_TYPE * quantptr;
4511 int * wsptr;
4512 JSAMPROW outptr;
4513 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4514 int ctr;
4515 int workspace[6*12]; /* buffers data between passes */
4517
4518 /* Pass 1: process columns from input, store into work array.
4519 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4520 */
4521
4522 inptr = coef_block;
4523 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4524 wsptr = workspace;
4525 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4526 /* Even part */
4527
4528 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4529 z3 <<= CONST_BITS;
4530 /* Add fudge factor here for final descale. */
4531 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4532
4533 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4534 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4535
4536 tmp10 = z3 + z4;
4537 tmp11 = z3 - z4;
4538
4539 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4540 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4541 z1 <<= CONST_BITS;
4542 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4543 z2 <<= CONST_BITS;
4544
4545 tmp12 = z1 - z2;
4546
4547 tmp21 = z3 + tmp12;
4548 tmp24 = z3 - tmp12;
4549
4550 tmp12 = z4 + z2;
4551
4552 tmp20 = tmp10 + tmp12;
4553 tmp25 = tmp10 - tmp12;
4554
4555 tmp12 = z4 - z1 - z2;
4556
4557 tmp22 = tmp11 + tmp12;
4558 tmp23 = tmp11 - tmp12;
4559
4560 /* Odd part */
4561
4562 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4563 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4564 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4565 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4566
4567 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4568 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4569
4570 tmp10 = z1 + z3;
4571 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4572 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4573 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4574 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4575 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4576 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4577 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4578 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4579
4580 z1 -= z4;
4581 z2 -= z3;
4582 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4583 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4584 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4585
4586 /* Final output stage */
4587
4588 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4589 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4590 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4591 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4592 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4593 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4594 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4595 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4596 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4597 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4598 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4599 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4600 }
4601
4602 /* Pass 2: process 12 rows from work array, store into output array.
4603 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4604 */
4605
4606 wsptr = workspace;
4607 for (ctr = 0; ctr < 12; ctr++) {
4608 outptr = output_buf[ctr] + output_col;
4609
4610 /* Even part */
4611
4612 /* Add range center and fudge factor for final descale and range-limit. */
4613 tmp10 = (INT32) wsptr[0] +
4614 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4615 (ONE << (PASS1_BITS+2)));
4616 tmp10 <<= CONST_BITS;
4617 tmp12 = (INT32) wsptr[4];
4618 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4619 tmp11 = tmp10 + tmp20;
4620 tmp21 = tmp10 - tmp20 - tmp20;
4621 tmp20 = (INT32) wsptr[2];
4622 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4623 tmp20 = tmp11 + tmp10;
4624 tmp22 = tmp11 - tmp10;
4625
4626 /* Odd part */
4627
4628 z1 = (INT32) wsptr[1];
4629 z2 = (INT32) wsptr[3];
4630 z3 = (INT32) wsptr[5];
4631 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4632 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4633 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4634 tmp11 = (z1 - z2 - z3) << CONST_BITS;
4635
4636 /* Final output stage */
4637
4638 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4639 CONST_BITS+PASS1_BITS+3)
4640 & RANGE_MASK];
4641 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4642 CONST_BITS+PASS1_BITS+3)
4643 & RANGE_MASK];
4644 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4645 CONST_BITS+PASS1_BITS+3)
4646 & RANGE_MASK];
4647 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4648 CONST_BITS+PASS1_BITS+3)
4649 & RANGE_MASK];
4650 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4651 CONST_BITS+PASS1_BITS+3)
4652 & RANGE_MASK];
4653 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4654 CONST_BITS+PASS1_BITS+3)
4655 & RANGE_MASK];
4656
4657 wsptr += 6; /* advance pointer to next row */
4658 }
4659}
4660
4661
4662/*
4663 * Perform dequantization and inverse DCT on one block of coefficients,
4664 * producing a 5x10 output block.
4665 *
4666 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4667 */
4668
4669GLOBAL(void)
4670jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4673{
4674 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4675 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4676 INT32 z1, z2, z3, z4, z5;
4677 JCOEFPTR inptr;
4678 ISLOW_MULT_TYPE * quantptr;
4679 int * wsptr;
4680 JSAMPROW outptr;
4681 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4682 int ctr;
4683 int workspace[5*10]; /* buffers data between passes */
4685
4686 /* Pass 1: process columns from input, store into work array.
4687 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4688 */
4689
4690 inptr = coef_block;
4691 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4692 wsptr = workspace;
4693 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4694 /* Even part */
4695
4696 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4697 z3 <<= CONST_BITS;
4698 /* Add fudge factor here for final descale. */
4699 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4700 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4701 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4702 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4703 tmp10 = z3 + z1;
4704 tmp11 = z3 - z2;
4705
4706 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4707 CONST_BITS-PASS1_BITS);
4708
4709 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4710 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4711
4712 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4713 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4714 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4715
4716 tmp20 = tmp10 + tmp12;
4717 tmp24 = tmp10 - tmp12;
4718 tmp21 = tmp11 + tmp13;
4719 tmp23 = tmp11 - tmp13;
4720
4721 /* Odd part */
4722
4723 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4724 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4725 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4726 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4727
4728 tmp11 = z2 + z4;
4729 tmp13 = z2 - z4;
4730
4731 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4732 z5 = z3 << CONST_BITS;
4733
4734 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4735 z4 = z5 + tmp12;
4736
4737 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4738 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4739
4740 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4741 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4742
4743 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4744
4745 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4746 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4747
4748 /* Final output stage */
4749
4750 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4751 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4752 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4753 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4754 wsptr[5*2] = (int) (tmp22 + tmp12);
4755 wsptr[5*7] = (int) (tmp22 - tmp12);
4756 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4757 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4758 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4759 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4760 }
4761
4762 /* Pass 2: process 10 rows from work array, store into output array.
4763 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4764 */
4765
4766 wsptr = workspace;
4767 for (ctr = 0; ctr < 10; ctr++) {
4768 outptr = output_buf[ctr] + output_col;
4769
4770 /* Even part */
4771
4772 /* Add range center and fudge factor for final descale and range-limit. */
4773 tmp12 = (INT32) wsptr[0] +
4774 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4775 (ONE << (PASS1_BITS+2)));
4776 tmp12 <<= CONST_BITS;
4777 tmp13 = (INT32) wsptr[2];
4778 tmp14 = (INT32) wsptr[4];
4779 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4780 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4781 z3 = tmp12 + z2;
4782 tmp10 = z3 + z1;
4783 tmp11 = z3 - z1;
4784 tmp12 -= z2 << 2;
4785
4786 /* Odd part */
4787
4788 z2 = (INT32) wsptr[1];
4789 z3 = (INT32) wsptr[3];
4790
4791 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4792 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4793 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4794
4795 /* Final output stage */
4796
4797 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
4798 CONST_BITS+PASS1_BITS+3)
4799 & RANGE_MASK];
4800 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
4801 CONST_BITS+PASS1_BITS+3)
4802 & RANGE_MASK];
4803 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
4804 CONST_BITS+PASS1_BITS+3)
4805 & RANGE_MASK];
4806 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
4807 CONST_BITS+PASS1_BITS+3)
4808 & RANGE_MASK];
4809 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
4810 CONST_BITS+PASS1_BITS+3)
4811 & RANGE_MASK];
4812
4813 wsptr += 5; /* advance pointer to next row */
4814 }
4815}
4816
4817
4818/*
4819 * Perform dequantization and inverse DCT on one block of coefficients,
4820 * producing a 4x8 output block.
4821 *
4822 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4823 */
4824
4825GLOBAL(void)
4826jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4829{
4830 INT32 tmp0, tmp1, tmp2, tmp3;
4831 INT32 tmp10, tmp11, tmp12, tmp13;
4832 INT32 z1, z2, z3;
4833 JCOEFPTR inptr;
4834 ISLOW_MULT_TYPE * quantptr;
4835 int * wsptr;
4836 JSAMPROW outptr;
4837 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4838 int ctr;
4839 int workspace[4*8]; /* buffers data between passes */
4841
4842 /* Pass 1: process columns from input, store into work array.
4843 * Note results are scaled up by sqrt(8) compared to a true IDCT;
4844 * furthermore, we scale the results by 2**PASS1_BITS.
4845 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4846 */
4847
4848 inptr = coef_block;
4849 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4850 wsptr = workspace;
4851 for (ctr = 4; ctr > 0; ctr--) {
4852 /* Due to quantization, we will usually find that many of the input
4853 * coefficients are zero, especially the AC terms. We can exploit this
4854 * by short-circuiting the IDCT calculation for any column in which all
4855 * the AC terms are zero. In that case each output is equal to the
4856 * DC coefficient (with scale factor as needed).
4857 * With typical images and quantization tables, half or more of the
4858 * column DCT calculations can be simplified this way.
4859 */
4860
4861 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4862 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4863 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4864 inptr[DCTSIZE*7] == 0) {
4865 /* AC terms all zero */
4866 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
4867
4868 wsptr[4*0] = dcval;
4869 wsptr[4*1] = dcval;
4870 wsptr[4*2] = dcval;
4871 wsptr[4*3] = dcval;
4872 wsptr[4*4] = dcval;
4873 wsptr[4*5] = dcval;
4874 wsptr[4*6] = dcval;
4875 wsptr[4*7] = dcval;
4876
4877 inptr++; /* advance pointers to next column */
4878 quantptr++;
4879 wsptr++;
4880 continue;
4881 }
4882
4883 /* Even part: reverse the even part of the forward DCT.
4884 * The rotator is c(-6).
4885 */
4886
4887 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4888 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4889 z2 <<= CONST_BITS;
4890 z3 <<= CONST_BITS;
4891 /* Add fudge factor here for final descale. */
4892 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4893
4894 tmp0 = z2 + z3;
4895 tmp1 = z2 - z3;
4896
4897 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4898 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4899
4900 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4901 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4902 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4903
4904 tmp10 = tmp0 + tmp2;
4905 tmp13 = tmp0 - tmp2;
4906 tmp11 = tmp1 + tmp3;
4907 tmp12 = tmp1 - tmp3;
4908
4909 /* Odd part per figure 8; the matrix is unitary and hence its
4910 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4911 */
4912
4913 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4914 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4915 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4916 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4917
4918 z2 = tmp0 + tmp2;
4919 z3 = tmp1 + tmp3;
4920
4921 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4922 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4923 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4924 z2 += z1;
4925 z3 += z1;
4926
4927 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4928 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4929 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4930 tmp0 += z1 + z2;
4931 tmp3 += z1 + z3;
4932
4933 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4934 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4935 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4936 tmp1 += z1 + z3;
4937 tmp2 += z1 + z2;
4938
4939 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4940
4941 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4942 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4943 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4944 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4945 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4946 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4947 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4948 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4949
4950 inptr++; /* advance pointers to next column */
4951 quantptr++;
4952 wsptr++;
4953 }
4954
4955 /* Pass 2: process 8 rows from work array, store into output array.
4956 * 4-point IDCT kernel,
4957 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4958 */
4959
4960 wsptr = workspace;
4961 for (ctr = 0; ctr < 8; ctr++) {
4962 outptr = output_buf[ctr] + output_col;
4963
4964 /* Even part */
4965
4966 /* Add range center and fudge factor for final descale and range-limit. */
4967 tmp0 = (INT32) wsptr[0] +
4968 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4969 (ONE << (PASS1_BITS+2)));
4970 tmp2 = (INT32) wsptr[2];
4971
4972 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4973 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4974
4975 /* Odd part */
4976 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4977
4978 z2 = (INT32) wsptr[1];
4979 z3 = (INT32) wsptr[3];
4980
4981 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4982 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4983 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4984
4985 /* Final output stage */
4986
4987 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4988 CONST_BITS+PASS1_BITS+3)
4989 & RANGE_MASK];
4990 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4991 CONST_BITS+PASS1_BITS+3)
4992 & RANGE_MASK];
4993 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4994 CONST_BITS+PASS1_BITS+3)
4995 & RANGE_MASK];
4996 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4997 CONST_BITS+PASS1_BITS+3)
4998 & RANGE_MASK];
4999
5000 wsptr += 4; /* advance pointer to next row */
5001 }
5002}
5003
5004
5005/*
5006 * Perform dequantization and inverse DCT on one block of coefficients,
5007 * producing a 3x6 output block.
5008 *
5009 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5010 */
5011
5012GLOBAL(void)
5013jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5016{
5017 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5018 INT32 z1, z2, z3;
5019 JCOEFPTR inptr;
5020 ISLOW_MULT_TYPE * quantptr;
5021 int * wsptr;
5022 JSAMPROW outptr;
5023 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5024 int ctr;
5025 int workspace[3*6]; /* buffers data between passes */
5027
5028 /* Pass 1: process columns from input, store into work array.
5029 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5030 */
5031
5032 inptr = coef_block;
5033 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5034 wsptr = workspace;
5035 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5036 /* Even part */
5037
5038 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5039 tmp0 <<= CONST_BITS;
5040 /* Add fudge factor here for final descale. */
5041 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5042 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5043 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5044 tmp1 = tmp0 + tmp10;
5045 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5046 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5047 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5048 tmp10 = tmp1 + tmp0;
5049 tmp12 = tmp1 - tmp0;
5050
5051 /* Odd part */
5052
5053 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5054 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5055 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5056 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5057 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5058 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5059 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5060
5061 /* Final output stage */
5062
5063 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5064 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5065 wsptr[3*1] = (int) (tmp11 + tmp1);
5066 wsptr[3*4] = (int) (tmp11 - tmp1);
5067 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5068 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5069 }
5070
5071 /* Pass 2: process 6 rows from work array, store into output array.
5072 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5073 */
5074
5075 wsptr = workspace;
5076 for (ctr = 0; ctr < 6; ctr++) {
5077 outptr = output_buf[ctr] + output_col;
5078
5079 /* Even part */
5080
5081 /* Add range center and fudge factor for final descale and range-limit. */
5082 tmp0 = (INT32) wsptr[0] +
5083 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5084 (ONE << (PASS1_BITS+2)));
5085 tmp0 <<= CONST_BITS;
5086 tmp2 = (INT32) wsptr[2];
5087 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5088 tmp10 = tmp0 + tmp12;
5089 tmp2 = tmp0 - tmp12 - tmp12;
5090
5091 /* Odd part */
5092
5093 tmp12 = (INT32) wsptr[1];
5094 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5095
5096 /* Final output stage */
5097
5098 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5099 CONST_BITS+PASS1_BITS+3)
5100 & RANGE_MASK];
5101 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5102 CONST_BITS+PASS1_BITS+3)
5103 & RANGE_MASK];
5104 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5105 CONST_BITS+PASS1_BITS+3)
5106 & RANGE_MASK];
5107
5108 wsptr += 3; /* advance pointer to next row */
5109 }
5110}
5111
5112
5113/*
5114 * Perform dequantization and inverse DCT on one block of coefficients,
5115 * producing a 2x4 output block.
5116 *
5117 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5118 */
5119
5120GLOBAL(void)
5121jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5124{
5125 INT32 tmp0, tmp2, tmp10, tmp12;
5126 INT32 z1, z2, z3;
5127 JCOEFPTR inptr;
5128 ISLOW_MULT_TYPE * quantptr;
5129 INT32 * wsptr;
5130 JSAMPROW outptr;
5131 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5132 int ctr;
5133 INT32 workspace[2*4]; /* buffers data between passes */
5135
5136 /* Pass 1: process columns from input, store into work array.
5137 * 4-point IDCT kernel,
5138 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5139 */
5140
5141 inptr = coef_block;
5142 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5143 wsptr = workspace;
5144 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5145 /* Even part */
5146
5147 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5148 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5149
5150 tmp10 = (tmp0 + tmp2) << CONST_BITS;
5151 tmp12 = (tmp0 - tmp2) << CONST_BITS;
5152
5153 /* Odd part */
5154 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5155
5156 z2 = DEQUANTIZE(inptr[