ReactOS 0.4.16-dev-2617-g01a0906
jidctint.c
Go to the documentation of this file.
1/*
2 * jidctint.c
3 *
4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * Modification developed 2002-2026 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains a slow-but-accurate integer implementation of the
10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11 * must also perform dequantization of the input coefficients.
12 *
13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14 * on each row (or vice versa, but it's more convenient to emit a row at
15 * a time). Direct algorithms are also available, but they are much more
16 * complex and seem not to be any faster when reduced to code.
17 *
18 * This implementation is based on an algorithm described in
19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22 * The primary algorithm described there uses 11 multiplies and 29 adds.
23 * We use their alternate method with 12 multiplies and 32 adds.
24 * The advantage of this method is that no data path contains more than one
25 * multiplication; this allows a very simple and accurate implementation in
26 * scaled fixed-point arithmetic, with a minimal number of shifts.
27 *
28 * We also provide IDCT routines with various output sample block sizes for
29 * direct resolution reduction or enlargement and for direct resolving the
30 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31 * (N=1...16), 2NxN, and Nx2N (N=1...8) samples for one 8x8 input DCT block.
32 *
33 * For N<8 we simply take the corresponding low-frequency coefficients of
34 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35 * to yield the downscaled outputs.
36 * This can be seen as direct low-pass downsampling from the DCT domain
37 * point of view rather than the usual spatial domain point of view,
38 * yielding significant computational savings and results at least
39 * as good as common bilinear (averaging) spatial downsampling.
40 *
41 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42 * lower frequencies and higher frequencies assumed to be zero.
43 * It turns out that the computational effort is similar to the 8x8 IDCT
44 * regarding the output size.
45 * Furthermore, the scaling and descaling is the same for all IDCT sizes.
46 *
47 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48 * since there would be too many additional constants to pre-calculate.
49 */
50
51#define JPEG_INTERNALS
52#include "jinclude.h"
53#include "jpeglib.h"
54#include "jdct.h" /* Private declarations for DCT subsystem */
55
56#ifdef DCT_ISLOW_SUPPORTED
57
58
59/*
60 * This module is specialized to the case DCTSIZE = 8.
61 */
62
63#if DCTSIZE != 8
64 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65#endif
66
67
68/*
69 * The poop on this scaling stuff is as follows:
70 *
71 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72 * larger than the true IDCT outputs. The final outputs are therefore
73 * a factor of N larger than desired; since N=8 this can be cured by
74 * a simple right shift at the end of the algorithm. The advantage of
75 * this arrangement is that we save two multiplications per 1-D IDCT,
76 * because the y0 and y4 inputs need not be divided by sqrt(N).
77 *
78 * We have to do addition and subtraction of the integer inputs, which
79 * is no problem, and multiplication by fractional constants, which is
80 * a problem to do in integer arithmetic. We multiply all the constants
81 * by CONST_SCALE and convert them to integer constants (thus retaining
82 * CONST_BITS bits of precision in the constants). After doing a
83 * multiplication we have to divide the product by CONST_SCALE, with
84 * proper rounding, to produce the correct output. This division can
85 * be done cheaply as a right shift of CONST_BITS bits. We postpone
86 * shifting as long as possible so that partial sums can be added
87 * together with full fractional precision.
88 *
89 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90 * they are represented to better-than-integral precision. These outputs
91 * require JPEG_DATA_PRECISION + PASS1_BITS + 3 bits; this fits in a
92 * 16-bit word with the recommended scaling. (To scale up higher bit
93 * depths further, an intermediate INT32 array would be needed.)
94 *
95 * To avoid overflow of the 32-bit intermediate results in pass 2, we
96 * must have JPEG_DATA_PRECISION + CONST_BITS + PASS1_BITS <= 26. Error
97 * analysis shows that the values given below are the most effective.
98 */
99
100#if JPEG_DATA_PRECISION <= 10 && BITS_IN_JSAMPLE <= 13
101#define CONST_BITS 13
102#define PASS1_BITS (10 - JPEG_DATA_PRECISION)
103#define PASS2_BITS (13 - BITS_IN_JSAMPLE)
104#else
105#if JPEG_DATA_PRECISION <= 13 && BITS_IN_JSAMPLE <= 16
106#define CONST_BITS 13
107#define PASS1_BITS (13 - JPEG_DATA_PRECISION)
108#define PASS2_BITS (16 - BITS_IN_JSAMPLE)
109#endif
110#endif
111
112/* Some C compilers fail to reduce "FIX(constant)" at compile time,
113 * thus causing a lot of useless floating-point operations at run time.
114 * To get around this we use the following pre-calculated constants.
115 * If you change CONST_BITS you may want to add appropriate values.
116 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
117 */
118
119#if CONST_BITS == 13
120#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
121#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
122#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
123#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
124#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
125#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
126#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
127#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
128#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
129#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
130#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
131#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
132#else
133#define FIX_0_298631336 FIX(0.298631336)
134#define FIX_0_390180644 FIX(0.390180644)
135#define FIX_0_541196100 FIX(0.541196100)
136#define FIX_0_765366865 FIX(0.765366865)
137#define FIX_0_899976223 FIX(0.899976223)
138#define FIX_1_175875602 FIX(1.175875602)
139#define FIX_1_501321110 FIX(1.501321110)
140#define FIX_1_847759065 FIX(1.847759065)
141#define FIX_1_961570560 FIX(1.961570560)
142#define FIX_2_053119869 FIX(2.053119869)
143#define FIX_2_562915447 FIX(2.562915447)
144#define FIX_3_072711026 FIX(3.072711026)
145#endif
146
147
148/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
149 * For up to 10-bit data with the recommended scaling, all the variable
150 * and constant values involved are no more than 16 bits wide, so a
151 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
152 * For higher bit depths, a full 32-bit multiplication will be needed.
153 */
154
155#if JPEG_DATA_PRECISION <= 10 && BITS_IN_JSAMPLE <= 13
156#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
157#else
158#define MULTIPLY(var,const) ((var) * (const))
159#endif
160
161
162/* Dequantize a coefficient by multiplying it by the multiplier-table
163 * entry; produce an int result. In this module, both inputs and result
164 * are 16 bits or less, so either int or short multiply will work.
165 */
166
167#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
168
169
170/* Pass 2 range center and fudge factor for final descale and range-limit. */
171
172#if PASS2_BITS > 1
173#define PASS2_OFFSET \
174 ((((INT32) RANGE_CENTER) << PASS2_BITS) + (ONE << (PASS2_BITS-1)))
175#else
176#if PASS2_BITS > 0
177#define PASS2_OFFSET ((((INT32) RANGE_CENTER) << 1) + ONE)
178#else
179#define PASS2_OFFSET (INT32) RANGE_CENTER
180#endif
181#endif
182
183
184/*
185 * Perform dequantization and inverse DCT on one block of coefficients.
186 *
187 * Optimized algorithm with 12 multiplications in the 1-D kernel.
188 * cK represents sqrt(2) * cos(K*pi/16).
189 */
190
191GLOBAL(void)
192jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
195{
196 INT32 tmp0, tmp1, tmp2, tmp3;
197 INT32 tmp10, tmp11, tmp12, tmp13;
198 INT32 z1, z2, z3;
199 JCOEFPTR inptr;
200 ISLOW_MULT_TYPE * quantptr;
201 int * wsptr;
202 JSAMPROW outptr;
203 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
204 int ctr;
205 int workspace[DCTSIZE2]; /* buffers data between passes */
207
208 /* Pass 1: process columns from input, store into work array.
209 * Note results are scaled up by sqrt(8) compared to a true IDCT;
210 * furthermore, we scale the results by 2**PASS1_BITS.
211 */
212
213 inptr = coef_block;
214 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
215 wsptr = workspace;
216 for (ctr = DCTSIZE; ctr > 0; ctr--) {
217 /* Due to quantization, we will usually find that many of the input
218 * coefficients are zero, especially the AC terms. We can exploit this
219 * by short-circuiting the IDCT calculation for any column in which all
220 * the AC terms are zero. In that case each output is equal to the
221 * DC coefficient (with scale factor as needed).
222 * With typical images and quantization tables, half or more of the
223 * column DCT calculations can be simplified this way.
224 */
225
226 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
227 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
228 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
229 inptr[DCTSIZE*7] == 0) {
230 /* AC terms all zero */
231#if PASS1_BITS > 0
232 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
233#else
234 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235#endif
236
237 wsptr[DCTSIZE*0] = dcval;
238 wsptr[DCTSIZE*1] = dcval;
239 wsptr[DCTSIZE*2] = dcval;
240 wsptr[DCTSIZE*3] = dcval;
241 wsptr[DCTSIZE*4] = dcval;
242 wsptr[DCTSIZE*5] = dcval;
243 wsptr[DCTSIZE*6] = dcval;
244 wsptr[DCTSIZE*7] = dcval;
245
246 inptr++; /* advance pointers to next column */
247 quantptr++;
248 wsptr++;
249 continue;
250 }
251
252 /* Even part: reverse the even part of the forward DCT.
253 * The rotator is c(-6).
254 */
255
256 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
257 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
258 z2 <<= CONST_BITS;
259 z3 <<= CONST_BITS;
260 /* Add fudge factor here for final descale. */
261 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
262
263 tmp0 = z2 + z3;
264 tmp1 = z2 - z3;
265
266 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
267 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
268
269 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
270 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
271 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
272
273 tmp10 = tmp0 + tmp2;
274 tmp13 = tmp0 - tmp2;
275 tmp11 = tmp1 + tmp3;
276 tmp12 = tmp1 - tmp3;
277
278 /* Odd part per figure 8; the matrix is unitary and hence its
279 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
280 */
281
282 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
283 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
284 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
285 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
286
287 z2 = tmp0 + tmp2;
288 z3 = tmp1 + tmp3;
289
290 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
291 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
292 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
293 z2 += z1;
294 z3 += z1;
295
296 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
297 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
298 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
299 tmp0 += z1 + z2;
300 tmp3 += z1 + z3;
301
302 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
303 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
304 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
305 tmp1 += z1 + z3;
306 tmp2 += z1 + z2;
307
308 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
309
310 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
311 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
312 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
313 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
314 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
315 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
316 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
317 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
318
319 inptr++; /* advance pointers to next column */
320 quantptr++;
321 wsptr++;
322 }
323
324 /* Pass 2: process rows from work array, store into output array.
325 * Note that we must descale the results by a factor of 8 == 2**3,
326 * which is folded into the PASS2_BITS value.
327 */
328
329 wsptr = workspace;
330 for (ctr = 0; ctr < DCTSIZE; ctr++) {
331 outptr = output_buf[ctr] + output_col;
332
333 /* Add range center and fudge factor for final descale and range-limit. */
334 z2 = (INT32) wsptr[0] + PASS2_OFFSET;
335
336 /* Rows of zeroes can be exploited in the same way as we did with columns.
337 * However, the column calculation has created many nonzero AC terms, so
338 * the simplification applies less often (typically 5% to 10% of the time).
339 * On machines with very fast multiplication, it's possible that the
340 * test takes more time than it's worth. In that case this section
341 * may be commented out.
342 */
343
344#ifndef NO_ZERO_ROW_TEST
345 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
346 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
347 /* AC terms all zero */
348#if PASS2_BITS > 0
349 JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS2_BITS)
350 & RANGE_MASK];
351#else
352 JSAMPLE dcval = range_limit[(int) z2 & RANGE_MASK];
353#endif
354
355 outptr[0] = dcval;
356 outptr[1] = dcval;
357 outptr[2] = dcval;
358 outptr[3] = dcval;
359 outptr[4] = dcval;
360 outptr[5] = dcval;
361 outptr[6] = dcval;
362 outptr[7] = dcval;
363
364 wsptr += DCTSIZE; /* advance pointer to next row */
365 continue;
366 }
367#endif
368
369 /* Even part: reverse the even part of the forward DCT.
370 * The rotator is c(-6).
371 */
372
373 z3 = (INT32) wsptr[4];
374 z2 <<= CONST_BITS;
375 z3 <<= CONST_BITS;
376#if PASS2_BITS == 0
377 /* Add fudge factor here for final descale. */
378 z2 += ONE << (CONST_BITS-1);
379#endif
380
381 tmp0 = z2 + z3;
382 tmp1 = z2 - z3;
383
384 z2 = (INT32) wsptr[2];
385 z3 = (INT32) wsptr[6];
386
387 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
388 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
389 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
390
391 tmp10 = tmp0 + tmp2;
392 tmp13 = tmp0 - tmp2;
393 tmp11 = tmp1 + tmp3;
394 tmp12 = tmp1 - tmp3;
395
396 /* Odd part per figure 8; the matrix is unitary and hence its
397 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
398 */
399
400 tmp0 = (INT32) wsptr[7];
401 tmp1 = (INT32) wsptr[5];
402 tmp2 = (INT32) wsptr[3];
403 tmp3 = (INT32) wsptr[1];
404
405 z2 = tmp0 + tmp2;
406 z3 = tmp1 + tmp3;
407
408 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
409 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
410 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
411 z2 += z1;
412 z3 += z1;
413
414 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
415 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
416 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
417 tmp0 += z1 + z2;
418 tmp3 += z1 + z3;
419
420 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
421 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
422 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
423 tmp1 += z1 + z3;
424 tmp2 += z1 + z2;
425
426 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
427
428 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
429 CONST_BITS+PASS2_BITS)
430 & RANGE_MASK];
431 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
432 CONST_BITS+PASS2_BITS)
433 & RANGE_MASK];
434 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
435 CONST_BITS+PASS2_BITS)
436 & RANGE_MASK];
437 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
438 CONST_BITS+PASS2_BITS)
439 & RANGE_MASK];
440 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
441 CONST_BITS+PASS2_BITS)
442 & RANGE_MASK];
443 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
444 CONST_BITS+PASS2_BITS)
445 & RANGE_MASK];
446 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
447 CONST_BITS+PASS2_BITS)
448 & RANGE_MASK];
449 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
450 CONST_BITS+PASS2_BITS)
451 & RANGE_MASK];
452
453 wsptr += DCTSIZE; /* advance pointer to next row */
454 }
455}
456
457#ifdef IDCT_SCALING_SUPPORTED
458
459
460/*
461 * Perform dequantization and inverse DCT on one block of coefficients,
462 * producing a reduced-size 7x7 output block.
463 *
464 * Optimized algorithm with 12 multiplications in the 1-D kernel.
465 * cK represents sqrt(2) * cos(K*pi/14).
466 */
467
468GLOBAL(void)
469jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
472{
473 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
474 INT32 z1, z2, z3;
475 JCOEFPTR inptr;
476 ISLOW_MULT_TYPE * quantptr;
477 int * wsptr;
478 JSAMPROW outptr;
479 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
480 int ctr;
481 int workspace[7*7]; /* buffers data between passes */
483
484 /* Pass 1: process columns from input, store into work array. */
485
486 inptr = coef_block;
487 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
488 wsptr = workspace;
489 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
490 /* Even part */
491
492 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
493 tmp13 <<= CONST_BITS;
494 /* Add fudge factor here for final descale. */
495 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
496
497 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
498 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
499 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
500
501 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
502 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
503 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
504 tmp0 = z1 + z3;
505 z2 -= tmp0;
506 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
507 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
508 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
509 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
510
511 /* Odd part */
512
513 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
514 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
515 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
516
517 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
518 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
519 tmp0 = tmp1 - tmp2;
520 tmp1 += tmp2;
521 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
522 tmp1 += tmp2;
523 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
524 tmp0 += z2;
525 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
526
527 /* Final output stage */
528
529 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
530 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
531 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
532 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
533 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
534 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
535 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
536 }
537
538 /* Pass 2: process 7 rows from work array, store into output array. */
539
540 wsptr = workspace;
541 for (ctr = 0; ctr < 7; ctr++) {
542 outptr = output_buf[ctr] + output_col;
543
544 /* Even part */
545
546 /* Add range center and fudge factor for final descale and range-limit. */
547 tmp13 = (INT32) wsptr[0] + PASS2_OFFSET;
548 tmp13 <<= CONST_BITS;
549#if PASS2_BITS == 0
550 tmp13 += ONE << (CONST_BITS-1);
551#endif
552
553 z1 = (INT32) wsptr[2];
554 z2 = (INT32) wsptr[4];
555 z3 = (INT32) wsptr[6];
556
557 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
558 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
559 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
560 tmp0 = z1 + z3;
561 z2 -= tmp0;
562 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
563 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
564 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
565 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
566
567 /* Odd part */
568
569 z1 = (INT32) wsptr[1];
570 z2 = (INT32) wsptr[3];
571 z3 = (INT32) wsptr[5];
572
573 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
574 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
575 tmp0 = tmp1 - tmp2;
576 tmp1 += tmp2;
577 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
578 tmp1 += tmp2;
579 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
580 tmp0 += z2;
581 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
582
583 /* Final output stage */
584
585 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
586 CONST_BITS+PASS2_BITS)
587 & RANGE_MASK];
588 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
589 CONST_BITS+PASS2_BITS)
590 & RANGE_MASK];
591 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
592 CONST_BITS+PASS2_BITS)
593 & RANGE_MASK];
594 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
595 CONST_BITS+PASS2_BITS)
596 & RANGE_MASK];
597 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
598 CONST_BITS+PASS2_BITS)
599 & RANGE_MASK];
600 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
601 CONST_BITS+PASS2_BITS)
602 & RANGE_MASK];
603 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
604 CONST_BITS+PASS2_BITS)
605 & RANGE_MASK];
606
607 wsptr += 7; /* advance pointer to next row */
608 }
609}
610
611
612/*
613 * Perform dequantization and inverse DCT on one block of coefficients,
614 * producing a reduced-size 6x6 output block.
615 *
616 * Optimized algorithm with 3 multiplications in the 1-D kernel.
617 * cK represents sqrt(2) * cos(K*pi/12).
618 */
619
620GLOBAL(void)
621jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
624{
625 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
626 INT32 z1, z2, z3;
627 JCOEFPTR inptr;
628 ISLOW_MULT_TYPE * quantptr;
629 int * wsptr;
630 JSAMPROW outptr;
631 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
632 int ctr;
633 int workspace[6*6]; /* buffers data between passes */
635
636 /* Pass 1: process columns from input, store into work array. */
637
638 inptr = coef_block;
639 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
640 wsptr = workspace;
641 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
642 /* Even part */
643
644 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
645 tmp0 <<= CONST_BITS;
646 /* Add fudge factor here for final descale. */
647 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
648 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
649 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
650 tmp1 = tmp0 + tmp10;
651 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
652 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
653 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
654 tmp10 = tmp1 + tmp0;
655 tmp12 = tmp1 - tmp0;
656
657 /* Odd part */
658
659 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
660 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
661 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
662 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
663 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
664 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
665#if PASS1_BITS > 0
666 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
667#else
668 tmp1 = z1 - z2 - z3;
669#endif
670
671 /* Final output stage */
672
673 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
674 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
675 wsptr[6*1] = (int) (tmp11 + tmp1);
676 wsptr[6*4] = (int) (tmp11 - tmp1);
677 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
678 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
679 }
680
681 /* Pass 2: process 6 rows from work array, store into output array. */
682
683 wsptr = workspace;
684 for (ctr = 0; ctr < 6; ctr++) {
685 outptr = output_buf[ctr] + output_col;
686
687 /* Even part */
688
689 /* Add range center and fudge factor for final descale and range-limit. */
690 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
691 tmp0 <<= CONST_BITS;
692#if PASS2_BITS == 0
693 tmp0 += ONE << (CONST_BITS-1);
694#endif
695 tmp2 = (INT32) wsptr[4];
696 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
697 tmp1 = tmp0 + tmp10;
698 tmp11 = tmp0 - tmp10 - tmp10;
699 tmp10 = (INT32) wsptr[2];
700 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
701 tmp10 = tmp1 + tmp0;
702 tmp12 = tmp1 - tmp0;
703
704 /* Odd part */
705
706 z1 = (INT32) wsptr[1];
707 z2 = (INT32) wsptr[3];
708 z3 = (INT32) wsptr[5];
709 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
710 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
711 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
712 tmp1 = (z1 - z2 - z3) << CONST_BITS;
713
714 /* Final output stage */
715
716 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
717 CONST_BITS+PASS2_BITS)
718 & RANGE_MASK];
719 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
720 CONST_BITS+PASS2_BITS)
721 & RANGE_MASK];
722 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
723 CONST_BITS+PASS2_BITS)
724 & RANGE_MASK];
725 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
726 CONST_BITS+PASS2_BITS)
727 & RANGE_MASK];
728 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
729 CONST_BITS+PASS2_BITS)
730 & RANGE_MASK];
731 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
732 CONST_BITS+PASS2_BITS)
733 & RANGE_MASK];
734
735 wsptr += 6; /* advance pointer to next row */
736 }
737}
738
739
740/*
741 * Perform dequantization and inverse DCT on one block of coefficients,
742 * producing a reduced-size 5x5 output block.
743 *
744 * Optimized algorithm with 5 multiplications in the 1-D kernel.
745 * cK represents sqrt(2) * cos(K*pi/10).
746 */
747
748GLOBAL(void)
749jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
752{
753 INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
754 INT32 z1, z2, z3;
755 JCOEFPTR inptr;
756 ISLOW_MULT_TYPE * quantptr;
757 int * wsptr;
758 JSAMPROW outptr;
759 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
760 int ctr;
761 int workspace[5*5]; /* buffers data between passes */
763
764 /* Pass 1: process columns from input, store into work array. */
765
766 inptr = coef_block;
767 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
768 wsptr = workspace;
769 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
770 /* Even part */
771
772 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
773 tmp12 <<= CONST_BITS;
774 /* Add fudge factor here for final descale. */
775 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
776 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
777 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
778 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
779 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
780 z3 = tmp12 + z2;
781 tmp10 = z3 + z1;
782 tmp11 = z3 - z1;
783 tmp12 -= z2 << 2;
784
785 /* Odd part */
786
787 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
788 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
789
790 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
791 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
792 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
793
794 /* Final output stage */
795
796 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
797 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
798 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
799 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
800 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
801 }
802
803 /* Pass 2: process 5 rows from work array, store into output array. */
804
805 wsptr = workspace;
806 for (ctr = 0; ctr < 5; ctr++) {
807 outptr = output_buf[ctr] + output_col;
808
809 /* Even part */
810
811 /* Add range center and fudge factor for final descale and range-limit. */
812 tmp12 = (INT32) wsptr[0] + PASS2_OFFSET;
813 tmp12 <<= CONST_BITS;
814#if PASS2_BITS == 0
815 tmp12 += ONE << (CONST_BITS-1);
816#endif
817 tmp0 = (INT32) wsptr[2];
818 tmp1 = (INT32) wsptr[4];
819 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
820 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
821 z3 = tmp12 + z2;
822 tmp10 = z3 + z1;
823 tmp11 = z3 - z1;
824 tmp12 -= z2 << 2;
825
826 /* Odd part */
827
828 z2 = (INT32) wsptr[1];
829 z3 = (INT32) wsptr[3];
830
831 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
832 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
833 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
834
835 /* Final output stage */
836
837 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
838 CONST_BITS+PASS2_BITS)
839 & RANGE_MASK];
840 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
841 CONST_BITS+PASS2_BITS)
842 & RANGE_MASK];
843 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
844 CONST_BITS+PASS2_BITS)
845 & RANGE_MASK];
846 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
847 CONST_BITS+PASS2_BITS)
848 & RANGE_MASK];
849 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
850 CONST_BITS+PASS2_BITS)
851 & RANGE_MASK];
852
853 wsptr += 5; /* advance pointer to next row */
854 }
855}
856
857
858/*
859 * Perform dequantization and inverse DCT on one block of coefficients,
860 * producing a reduced-size 4x4 output block.
861 *
862 * Optimized algorithm with 3 multiplications in the 1-D kernel.
863 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
864 */
865
866GLOBAL(void)
867jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
870{
871 INT32 tmp0, tmp2, tmp10, tmp12;
872 INT32 z1, z2, z3;
873 JCOEFPTR inptr;
874 ISLOW_MULT_TYPE * quantptr;
875 int * wsptr;
876 JSAMPROW outptr;
877 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
878 int ctr;
879 int workspace[4*4]; /* buffers data between passes */
881
882 /* Pass 1: process columns from input, store into work array. */
883
884 inptr = coef_block;
885 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
886 wsptr = workspace;
887 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
888 /* Even part */
889
890 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
891 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
892
893#if PASS1_BITS > 0
894 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
895 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
896#else
897 tmp10 = tmp0 + tmp2;
898 tmp12 = tmp0 - tmp2;
899#endif
900
901 /* Odd part */
902 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
903
904 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
905 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
906
907 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
908 /* Add fudge factor here for final descale. */
909 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
910 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
911 CONST_BITS-PASS1_BITS);
912 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
913 CONST_BITS-PASS1_BITS);
914
915 /* Final output stage */
916
917 wsptr[4*0] = (int) (tmp10 + tmp0);
918 wsptr[4*3] = (int) (tmp10 - tmp0);
919 wsptr[4*1] = (int) (tmp12 + tmp2);
920 wsptr[4*2] = (int) (tmp12 - tmp2);
921 }
922
923 /* Pass 2: process 4 rows from work array, store into output array. */
924
925 wsptr = workspace;
926 for (ctr = 0; ctr < 4; ctr++) {
927 outptr = output_buf[ctr] + output_col;
928
929 /* Even part */
930
931 /* Add range center and fudge factor for final descale and range-limit. */
932 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
933 tmp2 = (INT32) wsptr[2];
934 tmp0 <<= CONST_BITS;
935 tmp2 <<= CONST_BITS;
936#if PASS2_BITS == 0
937 tmp0 += ONE << (CONST_BITS-1);
938#endif
939
940 tmp10 = tmp0 + tmp2;
941 tmp12 = tmp0 - tmp2;
942
943 /* Odd part */
944 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
945
946 z2 = (INT32) wsptr[1];
947 z3 = (INT32) wsptr[3];
948
949 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
950 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
951 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
952
953 /* Final output stage */
954
955 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
956 CONST_BITS+PASS2_BITS)
957 & RANGE_MASK];
958 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
959 CONST_BITS+PASS2_BITS)
960 & RANGE_MASK];
961 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
962 CONST_BITS+PASS2_BITS)
963 & RANGE_MASK];
964 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
965 CONST_BITS+PASS2_BITS)
966 & RANGE_MASK];
967
968 wsptr += 4; /* advance pointer to next row */
969 }
970}
971
972
973/*
974 * Perform dequantization and inverse DCT on one block of coefficients,
975 * producing a reduced-size 3x3 output block.
976 *
977 * Optimized algorithm with 2 multiplications in the 1-D kernel.
978 * cK represents sqrt(2) * cos(K*pi/6).
979 */
980
981GLOBAL(void)
982jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
985{
986 INT32 tmp0, tmp2, tmp10, tmp12;
987 JCOEFPTR inptr;
988 ISLOW_MULT_TYPE * quantptr;
989 int * wsptr;
990 JSAMPROW outptr;
991 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
992 int ctr;
993 int workspace[3*3]; /* buffers data between passes */
995
996 /* Pass 1: process columns from input, store into work array. */
997
998 inptr = coef_block;
999 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1000 wsptr = workspace;
1001 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
1002 /* Even part */
1003
1004 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1005 tmp0 <<= CONST_BITS;
1006 /* Add fudge factor here for final descale. */
1007 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1008 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1009 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
1010 tmp10 = tmp0 + tmp12;
1011 tmp2 = tmp0 - tmp12 - tmp12;
1012
1013 /* Odd part */
1014
1015 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1016 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1017
1018 /* Final output stage */
1019
1020 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1021 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1022 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
1023 }
1024
1025 /* Pass 2: process 3 rows from work array, store into output array. */
1026
1027 wsptr = workspace;
1028 for (ctr = 0; ctr < 3; ctr++) {
1029 outptr = output_buf[ctr] + output_col;
1030
1031 /* Even part */
1032
1033 /* Add range center and fudge factor for final descale and range-limit. */
1034 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
1035 tmp0 <<= CONST_BITS;
1036#if PASS2_BITS == 0
1037 tmp0 += ONE << (CONST_BITS-1);
1038#endif
1039 tmp2 = (INT32) wsptr[2];
1040 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
1041 tmp10 = tmp0 + tmp12;
1042 tmp2 = tmp0 - tmp12 - tmp12;
1043
1044 /* Odd part */
1045
1046 tmp12 = (INT32) wsptr[1];
1047 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1048
1049 /* Final output stage */
1050
1051 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1052 CONST_BITS+PASS2_BITS)
1053 & RANGE_MASK];
1054 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1055 CONST_BITS+PASS2_BITS)
1056 & RANGE_MASK];
1057 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1058 CONST_BITS+PASS2_BITS)
1059 & RANGE_MASK];
1060
1061 wsptr += 3; /* advance pointer to next row */
1062 }
1063}
1064
1065
1066/*
1067 * Perform dequantization and inverse DCT on one block of coefficients,
1068 * producing a reduced-size 2x2 output block.
1069 *
1070 * Multiplication-less algorithm.
1071 */
1072
1073GLOBAL(void)
1074jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1077{
1078 DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1079 ISLOW_MULT_TYPE * quantptr;
1080 JSAMPROW outptr;
1081 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1083
1084 /* Pass 1: process columns from input. */
1085
1086 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1087
1088 /* Column 0 */
1089 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1090 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1091
1092#if PASS2_BITS > PASS1_BITS
1093 /* Add range center and fudge factor for final downscale and range-limit. */
1094#if PASS2_BITS > PASS1_BITS + 1
1095 tmp4 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
1096 (1 << (PASS2_BITS-PASS1_BITS-1));
1097#else
1098 tmp4 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
1099#endif
1100
1101 tmp0 = tmp4 + tmp5;
1102 tmp2 = tmp4 - tmp5;
1103
1104 /* Column 1 */
1105 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1106 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1107
1108 tmp1 = tmp4 + tmp5;
1109 tmp3 = tmp4 - tmp5;
1110
1111 /* Pass 2: process 2 rows, store into output array. */
1112
1113 /* Row 0 */
1114 outptr = output_buf[0] + output_col;
1115
1116 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1,
1117 PASS2_BITS-PASS1_BITS)
1118 & RANGE_MASK];
1119 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1,
1120 PASS2_BITS-PASS1_BITS)
1121 & RANGE_MASK];
1122
1123 /* Row 1 */
1124 outptr = output_buf[1] + output_col;
1125
1126 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3,
1127 PASS2_BITS-PASS1_BITS)
1128 & RANGE_MASK];
1129 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3,
1130 PASS2_BITS-PASS1_BITS)
1131 & RANGE_MASK];
1132#else
1133#if PASS2_BITS == PASS1_BITS
1134 tmp4 += (DCTELEM) RANGE_CENTER; /* add range center for final range-limit */
1135
1136 tmp0 = tmp4 + tmp5;
1137 tmp2 = tmp4 - tmp5;
1138
1139 /* Column 1 */
1140 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1141 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1142#else
1143 tmp4 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1144 tmp5 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1145
1146 tmp4 += (DCTELEM) RANGE_CENTER; /* add range center for final range-limit */
1147
1148 tmp0 = tmp4 + tmp5;
1149 tmp2 = tmp4 - tmp5;
1150
1151 /* Column 1 */
1152 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1153 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1154
1155 tmp4 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1156 tmp5 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1157#endif
1158
1159 tmp1 = tmp4 + tmp5;
1160 tmp3 = tmp4 - tmp5;
1161
1162 /* Pass 2: process 2 rows, store into output array. */
1163
1164 /* Row 0 */
1165 outptr = output_buf[0] + output_col;
1166
1167 outptr[0] = range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
1168 outptr[1] = range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
1169
1170 /* Row 1 */
1171 outptr = output_buf[1] + output_col;
1172
1173 outptr[0] = range_limit[(int) (tmp2 + tmp3) & RANGE_MASK];
1174 outptr[1] = range_limit[(int) (tmp2 - tmp3) & RANGE_MASK];
1175#endif
1176}
1177
1178
1179/*
1180 * Perform dequantization and inverse DCT on one block of coefficients,
1181 * producing a reduced-size 1x1 output block.
1182 *
1183 * This is just a rescale of the DC coefficient.
1184 */
1185
1186GLOBAL(void)
1187jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1190{
1191 DCTELEM dcval;
1192 ISLOW_MULT_TYPE * quantptr;
1193 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1195
1196 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1197
1198 dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1199
1200#if PASS2_BITS > PASS1_BITS
1201 /* Add range center and fudge factor for downscale and range-limit. */
1202#if PASS2_BITS > PASS1_BITS + 1
1203 dcval += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
1204 (1 << (PASS2_BITS-PASS1_BITS-1));
1205#else
1206 dcval += (((DCTELEM) RANGE_CENTER) << 1) + 1;
1207#endif
1208
1210 range_limit[(int) IRIGHT_SHIFT(dcval, PASS2_BITS-PASS1_BITS) & RANGE_MASK];
1211#else
1212#if PASS2_BITS < PASS1_BITS
1213 dcval <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1214#endif
1215
1217 range_limit[((int) dcval + RANGE_CENTER) & RANGE_MASK];
1218#endif
1219}
1220
1221
1222/*
1223 * Perform dequantization and inverse DCT on one block of coefficients,
1224 * producing a 9x9 output block.
1225 *
1226 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1227 * cK represents sqrt(2) * cos(K*pi/18).
1228 */
1229
1230GLOBAL(void)
1231jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1234{
1235 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1236 INT32 z1, z2, z3, z4;
1237 JCOEFPTR inptr;
1238 ISLOW_MULT_TYPE * quantptr;
1239 int * wsptr;
1240 JSAMPROW outptr;
1241 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1242 int ctr;
1243 int workspace[8*9]; /* buffers data between passes */
1245
1246 /* Pass 1: process columns from input, store into work array. */
1247
1248 inptr = coef_block;
1249 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1250 wsptr = workspace;
1251 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1252 /* Even part */
1253
1254 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1255 tmp0 <<= CONST_BITS;
1256 /* Add fudge factor here for final descale. */
1257 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1258
1259 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1260 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1261 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1262
1263 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1264 tmp1 = tmp0 + tmp3;
1265 tmp2 = tmp0 - tmp3 - tmp3;
1266
1267 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1268 tmp11 = tmp2 + tmp0;
1269 tmp14 = tmp2 - tmp0 - tmp0;
1270
1271 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1272 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1273 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1274
1275 tmp10 = tmp1 + tmp0 - tmp3;
1276 tmp12 = tmp1 - tmp0 + tmp2;
1277 tmp13 = tmp1 - tmp2 + tmp3;
1278
1279 /* Odd part */
1280
1281 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1282 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1283 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1284 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1285
1286 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1287
1288 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1289 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1290 tmp0 = tmp2 + tmp3 - z2;
1291 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1292 tmp2 += z2 - tmp1;
1293 tmp3 += z2 + tmp1;
1294 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1295
1296 /* Final output stage */
1297
1298 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1299 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1300 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1301 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1302 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1303 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1304 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1305 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1306 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1307 }
1308
1309 /* Pass 2: process 9 rows from work array, store into output array. */
1310
1311 wsptr = workspace;
1312 for (ctr = 0; ctr < 9; ctr++) {
1313 outptr = output_buf[ctr] + output_col;
1314
1315 /* Even part */
1316
1317 /* Add range center and fudge factor for final descale and range-limit. */
1318 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
1319 tmp0 <<= CONST_BITS;
1320#if PASS2_BITS == 0
1321 tmp0 += ONE << (CONST_BITS-1);
1322#endif
1323
1324 z1 = (INT32) wsptr[2];
1325 z2 = (INT32) wsptr[4];
1326 z3 = (INT32) wsptr[6];
1327
1328 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1329 tmp1 = tmp0 + tmp3;
1330 tmp2 = tmp0 - tmp3 - tmp3;
1331
1332 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1333 tmp11 = tmp2 + tmp0;
1334 tmp14 = tmp2 - tmp0 - tmp0;
1335
1336 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1337 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1338 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1339
1340 tmp10 = tmp1 + tmp0 - tmp3;
1341 tmp12 = tmp1 - tmp0 + tmp2;
1342 tmp13 = tmp1 - tmp2 + tmp3;
1343
1344 /* Odd part */
1345
1346 z1 = (INT32) wsptr[1];
1347 z2 = (INT32) wsptr[3];
1348 z3 = (INT32) wsptr[5];
1349 z4 = (INT32) wsptr[7];
1350
1351 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1352
1353 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1354 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1355 tmp0 = tmp2 + tmp3 - z2;
1356 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1357 tmp2 += z2 - tmp1;
1358 tmp3 += z2 + tmp1;
1359 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1360
1361 /* Final output stage */
1362
1363 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1364 CONST_BITS+PASS2_BITS)
1365 & RANGE_MASK];
1366 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1367 CONST_BITS+PASS2_BITS)
1368 & RANGE_MASK];
1369 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1370 CONST_BITS+PASS2_BITS)
1371 & RANGE_MASK];
1372 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1373 CONST_BITS+PASS2_BITS)
1374 & RANGE_MASK];
1375 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1376 CONST_BITS+PASS2_BITS)
1377 & RANGE_MASK];
1378 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1379 CONST_BITS+PASS2_BITS)
1380 & RANGE_MASK];
1381 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1382 CONST_BITS+PASS2_BITS)
1383 & RANGE_MASK];
1384 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1385 CONST_BITS+PASS2_BITS)
1386 & RANGE_MASK];
1387 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1388 CONST_BITS+PASS2_BITS)
1389 & RANGE_MASK];
1390
1391 wsptr += 8; /* advance pointer to next row */
1392 }
1393}
1394
1395
1396/*
1397 * Perform dequantization and inverse DCT on one block of coefficients,
1398 * producing a 10x10 output block.
1399 *
1400 * Optimized algorithm with 12 multiplications in the 1-D kernel.
1401 * cK represents sqrt(2) * cos(K*pi/20).
1402 */
1403
1404GLOBAL(void)
1405jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1408{
1409 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1410 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1411 INT32 z1, z2, z3, z4, z5;
1412 JCOEFPTR inptr;
1413 ISLOW_MULT_TYPE * quantptr;
1414 int * wsptr;
1415 JSAMPROW outptr;
1416 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1417 int ctr;
1418 int workspace[8*10]; /* buffers data between passes */
1420
1421 /* Pass 1: process columns from input, store into work array. */
1422
1423 inptr = coef_block;
1424 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1425 wsptr = workspace;
1426 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1427 /* Even part */
1428
1429 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1430 z3 <<= CONST_BITS;
1431 /* Add fudge factor here for final descale. */
1432 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1433 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1434 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1435 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1436 tmp10 = z3 + z1;
1437 tmp11 = z3 - z2;
1438
1439 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1440 CONST_BITS-PASS1_BITS);
1441
1442 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1443 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1444
1445 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1446 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1447 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1448
1449 tmp20 = tmp10 + tmp12;
1450 tmp24 = tmp10 - tmp12;
1451 tmp21 = tmp11 + tmp13;
1452 tmp23 = tmp11 - tmp13;
1453
1454 /* Odd part */
1455
1456 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1457 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1458 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1459 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1460
1461 tmp11 = z2 + z4;
1462 tmp13 = z2 - z4;
1463
1464 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1465 z5 = z3 << CONST_BITS;
1466
1467 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1468 z4 = z5 + tmp12;
1469
1470 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1471 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1472
1473 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1474 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1475
1476#if PASS1_BITS > 0
1477 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1478#else
1479 tmp12 = z1 - tmp13 - z3;
1480#endif
1481
1482 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1483 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1484
1485 /* Final output stage */
1486
1487 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1488 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1489 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1490 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1491 wsptr[8*2] = (int) (tmp22 + tmp12);
1492 wsptr[8*7] = (int) (tmp22 - tmp12);
1493 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1494 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1495 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1496 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1497 }
1498
1499 /* Pass 2: process 10 rows from work array, store into output array. */
1500
1501 wsptr = workspace;
1502 for (ctr = 0; ctr < 10; ctr++) {
1503 outptr = output_buf[ctr] + output_col;
1504
1505 /* Even part */
1506
1507 /* Add range center and fudge factor for final descale and range-limit. */
1508 z3 = (INT32) wsptr[0] + PASS2_OFFSET;
1509 z3 <<= CONST_BITS;
1510#if PASS2_BITS == 0
1511 z3 += ONE << (CONST_BITS-1);
1512#endif
1513 z4 = (INT32) wsptr[4];
1514 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1515 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1516 tmp10 = z3 + z1;
1517 tmp11 = z3 - z2;
1518
1519 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1520
1521 z2 = (INT32) wsptr[2];
1522 z3 = (INT32) wsptr[6];
1523
1524 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1525 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1526 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1527
1528 tmp20 = tmp10 + tmp12;
1529 tmp24 = tmp10 - tmp12;
1530 tmp21 = tmp11 + tmp13;
1531 tmp23 = tmp11 - tmp13;
1532
1533 /* Odd part */
1534
1535 z1 = (INT32) wsptr[1];
1536 z2 = (INT32) wsptr[3];
1537 z3 = (INT32) wsptr[5];
1538 z3 <<= CONST_BITS;
1539 z4 = (INT32) wsptr[7];
1540
1541 tmp11 = z2 + z4;
1542 tmp13 = z2 - z4;
1543
1544 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1545
1546 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1547 z4 = z3 + tmp12;
1548
1549 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1550 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1551
1552 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1553 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1554
1555 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1556
1557 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1558 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1559
1560 /* Final output stage */
1561
1562 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1563 CONST_BITS+PASS2_BITS)
1564 & RANGE_MASK];
1565 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1566 CONST_BITS+PASS2_BITS)
1567 & RANGE_MASK];
1568 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1569 CONST_BITS+PASS2_BITS)
1570 & RANGE_MASK];
1571 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1572 CONST_BITS+PASS2_BITS)
1573 & RANGE_MASK];
1574 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1575 CONST_BITS+PASS2_BITS)
1576 & RANGE_MASK];
1577 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1578 CONST_BITS+PASS2_BITS)
1579 & RANGE_MASK];
1580 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1581 CONST_BITS+PASS2_BITS)
1582 & RANGE_MASK];
1583 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1584 CONST_BITS+PASS2_BITS)
1585 & RANGE_MASK];
1586 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1587 CONST_BITS+PASS2_BITS)
1588 & RANGE_MASK];
1589 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1590 CONST_BITS+PASS2_BITS)
1591 & RANGE_MASK];
1592
1593 wsptr += 8; /* advance pointer to next row */
1594 }
1595}
1596
1597
1598/*
1599 * Perform dequantization and inverse DCT on one block of coefficients,
1600 * producing an 11x11 output block.
1601 *
1602 * Optimized algorithm with 24 multiplications in the 1-D kernel.
1603 * cK represents sqrt(2) * cos(K*pi/22).
1604 */
1605
1606GLOBAL(void)
1607jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1610{
1611 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1612 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1613 INT32 z1, z2, z3, z4;
1614 JCOEFPTR inptr;
1615 ISLOW_MULT_TYPE * quantptr;
1616 int * wsptr;
1617 JSAMPROW outptr;
1618 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1619 int ctr;
1620 int workspace[8*11]; /* buffers data between passes */
1622
1623 /* Pass 1: process columns from input, store into work array. */
1624
1625 inptr = coef_block;
1626 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1627 wsptr = workspace;
1628 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1629 /* Even part */
1630
1631 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1632 tmp10 <<= CONST_BITS;
1633 /* Add fudge factor here for final descale. */
1634 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1635
1636 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1637 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1638 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1639
1640 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1641 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1642 z4 = z1 + z3;
1643 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1644 z4 -= z2;
1645 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1646 tmp21 = tmp20 + tmp23 + tmp25 -
1647 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1648 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1649 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1650 tmp24 += tmp25;
1651 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1652 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1653 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1654 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1655
1656 /* Odd part */
1657
1658 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1659 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1660 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1661 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1662
1663 tmp11 = z1 + z2;
1664 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1665 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1666 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1667 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1668 tmp10 = tmp11 + tmp12 + tmp13 -
1669 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1670 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1671 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1672 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1673 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1674 tmp11 += z1;
1675 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1676 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1677 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1678 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1679
1680 /* Final output stage */
1681
1682 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1683 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1684 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1685 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1686 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1687 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1688 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1689 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1690 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1691 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1692 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1693 }
1694
1695 /* Pass 2: process 11 rows from work array, store into output array. */
1696
1697 wsptr = workspace;
1698 for (ctr = 0; ctr < 11; ctr++) {
1699 outptr = output_buf[ctr] + output_col;
1700
1701 /* Even part */
1702
1703 /* Add range center and fudge factor for final descale and range-limit. */
1704 tmp10 = (INT32) wsptr[0] + PASS2_OFFSET;
1705 tmp10 <<= CONST_BITS;
1706#if PASS2_BITS == 0
1707 tmp10 += ONE << (CONST_BITS-1);
1708#endif
1709
1710 z1 = (INT32) wsptr[2];
1711 z2 = (INT32) wsptr[4];
1712 z3 = (INT32) wsptr[6];
1713
1714 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1715 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1716 z4 = z1 + z3;
1717 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1718 z4 -= z2;
1719 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1720 tmp21 = tmp20 + tmp23 + tmp25 -
1721 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1722 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1723 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1724 tmp24 += tmp25;
1725 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1726 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1727 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1728 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1729
1730 /* Odd part */
1731
1732 z1 = (INT32) wsptr[1];
1733 z2 = (INT32) wsptr[3];
1734 z3 = (INT32) wsptr[5];
1735 z4 = (INT32) wsptr[7];
1736
1737 tmp11 = z1 + z2;
1738 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1739 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1740 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1741 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1742 tmp10 = tmp11 + tmp12 + tmp13 -
1743 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1744 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1745 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1746 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1747 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1748 tmp11 += z1;
1749 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1750 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1751 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1752 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1753
1754 /* Final output stage */
1755
1756 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1757 CONST_BITS+PASS2_BITS)
1758 & RANGE_MASK];
1759 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1760 CONST_BITS+PASS2_BITS)
1761 & RANGE_MASK];
1762 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1763 CONST_BITS+PASS2_BITS)
1764 & RANGE_MASK];
1765 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1766 CONST_BITS+PASS2_BITS)
1767 & RANGE_MASK];
1768 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1769 CONST_BITS+PASS2_BITS)
1770 & RANGE_MASK];
1771 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1772 CONST_BITS+PASS2_BITS)
1773 & RANGE_MASK];
1774 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1775 CONST_BITS+PASS2_BITS)
1776 & RANGE_MASK];
1777 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1778 CONST_BITS+PASS2_BITS)
1779 & RANGE_MASK];
1780 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1781 CONST_BITS+PASS2_BITS)
1782 & RANGE_MASK];
1783 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1784 CONST_BITS+PASS2_BITS)
1785 & RANGE_MASK];
1786 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1787 CONST_BITS+PASS2_BITS)
1788 & RANGE_MASK];
1789
1790 wsptr += 8; /* advance pointer to next row */
1791 }
1792}
1793
1794
1795/*
1796 * Perform dequantization and inverse DCT on one block of coefficients,
1797 * producing a 12x12 output block.
1798 *
1799 * Optimized algorithm with 15 multiplications in the 1-D kernel.
1800 * cK represents sqrt(2) * cos(K*pi/24).
1801 */
1802
1803GLOBAL(void)
1804jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1807{
1808 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1809 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1810 INT32 z1, z2, z3, z4;
1811 JCOEFPTR inptr;
1812 ISLOW_MULT_TYPE * quantptr;
1813 int * wsptr;
1814 JSAMPROW outptr;
1815 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1816 int ctr;
1817 int workspace[8*12]; /* buffers data between passes */
1819
1820 /* Pass 1: process columns from input, store into work array. */
1821
1822 inptr = coef_block;
1823 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1824 wsptr = workspace;
1825 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1826 /* Even part */
1827
1828 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1829 z3 <<= CONST_BITS;
1830 /* Add fudge factor here for final descale. */
1831 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1832
1833 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1834 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1835
1836 tmp10 = z3 + z4;
1837 tmp11 = z3 - z4;
1838
1839 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1840 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1841 z1 <<= CONST_BITS;
1842 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1843 z2 <<= CONST_BITS;
1844
1845 tmp12 = z1 - z2;
1846
1847 tmp21 = z3 + tmp12;
1848 tmp24 = z3 - tmp12;
1849
1850 tmp12 = z4 + z2;
1851
1852 tmp20 = tmp10 + tmp12;
1853 tmp25 = tmp10 - tmp12;
1854
1855 tmp12 = z4 - z1 - z2;
1856
1857 tmp22 = tmp11 + tmp12;
1858 tmp23 = tmp11 - tmp12;
1859
1860 /* Odd part */
1861
1862 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1863 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1864 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1865 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1866
1867 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1868 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1869
1870 tmp10 = z1 + z3;
1871 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1872 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1873 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1874 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1875 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1876 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1877 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1878 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1879
1880 z1 -= z4;
1881 z2 -= z3;
1882 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1883 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1884 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1885
1886 /* Final output stage */
1887
1888 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1889 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1890 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1891 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1892 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1893 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1894 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1895 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1896 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1897 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1898 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1899 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1900 }
1901
1902 /* Pass 2: process 12 rows from work array, store into output array. */
1903
1904 wsptr = workspace;
1905 for (ctr = 0; ctr < 12; ctr++) {
1906 outptr = output_buf[ctr] + output_col;
1907
1908 /* Even part */
1909
1910 /* Add range center and fudge factor for final descale and range-limit. */
1911 z3 = (INT32) wsptr[0] + PASS2_OFFSET;
1912 z3 <<= CONST_BITS;
1913#if PASS2_BITS == 0
1914 z3 += ONE << (CONST_BITS-1);
1915#endif
1916
1917 z4 = (INT32) wsptr[4];
1918 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1919
1920 tmp10 = z3 + z4;
1921 tmp11 = z3 - z4;
1922
1923 z1 = (INT32) wsptr[2];
1924 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1925 z1 <<= CONST_BITS;
1926 z2 = (INT32) wsptr[6];
1927 z2 <<= CONST_BITS;
1928
1929 tmp12 = z1 - z2;
1930
1931 tmp21 = z3 + tmp12;
1932 tmp24 = z3 - tmp12;
1933
1934 tmp12 = z4 + z2;
1935
1936 tmp20 = tmp10 + tmp12;
1937 tmp25 = tmp10 - tmp12;
1938
1939 tmp12 = z4 - z1 - z2;
1940
1941 tmp22 = tmp11 + tmp12;
1942 tmp23 = tmp11 - tmp12;
1943
1944 /* Odd part */
1945
1946 z1 = (INT32) wsptr[1];
1947 z2 = (INT32) wsptr[3];
1948 z3 = (INT32) wsptr[5];
1949 z4 = (INT32) wsptr[7];
1950
1951 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1952 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1953
1954 tmp10 = z1 + z3;
1955 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1956 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1957 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1958 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1959 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1960 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1961 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1962 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1963
1964 z1 -= z4;
1965 z2 -= z3;
1966 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1967 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1968 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1969
1970 /* Final output stage */
1971
1972 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1973 CONST_BITS+PASS2_BITS)
1974 & RANGE_MASK];
1975 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1976 CONST_BITS+PASS2_BITS)
1977 & RANGE_MASK];
1978 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1979 CONST_BITS+PASS2_BITS)
1980 & RANGE_MASK];
1981 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1982 CONST_BITS+PASS2_BITS)
1983 & RANGE_MASK];
1984 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1985 CONST_BITS+PASS2_BITS)
1986 & RANGE_MASK];
1987 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1988 CONST_BITS+PASS2_BITS)
1989 & RANGE_MASK];
1990 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1991 CONST_BITS+PASS2_BITS)
1992 & RANGE_MASK];
1993 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1994 CONST_BITS+PASS2_BITS)
1995 & RANGE_MASK];
1996 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1997 CONST_BITS+PASS2_BITS)
1998 & RANGE_MASK];
1999 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2000 CONST_BITS+PASS2_BITS)
2001 & RANGE_MASK];
2002 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2003 CONST_BITS+PASS2_BITS)
2004 & RANGE_MASK];
2005 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2006 CONST_BITS+PASS2_BITS)
2007 & RANGE_MASK];
2008
2009 wsptr += 8; /* advance pointer to next row */
2010 }
2011}
2012
2013
2014/*
2015 * Perform dequantization and inverse DCT on one block of coefficients,
2016 * producing a 13x13 output block.
2017 *
2018 * Optimized algorithm with 29 multiplications in the 1-D kernel.
2019 * cK represents sqrt(2) * cos(K*pi/26).
2020 */
2021
2022GLOBAL(void)
2023jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2026{
2027 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2028 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2029 INT32 z1, z2, z3, z4;
2030 JCOEFPTR inptr;
2031 ISLOW_MULT_TYPE * quantptr;
2032 int * wsptr;
2033 JSAMPROW outptr;
2034 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2035 int ctr;
2036 int workspace[8*13]; /* buffers data between passes */
2038
2039 /* Pass 1: process columns from input, store into work array. */
2040
2041 inptr = coef_block;
2042 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2043 wsptr = workspace;
2044 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2045 /* Even part */
2046
2047 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2048 z1 <<= CONST_BITS;
2049 /* Add fudge factor here for final descale. */
2050 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2051
2052 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2053 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2054 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2055
2056 tmp10 = z3 + z4;
2057 tmp11 = z3 - z4;
2058
2059 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2060 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2061
2062 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2063 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2064
2065 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2066 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2067
2068 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2069 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2070
2071 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2072 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2073
2074 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2075 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2076
2077 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2078
2079 /* Odd part */
2080
2081 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2082 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2083 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2084 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2085
2086 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2087 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2088 tmp15 = z1 + z4;
2089 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2090 tmp10 = tmp11 + tmp12 + tmp13 -
2091 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2092 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2093 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2094 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2095 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2096 tmp11 += tmp14;
2097 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2098 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2099 tmp12 += tmp14;
2100 tmp13 += tmp14;
2101 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2102 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2103 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2104 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2105 tmp14 += z1;
2106 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2107 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2108
2109 /* Final output stage */
2110
2111 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2112 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2113 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2114 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2115 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2116 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2117 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2118 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2119 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2120 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2121 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2122 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2123 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
2124 }
2125
2126 /* Pass 2: process 13 rows from work array, store into output array. */
2127
2128 wsptr = workspace;
2129 for (ctr = 0; ctr < 13; ctr++) {
2130 outptr = output_buf[ctr] + output_col;
2131
2132 /* Even part */
2133
2134 /* Add range center and fudge factor for final descale and range-limit. */
2135 z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2136 z1 <<= CONST_BITS;
2137#if PASS2_BITS == 0
2138 z1 += ONE << (CONST_BITS-1);
2139#endif
2140
2141 z2 = (INT32) wsptr[2];
2142 z3 = (INT32) wsptr[4];
2143 z4 = (INT32) wsptr[6];
2144
2145 tmp10 = z3 + z4;
2146 tmp11 = z3 - z4;
2147
2148 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2149 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2150
2151 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2152 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2153
2154 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2155 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2156
2157 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2158 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2159
2160 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2161 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2162
2163 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2164 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2165
2166 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2167
2168 /* Odd part */
2169
2170 z1 = (INT32) wsptr[1];
2171 z2 = (INT32) wsptr[3];
2172 z3 = (INT32) wsptr[5];
2173 z4 = (INT32) wsptr[7];
2174
2175 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2176 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2177 tmp15 = z1 + z4;
2178 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2179 tmp10 = tmp11 + tmp12 + tmp13 -
2180 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2181 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2182 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2183 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2184 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2185 tmp11 += tmp14;
2186 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2187 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2188 tmp12 += tmp14;
2189 tmp13 += tmp14;
2190 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2191 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2192 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2193 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2194 tmp14 += z1;
2195 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2196 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2197
2198 /* Final output stage */
2199
2200 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2201 CONST_BITS+PASS2_BITS)
2202 & RANGE_MASK];
2203 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2204 CONST_BITS+PASS2_BITS)
2205 & RANGE_MASK];
2206 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2207 CONST_BITS+PASS2_BITS)
2208 & RANGE_MASK];
2209 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2210 CONST_BITS+PASS2_BITS)
2211 & RANGE_MASK];
2212 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2213 CONST_BITS+PASS2_BITS)
2214 & RANGE_MASK];
2215 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2216 CONST_BITS+PASS2_BITS)
2217 & RANGE_MASK];
2218 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2219 CONST_BITS+PASS2_BITS)
2220 & RANGE_MASK];
2221 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2222 CONST_BITS+PASS2_BITS)
2223 & RANGE_MASK];
2224 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2225 CONST_BITS+PASS2_BITS)
2226 & RANGE_MASK];
2227 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2228 CONST_BITS+PASS2_BITS)
2229 & RANGE_MASK];
2230 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2231 CONST_BITS+PASS2_BITS)
2232 & RANGE_MASK];
2233 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2234 CONST_BITS+PASS2_BITS)
2235 & RANGE_MASK];
2236 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2237 CONST_BITS+PASS2_BITS)
2238 & RANGE_MASK];
2239
2240 wsptr += 8; /* advance pointer to next row */
2241 }
2242}
2243
2244
2245/*
2246 * Perform dequantization and inverse DCT on one block of coefficients,
2247 * producing a 14x14 output block.
2248 *
2249 * Optimized algorithm with 20 multiplications in the 1-D kernel.
2250 * cK represents sqrt(2) * cos(K*pi/28).
2251 */
2252
2253GLOBAL(void)
2254jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2257{
2258 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2259 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2260 INT32 z1, z2, z3, z4;
2261 JCOEFPTR inptr;
2262 ISLOW_MULT_TYPE * quantptr;
2263 int * wsptr;
2264 JSAMPROW outptr;
2265 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2266 int ctr;
2267 int workspace[8*14]; /* buffers data between passes */
2269
2270 /* Pass 1: process columns from input, store into work array. */
2271
2272 inptr = coef_block;
2273 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2274 wsptr = workspace;
2275 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2276 /* Even part */
2277
2278 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2279 z1 <<= CONST_BITS;
2280 /* Add fudge factor here for final descale. */
2281 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2282 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2283 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2284 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2285 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2286
2287 tmp10 = z1 + z2;
2288 tmp11 = z1 + z3;
2289 tmp12 = z1 - z4;
2290
2291 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2292 CONST_BITS-PASS1_BITS);
2293
2294 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2295 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2296
2297 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2298
2299 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2300 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2301 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2302 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2303
2304 tmp20 = tmp10 + tmp13;
2305 tmp26 = tmp10 - tmp13;
2306 tmp21 = tmp11 + tmp14;
2307 tmp25 = tmp11 - tmp14;
2308 tmp22 = tmp12 + tmp15;
2309 tmp24 = tmp12 - tmp15;
2310
2311 /* Odd part */
2312
2313 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2314 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2315 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2316 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2317 tmp13 = z4 << CONST_BITS;
2318
2319 tmp14 = z1 + z3;
2320 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2321 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2322 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2323 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2324 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2325 z1 -= z2;
2326 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2327 tmp16 += tmp15;
2328 z1 += z4;
2329 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2330 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2331 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2332 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2333 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2334 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2335
2336#if PASS1_BITS > 0
2337 tmp13 = (z1 - z3) << PASS1_BITS;
2338#else
2339 tmp13 = z1 - z3;
2340#endif
2341
2342 /* Final output stage */
2343
2344 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2345 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2346 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2347 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2348 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2349 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2350 wsptr[8*3] = (int) (tmp23 + tmp13);
2351 wsptr[8*10] = (int) (tmp23 - tmp13);
2352 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2353 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2354 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2355 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2356 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2357 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2358 }
2359
2360 /* Pass 2: process 14 rows from work array, store into output array. */
2361
2362 wsptr = workspace;
2363 for (ctr = 0; ctr < 14; ctr++) {
2364 outptr = output_buf[ctr] + output_col;
2365
2366 /* Even part */
2367
2368 /* Add range center and fudge factor for final descale and range-limit. */
2369 z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2370 z1 <<= CONST_BITS;
2371#if PASS2_BITS == 0
2372 z1 += ONE << (CONST_BITS-1);
2373#endif
2374 z4 = (INT32) wsptr[4];
2375 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2376 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2377 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2378
2379 tmp10 = z1 + z2;
2380 tmp11 = z1 + z3;
2381 tmp12 = z1 - z4;
2382
2383 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2384
2385 z1 = (INT32) wsptr[2];
2386 z2 = (INT32) wsptr[6];
2387
2388 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2389
2390 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2391 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2392 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2393 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2394
2395 tmp20 = tmp10 + tmp13;
2396 tmp26 = tmp10 - tmp13;
2397 tmp21 = tmp11 + tmp14;
2398 tmp25 = tmp11 - tmp14;
2399 tmp22 = tmp12 + tmp15;
2400 tmp24 = tmp12 - tmp15;
2401
2402 /* Odd part */
2403
2404 z1 = (INT32) wsptr[1];
2405 z2 = (INT32) wsptr[3];
2406 z3 = (INT32) wsptr[5];
2407 z4 = (INT32) wsptr[7];
2408 z4 <<= CONST_BITS;
2409
2410 tmp14 = z1 + z3;
2411 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2412 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2413 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2414 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2415 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2416 z1 -= z2;
2417 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2418 tmp16 += tmp15;
2419 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2420 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2421 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2422 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2423 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2424 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2425
2426 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2427
2428 /* Final output stage */
2429
2430 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2431 CONST_BITS+PASS2_BITS)
2432 & RANGE_MASK];
2433 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2434 CONST_BITS+PASS2_BITS)
2435 & RANGE_MASK];
2436 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2437 CONST_BITS+PASS2_BITS)
2438 & RANGE_MASK];
2439 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2440 CONST_BITS+PASS2_BITS)
2441 & RANGE_MASK];
2442 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2443 CONST_BITS+PASS2_BITS)
2444 & RANGE_MASK];
2445 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2446 CONST_BITS+PASS2_BITS)
2447 & RANGE_MASK];
2448 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2449 CONST_BITS+PASS2_BITS)
2450 & RANGE_MASK];
2451 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2452 CONST_BITS+PASS2_BITS)
2453 & RANGE_MASK];
2454 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2455 CONST_BITS+PASS2_BITS)
2456 & RANGE_MASK];
2457 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2458 CONST_BITS+PASS2_BITS)
2459 & RANGE_MASK];
2460 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2461 CONST_BITS+PASS2_BITS)
2462 & RANGE_MASK];
2463 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2464 CONST_BITS+PASS2_BITS)
2465 & RANGE_MASK];
2466 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2467 CONST_BITS+PASS2_BITS)
2468 & RANGE_MASK];
2469 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2470 CONST_BITS+PASS2_BITS)
2471 & RANGE_MASK];
2472
2473 wsptr += 8; /* advance pointer to next row */
2474 }
2475}
2476
2477
2478/*
2479 * Perform dequantization and inverse DCT on one block of coefficients,
2480 * producing a 15x15 output block.
2481 *
2482 * Optimized algorithm with 22 multiplications in the 1-D kernel.
2483 * cK represents sqrt(2) * cos(K*pi/30).
2484 */
2485
2486GLOBAL(void)
2487jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2490{
2491 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2492 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2493 INT32 z1, z2, z3, z4;
2494 JCOEFPTR inptr;
2495 ISLOW_MULT_TYPE * quantptr;
2496 int * wsptr;
2497 JSAMPROW outptr;
2498 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2499 int ctr;
2500 int workspace[8*15]; /* buffers data between passes */
2502
2503 /* Pass 1: process columns from input, store into work array. */
2504
2505 inptr = coef_block;
2506 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2507 wsptr = workspace;
2508 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2509 /* Even part */
2510
2511 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2512 z1 <<= CONST_BITS;
2513 /* Add fudge factor here for final descale. */
2514 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2515
2516 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2517 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2518 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2519
2520 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2521 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2522
2523 tmp12 = z1 - tmp10;
2524 tmp13 = z1 + tmp11;
2525 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2526
2527 z4 = z2 - z3;
2528 z3 += z2;
2529 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2530 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2531 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2532
2533 tmp20 = tmp13 + tmp10 + tmp11;
2534 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2535
2536 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2537 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2538
2539 tmp25 = tmp13 - tmp10 - tmp11;
2540 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2541
2542 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2543 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2544
2545 tmp21 = tmp12 + tmp10 + tmp11;
2546 tmp24 = tmp13 - tmp10 + tmp11;
2547 tmp11 += tmp11;
2548 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2549 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2550
2551 /* Odd part */
2552
2553 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2554 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2555 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2556 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2557 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2558
2559 tmp13 = z2 - z4;
2560 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2561 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2562 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2563
2564 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2565 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2566 z2 = z1 - z4;
2567 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2568
2569 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2570 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2571 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2572 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2573 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2574 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2575
2576 /* Final output stage */
2577
2578 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2579 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2580 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2581 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2582 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2583 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2584 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2585 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2586 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2587 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2588 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2589 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2590 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2591 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2592 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2593 }
2594
2595 /* Pass 2: process 15 rows from work array, store into output array. */
2596
2597 wsptr = workspace;
2598 for (ctr = 0; ctr < 15; ctr++) {
2599 outptr = output_buf[ctr] + output_col;
2600
2601 /* Even part */
2602
2603 /* Add range center and fudge factor for final descale and range-limit. */
2604 z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2605 z1 <<= CONST_BITS;
2606#if PASS2_BITS == 0
2607 z1 += ONE << (CONST_BITS-1);
2608#endif
2609
2610 z2 = (INT32) wsptr[2];
2611 z3 = (INT32) wsptr[4];
2612 z4 = (INT32) wsptr[6];
2613
2614 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2615 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2616
2617 tmp12 = z1 - tmp10;
2618 tmp13 = z1 + tmp11;
2619 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2620
2621 z4 = z2 - z3;
2622 z3 += z2;
2623 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2624 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2625 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2626
2627 tmp20 = tmp13 + tmp10 + tmp11;
2628 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2629
2630 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2631 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2632
2633 tmp25 = tmp13 - tmp10 - tmp11;
2634 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2635
2636 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2637 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2638
2639 tmp21 = tmp12 + tmp10 + tmp11;
2640 tmp24 = tmp13 - tmp10 + tmp11;
2641 tmp11 += tmp11;
2642 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2643 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2644
2645 /* Odd part */
2646
2647 z1 = (INT32) wsptr[1];
2648 z2 = (INT32) wsptr[3];
2649 z4 = (INT32) wsptr[5];
2650 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2651 z4 = (INT32) wsptr[7];
2652
2653 tmp13 = z2 - z4;
2654 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2655 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2656 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2657
2658 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2659 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2660 z2 = z1 - z4;
2661 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2662
2663 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2664 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2665 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2666 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2667 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2668 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2669
2670 /* Final output stage */
2671
2672 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2673 CONST_BITS+PASS2_BITS)
2674 & RANGE_MASK];
2675 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2676 CONST_BITS+PASS2_BITS)
2677 & RANGE_MASK];
2678 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2679 CONST_BITS+PASS2_BITS)
2680 & RANGE_MASK];
2681 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2682 CONST_BITS+PASS2_BITS)
2683 & RANGE_MASK];
2684 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2685 CONST_BITS+PASS2_BITS)
2686 & RANGE_MASK];
2687 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2688 CONST_BITS+PASS2_BITS)
2689 & RANGE_MASK];
2690 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2691 CONST_BITS+PASS2_BITS)
2692 & RANGE_MASK];
2693 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2694 CONST_BITS+PASS2_BITS)
2695 & RANGE_MASK];
2696 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2697 CONST_BITS+PASS2_BITS)
2698 & RANGE_MASK];
2699 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2700 CONST_BITS+PASS2_BITS)
2701 & RANGE_MASK];
2702 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2703 CONST_BITS+PASS2_BITS)
2704 & RANGE_MASK];
2705 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2706 CONST_BITS+PASS2_BITS)
2707 & RANGE_MASK];
2708 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2709 CONST_BITS+PASS2_BITS)
2710 & RANGE_MASK];
2711 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2712 CONST_BITS+PASS2_BITS)
2713 & RANGE_MASK];
2714 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2715 CONST_BITS+PASS2_BITS)
2716 & RANGE_MASK];
2717
2718 wsptr += 8; /* advance pointer to next row */
2719 }
2720}
2721
2722
2723/*
2724 * Perform dequantization and inverse DCT on one block of coefficients,
2725 * producing a 16x16 output block.
2726 *
2727 * Optimized algorithm with 28 multiplications in the 1-D kernel.
2728 * cK represents sqrt(2) * cos(K*pi/32).
2729 */
2730
2731GLOBAL(void)
2732jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2735{
2736 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2737 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2738 INT32 z1, z2, z3, z4;
2739 JCOEFPTR inptr;
2740 ISLOW_MULT_TYPE * quantptr;
2741 int * wsptr;
2742 JSAMPROW outptr;
2743 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2744 int ctr;
2745 int workspace[8*16]; /* buffers data between passes */
2747
2748 /* Pass 1: process columns from input, store into work array. */
2749
2750 inptr = coef_block;
2751 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2752 wsptr = workspace;
2753 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2754 /* Even part */
2755
2756 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2757 tmp0 <<= CONST_BITS;
2758 /* Add fudge factor here for final descale. */
2759 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2760
2761 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2762 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2763 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2764
2765 tmp10 = tmp0 + tmp1;
2766 tmp11 = tmp0 - tmp1;
2767 tmp12 = tmp0 + tmp2;
2768 tmp13 = tmp0 - tmp2;
2769
2770 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2771 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2772 z3 = z1 - z2;
2773 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2774 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2775
2776 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2777 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2778 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2779 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2780
2781 tmp20 = tmp10 + tmp0;
2782 tmp27 = tmp10 - tmp0;
2783 tmp21 = tmp12 + tmp1;
2784 tmp26 = tmp12 - tmp1;
2785 tmp22 = tmp13 + tmp2;
2786 tmp25 = tmp13 - tmp2;
2787 tmp23 = tmp11 + tmp3;
2788 tmp24 = tmp11 - tmp3;
2789
2790 /* Odd part */
2791
2792 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2793 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2794 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2795 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2796
2797 tmp11 = z1 + z3;
2798
2799 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2800 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2801 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2802 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2803 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2804 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2805 tmp0 = tmp1 + tmp2 + tmp3 -
2806 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2807 tmp13 = tmp10 + tmp11 + tmp12 -
2808 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2809 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2810 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2811 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2812 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2813 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2814 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2815 z2 += z4;
2816 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2817 tmp1 += z1;
2818 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2819 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2820 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2821 tmp12 += z2;
2822 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2823 tmp2 += z2;
2824 tmp3 += z2;
2825 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2826 tmp10 += z2;
2827 tmp11 += z2;
2828
2829 /* Final output stage */
2830
2831 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2832 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2833 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2834 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2835 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2836 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2837 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2838 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2839 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2840 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2841 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2842 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2843 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2844 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2845 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2846 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2847 }
2848
2849 /* Pass 2: process 16 rows from work array, store into output array. */
2850
2851 wsptr = workspace;
2852 for (ctr = 0; ctr < 16; ctr++) {
2853 outptr = output_buf[ctr] + output_col;
2854
2855 /* Even part */
2856
2857 /* Add range center and fudge factor for final descale and range-limit. */
2858 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
2859 tmp0 <<= CONST_BITS;
2860#if PASS2_BITS == 0
2861 tmp0 += ONE << (CONST_BITS-1);
2862#endif
2863
2864 z1 = (INT32) wsptr[4];
2865 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2866 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2867
2868 tmp10 = tmp0 + tmp1;
2869 tmp11 = tmp0 - tmp1;
2870 tmp12 = tmp0 + tmp2;
2871 tmp13 = tmp0 - tmp2;
2872
2873 z1 = (INT32) wsptr[2];
2874 z2 = (INT32) wsptr[6];
2875 z3 = z1 - z2;
2876 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2877 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2878
2879 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2880 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2881 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2882 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2883
2884 tmp20 = tmp10 + tmp0;
2885 tmp27 = tmp10 - tmp0;
2886 tmp21 = tmp12 + tmp1;
2887 tmp26 = tmp12 - tmp1;
2888 tmp22 = tmp13 + tmp2;
2889 tmp25 = tmp13 - tmp2;
2890 tmp23 = tmp11 + tmp3;
2891 tmp24 = tmp11 - tmp3;
2892
2893 /* Odd part */
2894
2895 z1 = (INT32) wsptr[1];
2896 z2 = (INT32) wsptr[3];
2897 z3 = (INT32) wsptr[5];
2898 z4 = (INT32) wsptr[7];
2899
2900 tmp11 = z1 + z3;
2901
2902 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2903 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2904 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2905 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2906 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2907 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2908 tmp0 = tmp1 + tmp2 + tmp3 -
2909 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2910 tmp13 = tmp10 + tmp11 + tmp12 -
2911 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2912 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2913 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2914 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2915 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2916 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2917 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2918 z2 += z4;
2919 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2920 tmp1 += z1;
2921 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2922 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2923 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2924 tmp12 += z2;
2925 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2926 tmp2 += z2;
2927 tmp3 += z2;
2928 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2929 tmp10 += z2;
2930 tmp11 += z2;
2931
2932 /* Final output stage */
2933
2934 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2935 CONST_BITS+PASS2_BITS)
2936 & RANGE_MASK];
2937 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2938 CONST_BITS+PASS2_BITS)
2939 & RANGE_MASK];
2940 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2941 CONST_BITS+PASS2_BITS)
2942 & RANGE_MASK];
2943 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2944 CONST_BITS+PASS2_BITS)
2945 & RANGE_MASK];
2946 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2947 CONST_BITS+PASS2_BITS)
2948 & RANGE_MASK];
2949 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2950 CONST_BITS+PASS2_BITS)
2951 & RANGE_MASK];
2952 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2953 CONST_BITS+PASS2_BITS)
2954 & RANGE_MASK];
2955 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2956 CONST_BITS+PASS2_BITS)
2957 & RANGE_MASK];
2958 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2959 CONST_BITS+PASS2_BITS)
2960 & RANGE_MASK];
2961 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2962 CONST_BITS+PASS2_BITS)
2963 & RANGE_MASK];
2964 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2965 CONST_BITS+PASS2_BITS)
2966 & RANGE_MASK];
2967 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2968 CONST_BITS+PASS2_BITS)
2969 & RANGE_MASK];
2970 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2971 CONST_BITS+PASS2_BITS)
2972 & RANGE_MASK];
2973 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2974 CONST_BITS+PASS2_BITS)
2975 & RANGE_MASK];
2976 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2977 CONST_BITS+PASS2_BITS)
2978 & RANGE_MASK];
2979 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2980 CONST_BITS+PASS2_BITS)
2981 & RANGE_MASK];
2982
2983 wsptr += 8; /* advance pointer to next row */
2984 }
2985}
2986
2987
2988/*
2989 * Perform dequantization and inverse DCT on one block of coefficients,
2990 * producing a 16x8 output block.
2991 *
2992 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2993 */
2994
2995GLOBAL(void)
2996jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2999{
3000 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
3001 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
3002 INT32 z1, z2, z3, z4;
3003 JCOEFPTR inptr;
3004 ISLOW_MULT_TYPE * quantptr;
3005 int * wsptr;
3006 JSAMPROW outptr;
3007 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3008 int ctr;
3009 int workspace[8*8]; /* buffers data between passes */
3011
3012 /* Pass 1: process columns from input, store into work array.
3013 * Note results are scaled up by sqrt(8) compared to a true IDCT;
3014 * furthermore, we scale the results by 2**PASS1_BITS.
3015 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3016 */
3017
3018 inptr = coef_block;
3019 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3020 wsptr = workspace;
3021 for (ctr = DCTSIZE; ctr > 0; ctr--) {
3022 /* Due to quantization, we will usually find that many of the input
3023 * coefficients are zero, especially the AC terms. We can exploit this
3024 * by short-circuiting the IDCT calculation for any column in which all
3025 * the AC terms are zero. In that case each output is equal to the
3026 * DC coefficient (with scale factor as needed).
3027 * With typical images and quantization tables, half or more of the
3028 * column DCT calculations can be simplified this way.
3029 */
3030
3031 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
3032 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
3033 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
3034 inptr[DCTSIZE*7] == 0) {
3035 /* AC terms all zero */
3036#if PASS1_BITS > 0
3037 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
3038#else
3039 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3040#endif
3041
3042 wsptr[DCTSIZE*0] = dcval;
3043 wsptr[DCTSIZE*1] = dcval;
3044 wsptr[DCTSIZE*2] = dcval;
3045 wsptr[DCTSIZE*3] = dcval;
3046 wsptr[DCTSIZE*4] = dcval;
3047 wsptr[DCTSIZE*5] = dcval;
3048 wsptr[DCTSIZE*6] = dcval;
3049 wsptr[DCTSIZE*7] = dcval;
3050
3051 inptr++; /* advance pointers to next column */
3052 quantptr++;
3053 wsptr++;
3054 continue;
3055 }
3056
3057 /* Even part: reverse the even part of the forward DCT.
3058 * The rotator is c(-6).
3059 */
3060
3061 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3062 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3063 z2 <<= CONST_BITS;
3064 z3 <<= CONST_BITS;
3065 /* Add fudge factor here for final descale. */
3066 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
3067
3068 tmp0 = z2 + z3;
3069 tmp1 = z2 - z3;
3070
3071 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3072 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3073
3074 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3075 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3076 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3077
3078 tmp10 = tmp0 + tmp2;
3079 tmp13 = tmp0 - tmp2;
3080 tmp11 = tmp1 + tmp3;
3081 tmp12 = tmp1 - tmp3;
3082
3083 /* Odd part per figure 8; the matrix is unitary and hence its
3084 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3085 */
3086
3087 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
3088 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3089 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3090 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3091
3092 z2 = tmp0 + tmp2;
3093 z3 = tmp1 + tmp3;
3094
3095 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3096 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3097 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3098 z2 += z1;
3099 z3 += z1;
3100
3101 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3102 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3103 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3104 tmp0 += z1 + z2;
3105 tmp3 += z1 + z3;
3106
3107 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3108 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3109 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3110 tmp1 += z1 + z3;
3111 tmp2 += z1 + z2;
3112
3113 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3114
3115 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
3116 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
3117 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
3118 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
3119 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
3120 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
3121 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
3122 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
3123
3124 inptr++; /* advance pointers to next column */
3125 quantptr++;
3126 wsptr++;
3127 }
3128
3129 /* Pass 2: process 8 rows from work array, store into output array.
3130 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3131 */
3132
3133 wsptr = workspace;
3134 for (ctr = 0; ctr < 8; ctr++) {
3135 outptr = output_buf[ctr] + output_col;
3136
3137 /* Even part */
3138
3139 /* Add range center and fudge factor for final descale and range-limit. */
3140 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
3141 tmp0 <<= CONST_BITS;
3142#if PASS2_BITS == 0
3143 tmp0 += ONE << (CONST_BITS-1);
3144#endif
3145
3146 z1 = (INT32) wsptr[4];
3147 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3148 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3149
3150 tmp10 = tmp0 + tmp1;
3151 tmp11 = tmp0 - tmp1;
3152 tmp12 = tmp0 + tmp2;
3153 tmp13 = tmp0 - tmp2;
3154
3155 z1 = (INT32) wsptr[2];
3156 z2 = (INT32) wsptr[6];
3157 z3 = z1 - z2;
3158 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3159 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3160
3161 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3162 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3163 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3164 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3165
3166 tmp20 = tmp10 + tmp0;
3167 tmp27 = tmp10 - tmp0;
3168 tmp21 = tmp12 + tmp1;
3169 tmp26 = tmp12 - tmp1;
3170 tmp22 = tmp13 + tmp2;
3171 tmp25 = tmp13 - tmp2;
3172 tmp23 = tmp11 + tmp3;
3173 tmp24 = tmp11 - tmp3;
3174
3175 /* Odd part */
3176
3177 z1 = (INT32) wsptr[1];
3178 z2 = (INT32) wsptr[3];
3179 z3 = (INT32) wsptr[5];
3180 z4 = (INT32) wsptr[7];
3181
3182 tmp11 = z1 + z3;
3183
3184 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3185 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3186 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3187 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3188 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3189 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3190 tmp0 = tmp1 + tmp2 + tmp3 -
3191 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3192 tmp13 = tmp10 + tmp11 + tmp12 -
3193 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3194 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3195 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3196 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3197 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3198 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3199 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3200 z2 += z4;
3201 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3202 tmp1 += z1;
3203 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3204 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3205 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3206 tmp12 += z2;
3207 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3208 tmp2 += z2;
3209 tmp3 += z2;
3210 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3211 tmp10 += z2;
3212 tmp11 += z2;
3213
3214 /* Final output stage */
3215
3216 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3217 CONST_BITS+PASS2_BITS)
3218 & RANGE_MASK];
3219 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3220 CONST_BITS+PASS2_BITS)
3221 & RANGE_MASK];
3222 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3223 CONST_BITS+PASS2_BITS)
3224 & RANGE_MASK];
3225 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3226 CONST_BITS+PASS2_BITS)
3227 & RANGE_MASK];
3228 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3229 CONST_BITS+PASS2_BITS)
3230 & RANGE_MASK];
3231 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3232 CONST_BITS+PASS2_BITS)
3233 & RANGE_MASK];
3234 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3235 CONST_BITS+PASS2_BITS)
3236 & RANGE_MASK];
3237 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3238 CONST_BITS+PASS2_BITS)
3239 & RANGE_MASK];
3240 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3241 CONST_BITS+PASS2_BITS)
3242 & RANGE_MASK];
3243 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3244 CONST_BITS+PASS2_BITS)
3245 & RANGE_MASK];
3246 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3247 CONST_BITS+PASS2_BITS)
3248 & RANGE_MASK];
3249 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3250 CONST_BITS+PASS2_BITS)
3251 & RANGE_MASK];
3252 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3253 CONST_BITS+PASS2_BITS)
3254 & RANGE_MASK];
3255 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3256 CONST_BITS+PASS2_BITS)
3257 & RANGE_MASK];
3258 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3259 CONST_BITS+PASS2_BITS)
3260 & RANGE_MASK];
3261 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3262 CONST_BITS+PASS2_BITS)
3263 & RANGE_MASK];
3264
3265 wsptr += 8; /* advance pointer to next row */
3266 }
3267}
3268
3269
3270/*
3271 * Perform dequantization and inverse DCT on one block of coefficients,
3272 * producing a 14x7 output block.
3273 *
3274 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3275 */
3276
3277GLOBAL(void)
3278jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3281{
3282 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3283 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3284 INT32 z1, z2, z3, z4;
3285 JCOEFPTR inptr;
3286 ISLOW_MULT_TYPE * quantptr;
3287 int * wsptr;
3288 JSAMPROW outptr;
3289 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3290 int ctr;
3291 int workspace[8*7]; /* buffers data between passes */
3293
3294 /* Pass 1: process columns from input, store into work array.
3295 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3296 */
3297
3298 inptr = coef_block;
3299 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3300 wsptr = workspace;
3301 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3302 /* Even part */
3303
3304 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3305 tmp23 <<= CONST_BITS;
3306 /* Add fudge factor here for final descale. */
3307 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3308
3309 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3310 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3311 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3312
3313 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3314 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3315 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3316 tmp10 = z1 + z3;
3317 z2 -= tmp10;
3318 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3319 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3320 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3321 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3322
3323 /* Odd part */
3324
3325 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3326 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3327 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3328
3329 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3330 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3331 tmp10 = tmp11 - tmp12;
3332 tmp11 += tmp12;
3333 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3334 tmp11 += tmp12;
3335 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3336 tmp10 += z2;
3337 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3338
3339 /* Final output stage */
3340
3341 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3342 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3343 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3344 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3345 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3346 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3347 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3348 }
3349
3350 /* Pass 2: process 7 rows from work array, store into output array.
3351 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3352 */
3353
3354 wsptr = workspace;
3355 for (ctr = 0; ctr < 7; ctr++) {
3356 outptr = output_buf[ctr] + output_col;
3357
3358 /* Even part */
3359
3360 /* Add range center and fudge factor for final descale and range-limit. */
3361 z1 = (INT32) wsptr[0] + PASS2_OFFSET;
3362 z1 <<= CONST_BITS;
3363#if PASS2_BITS == 0
3364 z1 += ONE << (CONST_BITS-1);
3365#endif
3366 z4 = (INT32) wsptr[4];
3367 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3368 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3369 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3370
3371 tmp10 = z1 + z2;
3372 tmp11 = z1 + z3;
3373 tmp12 = z1 - z4;
3374
3375 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3376
3377 z1 = (INT32) wsptr[2];
3378 z2 = (INT32) wsptr[6];
3379
3380 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3381
3382 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3383 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3384 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3385 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3386
3387 tmp20 = tmp10 + tmp13;
3388 tmp26 = tmp10 - tmp13;
3389 tmp21 = tmp11 + tmp14;
3390 tmp25 = tmp11 - tmp14;
3391 tmp22 = tmp12 + tmp15;
3392 tmp24 = tmp12 - tmp15;
3393
3394 /* Odd part */
3395
3396 z1 = (INT32) wsptr[1];
3397 z2 = (INT32) wsptr[3];
3398 z3 = (INT32) wsptr[5];
3399 z4 = (INT32) wsptr[7];
3400 z4 <<= CONST_BITS;
3401
3402 tmp14 = z1 + z3;
3403 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3404 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3405 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3406 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3407 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3408 z1 -= z2;
3409 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3410 tmp16 += tmp15;
3411 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3412 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3413 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3414 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3415 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3416 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3417
3418 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3419
3420 /* Final output stage */
3421
3422 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3423 CONST_BITS+PASS2_BITS)
3424 & RANGE_MASK];
3425 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3426 CONST_BITS+PASS2_BITS)
3427 & RANGE_MASK];
3428 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3429 CONST_BITS+PASS2_BITS)
3430 & RANGE_MASK];
3431 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3432 CONST_BITS+PASS2_BITS)
3433 & RANGE_MASK];
3434 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3435 CONST_BITS+PASS2_BITS)
3436 & RANGE_MASK];
3437 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3438 CONST_BITS+PASS2_BITS)
3439 & RANGE_MASK];
3440 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3441 CONST_BITS+PASS2_BITS)
3442 & RANGE_MASK];
3443 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3444 CONST_BITS+PASS2_BITS)
3445 & RANGE_MASK];
3446 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3447 CONST_BITS+PASS2_BITS)
3448 & RANGE_MASK];
3449 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3450 CONST_BITS+PASS2_BITS)
3451 & RANGE_MASK];
3452 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3453 CONST_BITS+PASS2_BITS)
3454 & RANGE_MASK];
3455 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3456 CONST_BITS+PASS2_BITS)
3457 & RANGE_MASK];
3458 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3459 CONST_BITS+PASS2_BITS)
3460 & RANGE_MASK];
3461 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3462 CONST_BITS+PASS2_BITS)
3463 & RANGE_MASK];
3464
3465 wsptr += 8; /* advance pointer to next row */
3466 }
3467}
3468
3469
3470/*
3471 * Perform dequantization and inverse DCT on one block of coefficients,
3472 * producing a 12x6 output block.
3473 *
3474 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3475 */
3476
3477GLOBAL(void)
3478jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3481{
3482 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3483 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3484 INT32 z1, z2, z3, z4;
3485 JCOEFPTR inptr;
3486 ISLOW_MULT_TYPE * quantptr;
3487 int * wsptr;
3488 JSAMPROW outptr;
3489 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3490 int ctr;
3491 int workspace[8*6]; /* buffers data between passes */
3493
3494 /* Pass 1: process columns from input, store into work array.
3495 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3496 */
3497
3498 inptr = coef_block;
3499 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3500 wsptr = workspace;
3501 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3502 /* Even part */
3503
3504 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3505 tmp10 <<= CONST_BITS;
3506 /* Add fudge factor here for final descale. */
3507 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3508 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3509 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3510 tmp11 = tmp10 + tmp20;
3511 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3512 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3513 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3514 tmp20 = tmp11 + tmp10;
3515 tmp22 = tmp11 - tmp10;
3516
3517 /* Odd part */
3518
3519 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3520 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3521 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3522 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3523 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3524 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3525#if PASS1_BITS > 0
3526 tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3527#else
3528 tmp11 = z1 - z2 - z3;
3529#endif
3530
3531 /* Final output stage */
3532
3533 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3534 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3535 wsptr[8*1] = (int) (tmp21 + tmp11);
3536 wsptr[8*4] = (int) (tmp21 - tmp11);
3537 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3538 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3539 }
3540
3541 /* Pass 2: process 6 rows from work array, store into output array.
3542 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3543 */
3544
3545 wsptr = workspace;
3546 for (ctr = 0; ctr < 6; ctr++) {
3547 outptr = output_buf[ctr] + output_col;
3548
3549 /* Even part */
3550
3551 /* Add range center and fudge factor for final descale and range-limit. */
3552 z3 = (INT32) wsptr[0] + PASS2_OFFSET;
3553 z3 <<= CONST_BITS;
3554#if PASS2_BITS == 0
3555 z3 += ONE << (CONST_BITS-1);
3556#endif
3557
3558 z4 = (INT32) wsptr[4];
3559 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3560
3561 tmp10 = z3 + z4;
3562 tmp11 = z3 - z4;
3563
3564 z1 = (INT32) wsptr[2];
3565 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3566 z1 <<= CONST_BITS;
3567 z2 = (INT32) wsptr[6];
3568 z2 <<= CONST_BITS;
3569
3570 tmp12 = z1 - z2;
3571
3572 tmp21 = z3 + tmp12;
3573 tmp24 = z3 - tmp12;
3574
3575 tmp12 = z4 + z2;
3576
3577 tmp20 = tmp10 + tmp12;
3578 tmp25 = tmp10 - tmp12;
3579
3580 tmp12 = z4 - z1 - z2;
3581
3582 tmp22 = tmp11 + tmp12;
3583 tmp23 = tmp11 - tmp12;
3584
3585 /* Odd part */
3586
3587 z1 = (INT32) wsptr[1];
3588 z2 = (INT32) wsptr[3];
3589 z3 = (INT32) wsptr[5];
3590 z4 = (INT32) wsptr[7];
3591
3592 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3593 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3594
3595 tmp10 = z1 + z3;
3596 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3597 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3598 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3599 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3600 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3601 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3602 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3603 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3604
3605 z1 -= z4;
3606 z2 -= z3;
3607 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3608 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3609 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3610
3611 /* Final output stage */
3612
3613 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3614 CONST_BITS+PASS2_BITS)
3615 & RANGE_MASK];
3616 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3617 CONST_BITS+PASS2_BITS)
3618 & RANGE_MASK];
3619 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3620 CONST_BITS+PASS2_BITS)
3621 & RANGE_MASK];
3622 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3623 CONST_BITS+PASS2_BITS)
3624 & RANGE_MASK];
3625 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3626 CONST_BITS+PASS2_BITS)
3627 & RANGE_MASK];
3628 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3629 CONST_BITS+PASS2_BITS)
3630 & RANGE_MASK];
3631 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3632 CONST_BITS+PASS2_BITS)
3633 & RANGE_MASK];
3634 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3635 CONST_BITS+PASS2_BITS)
3636 & RANGE_MASK];
3637 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3638 CONST_BITS+PASS2_BITS)
3639 & RANGE_MASK];
3640 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3641 CONST_BITS+PASS2_BITS)
3642 & RANGE_MASK];
3643 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3644 CONST_BITS+PASS2_BITS)
3645 & RANGE_MASK];
3646 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3647 CONST_BITS+PASS2_BITS)
3648 & RANGE_MASK];
3649
3650 wsptr += 8; /* advance pointer to next row */
3651 }
3652}
3653
3654
3655/*
3656 * Perform dequantization and inverse DCT on one block of coefficients,
3657 * producing a 10x5 output block.
3658 *
3659 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3660 */
3661
3662GLOBAL(void)
3663jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3666{
3667 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3668 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3669 INT32 z1, z2, z3, z4;
3670 JCOEFPTR inptr;
3671 ISLOW_MULT_TYPE * quantptr;
3672 int * wsptr;
3673 JSAMPROW outptr;
3674 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3675 int ctr;
3676 int workspace[8*5]; /* buffers data between passes */
3678
3679 /* Pass 1: process columns from input, store into work array.
3680 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3681 */
3682
3683 inptr = coef_block;
3684 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3685 wsptr = workspace;
3686 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3687 /* Even part */
3688
3689 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3690 tmp12 <<= CONST_BITS;
3691 /* Add fudge factor here for final descale. */
3692 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3693 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3694 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3695 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3696 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3697 z3 = tmp12 + z2;
3698 tmp10 = z3 + z1;
3699 tmp11 = z3 - z1;
3700 tmp12 -= z2 << 2;
3701
3702 /* Odd part */
3703
3704 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3705 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3706
3707 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3708 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3709 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3710
3711 /* Final output stage */
3712
3713 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3714 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3715 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3716 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3717 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3718 }
3719
3720 /* Pass 2: process 5 rows from work array, store into output array.
3721 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3722 */
3723
3724 wsptr = workspace;
3725 for (ctr = 0; ctr < 5; ctr++) {
3726 outptr = output_buf[ctr] + output_col;
3727
3728 /* Even part */
3729
3730 /* Add range center and fudge factor for final descale and range-limit. */
3731 z3 = (INT32) wsptr[0] + PASS2_OFFSET;
3732 z3 <<= CONST_BITS;
3733#if PASS2_BITS == 0
3734 z3 += ONE << (CONST_BITS-1);
3735#endif
3736 z4 = (INT32) wsptr[4];
3737 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3738 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3739 tmp10 = z3 + z1;
3740 tmp11 = z3 - z2;
3741
3742 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3743
3744 z2 = (INT32) wsptr[2];
3745 z3 = (INT32) wsptr[6];
3746
3747 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3748 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3749 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3750
3751 tmp20 = tmp10 + tmp12;
3752 tmp24 = tmp10 - tmp12;
3753 tmp21 = tmp11 + tmp13;
3754 tmp23 = tmp11 - tmp13;
3755
3756 /* Odd part */
3757
3758 z1 = (INT32) wsptr[1];
3759 z2 = (INT32) wsptr[3];
3760 z3 = (INT32) wsptr[5];
3761 z3 <<= CONST_BITS;
3762 z4 = (INT32) wsptr[7];
3763
3764 tmp11 = z2 + z4;
3765 tmp13 = z2 - z4;
3766
3767 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3768
3769 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3770 z4 = z3 + tmp12;
3771
3772 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3773 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3774
3775 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3776 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3777
3778 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3779
3780 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3781 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3782
3783 /* Final output stage */
3784
3785 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3786 CONST_BITS+PASS2_BITS)
3787 & RANGE_MASK];
3788 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3789 CONST_BITS+PASS2_BITS)
3790 & RANGE_MASK];
3791 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3792 CONST_BITS+PASS2_BITS)
3793 & RANGE_MASK];
3794 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3795 CONST_BITS+PASS2_BITS)
3796 & RANGE_MASK];
3797 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3798 CONST_BITS+PASS2_BITS)
3799 & RANGE_MASK];
3800 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3801 CONST_BITS+PASS2_BITS)
3802 & RANGE_MASK];
3803 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3804 CONST_BITS+PASS2_BITS)
3805 & RANGE_MASK];
3806 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3807 CONST_BITS+PASS2_BITS)
3808 & RANGE_MASK];
3809 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3810 CONST_BITS+PASS2_BITS)
3811 & RANGE_MASK];
3812 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3813 CONST_BITS+PASS2_BITS)
3814 & RANGE_MASK];
3815
3816 wsptr += 8; /* advance pointer to next row */
3817 }
3818}
3819
3820
3821/*
3822 * Perform dequantization and inverse DCT on one block of coefficients,
3823 * producing an 8x4 output block.
3824 *
3825 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3826 */
3827
3828GLOBAL(void)
3829jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3832{
3833 INT32 tmp0, tmp1, tmp2, tmp3;
3834 INT32 tmp10, tmp11, tmp12, tmp13;
3835 INT32 z1, z2, z3;
3836 JCOEFPTR inptr;
3837 ISLOW_MULT_TYPE * quantptr;
3838 int * wsptr;
3839 JSAMPROW outptr;
3840 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3841 int ctr;
3842 int workspace[8*4]; /* buffers data between passes */
3844
3845 /* Pass 1: process columns from input, store into work array.
3846 * 4-point IDCT kernel,
3847 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3848 */
3849
3850 inptr = coef_block;
3851 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3852 wsptr = workspace;
3853 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3854 /* Even part */
3855
3856 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3857 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3858
3859#if PASS1_BITS > 0
3860 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3861 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3862#else
3863 tmp10 = tmp0 + tmp2;
3864 tmp12 = tmp0 - tmp2;
3865#endif
3866
3867 /* Odd part */
3868 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3869
3870 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3871 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3872
3873 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3874 /* Add fudge factor here for final descale. */
3875 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3876 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3877 CONST_BITS-PASS1_BITS);
3878 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3879 CONST_BITS-PASS1_BITS);
3880
3881 /* Final output stage */
3882
3883 wsptr[8*0] = (int) (tmp10 + tmp0);
3884 wsptr[8*3] = (int) (tmp10 - tmp0);
3885 wsptr[8*1] = (int) (tmp12 + tmp2);
3886 wsptr[8*2] = (int) (tmp12 - tmp2);
3887 }
3888
3889 /* Pass 2: process rows from work array, store into output array.
3890 * Note that we must descale the results by a factor of 8 == 2**3,
3891 * which is folded into the PASS2_BITS value.
3892 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3893 */
3894
3895 wsptr = workspace;
3896 for (ctr = 0; ctr < 4; ctr++) {
3897 outptr = output_buf[ctr] + output_col;
3898
3899 /* Even part: reverse the even part of the forward DCT.
3900 * The rotator is c(-6).
3901 */
3902
3903 /* Add range center and fudge factor for final descale and range-limit. */
3904 z2 = (INT32) wsptr[0] + PASS2_OFFSET;
3905 z3 = (INT32) wsptr[4];
3906 z2 <<= CONST_BITS;
3907 z3 <<= CONST_BITS;
3908#if PASS2_BITS == 0
3909 /* Add fudge factor here for final descale. */
3910 z2 += ONE << (CONST_BITS-1);
3911#endif
3912
3913 tmp0 = z2 + z3;
3914 tmp1 = z2 - z3;
3915
3916 z2 = (INT32) wsptr[2];
3917 z3 = (INT32) wsptr[6];
3918
3919 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3920 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3921 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3922
3923 tmp10 = tmp0 + tmp2;
3924 tmp13 = tmp0 - tmp2;
3925 tmp11 = tmp1 + tmp3;
3926 tmp12 = tmp1 - tmp3;
3927
3928 /* Odd part per figure 8; the matrix is unitary and hence its
3929 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3930 */
3931
3932 tmp0 = (INT32) wsptr[7];
3933 tmp1 = (INT32) wsptr[5];
3934 tmp2 = (INT32) wsptr[3];
3935 tmp3 = (INT32) wsptr[1];
3936
3937 z2 = tmp0 + tmp2;
3938 z3 = tmp1 + tmp3;
3939
3940 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3941 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3942 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3943 z2 += z1;
3944 z3 += z1;
3945
3946 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3947 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3948 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3949 tmp0 += z1 + z2;
3950 tmp3 += z1 + z3;
3951
3952 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3953 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3954 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3955 tmp1 += z1 + z3;
3956 tmp2 += z1 + z2;
3957
3958 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3959
3960 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3961 CONST_BITS+PASS2_BITS)
3962 & RANGE_MASK];
3963 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3964 CONST_BITS+PASS2_BITS)
3965 & RANGE_MASK];
3966 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3967 CONST_BITS+PASS2_BITS)
3968 & RANGE_MASK];
3969 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3970 CONST_BITS+PASS2_BITS)
3971 & RANGE_MASK];
3972 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3973 CONST_BITS+PASS2_BITS)
3974 & RANGE_MASK];
3975 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3976 CONST_BITS+PASS2_BITS)
3977 & RANGE_MASK];
3978 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3979 CONST_BITS+PASS2_BITS)
3980 & RANGE_MASK];
3981 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3982 CONST_BITS+PASS2_BITS)
3983 & RANGE_MASK];
3984
3985 wsptr += DCTSIZE; /* advance pointer to next row */
3986 }
3987}
3988
3989
3990/*
3991 * Perform dequantization and inverse DCT on one block of coefficients,
3992 * producing a 6x3 output block.
3993 *
3994 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3995 */
3996
3997GLOBAL(void)
3998jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4001{
4002 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
4003 INT32 z1, z2, z3;
4004 JCOEFPTR inptr;
4005 ISLOW_MULT_TYPE * quantptr;
4006 int * wsptr;
4007 JSAMPROW outptr;
4008 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4009 int ctr;
4010 int workspace[6*3]; /* buffers data between passes */
4012
4013 /* Pass 1: process columns from input, store into work array.
4014 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
4015 */
4016
4017 inptr = coef_block;
4018 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4019 wsptr = workspace;
4020 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4021 /* Even part */
4022
4023 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4024 tmp0 <<= CONST_BITS;
4025 /* Add fudge factor here for final descale. */
4026 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4027 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4028 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
4029 tmp10 = tmp0 + tmp12;
4030 tmp2 = tmp0 - tmp12 - tmp12;
4031
4032 /* Odd part */
4033
4034 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4035 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
4036
4037 /* Final output stage */
4038
4039 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
4040 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
4041 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
4042 }
4043
4044 /* Pass 2: process 3 rows from work array, store into output array.
4045 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4046 */
4047
4048 wsptr = workspace;
4049 for (ctr = 0; ctr < 3; ctr++) {
4050 outptr = output_buf[ctr] + output_col;
4051
4052 /* Even part */
4053
4054 /* Add range center and fudge factor for final descale and range-limit. */
4055 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
4056 tmp0 <<= CONST_BITS;
4057#if PASS2_BITS == 0
4058 tmp0 += ONE << (CONST_BITS-1);
4059#endif
4060 tmp2 = (INT32) wsptr[4];
4061 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
4062 tmp1 = tmp0 + tmp10;
4063 tmp11 = tmp0 - tmp10 - tmp10;
4064 tmp10 = (INT32) wsptr[2];
4065 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
4066 tmp10 = tmp1 + tmp0;
4067 tmp12 = tmp1 - tmp0;
4068
4069 /* Odd part */
4070
4071 z1 = (INT32) wsptr[1];
4072 z2 = (INT32) wsptr[3];
4073 z3 = (INT32) wsptr[5];
4074 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4075 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
4076 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
4077 tmp1 = (z1 - z2 - z3) << CONST_BITS;
4078
4079 /* Final output stage */
4080
4081 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4082 CONST_BITS+PASS2_BITS)
4083 & RANGE_MASK];
4084 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4085 CONST_BITS+PASS2_BITS)
4086 & RANGE_MASK];
4087 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
4088 CONST_BITS+PASS2_BITS)
4089 & RANGE_MASK];
4090 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
4091 CONST_BITS+PASS2_BITS)
4092 & RANGE_MASK];
4093 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4094 CONST_BITS+PASS2_BITS)
4095 & RANGE_MASK];
4096 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4097 CONST_BITS+PASS2_BITS)
4098 & RANGE_MASK];
4099
4100 wsptr += 6; /* advance pointer to next row */
4101 }
4102}
4103
4104
4105/*
4106 * Perform dequantization and inverse DCT on one block of coefficients,
4107 * producing a 4x2 output block.
4108 *
4109 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4110 */
4111
4112GLOBAL(void)
4113jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4116{
4117 INT32 tmp0, tmp2, tmp10, tmp12;
4118 INT32 z1, z2, z3;
4119 JCOEFPTR inptr;
4120 ISLOW_MULT_TYPE * quantptr;
4121 INT32 * wsptr;
4122 JSAMPROW outptr;
4123 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4124 int ctr;
4125 INT32 workspace[4*2]; /* buffers data between passes */
4127
4128 /* Pass 1: process columns from input, store into work array. */
4129
4130 inptr = coef_block;
4131 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4132 wsptr = workspace;
4133 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
4134 /* Even part */
4135
4136 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4137
4138 /* Odd part */
4139
4140 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4141
4142 /* Final output stage */
4143
4144 wsptr[4*0] = tmp10 + tmp0;
4145 wsptr[4*1] = tmp10 - tmp0;
4146 }
4147
4148 /* Pass 2: process 2 rows from work array, store into output array.
4149 * 4-point IDCT kernel,
4150 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4151 */
4152
4153 wsptr = workspace;
4154 for (ctr = 0; ctr < 2; ctr++) {
4155 outptr = output_buf[ctr] + output_col;
4156
4157 /* Even part */
4158
4159 tmp0 = wsptr[0];
4160 tmp2 = wsptr[2];
4161
4162 /* Add range center and fudge factor for final descale and range-limit. */
4163#if PASS2_BITS > PASS1_BITS
4164#if PASS2_BITS > PASS1_BITS + 1
4165 tmp0 += (((INT32) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
4166 (ONE << (PASS2_BITS-PASS1_BITS-1));
4167#else
4168 tmp0 += (((INT32) RANGE_CENTER) << 1) + ONE;
4169#endif
4170 tmp0 <<= CONST_BITS;
4171#else
4172#if PASS2_BITS == PASS1_BITS
4173 tmp0 += (INT32) RANGE_CENTER;
4174 tmp0 <<= CONST_BITS;
4175 tmp0 += ONE << (CONST_BITS-1);
4176#else
4177 tmp0 <<= CONST_BITS;
4178 tmp0 += (((INT32) RANGE_CENTER) << (CONST_BITS+PASS2_BITS-PASS1_BITS)) +
4179 (ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-1));
4180#endif
4181#endif
4182
4183 tmp2 <<= CONST_BITS;
4184
4185 tmp10 = tmp0 + tmp2;
4186 tmp12 = tmp0 - tmp2;
4187
4188 /* Odd part */
4189 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4190
4191 z2 = wsptr[1];
4192 z3 = wsptr[3];
4193
4194 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4195 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4196 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4197
4198 /* Final output stage */
4199
4200 outptr[0] =
4201 range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4202 CONST_BITS+PASS2_BITS-PASS1_BITS)
4203 & RANGE_MASK];
4204 outptr[3] =
4205 range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4206 CONST_BITS+PASS2_BITS-PASS1_BITS)
4207 & RANGE_MASK];
4208 outptr[1] =
4209 range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4210 CONST_BITS+PASS2_BITS-PASS1_BITS)
4211 & RANGE_MASK];
4212 outptr[2] =
4213 range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4214 CONST_BITS+PASS2_BITS-PASS1_BITS)
4215 & RANGE_MASK];
4216
4217 wsptr += 4; /* advance pointer to next row */
4218 }
4219}
4220
4221
4222/*
4223 * Perform dequantization and inverse DCT on one block of coefficients,
4224 * producing a 2x1 output block.
4225 *
4226 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4227 */
4228
4229GLOBAL(void)
4230jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4233{
4234 DCTELEM tmp0, tmp1;
4235 ISLOW_MULT_TYPE * quantptr;
4236 JSAMPROW outptr;
4237 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4239
4240 /* Pass 1: empty. */
4241
4242 /* Pass 2: process 1 row from input, store into output array. */
4243
4244 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4245 outptr = output_buf[0] + output_col;
4246
4247 /* Even part */
4248
4249 tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4250
4251 /* Odd part */
4252
4253 tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4254
4255 /* Final output stage */
4256
4257#if PASS2_BITS > PASS1_BITS
4258 /* Add range center and fudge factor for downscale and range-limit. */
4259#if PASS2_BITS > PASS1_BITS + 1
4260 tmp0 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
4261 (1 << (PASS2_BITS-PASS1_BITS-1));
4262#else
4263 tmp0 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
4264#endif
4265
4266 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1,
4267 PASS2_BITS-PASS1_BITS)
4268 & RANGE_MASK];
4269 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1,
4270 PASS2_BITS-PASS1_BITS)
4271 & RANGE_MASK];
4272#else
4273#if PASS2_BITS < PASS1_BITS
4274 tmp0 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
4275 tmp1 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
4276#endif
4277
4278 tmp0 += (DCTELEM) RANGE_CENTER; /* add range center for range-limit */
4279
4280 outptr[0] = range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
4281 outptr[1] = range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
4282#endif
4283}
4284
4285
4286/*
4287 * Perform dequantization and inverse DCT on one block of coefficients,
4288 * producing an 8x16 output block.
4289 *
4290 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4291 */
4292
4293GLOBAL(void)
4294jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4297{
4298 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4299 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4300 INT32 z1, z2, z3, z4;
4301 JCOEFPTR inptr;
4302 ISLOW_MULT_TYPE * quantptr;
4303 int * wsptr;
4304 JSAMPROW outptr;
4305 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4306 int ctr;
4307 int workspace[8*16]; /* buffers data between passes */
4309
4310 /* Pass 1: process columns from input, store into work array.
4311 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4312 */
4313
4314 inptr = coef_block;
4315 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4316 wsptr = workspace;
4317 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4318 /* Even part */
4319
4320 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4321 tmp0 <<= CONST_BITS;
4322 /* Add fudge factor here for final descale. */
4323 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4324
4325 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4326 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4327 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4328
4329 tmp10 = tmp0 + tmp1;
4330 tmp11 = tmp0 - tmp1;
4331 tmp12 = tmp0 + tmp2;
4332 tmp13 = tmp0 - tmp2;
4333
4334 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4335 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4336 z3 = z1 - z2;
4337 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4338 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4339
4340 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4341 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4342 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4343 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4344
4345 tmp20 = tmp10 + tmp0;
4346 tmp27 = tmp10 - tmp0;
4347 tmp21 = tmp12 + tmp1;
4348 tmp26 = tmp12 - tmp1;
4349 tmp22 = tmp13 + tmp2;
4350 tmp25 = tmp13 - tmp2;
4351 tmp23 = tmp11 + tmp3;
4352 tmp24 = tmp11 - tmp3;
4353
4354 /* Odd part */
4355
4356 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4357 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4358 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4359 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4360
4361 tmp11 = z1 + z3;
4362
4363 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4364 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4365 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4366 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4367 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4368 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4369 tmp0 = tmp1 + tmp2 + tmp3 -
4370 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4371 tmp13 = tmp10 + tmp11 + tmp12 -
4372 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4373 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4374 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4375 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4376 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4377 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4378 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4379 z2 += z4;
4380 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4381 tmp1 += z1;
4382 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4383 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4384 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4385 tmp12 += z2;
4386 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4387 tmp2 += z2;
4388 tmp3 += z2;
4389 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4390 tmp10 += z2;
4391 tmp11 += z2;
4392
4393 /* Final output stage */
4394
4395 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4396 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4397 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4398 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4399 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4400 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4401 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4402 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4403 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4404 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4405 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4406 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4407 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4408 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4409 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4410 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4411 }
4412
4413 /* Pass 2: process rows from work array, store into output array.
4414 * Note that we must descale the results by a factor of 8 == 2**3,
4415 * which is folded into the PASS2_BITS value.
4416 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4417 */
4418
4419 wsptr = workspace;
4420 for (ctr = 0; ctr < 16; ctr++) {
4421 outptr = output_buf[ctr] + output_col;
4422
4423 /* Even part: reverse the even part of the forward DCT.
4424 * The rotator is c(-6).
4425 */
4426
4427 /* Add range center and fudge factor for final descale and range-limit. */
4428 z2 = (INT32) wsptr[0] + PASS2_OFFSET;
4429 z3 = (INT32) wsptr[4];
4430 z2 <<= CONST_BITS;
4431 z3 <<= CONST_BITS;
4432#if PASS2_BITS == 0
4433 /* Add fudge factor here for final descale. */
4434 z2 += ONE << (CONST_BITS-1);
4435#endif
4436
4437 tmp0 = z2 + z3;
4438 tmp1 = z2 - z3;
4439
4440 z2 = (INT32) wsptr[2];
4441 z3 = (INT32) wsptr[6];
4442
4443 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4444 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4445 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4446
4447 tmp10 = tmp0 + tmp2;
4448 tmp13 = tmp0 - tmp2;
4449 tmp11 = tmp1 + tmp3;
4450 tmp12 = tmp1 - tmp3;
4451
4452 /* Odd part per figure 8; the matrix is unitary and hence its
4453 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4454 */
4455
4456 tmp0 = (INT32) wsptr[7];
4457 tmp1 = (INT32) wsptr[5];
4458 tmp2 = (INT32) wsptr[3];
4459 tmp3 = (INT32) wsptr[1];
4460
4461 z2 = tmp0 + tmp2;
4462 z3 = tmp1 + tmp3;
4463
4464 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4465 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4466 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4467 z2 += z1;
4468 z3 += z1;
4469
4470 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4471 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4472 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4473 tmp0 += z1 + z2;
4474 tmp3 += z1 + z3;
4475
4476 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4477 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4478 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4479 tmp1 += z1 + z3;
4480 tmp2 += z1 + z2;
4481
4482 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4483
4484 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4485 CONST_BITS+PASS2_BITS)
4486 & RANGE_MASK];
4487 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4488 CONST_BITS+PASS2_BITS)
4489 & RANGE_MASK];
4490 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4491 CONST_BITS+PASS2_BITS)
4492 & RANGE_MASK];
4493 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4494 CONST_BITS+PASS2_BITS)
4495 & RANGE_MASK];
4496 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4497 CONST_BITS+PASS2_BITS)
4498 & RANGE_MASK];
4499 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4500 CONST_BITS+PASS2_BITS)
4501 & RANGE_MASK];
4502 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4503 CONST_BITS+PASS2_BITS)
4504 & RANGE_MASK];
4505 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4506 CONST_BITS+PASS2_BITS)
4507 & RANGE_MASK];
4508
4509 wsptr += DCTSIZE; /* advance pointer to next row */
4510 }
4511}
4512
4513
4514/*
4515 * Perform dequantization and inverse DCT on one block of coefficients,
4516 * producing a 7x14 output block.
4517 *
4518 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4519 */
4520
4521GLOBAL(void)
4522jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4525{
4526 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4527 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4528 INT32 z1, z2, z3, z4;
4529 JCOEFPTR inptr;
4530 ISLOW_MULT_TYPE * quantptr;
4531 int * wsptr;
4532 JSAMPROW outptr;
4533 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4534 int ctr;
4535 int workspace[7*14]; /* buffers data between passes */
4537
4538 /* Pass 1: process columns from input, store into work array.
4539 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4540 */
4541
4542 inptr = coef_block;
4543 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4544 wsptr = workspace;
4545 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4546 /* Even part */
4547
4548 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4549 z1 <<= CONST_BITS;
4550 /* Add fudge factor here for final descale. */
4551 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4552 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4553 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4554 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4555 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4556
4557 tmp10 = z1 + z2;
4558 tmp11 = z1 + z3;
4559 tmp12 = z1 - z4;
4560
4561 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4562 CONST_BITS-PASS1_BITS);
4563
4564 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4565 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4566
4567 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4568
4569 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4570 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4571 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4572 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4573
4574 tmp20 = tmp10 + tmp13;
4575 tmp26 = tmp10 - tmp13;
4576 tmp21 = tmp11 + tmp14;
4577 tmp25 = tmp11 - tmp14;
4578 tmp22 = tmp12 + tmp15;
4579 tmp24 = tmp12 - tmp15;
4580
4581 /* Odd part */
4582
4583 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4584 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4585 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4586 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4587 tmp13 = z4 << CONST_BITS;
4588
4589 tmp14 = z1 + z3;
4590 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4591 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4592 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4593 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4594 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4595 z1 -= z2;
4596 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4597 tmp16 += tmp15;
4598 z1 += z4;
4599 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4600 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4601 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4602 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4603 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4604 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4605
4606#if PASS1_BITS > 0
4607 tmp13 = (z1 - z3) << PASS1_BITS;
4608#else
4609 tmp13 = z1 - z3;
4610#endif
4611
4612 /* Final output stage */
4613
4614 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4615 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4616 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4617 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4618 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4619 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4620 wsptr[7*3] = (int) (tmp23 + tmp13);
4621 wsptr[7*10] = (int) (tmp23 - tmp13);
4622 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4623 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4624 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4625 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4626 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4627 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4628 }
4629
4630 /* Pass 2: process 14 rows from work array, store into output array.
4631 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4632 */
4633
4634 wsptr = workspace;
4635 for (ctr = 0; ctr < 14; ctr++) {
4636 outptr = output_buf[ctr] + output_col;
4637
4638 /* Even part */
4639
4640 /* Add range center and fudge factor for final descale and range-limit. */
4641 tmp23 = (INT32) wsptr[0] + PASS2_OFFSET;
4642 tmp23 <<= CONST_BITS;
4643#if PASS2_BITS == 0
4644 tmp23 += ONE << (CONST_BITS-1);
4645#endif
4646
4647 z1 = (INT32) wsptr[2];
4648 z2 = (INT32) wsptr[4];
4649 z3 = (INT32) wsptr[6];
4650
4651 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4652 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4653 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4654 tmp10 = z1 + z3;
4655 z2 -= tmp10;
4656 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4657 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4658 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4659 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4660
4661 /* Odd part */
4662
4663 z1 = (INT32) wsptr[1];
4664 z2 = (INT32) wsptr[3];
4665 z3 = (INT32) wsptr[5];
4666
4667 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4668 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4669 tmp10 = tmp11 - tmp12;
4670 tmp11 += tmp12;
4671 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4672 tmp11 += tmp12;
4673 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4674 tmp10 += z2;
4675 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4676
4677 /* Final output stage */
4678
4679 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4680 CONST_BITS+PASS2_BITS)
4681 & RANGE_MASK];
4682 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4683 CONST_BITS+PASS2_BITS)
4684 & RANGE_MASK];
4685 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4686 CONST_BITS+PASS2_BITS)
4687 & RANGE_MASK];
4688 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4689 CONST_BITS+PASS2_BITS)
4690 & RANGE_MASK];
4691 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4692 CONST_BITS+PASS2_BITS)
4693 & RANGE_MASK];
4694 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4695 CONST_BITS+PASS2_BITS)
4696 & RANGE_MASK];
4697 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4698 CONST_BITS+PASS2_BITS)
4699 & RANGE_MASK];
4700
4701 wsptr += 7; /* advance pointer to next row */
4702 }
4703}
4704
4705
4706/*
4707 * Perform dequantization and inverse DCT on one block of coefficients,
4708 * producing a 6x12 output block.
4709 *
4710 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4711 */
4712
4713GLOBAL(void)
4714jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4717{
4718 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4719 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4720 INT32 z1, z2, z3, z4;
4721 JCOEFPTR inptr;
4722 ISLOW_MULT_TYPE * quantptr;
4723 int * wsptr;
4724 JSAMPROW outptr;
4725 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4726 int ctr;
4727 int workspace[6*12]; /* buffers data between passes */
4729
4730 /* Pass 1: process columns from input, store into work array.
4731 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4732 */
4733
4734 inptr = coef_block;
4735 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4736 wsptr = workspace;
4737 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4738 /* Even part */
4739
4740 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4741 z3 <<= CONST_BITS;
4742 /* Add fudge factor here for final descale. */
4743 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4744
4745 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4746 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4747
4748 tmp10 = z3 + z4;
4749 tmp11 = z3 - z4;
4750
4751 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4752 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4753 z1 <<= CONST_BITS;
4754 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4755 z2 <<= CONST_BITS;
4756
4757 tmp12 = z1 - z2;
4758
4759 tmp21 = z3 + tmp12;
4760 tmp24 = z3 - tmp12;
4761
4762 tmp12 = z4 + z2;
4763
4764 tmp20 = tmp10 + tmp12;
4765 tmp25 = tmp10 - tmp12;
4766
4767 tmp12 = z4 - z1 - z2;
4768
4769 tmp22 = tmp11 + tmp12;
4770 tmp23 = tmp11 - tmp12;
4771
4772 /* Odd part */
4773
4774 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4775 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4776 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4777 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4778
4779 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4780 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4781
4782 tmp10 = z1 + z3;
4783 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4784 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4785 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4786 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4787 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4788 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4789 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4790 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4791
4792 z1 -= z4;
4793 z2 -= z3;
4794 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4795 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4796 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4797
4798 /* Final output stage */
4799
4800 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4801 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4802 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4803 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4804 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4805 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4806 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4807 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4808 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4809 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4810 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4811 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4812 }
4813
4814 /* Pass 2: process 12 rows from work array, store into output array.
4815 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4816 */
4817
4818 wsptr = workspace;
4819 for (ctr = 0; ctr < 12; ctr++) {
4820 outptr = output_buf[ctr] + output_col;
4821
4822 /* Even part */
4823
4824 /* Add range center and fudge factor for final descale and range-limit. */
4825 tmp10 = (INT32) wsptr[0] + PASS2_OFFSET;
4826 tmp10 <<= CONST_BITS;
4827#if PASS2_BITS == 0
4828 tmp10 += ONE << (CONST_BITS-1);
4829#endif
4830 tmp12 = (INT32) wsptr[4];
4831 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4832 tmp11 = tmp10 + tmp20;
4833 tmp21 = tmp10 - tmp20 - tmp20;
4834 tmp20 = (INT32) wsptr[2];
4835 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4836 tmp20 = tmp11 + tmp10;
4837 tmp22 = tmp11 - tmp10;
4838
4839 /* Odd part */
4840
4841 z1 = (INT32) wsptr[1];
4842 z2 = (INT32) wsptr[3];
4843 z3 = (INT32) wsptr[5];
4844 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4845 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4846 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4847 tmp11 = (z1 - z2 - z3) << CONST_BITS;
4848
4849 /* Final output stage */
4850
4851 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4852 CONST_BITS+PASS2_BITS)
4853 & RANGE_MASK];
4854 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4855 CONST_BITS+PASS2_BITS)
4856 & RANGE_MASK];
4857 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4858 CONST_BITS+PASS2_BITS)
4859 & RANGE_MASK];
4860 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4861 CONST_BITS+PASS2_BITS)
4862 & RANGE_MASK];
4863 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4864 CONST_BITS+PASS2_BITS)
4865 & RANGE_MASK];
4866 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4867 CONST_BITS+PASS2_BITS)
4868 & RANGE_MASK];
4869
4870 wsptr += 6; /* advance pointer to next row */
4871 }
4872}
4873
4874
4875/*
4876 * Perform dequantization and inverse DCT on one block of coefficients,
4877 * producing a 5x10 output block.
4878 *
4879 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4880 */
4881
4882GLOBAL(void)
4883jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4886{
4887 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4888 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4889 INT32 z1, z2, z3, z4, z5;
4890 JCOEFPTR inptr;
4891 ISLOW_MULT_TYPE * quantptr;
4892 int * wsptr;
4893 JSAMPROW outptr;
4894 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4895 int ctr;
4896 int workspace[5*10]; /* buffers data between passes */
4898
4899 /* Pass 1: process columns from input, store into work array.
4900 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4901 */
4902
4903 inptr = coef_block;
4904 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4905 wsptr = workspace;
4906 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4907 /* Even part */
4908
4909 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4910 z3 <<= CONST_BITS;
4911 /* Add fudge factor here for final descale. */
4912 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4913 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4914 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4915 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4916 tmp10 = z3 + z1;
4917 tmp11 = z3 - z2;
4918
4919 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4920 CONST_BITS-PASS1_BITS);
4921
4922 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4923 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4924
4925 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4926 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4927 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4928
4929 tmp20 = tmp10 + tmp12;
4930 tmp24 = tmp10 - tmp12;
4931 tmp21 = tmp11 + tmp13;
4932 tmp23 = tmp11 - tmp13;
4933
4934 /* Odd part */
4935
4936 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4937 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4938 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4939 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4940
4941 tmp11 = z2 + z4;
4942 tmp13 = z2 - z4;
4943
4944 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4945 z5 = z3 << CONST_BITS;
4946
4947 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4948 z4 = z5 + tmp12;
4949
4950 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4951 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4952
4953 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4954 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4955
4956#if PASS1_BITS > 0
4957 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4958#else
4959 tmp12 = z1 - tmp13 - z3;
4960#endif
4961
4962 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4963 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4964
4965 /* Final output stage */
4966
4967 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4968 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4969 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4970 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4971 wsptr[5*2] = (int) (tmp22 + tmp12);
4972 wsptr[5*7] = (int) (tmp22 - tmp12);
4973 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4974 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4975 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4976 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4977 }
4978
4979 /* Pass 2: process 10 rows from work array, store into output array.
4980 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4981 */
4982
4983 wsptr = workspace;
4984 for (ctr = 0; ctr < 10; ctr++) {
4985 outptr = output_buf[ctr] + output_col;
4986
4987 /* Even part */
4988
4989 /* Add range center and fudge factor for final descale and range-limit. */
4990 tmp12 = (INT32) wsptr[0] + PASS2_OFFSET;
4991 tmp12 <<= CONST_BITS;
4992#if PASS2_BITS == 0
4993 tmp12 += ONE << (CONST_BITS-1);
4994#endif
4995 tmp13 = (INT32) wsptr[2];
4996 tmp14 = (INT32) wsptr[4];
4997 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4998 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4999 z3 = tmp12 + z2;
5000 tmp10 = z3 + z1;
5001 tmp11 = z3 - z1;
5002 tmp12 -= z2 << 2;
5003
5004 /* Odd part */
5005
5006 z2 = (INT32) wsptr[1];
5007 z3 = (INT32) wsptr[3];
5008
5009 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
5010 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
5011 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
5012
5013 /* Final output stage */
5014
5015 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
5016 CONST_BITS+PASS2_BITS)
5017 & RANGE_MASK];
5018 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
5019 CONST_BITS+PASS2_BITS)
5020 & RANGE_MASK];
5021 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
5022 CONST_BITS+PASS2_BITS)
5023 & RANGE_MASK];
5024 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
5025 CONST_BITS+PASS2_BITS)
5026 & RANGE_MASK];
5027 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
5028 CONST_BITS+PASS2_BITS)
5029 & RANGE_MASK];
5030
5031 wsptr += 5; /* advance pointer to next row */
5032 }
5033}
5034
5035
5036/*
5037 * Perform dequantization and inverse DCT on one block of coefficients,
5038 * producing a 4x8 output block.
5039 *
5040 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
5041 */
5042
5043GLOBAL(void)
5044jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5047{
5048 INT32 tmp0, tmp1, tmp2, tmp3;
5049 INT32 tmp10, tmp11, tmp12, tmp13;
5050 INT32 z1, z2, z3;
5051 JCOEFPTR inptr;
5052 ISLOW_MULT_TYPE * quantptr;
5053 int * wsptr;
5054 JSAMPROW outptr;
5055 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5056 int ctr;
5057 int workspace[4*8]; /* buffers data between passes */
5059
5060 /* Pass 1: process columns from input, store into work array.
5061 * Note results are scaled up by sqrt(8) compared to a true IDCT;
5062 * furthermore, we scale the results by 2**PASS1_BITS.
5063 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
5064 */
5065
5066 inptr = coef_block;
5067 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5068 wsptr = workspace;
5069 for (ctr = 4; ctr > 0; ctr--) {
5070 /* Due to quantization, we will usually find that many of the input
5071 * coefficients are zero, especially the AC terms. We can exploit this
5072 * by short-circuiting the IDCT calculation for any column in which all
5073 * the AC terms are zero. In that case each output is equal to the
5074 * DC coefficient (with scale factor as needed).
5075 * With typical images and quantization tables, half or more of the
5076 * column DCT calculations can be simplified this way.
5077 */
5078
5079 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
5080 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
5081 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
5082 inptr[DCTSIZE*7] == 0) {
5083 /* AC terms all zero */
5084#if PASS1_BITS > 0
5085 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
5086#else
5087 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5088#endif
5089
5090 wsptr[4*0] = dcval;
5091 wsptr[4*1] = dcval;
5092 wsptr[4*2] = dcval;
5093 wsptr[4*3] = dcval;
5094 wsptr[4*4] = dcval;
5095 wsptr[4*5] = dcval;
5096 wsptr[4*6] = dcval;
5097 wsptr[4*7] = dcval;
5098
5099 inptr++; /* advance pointers to next column */
5100 quantptr++;
5101 wsptr++;
5102 continue;
5103 }
5104
5105 /* Even part: reverse the even part of the forward DCT.
5106 * The rotator is c(-6).
5107 */
5108
5109 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5110 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5111 z2 <<= CONST_BITS;
5112 z3 <<= CONST_BITS;
5113 /* Add fudge factor here for final descale. */
5114 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
5115
5116 tmp0 = z2 + z3;
5117 tmp1 = z2 - z3;
5118
5119 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5120 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
5121
5122 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5123 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5124 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5125
5126 tmp10 = tmp0 + tmp2;
5127 tmp13 = tmp0 - tmp2;
5128 tmp11 = tmp1 + tmp3;
5129 tmp12 = tmp1 - tmp3;
5130
5131 /* Odd part per figure 8; the matrix is unitary and hence its
5132 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
5133 */
5134
5135 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
5136 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5137 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5138 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5139
5140 z2 = tmp0 + tmp2;
5141 z3 = tmp1 + tmp3;
5142
5143 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
5144 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
5145 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
5146 z2 += z1;
5147 z3 += z1;
5148
5149 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
5150 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
5151 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
5152 tmp0 += z1 + z2;
5153 tmp3 += z1 + z3;
5154
5155 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
5156 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
5157 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
5158 tmp1 += z1 + z3;
5159 tmp2 += z1 + z2;
5160
5161 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
5162
5163 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
5164 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
5165 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
5166 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
5167 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
5168 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
5169 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
5170 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
5171
5172 inptr++; /* advance pointers to next column */
5173 quantptr++;
5174 wsptr++;
5175 }
5176
5177 /* Pass 2: process 8 rows from work array, store into output array.
5178 * 4-point IDCT kernel,
5179 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5180 */
5181
5182 wsptr = workspace;
5183 for (ctr = 0; ctr < 8; ctr++) {
5184 outptr = output_buf[ctr] + output_col;
5185
5186 /* Even part */
5187
5188 /* Add range center and fudge factor for final descale and range-limit. */
5189 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
5190 tmp2 = (INT32) wsptr[2];
5191 tmp0 <<= CONST_BITS;
5192 tmp2 <<= CONST_BITS;
5193#if PASS2_BITS == 0
5194 tmp0 += ONE << (CONST_BITS-1);
5195#endif
5196
5197 tmp10 = tmp0 + tmp2;
5198 tmp12 = tmp0 - tmp2;
5199
5200 /* Odd part */
5201 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5202
5203 z2 = (INT32) wsptr[1];
5204 z3 = (INT32) wsptr[3];
5205
5206 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5207 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5208 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5209
5210 /* Final output stage */
5211
5212 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5213 CONST_BITS+PASS2_BITS)
5214 & RANGE_MASK];
5215 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5216 CONST_BITS+PASS2_BITS)
5217 & RANGE_MASK];
5218 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
5219 CONST_BITS+PASS2_BITS)
5220 & RANGE_MASK];
5221 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
5222 CONST_BITS+PASS2_BITS)
5223 & RANGE_MASK];
5224
5225 wsptr += 4; /* advance pointer to next row */
5226 }
5227}
5228
5229
5230/*
5231 * Perform dequantization and inverse DCT on one block of coefficients,
5232 * producing a 3x6 output block.
5233 *
5234 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5235 */
5236
5237GLOBAL(void)
5238jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5241{
5242 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5243 INT32 z1, z2, z3;
5244 JCOEFPTR inptr;
5245 ISLOW_MULT_TYPE * quantptr;
5246 int * wsptr;
5247 JSAMPROW outptr;
5248 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5249 int ctr;
5250 int workspace[3*6]; /* buffers data between passes */
5252
5253 /* Pass 1: process columns from input, store into work array.
5254 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5255 */
5256
5257 inptr = coef_block;
5258 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5259 wsptr = workspace;
5260 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5261 /* Even part */
5262
5263 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5264 tmp0 <<= CONST_BITS;
5265 /* Add fudge factor here for final descale. */
5266 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5267 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5268 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5269 tmp1 = tmp0 + tmp10;
5270 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5271 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5272 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5273 tmp10 = tmp1 + tmp0;
5274 tmp12 = tmp1 - tmp0;
5275
5276 /* Odd part */
5277
5278 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5279 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5280 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5281 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5282 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5283 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5284#if PASS1_BITS > 0
5285 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5286#else
5287 tmp1 = z1 - z2 - z3;
5288#endif
5289
5290 /* Final output stage */
5291
5292 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5293 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5294 wsptr[3*1] = (int) (tmp11 + tmp1);
5295 wsptr[3*4] = (int) (tmp11 - tmp1);
5296 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5297 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5298 }
5299
5300 /* Pass 2: process 6 rows from work array, store into output array.
5301 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5302 */
5303
5304 wsptr = workspace;
5305 for (ctr = 0; ctr < 6; ctr++) {
5306 outptr = output_buf[ctr] + output_col;
5307
5308 /* Even part */
5309
5310 /* Add range center and fudge factor for final descale and range-limit. */
5311 tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
5312 tmp0 <<= CONST_BITS;
5313#if PASS2_BITS == 0
5314 tmp0 += ONE << (CONST_BITS-1);
5315#endif
5316 tmp2 = (INT32) wsptr[2];
5317 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5318 tmp10 = tmp0 + tmp12;
5319 tmp2 = tmp0 - tmp12 - tmp12;
5320
5321 /* Odd part */
5322
5323 tmp12 = (INT32) wsptr[1];
5324 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5325
5326 /* Final output stage */
5327
5328 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5329 CONST_BITS+PASS2_BITS)
5330 & RANGE_MASK];
5331 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5332 CONST_BITS+PASS2_BITS)
5333 & RANGE_MASK];
5334 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5335 CONST_BITS+PASS2_BITS)
5336 & RANGE_MASK];
5337
5338 wsptr += 3; /* advance pointer to next row */
5339 }
5340}
5341
5342
5343/*
5344 * Perform dequantization and inverse DCT on one block of coefficients,
5345 * producing a 2x4 output block.
5346 *
5347 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5348 */
5349
5350GLOBAL(void)
5351jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5354{
5355 INT32 tmp0, tmp2, tmp10, tmp12;
5356 INT32 z1, z2, z3;
5357 JCOEFPTR inptr;
5358 ISLOW_MULT_TYPE * quantptr;
5359 INT32 * wsptr;
5360 JSAMPROW outptr;
5361 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5362 int ctr;
5363 INT32 workspace[2*4]; /* buffers data between passes */
5365
5366 /* Pass 1: process columns from input, store into work array.
5367 * 4-point IDCT kernel,
5368 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5369 */
5370
5371 inptr = coef_block;
5372 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5373 wsptr = workspace;
5374 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5375 /* Even part */
5376
5377 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5378 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5379
5380 tmp10 = (tmp0 + tmp2) << CONST_BITS;
5381 tmp12 = (tmp0 - tmp2) << CONST_BITS;
5382
5383 /* Odd part */
5384 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5385
5386 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5387 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5388
5389 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5390 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5391 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5392
5393 /* Final output stage */
5394
5395 wsptr[2*0] = tmp10 + tmp0;
5396 wsptr[2*3] = tmp10 - tmp0;
5397 wsptr[2*1] = tmp12 + tmp2;
5398 wsptr[2*2] = tmp12 - tmp2;
5399 }
5400
5401 /* Pass 2: process 4 rows from work array, store into output array. */
5402
5403 wsptr = workspace;
5404 for (ctr = 0; ctr < 4; ctr++) {
5405 outptr = output_buf[ctr] + output_col;
5406
5407 /* Even part */
5408
5409 /* Add range center and fudge factor for final descale and range-limit. */
5410 tmp10 = wsptr[0] +
5411 ((((INT32) RANGE_CENTER) << (CONST_BITS+PASS2_BITS-PASS1_BITS)) +
5412 (ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-1)));
5413
5414 /* Odd part */
5415
5416 tmp0 = wsptr[1];
5417
5418 /* Final output stage */
5419
5420 outptr[0] =
5421 range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5422 CONST_BITS+PASS2_BITS-PASS1_BITS)
5423 & RANGE_MASK];
5424 outptr[1] =
5425 range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5426 CONST_BITS+PASS2_BITS-PASS1_BITS)
5427 & RANGE_MASK];
5428
5429 wsptr += 2; /* advance pointer to next row */
5430 }
5431}
5432
5433
5434/*
5435 * Perform dequantization and inverse DCT on one block of coefficients,
5436 * producing a 1x2 output block.
5437 *
5438 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5439 */
5440
5441GLOBAL(void)
5442jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5445{
5446 DCTELEM tmp0, tmp1;
5447 ISLOW_MULT_TYPE * quantptr;
5448 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5450
5451 /* Process 1 column from input, store into output array. */
5452
5453 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5454
5455 /* Even part */
5456
5457 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5458
5459 /* Odd part */
5460
5461 tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5462
5463 /* Final output stage */
5464
5465#if PASS2_BITS > PASS1_BITS
5466 /* Add range center and fudge factor for downscale and range-limit. */
5467#if PASS2_BITS > PASS1_BITS + 1
5468 tmp0 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
5469 (1 << (PASS2_BITS-PASS1_BITS-1));
5470#else
5471 tmp0 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
5472#endif
5473
5475 range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, PASS2_BITS-PASS1_BITS)
5476 & RANGE_MASK];
5478 range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, PASS2_BITS-PASS1_BITS)
5479 & RANGE_MASK];
5480#else
5481#if PASS2_BITS < PASS1_BITS
5482 tmp0 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
5483 tmp1 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
5484#endif
5485
5486 tmp0 += (DCTELEM) RANGE_CENTER; /* add range center for range-limit */
5487
5489 range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
5491 range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
5492#endif
5493}
5494
5495#endif /* IDCT_SCALING_SUPPORTED */
5496#endif /* DCT_ISLOW_SUPPORTED */
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31
for(i=0;i< ARRAY_SIZE(offsets);i++)
#define ISHIFT_TEMPS
Definition: jcarith.c:110
#define IRIGHT_SHIFT(x, shft)
Definition: jcarith.c:111
#define FIX(x)
Definition: jccolor.c:74
Sorry
Definition: jdcolor.c:19
MULTIPLIER ISLOW_MULT_TYPE
Definition: jdct.h:79
jpeg_component_info JCOEFPTR coef_block
Definition: jdct.h:253
#define ONE
Definition: jdct.h:365
jpeg_component_info JCOEFPTR JSAMPARRAY JDIMENSION output_col
Definition: jdct.h:253
#define RANGE_MASK
Definition: jdct.h:101
jpeg_component_info * compptr
Definition: jdct.h:252
int DCTELEM
Definition: jdct.h:49
#define IDCT_range_limit(cinfo)
Definition: jdct.h:104
jpeg_component_info JCOEFPTR JSAMPARRAY output_buf
Definition: jdct.h:253
unsigned int JDIMENSION
Definition: jmorecfg.h:265
char JSAMPLE
Definition: jmorecfg.h:110
#define GLOBAL(type)
Definition: jmorecfg.h:327
#define SHIFT_TEMPS
Definition: jpegint.h:300
#define RANGE_CENTER
Definition: jpegint.h:272
#define RIGHT_SHIFT(x, shft)
Definition: jpegint.h:301
#define DCTSIZE
Definition: jpeglib.h:50
JCOEF FAR * JCOEFPTR
Definition: jpeglib.h:84
JSAMPROW * JSAMPARRAY
Definition: jpeglib.h:76
JSAMPLE FAR * JSAMPROW
Definition: jpeglib.h:75
#define DCTSIZE2
Definition: jpeglib.h:51
static int blocks
Definition: mkdosfs.c:527
static double float double int float double float int float double float int double int float z5
Definition: server.c:81
static double float double int float double float int float double float z4
Definition: server.c:81
static double float double int float z1
Definition: server.c:81
static double float double int float double float int float z3
Definition: server.c:81
static double float double int float double float z2
Definition: server.c:81
Definition: inflate.c:139
int32_t INT32
Definition: typedefs.h:58