ReactOS  0.4.15-dev-5137-g826bd41
jidctflt.c
Go to the documentation of this file.
1 /*
2  * jidctflt.c
3  *
4  * Copyright (C) 1994-1998, Thomas G. Lane.
5  * Modified 2010-2017 by Guido Vollbeding.
6  * This file is part of the Independent JPEG Group's software.
7  * For conditions of distribution and use, see the accompanying README file.
8  *
9  * This file contains a floating-point implementation of the
10  * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11  * must also perform dequantization of the input coefficients.
12  *
13  * This implementation should be more accurate than either of the integer
14  * IDCT implementations. However, it may not give the same results on all
15  * machines because of differences in roundoff behavior. Speed will depend
16  * on the hardware's floating point capacity.
17  *
18  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
19  * on each row (or vice versa, but it's more convenient to emit a row at
20  * a time). Direct algorithms are also available, but they are much more
21  * complex and seem not to be any faster when reduced to code.
22  *
23  * This implementation is based on Arai, Agui, and Nakajima's algorithm for
24  * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
25  * Japanese, but the algorithm is described in the Pennebaker & Mitchell
26  * JPEG textbook (see REFERENCES section in file README). The following code
27  * is based directly on figure 4-8 in P&M.
28  * While an 8-point DCT cannot be done in less than 11 multiplies, it is
29  * possible to arrange the computation so that many of the multiplies are
30  * simple scalings of the final outputs. These multiplies can then be
31  * folded into the multiplications or divisions by the JPEG quantization
32  * table entries. The AA&N method leaves only 5 multiplies and 29 adds
33  * to be done in the DCT itself.
34  * The primary disadvantage of this method is that with a fixed-point
35  * implementation, accuracy is lost due to imprecise representation of the
36  * scaled quantization values. However, that problem does not arise if
37  * we use floating point arithmetic.
38  */
39 
40 #define JPEG_INTERNALS
41 #include "jinclude.h"
42 #include "jpeglib.h"
43 #include "jdct.h" /* Private declarations for DCT subsystem */
44 
45 #ifdef DCT_FLOAT_SUPPORTED
46 
47 
48 /*
49  * This module is specialized to the case DCTSIZE = 8.
50  */
51 
52 #if DCTSIZE != 8
53  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
54 #endif
55 
56 
57 /* Dequantize a coefficient by multiplying it by the multiplier-table
58  * entry; produce a float result.
59  */
60 
61 #define DEQUANTIZE(coef,quantval) (((FAST_FLOAT) (coef)) * (quantval))
62 
63 
64 /*
65  * Perform dequantization and inverse DCT on one block of coefficients.
66  *
67  * cK represents cos(K*pi/16).
68  */
69 
70 GLOBAL(void)
71 jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
74 {
75  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
76  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
77  FAST_FLOAT z5, z10, z11, z12, z13;
78  JCOEFPTR inptr;
79  FLOAT_MULT_TYPE * quantptr;
80  FAST_FLOAT * wsptr;
81  JSAMPROW outptr;
82  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
83  int ctr;
84  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
85 
86  /* Pass 1: process columns from input, store into work array. */
87 
88  inptr = coef_block;
89  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
90  wsptr = workspace;
91  for (ctr = DCTSIZE; ctr > 0; ctr--) {
92  /* Due to quantization, we will usually find that many of the input
93  * coefficients are zero, especially the AC terms. We can exploit this
94  * by short-circuiting the IDCT calculation for any column in which all
95  * the AC terms are zero. In that case each output is equal to the
96  * DC coefficient (with scale factor as needed).
97  * With typical images and quantization tables, half or more of the
98  * column DCT calculations can be simplified this way.
99  */
100 
101  if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
102  inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
103  inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
104  inptr[DCTSIZE*7] == 0) {
105  /* AC terms all zero */
106  FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
107 
108  wsptr[DCTSIZE*0] = dcval;
109  wsptr[DCTSIZE*1] = dcval;
110  wsptr[DCTSIZE*2] = dcval;
111  wsptr[DCTSIZE*3] = dcval;
112  wsptr[DCTSIZE*4] = dcval;
113  wsptr[DCTSIZE*5] = dcval;
114  wsptr[DCTSIZE*6] = dcval;
115  wsptr[DCTSIZE*7] = dcval;
116 
117  inptr++; /* advance pointers to next column */
118  quantptr++;
119  wsptr++;
120  continue;
121  }
122 
123  /* Even part */
124 
125  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
126  tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
127  tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
128  tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
129 
130  tmp10 = tmp0 + tmp2; /* phase 3 */
131  tmp11 = tmp0 - tmp2;
132 
133  tmp13 = tmp1 + tmp3; /* phases 5-3 */
134  tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
135 
136  tmp0 = tmp10 + tmp13; /* phase 2 */
137  tmp3 = tmp10 - tmp13;
138  tmp1 = tmp11 + tmp12;
139  tmp2 = tmp11 - tmp12;
140 
141  /* Odd part */
142 
143  tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
144  tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
145  tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
146  tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
147 
148  z13 = tmp6 + tmp5; /* phase 6 */
149  z10 = tmp6 - tmp5;
150  z11 = tmp4 + tmp7;
151  z12 = tmp4 - tmp7;
152 
153  tmp7 = z11 + z13; /* phase 5 */
154  tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
155 
156  z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
157  tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
158  tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
159 
160  tmp6 = tmp12 - tmp7; /* phase 2 */
161  tmp5 = tmp11 - tmp6;
162  tmp4 = tmp10 - tmp5;
163 
164  wsptr[DCTSIZE*0] = tmp0 + tmp7;
165  wsptr[DCTSIZE*7] = tmp0 - tmp7;
166  wsptr[DCTSIZE*1] = tmp1 + tmp6;
167  wsptr[DCTSIZE*6] = tmp1 - tmp6;
168  wsptr[DCTSIZE*2] = tmp2 + tmp5;
169  wsptr[DCTSIZE*5] = tmp2 - tmp5;
170  wsptr[DCTSIZE*3] = tmp3 + tmp4;
171  wsptr[DCTSIZE*4] = tmp3 - tmp4;
172 
173  inptr++; /* advance pointers to next column */
174  quantptr++;
175  wsptr++;
176  }
177 
178  /* Pass 2: process rows from work array, store into output array. */
179 
180  wsptr = workspace;
181  for (ctr = 0; ctr < DCTSIZE; ctr++) {
182  outptr = output_buf[ctr] + output_col;
183  /* Rows of zeroes can be exploited in the same way as we did with columns.
184  * However, the column calculation has created many nonzero AC terms, so
185  * the simplification applies less often (typically 5% to 10% of the time).
186  * And testing floats for zero is relatively expensive, so we don't bother.
187  */
188 
189  /* Even part */
190 
191  /* Prepare range-limit and float->int conversion */
192  z5 = wsptr[0] + (((FAST_FLOAT) RANGE_CENTER) + ((FAST_FLOAT) 0.5));
193  tmp10 = z5 + wsptr[4];
194  tmp11 = z5 - wsptr[4];
195 
196  tmp13 = wsptr[2] + wsptr[6];
197  tmp12 = (wsptr[2] - wsptr[6]) *
198  ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
199 
200  tmp0 = tmp10 + tmp13;
201  tmp3 = tmp10 - tmp13;
202  tmp1 = tmp11 + tmp12;
203  tmp2 = tmp11 - tmp12;
204 
205  /* Odd part */
206 
207  z13 = wsptr[5] + wsptr[3];
208  z10 = wsptr[5] - wsptr[3];
209  z11 = wsptr[1] + wsptr[7];
210  z12 = wsptr[1] - wsptr[7];
211 
212  tmp7 = z11 + z13; /* phase 5 */
213  tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
214 
215  z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
216  tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
217  tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
218 
219  tmp6 = tmp12 - tmp7; /* phase 2 */
220  tmp5 = tmp11 - tmp6;
221  tmp4 = tmp10 - tmp5;
222 
223  /* Final output stage: float->int conversion and range-limit */
224 
225  outptr[0] = range_limit[(int) (tmp0 + tmp7) & RANGE_MASK];
226  outptr[7] = range_limit[(int) (tmp0 - tmp7) & RANGE_MASK];
227  outptr[1] = range_limit[(int) (tmp1 + tmp6) & RANGE_MASK];
228  outptr[6] = range_limit[(int) (tmp1 - tmp6) & RANGE_MASK];
229  outptr[2] = range_limit[(int) (tmp2 + tmp5) & RANGE_MASK];
230  outptr[5] = range_limit[(int) (tmp2 - tmp5) & RANGE_MASK];
231  outptr[3] = range_limit[(int) (tmp3 + tmp4) & RANGE_MASK];
232  outptr[4] = range_limit[(int) (tmp3 - tmp4) & RANGE_MASK];
233 
234  wsptr += DCTSIZE; /* advance pointer to next row */
235  }
236 }
237 
238 #endif /* DCT_FLOAT_SUPPORTED */
#define IDCT_range_limit(cinfo)
Definition: jdct.h:90
char JSAMPLE
Definition: jmorecfg.h:74
JSAMPLE FAR * JSAMPROW
Definition: jpeglib.h:75
jpeg_component_info JCOEFPTR coef_block
Definition: jdct.h:238
#define RANGE_MASK
Definition: jdct.h:87
jpeg_component_info * compptr
Definition: jdct.h:238
jpeg_component_info JCOEFPTR JSAMPARRAY JDIMENSION output_col
Definition: jdct.h:238
static int blocks
Definition: mkdosfs.c:527
#define DCTSIZE2
Definition: jpeglib.h:51
#define for
Definition: utility.h:88
JCOEF FAR * JCOEFPTR
Definition: jpeglib.h:84
#define RANGE_CENTER
Definition: jpegint.h:273
FAST_FLOAT FLOAT_MULT_TYPE
Definition: jdct.h:74
Definition: inflate.c:139
JSAMPROW * JSAMPARRAY
Definition: jpeglib.h:76
#define GLOBAL(type)
Definition: jmorecfg.h:291
#define DCTSIZE
Definition: jpeglib.h:50
jpeg_component_info JCOEFPTR JSAMPARRAY output_buf
Definition: jdct.h:238
unsigned int JDIMENSION
Definition: jmorecfg.h:229
Sorry
Definition: jdcolor.c:19
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31