Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenjfdctint.c
Go to the documentation of this file.
00001 /* 00002 * jfdctint.c 00003 * 00004 * Copyright (C) 1991-1996, Thomas G. Lane. 00005 * Modification developed 2003-2009 by Guido Vollbeding. 00006 * This file is part of the Independent JPEG Group's software. 00007 * For conditions of distribution and use, see the accompanying README file. 00008 * 00009 * This file contains a slow-but-accurate integer implementation of the 00010 * forward DCT (Discrete Cosine Transform). 00011 * 00012 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT 00013 * on each column. Direct algorithms are also available, but they are 00014 * much more complex and seem not to be any faster when reduced to code. 00015 * 00016 * This implementation is based on an algorithm described in 00017 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT 00018 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, 00019 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. 00020 * The primary algorithm described there uses 11 multiplies and 29 adds. 00021 * We use their alternate method with 12 multiplies and 32 adds. 00022 * The advantage of this method is that no data path contains more than one 00023 * multiplication; this allows a very simple and accurate implementation in 00024 * scaled fixed-point arithmetic, with a minimal number of shifts. 00025 * 00026 * We also provide FDCT routines with various input sample block sizes for 00027 * direct resolution reduction or enlargement and for direct resolving the 00028 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN 00029 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block. 00030 * 00031 * For N<8 we fill the remaining block coefficients with zero. 00032 * For N>8 we apply a partial N-point FDCT on the input samples, computing 00033 * just the lower 8 frequency coefficients and discarding the rest. 00034 * 00035 * We must scale the output coefficients of the N-point FDCT appropriately 00036 * to the standard 8-point FDCT level by 8/N per 1-D pass. This scaling 00037 * is folded into the constant multipliers (pass 2) and/or final/initial 00038 * shifting. 00039 * 00040 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases 00041 * since there would be too many additional constants to pre-calculate. 00042 */ 00043 00044 #define JPEG_INTERNALS 00045 #include "jinclude.h" 00046 #include "jpeglib.h" 00047 #include "jdct.h" /* Private declarations for DCT subsystem */ 00048 00049 #ifdef DCT_ISLOW_SUPPORTED 00050 00051 00052 /* 00053 * This module is specialized to the case DCTSIZE = 8. 00054 */ 00055 00056 #if DCTSIZE != 8 00057 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ 00058 #endif 00059 00060 00061 /* 00062 * The poop on this scaling stuff is as follows: 00063 * 00064 * Each 1-D DCT step produces outputs which are a factor of sqrt(N) 00065 * larger than the true DCT outputs. The final outputs are therefore 00066 * a factor of N larger than desired; since N=8 this can be cured by 00067 * a simple right shift at the end of the algorithm. The advantage of 00068 * this arrangement is that we save two multiplications per 1-D DCT, 00069 * because the y0 and y4 outputs need not be divided by sqrt(N). 00070 * In the IJG code, this factor of 8 is removed by the quantization step 00071 * (in jcdctmgr.c), NOT in this module. 00072 * 00073 * We have to do addition and subtraction of the integer inputs, which 00074 * is no problem, and multiplication by fractional constants, which is 00075 * a problem to do in integer arithmetic. We multiply all the constants 00076 * by CONST_SCALE and convert them to integer constants (thus retaining 00077 * CONST_BITS bits of precision in the constants). After doing a 00078 * multiplication we have to divide the product by CONST_SCALE, with proper 00079 * rounding, to produce the correct output. This division can be done 00080 * cheaply as a right shift of CONST_BITS bits. We postpone shifting 00081 * as long as possible so that partial sums can be added together with 00082 * full fractional precision. 00083 * 00084 * The outputs of the first pass are scaled up by PASS1_BITS bits so that 00085 * they are represented to better-than-integral precision. These outputs 00086 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word 00087 * with the recommended scaling. (For 12-bit sample data, the intermediate 00088 * array is INT32 anyway.) 00089 * 00090 * To avoid overflow of the 32-bit intermediate results in pass 2, we must 00091 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis 00092 * shows that the values given below are the most effective. 00093 */ 00094 00095 #if BITS_IN_JSAMPLE == 8 00096 #define CONST_BITS 13 00097 #define PASS1_BITS 2 00098 #else 00099 #define CONST_BITS 13 00100 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 00101 #endif 00102 00103 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus 00104 * causing a lot of useless floating-point operations at run time. 00105 * To get around this we use the following pre-calculated constants. 00106 * If you change CONST_BITS you may want to add appropriate values. 00107 * (With a reasonable C compiler, you can just rely on the FIX() macro...) 00108 */ 00109 00110 #if CONST_BITS == 13 00111 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ 00112 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ 00113 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ 00114 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ 00115 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ 00116 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ 00117 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ 00118 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ 00119 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ 00120 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ 00121 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ 00122 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ 00123 #else 00124 #define FIX_0_298631336 FIX(0.298631336) 00125 #define FIX_0_390180644 FIX(0.390180644) 00126 #define FIX_0_541196100 FIX(0.541196100) 00127 #define FIX_0_765366865 FIX(0.765366865) 00128 #define FIX_0_899976223 FIX(0.899976223) 00129 #define FIX_1_175875602 FIX(1.175875602) 00130 #define FIX_1_501321110 FIX(1.501321110) 00131 #define FIX_1_847759065 FIX(1.847759065) 00132 #define FIX_1_961570560 FIX(1.961570560) 00133 #define FIX_2_053119869 FIX(2.053119869) 00134 #define FIX_2_562915447 FIX(2.562915447) 00135 #define FIX_3_072711026 FIX(3.072711026) 00136 #endif 00137 00138 00139 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. 00140 * For 8-bit samples with the recommended scaling, all the variable 00141 * and constant values involved are no more than 16 bits wide, so a 00142 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. 00143 * For 12-bit samples, a full 32-bit multiplication will be needed. 00144 */ 00145 00146 #if BITS_IN_JSAMPLE == 8 00147 #define MULTIPLY(var,const) MULTIPLY16C16(var,const) 00148 #else 00149 #define MULTIPLY(var,const) ((var) * (const)) 00150 #endif 00151 00152 00153 /* 00154 * Perform the forward DCT on one block of samples. 00155 */ 00156 00157 GLOBAL(void) 00158 jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00159 { 00160 INT32 tmp0, tmp1, tmp2, tmp3; 00161 INT32 tmp10, tmp11, tmp12, tmp13; 00162 INT32 z1; 00163 DCTELEM *dataptr; 00164 JSAMPROW elemptr; 00165 int ctr; 00166 SHIFT_TEMPS 00167 00168 /* Pass 1: process rows. */ 00169 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 00170 /* furthermore, we scale the results by 2**PASS1_BITS. */ 00171 00172 dataptr = data; 00173 for (ctr = 0; ctr < DCTSIZE; ctr++) { 00174 elemptr = sample_data[ctr] + start_col; 00175 00176 /* Even part per LL&M figure 1 --- note that published figure is faulty; 00177 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 00178 */ 00179 00180 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); 00181 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); 00182 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); 00183 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); 00184 00185 tmp10 = tmp0 + tmp3; 00186 tmp12 = tmp0 - tmp3; 00187 tmp11 = tmp1 + tmp2; 00188 tmp13 = tmp1 - tmp2; 00189 00190 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); 00191 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); 00192 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); 00193 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); 00194 00195 /* Apply unsigned->signed conversion */ 00196 dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); 00197 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); 00198 00199 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 00200 /* Add fudge factor here for final descale. */ 00201 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 00202 dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), 00203 CONST_BITS-PASS1_BITS); 00204 dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), 00205 CONST_BITS-PASS1_BITS); 00206 00207 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 00208 * cK represents sqrt(2) * cos(K*pi/16). 00209 * i0..i3 in the paper are tmp0..tmp3 here. 00210 */ 00211 00212 tmp10 = tmp0 + tmp3; 00213 tmp11 = tmp1 + tmp2; 00214 tmp12 = tmp0 + tmp2; 00215 tmp13 = tmp1 + tmp3; 00216 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 00217 /* Add fudge factor here for final descale. */ 00218 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 00219 00220 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 00221 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 00222 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 00223 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 00224 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 00225 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 00226 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 00227 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 00228 00229 tmp12 += z1; 00230 tmp13 += z1; 00231 00232 dataptr[1] = (DCTELEM) 00233 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); 00234 dataptr[3] = (DCTELEM) 00235 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); 00236 dataptr[5] = (DCTELEM) 00237 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); 00238 dataptr[7] = (DCTELEM) 00239 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); 00240 00241 dataptr += DCTSIZE; /* advance pointer to next row */ 00242 } 00243 00244 /* Pass 2: process columns. 00245 * We remove the PASS1_BITS scaling, but leave the results scaled up 00246 * by an overall factor of 8. 00247 */ 00248 00249 dataptr = data; 00250 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 00251 /* Even part per LL&M figure 1 --- note that published figure is faulty; 00252 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 00253 */ 00254 00255 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 00256 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 00257 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 00258 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 00259 00260 /* Add fudge factor here for final descale. */ 00261 tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1)); 00262 tmp12 = tmp0 - tmp3; 00263 tmp11 = tmp1 + tmp2; 00264 tmp13 = tmp1 - tmp2; 00265 00266 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 00267 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 00268 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 00269 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 00270 00271 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); 00272 dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); 00273 00274 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 00275 /* Add fudge factor here for final descale. */ 00276 z1 += ONE << (CONST_BITS+PASS1_BITS-1); 00277 dataptr[DCTSIZE*2] = (DCTELEM) 00278 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); 00279 dataptr[DCTSIZE*6] = (DCTELEM) 00280 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); 00281 00282 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 00283 * cK represents sqrt(2) * cos(K*pi/16). 00284 * i0..i3 in the paper are tmp0..tmp3 here. 00285 */ 00286 00287 tmp10 = tmp0 + tmp3; 00288 tmp11 = tmp1 + tmp2; 00289 tmp12 = tmp0 + tmp2; 00290 tmp13 = tmp1 + tmp3; 00291 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 00292 /* Add fudge factor here for final descale. */ 00293 z1 += ONE << (CONST_BITS+PASS1_BITS-1); 00294 00295 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 00296 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 00297 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 00298 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 00299 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 00300 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 00301 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 00302 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 00303 00304 tmp12 += z1; 00305 tmp13 += z1; 00306 00307 dataptr[DCTSIZE*1] = (DCTELEM) 00308 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); 00309 dataptr[DCTSIZE*3] = (DCTELEM) 00310 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); 00311 dataptr[DCTSIZE*5] = (DCTELEM) 00312 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); 00313 dataptr[DCTSIZE*7] = (DCTELEM) 00314 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); 00315 00316 dataptr++; /* advance pointer to next column */ 00317 } 00318 } 00319 00320 #ifdef DCT_SCALING_SUPPORTED 00321 00322 00323 /* 00324 * Perform the forward DCT on a 7x7 sample block. 00325 */ 00326 00327 GLOBAL(void) 00328 jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00329 { 00330 INT32 tmp0, tmp1, tmp2, tmp3; 00331 INT32 tmp10, tmp11, tmp12; 00332 INT32 z1, z2, z3; 00333 DCTELEM *dataptr; 00334 JSAMPROW elemptr; 00335 int ctr; 00336 SHIFT_TEMPS 00337 00338 /* Pre-zero output coefficient block. */ 00339 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 00340 00341 /* Pass 1: process rows. */ 00342 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 00343 /* furthermore, we scale the results by 2**PASS1_BITS. */ 00344 /* cK represents sqrt(2) * cos(K*pi/14). */ 00345 00346 dataptr = data; 00347 for (ctr = 0; ctr < 7; ctr++) { 00348 elemptr = sample_data[ctr] + start_col; 00349 00350 /* Even part */ 00351 00352 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]); 00353 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]); 00354 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]); 00355 tmp3 = GETJSAMPLE(elemptr[3]); 00356 00357 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); 00358 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); 00359 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); 00360 00361 z1 = tmp0 + tmp2; 00362 /* Apply unsigned->signed conversion */ 00363 dataptr[0] = (DCTELEM) 00364 ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); 00365 tmp3 += tmp3; 00366 z1 -= tmp3; 00367 z1 -= tmp3; 00368 z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */ 00369 z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */ 00370 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */ 00371 dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS); 00372 z1 -= z2; 00373 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */ 00374 dataptr[4] = (DCTELEM) 00375 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */ 00376 CONST_BITS-PASS1_BITS); 00377 dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS); 00378 00379 /* Odd part */ 00380 00381 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 00382 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 00383 tmp0 = tmp1 - tmp2; 00384 tmp1 += tmp2; 00385 tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */ 00386 tmp1 += tmp2; 00387 tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */ 00388 tmp0 += tmp3; 00389 tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */ 00390 00391 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); 00392 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); 00393 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); 00394 00395 dataptr += DCTSIZE; /* advance pointer to next row */ 00396 } 00397 00398 /* Pass 2: process columns. 00399 * We remove the PASS1_BITS scaling, but leave the results scaled up 00400 * by an overall factor of 8. 00401 * We must also scale the output by (8/7)**2 = 64/49, which we fold 00402 * into the constant multipliers: 00403 * cK now represents sqrt(2) * cos(K*pi/14) * 64/49. 00404 */ 00405 00406 dataptr = data; 00407 for (ctr = 0; ctr < 7; ctr++) { 00408 /* Even part */ 00409 00410 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6]; 00411 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5]; 00412 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4]; 00413 tmp3 = dataptr[DCTSIZE*3]; 00414 00415 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6]; 00416 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5]; 00417 tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4]; 00418 00419 z1 = tmp0 + tmp2; 00420 dataptr[DCTSIZE*0] = (DCTELEM) 00421 DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */ 00422 CONST_BITS+PASS1_BITS); 00423 tmp3 += tmp3; 00424 z1 -= tmp3; 00425 z1 -= tmp3; 00426 z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */ 00427 z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */ 00428 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */ 00429 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS); 00430 z1 -= z2; 00431 z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */ 00432 dataptr[DCTSIZE*4] = (DCTELEM) 00433 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */ 00434 CONST_BITS+PASS1_BITS); 00435 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS); 00436 00437 /* Odd part */ 00438 00439 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */ 00440 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */ 00441 tmp0 = tmp1 - tmp2; 00442 tmp1 += tmp2; 00443 tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */ 00444 tmp1 += tmp2; 00445 tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */ 00446 tmp0 += tmp3; 00447 tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */ 00448 00449 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS); 00450 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS); 00451 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS); 00452 00453 dataptr++; /* advance pointer to next column */ 00454 } 00455 } 00456 00457 00458 /* 00459 * Perform the forward DCT on a 6x6 sample block. 00460 */ 00461 00462 GLOBAL(void) 00463 jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00464 { 00465 INT32 tmp0, tmp1, tmp2; 00466 INT32 tmp10, tmp11, tmp12; 00467 DCTELEM *dataptr; 00468 JSAMPROW elemptr; 00469 int ctr; 00470 SHIFT_TEMPS 00471 00472 /* Pre-zero output coefficient block. */ 00473 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 00474 00475 /* Pass 1: process rows. */ 00476 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 00477 /* furthermore, we scale the results by 2**PASS1_BITS. */ 00478 /* cK represents sqrt(2) * cos(K*pi/12). */ 00479 00480 dataptr = data; 00481 for (ctr = 0; ctr < 6; ctr++) { 00482 elemptr = sample_data[ctr] + start_col; 00483 00484 /* Even part */ 00485 00486 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); 00487 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); 00488 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); 00489 00490 tmp10 = tmp0 + tmp2; 00491 tmp12 = tmp0 - tmp2; 00492 00493 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); 00494 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); 00495 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); 00496 00497 /* Apply unsigned->signed conversion */ 00498 dataptr[0] = (DCTELEM) 00499 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); 00500 dataptr[2] = (DCTELEM) 00501 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ 00502 CONST_BITS-PASS1_BITS); 00503 dataptr[4] = (DCTELEM) 00504 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ 00505 CONST_BITS-PASS1_BITS); 00506 00507 /* Odd part */ 00508 00509 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ 00510 CONST_BITS-PASS1_BITS); 00511 00512 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS)); 00513 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS); 00514 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS)); 00515 00516 dataptr += DCTSIZE; /* advance pointer to next row */ 00517 } 00518 00519 /* Pass 2: process columns. 00520 * We remove the PASS1_BITS scaling, but leave the results scaled up 00521 * by an overall factor of 8. 00522 * We must also scale the output by (8/6)**2 = 16/9, which we fold 00523 * into the constant multipliers: 00524 * cK now represents sqrt(2) * cos(K*pi/12) * 16/9. 00525 */ 00526 00527 dataptr = data; 00528 for (ctr = 0; ctr < 6; ctr++) { 00529 /* Even part */ 00530 00531 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; 00532 tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; 00533 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; 00534 00535 tmp10 = tmp0 + tmp2; 00536 tmp12 = tmp0 - tmp2; 00537 00538 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; 00539 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; 00540 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; 00541 00542 dataptr[DCTSIZE*0] = (DCTELEM) 00543 DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ 00544 CONST_BITS+PASS1_BITS); 00545 dataptr[DCTSIZE*2] = (DCTELEM) 00546 DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ 00547 CONST_BITS+PASS1_BITS); 00548 dataptr[DCTSIZE*4] = (DCTELEM) 00549 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ 00550 CONST_BITS+PASS1_BITS); 00551 00552 /* Odd part */ 00553 00554 tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ 00555 00556 dataptr[DCTSIZE*1] = (DCTELEM) 00557 DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 00558 CONST_BITS+PASS1_BITS); 00559 dataptr[DCTSIZE*3] = (DCTELEM) 00560 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ 00561 CONST_BITS+PASS1_BITS); 00562 dataptr[DCTSIZE*5] = (DCTELEM) 00563 DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ 00564 CONST_BITS+PASS1_BITS); 00565 00566 dataptr++; /* advance pointer to next column */ 00567 } 00568 } 00569 00570 00571 /* 00572 * Perform the forward DCT on a 5x5 sample block. 00573 */ 00574 00575 GLOBAL(void) 00576 jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00577 { 00578 INT32 tmp0, tmp1, tmp2; 00579 INT32 tmp10, tmp11; 00580 DCTELEM *dataptr; 00581 JSAMPROW elemptr; 00582 int ctr; 00583 SHIFT_TEMPS 00584 00585 /* Pre-zero output coefficient block. */ 00586 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 00587 00588 /* Pass 1: process rows. */ 00589 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 00590 /* furthermore, we scale the results by 2**PASS1_BITS. */ 00591 /* We scale the results further by 2 as part of output adaption */ 00592 /* scaling for different DCT size. */ 00593 /* cK represents sqrt(2) * cos(K*pi/10). */ 00594 00595 dataptr = data; 00596 for (ctr = 0; ctr < 5; ctr++) { 00597 elemptr = sample_data[ctr] + start_col; 00598 00599 /* Even part */ 00600 00601 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]); 00602 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]); 00603 tmp2 = GETJSAMPLE(elemptr[2]); 00604 00605 tmp10 = tmp0 + tmp1; 00606 tmp11 = tmp0 - tmp1; 00607 00608 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); 00609 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); 00610 00611 /* Apply unsigned->signed conversion */ 00612 dataptr[0] = (DCTELEM) 00613 ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1)); 00614 tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ 00615 tmp10 -= tmp2 << 2; 00616 tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ 00617 dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1); 00618 dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1); 00619 00620 /* Odd part */ 00621 00622 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */ 00623 00624 dataptr[1] = (DCTELEM) 00625 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */ 00626 CONST_BITS-PASS1_BITS-1); 00627 dataptr[3] = (DCTELEM) 00628 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */ 00629 CONST_BITS-PASS1_BITS-1); 00630 00631 dataptr += DCTSIZE; /* advance pointer to next row */ 00632 } 00633 00634 /* Pass 2: process columns. 00635 * We remove the PASS1_BITS scaling, but leave the results scaled up 00636 * by an overall factor of 8. 00637 * We must also scale the output by (8/5)**2 = 64/25, which we partially 00638 * fold into the constant multipliers (other part was done in pass 1): 00639 * cK now represents sqrt(2) * cos(K*pi/10) * 32/25. 00640 */ 00641 00642 dataptr = data; 00643 for (ctr = 0; ctr < 5; ctr++) { 00644 /* Even part */ 00645 00646 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4]; 00647 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3]; 00648 tmp2 = dataptr[DCTSIZE*2]; 00649 00650 tmp10 = tmp0 + tmp1; 00651 tmp11 = tmp0 - tmp1; 00652 00653 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4]; 00654 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3]; 00655 00656 dataptr[DCTSIZE*0] = (DCTELEM) 00657 DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */ 00658 CONST_BITS+PASS1_BITS); 00659 tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */ 00660 tmp10 -= tmp2 << 2; 00661 tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */ 00662 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS); 00663 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS); 00664 00665 /* Odd part */ 00666 00667 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */ 00668 00669 dataptr[DCTSIZE*1] = (DCTELEM) 00670 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */ 00671 CONST_BITS+PASS1_BITS); 00672 dataptr[DCTSIZE*3] = (DCTELEM) 00673 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */ 00674 CONST_BITS+PASS1_BITS); 00675 00676 dataptr++; /* advance pointer to next column */ 00677 } 00678 } 00679 00680 00681 /* 00682 * Perform the forward DCT on a 4x4 sample block. 00683 */ 00684 00685 GLOBAL(void) 00686 jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00687 { 00688 INT32 tmp0, tmp1; 00689 INT32 tmp10, tmp11; 00690 DCTELEM *dataptr; 00691 JSAMPROW elemptr; 00692 int ctr; 00693 SHIFT_TEMPS 00694 00695 /* Pre-zero output coefficient block. */ 00696 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 00697 00698 /* Pass 1: process rows. */ 00699 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 00700 /* furthermore, we scale the results by 2**PASS1_BITS. */ 00701 /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */ 00702 /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ 00703 00704 dataptr = data; 00705 for (ctr = 0; ctr < 4; ctr++) { 00706 elemptr = sample_data[ctr] + start_col; 00707 00708 /* Even part */ 00709 00710 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); 00711 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); 00712 00713 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); 00714 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); 00715 00716 /* Apply unsigned->signed conversion */ 00717 dataptr[0] = (DCTELEM) 00718 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2)); 00719 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2)); 00720 00721 /* Odd part */ 00722 00723 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 00724 /* Add fudge factor here for final descale. */ 00725 tmp0 += ONE << (CONST_BITS-PASS1_BITS-3); 00726 00727 dataptr[1] = (DCTELEM) 00728 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 00729 CONST_BITS-PASS1_BITS-2); 00730 dataptr[3] = (DCTELEM) 00731 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 00732 CONST_BITS-PASS1_BITS-2); 00733 00734 dataptr += DCTSIZE; /* advance pointer to next row */ 00735 } 00736 00737 /* Pass 2: process columns. 00738 * We remove the PASS1_BITS scaling, but leave the results scaled up 00739 * by an overall factor of 8. 00740 */ 00741 00742 dataptr = data; 00743 for (ctr = 0; ctr < 4; ctr++) { 00744 /* Even part */ 00745 00746 /* Add fudge factor here for final descale. */ 00747 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1)); 00748 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; 00749 00750 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; 00751 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; 00752 00753 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); 00754 dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); 00755 00756 /* Odd part */ 00757 00758 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 00759 /* Add fudge factor here for final descale. */ 00760 tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); 00761 00762 dataptr[DCTSIZE*1] = (DCTELEM) 00763 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 00764 CONST_BITS+PASS1_BITS); 00765 dataptr[DCTSIZE*3] = (DCTELEM) 00766 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 00767 CONST_BITS+PASS1_BITS); 00768 00769 dataptr++; /* advance pointer to next column */ 00770 } 00771 } 00772 00773 00774 /* 00775 * Perform the forward DCT on a 3x3 sample block. 00776 */ 00777 00778 GLOBAL(void) 00779 jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00780 { 00781 INT32 tmp0, tmp1, tmp2; 00782 DCTELEM *dataptr; 00783 JSAMPROW elemptr; 00784 int ctr; 00785 SHIFT_TEMPS 00786 00787 /* Pre-zero output coefficient block. */ 00788 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 00789 00790 /* Pass 1: process rows. */ 00791 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 00792 /* furthermore, we scale the results by 2**PASS1_BITS. */ 00793 /* We scale the results further by 2**2 as part of output adaption */ 00794 /* scaling for different DCT size. */ 00795 /* cK represents sqrt(2) * cos(K*pi/6). */ 00796 00797 dataptr = data; 00798 for (ctr = 0; ctr < 3; ctr++) { 00799 elemptr = sample_data[ctr] + start_col; 00800 00801 /* Even part */ 00802 00803 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); 00804 tmp1 = GETJSAMPLE(elemptr[1]); 00805 00806 tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); 00807 00808 /* Apply unsigned->signed conversion */ 00809 dataptr[0] = (DCTELEM) 00810 ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2)); 00811 dataptr[2] = (DCTELEM) 00812 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ 00813 CONST_BITS-PASS1_BITS-2); 00814 00815 /* Odd part */ 00816 00817 dataptr[1] = (DCTELEM) 00818 DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */ 00819 CONST_BITS-PASS1_BITS-2); 00820 00821 dataptr += DCTSIZE; /* advance pointer to next row */ 00822 } 00823 00824 /* Pass 2: process columns. 00825 * We remove the PASS1_BITS scaling, but leave the results scaled up 00826 * by an overall factor of 8. 00827 * We must also scale the output by (8/3)**2 = 64/9, which we partially 00828 * fold into the constant multipliers (other part was done in pass 1): 00829 * cK now represents sqrt(2) * cos(K*pi/6) * 16/9. 00830 */ 00831 00832 dataptr = data; 00833 for (ctr = 0; ctr < 3; ctr++) { 00834 /* Even part */ 00835 00836 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2]; 00837 tmp1 = dataptr[DCTSIZE*1]; 00838 00839 tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2]; 00840 00841 dataptr[DCTSIZE*0] = (DCTELEM) 00842 DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 00843 CONST_BITS+PASS1_BITS); 00844 dataptr[DCTSIZE*2] = (DCTELEM) 00845 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */ 00846 CONST_BITS+PASS1_BITS); 00847 00848 /* Odd part */ 00849 00850 dataptr[DCTSIZE*1] = (DCTELEM) 00851 DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */ 00852 CONST_BITS+PASS1_BITS); 00853 00854 dataptr++; /* advance pointer to next column */ 00855 } 00856 } 00857 00858 00859 /* 00860 * Perform the forward DCT on a 2x2 sample block. 00861 */ 00862 00863 GLOBAL(void) 00864 jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00865 { 00866 INT32 tmp0, tmp1, tmp2, tmp3; 00867 JSAMPROW elemptr; 00868 00869 /* Pre-zero output coefficient block. */ 00870 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 00871 00872 /* Pass 1: process rows. */ 00873 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 00874 00875 /* Row 0 */ 00876 elemptr = sample_data[0] + start_col; 00877 00878 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); 00879 tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]); 00880 00881 /* Row 1 */ 00882 elemptr = sample_data[1] + start_col; 00883 00884 tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); 00885 tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]); 00886 00887 /* Pass 2: process columns. 00888 * We leave the results scaled up by an overall factor of 8. 00889 * We must also scale the output by (8/2)**2 = 2**4. 00890 */ 00891 00892 /* Column 0 */ 00893 /* Apply unsigned->signed conversion */ 00894 data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4); 00895 data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4); 00896 00897 /* Column 1 */ 00898 data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4); 00899 data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4); 00900 } 00901 00902 00903 /* 00904 * Perform the forward DCT on a 1x1 sample block. 00905 */ 00906 00907 GLOBAL(void) 00908 jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00909 { 00910 /* Pre-zero output coefficient block. */ 00911 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 00912 00913 /* We leave the result scaled up by an overall factor of 8. */ 00914 /* We must also scale the output by (8/1)**2 = 2**6. */ 00915 /* Apply unsigned->signed conversion */ 00916 data[0] = (DCTELEM) 00917 ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6); 00918 } 00919 00920 00921 /* 00922 * Perform the forward DCT on a 9x9 sample block. 00923 */ 00924 00925 GLOBAL(void) 00926 jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 00927 { 00928 INT32 tmp0, tmp1, tmp2, tmp3, tmp4; 00929 INT32 tmp10, tmp11, tmp12, tmp13; 00930 INT32 z1, z2; 00931 DCTELEM workspace[8]; 00932 DCTELEM *dataptr; 00933 DCTELEM *wsptr; 00934 JSAMPROW elemptr; 00935 int ctr; 00936 SHIFT_TEMPS 00937 00938 /* Pass 1: process rows. */ 00939 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 00940 /* we scale the results further by 2 as part of output adaption */ 00941 /* scaling for different DCT size. */ 00942 /* cK represents sqrt(2) * cos(K*pi/18). */ 00943 00944 dataptr = data; 00945 ctr = 0; 00946 for (;;) { 00947 elemptr = sample_data[ctr] + start_col; 00948 00949 /* Even part */ 00950 00951 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]); 00952 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]); 00953 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]); 00954 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]); 00955 tmp4 = GETJSAMPLE(elemptr[4]); 00956 00957 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]); 00958 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]); 00959 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]); 00960 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]); 00961 00962 z1 = tmp0 + tmp2 + tmp3; 00963 z2 = tmp1 + tmp4; 00964 /* Apply unsigned->signed conversion */ 00965 dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1); 00966 dataptr[6] = (DCTELEM) 00967 DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */ 00968 CONST_BITS-1); 00969 z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */ 00970 z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */ 00971 dataptr[2] = (DCTELEM) 00972 DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */ 00973 + z1 + z2, CONST_BITS-1); 00974 dataptr[4] = (DCTELEM) 00975 DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */ 00976 + z1 - z2, CONST_BITS-1); 00977 00978 /* Odd part */ 00979 00980 dataptr[3] = (DCTELEM) 00981 DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */ 00982 CONST_BITS-1); 00983 00984 tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */ 00985 tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */ 00986 tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */ 00987 00988 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1); 00989 00990 tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */ 00991 00992 dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1); 00993 dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1); 00994 00995 ctr++; 00996 00997 if (ctr != DCTSIZE) { 00998 if (ctr == 9) 00999 break; /* Done. */ 01000 dataptr += DCTSIZE; /* advance pointer to next row */ 01001 } else 01002 dataptr = workspace; /* switch pointer to extended workspace */ 01003 } 01004 01005 /* Pass 2: process columns. 01006 * We leave the results scaled up by an overall factor of 8. 01007 * We must also scale the output by (8/9)**2 = 64/81, which we partially 01008 * fold into the constant multipliers and final/initial shifting: 01009 * cK now represents sqrt(2) * cos(K*pi/18) * 128/81. 01010 */ 01011 01012 dataptr = data; 01013 wsptr = workspace; 01014 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 01015 /* Even part */ 01016 01017 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0]; 01018 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7]; 01019 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6]; 01020 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5]; 01021 tmp4 = dataptr[DCTSIZE*4]; 01022 01023 tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0]; 01024 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7]; 01025 tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6]; 01026 tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5]; 01027 01028 z1 = tmp0 + tmp2 + tmp3; 01029 z2 = tmp1 + tmp4; 01030 dataptr[DCTSIZE*0] = (DCTELEM) 01031 DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)), /* 128/81 */ 01032 CONST_BITS+2); 01033 dataptr[DCTSIZE*6] = (DCTELEM) 01034 DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)), /* c6 */ 01035 CONST_BITS+2); 01036 z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287)); /* c2 */ 01037 z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */ 01038 dataptr[DCTSIZE*2] = (DCTELEM) 01039 DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190)) /* c4 */ 01040 + z1 + z2, CONST_BITS+2); 01041 dataptr[DCTSIZE*4] = (DCTELEM) 01042 DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096)) /* c8 */ 01043 + z1 - z2, CONST_BITS+2); 01044 01045 /* Odd part */ 01046 01047 dataptr[DCTSIZE*3] = (DCTELEM) 01048 DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */ 01049 CONST_BITS+2); 01050 01051 tmp11 = MULTIPLY(tmp11, FIX(1.935399303)); /* c3 */ 01052 tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */ 01053 tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */ 01054 01055 dataptr[DCTSIZE*1] = (DCTELEM) 01056 DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2); 01057 01058 tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */ 01059 01060 dataptr[DCTSIZE*5] = (DCTELEM) 01061 DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2); 01062 dataptr[DCTSIZE*7] = (DCTELEM) 01063 DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2); 01064 01065 dataptr++; /* advance pointer to next column */ 01066 wsptr++; /* advance pointer to next column */ 01067 } 01068 } 01069 01070 01071 /* 01072 * Perform the forward DCT on a 10x10 sample block. 01073 */ 01074 01075 GLOBAL(void) 01076 jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 01077 { 01078 INT32 tmp0, tmp1, tmp2, tmp3, tmp4; 01079 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 01080 DCTELEM workspace[8*2]; 01081 DCTELEM *dataptr; 01082 DCTELEM *wsptr; 01083 JSAMPROW elemptr; 01084 int ctr; 01085 SHIFT_TEMPS 01086 01087 /* Pass 1: process rows. */ 01088 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 01089 /* we scale the results further by 2 as part of output adaption */ 01090 /* scaling for different DCT size. */ 01091 /* cK represents sqrt(2) * cos(K*pi/20). */ 01092 01093 dataptr = data; 01094 ctr = 0; 01095 for (;;) { 01096 elemptr = sample_data[ctr] + start_col; 01097 01098 /* Even part */ 01099 01100 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]); 01101 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]); 01102 tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]); 01103 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]); 01104 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]); 01105 01106 tmp10 = tmp0 + tmp4; 01107 tmp13 = tmp0 - tmp4; 01108 tmp11 = tmp1 + tmp3; 01109 tmp14 = tmp1 - tmp3; 01110 01111 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]); 01112 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); 01113 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); 01114 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); 01115 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); 01116 01117 /* Apply unsigned->signed conversion */ 01118 dataptr[0] = (DCTELEM) 01119 ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1); 01120 tmp12 += tmp12; 01121 dataptr[4] = (DCTELEM) 01122 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ 01123 MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */ 01124 CONST_BITS-1); 01125 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */ 01126 dataptr[2] = (DCTELEM) 01127 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */ 01128 CONST_BITS-1); 01129 dataptr[6] = (DCTELEM) 01130 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */ 01131 CONST_BITS-1); 01132 01133 /* Odd part */ 01134 01135 tmp10 = tmp0 + tmp4; 01136 tmp11 = tmp1 - tmp3; 01137 dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1); 01138 tmp2 <<= CONST_BITS; 01139 dataptr[1] = (DCTELEM) 01140 DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */ 01141 MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */ 01142 MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */ 01143 MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */ 01144 CONST_BITS-1); 01145 tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */ 01146 MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */ 01147 tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */ 01148 (tmp11 << (CONST_BITS - 1)) - tmp2; 01149 dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1); 01150 dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1); 01151 01152 ctr++; 01153 01154 if (ctr != DCTSIZE) { 01155 if (ctr == 10) 01156 break; /* Done. */ 01157 dataptr += DCTSIZE; /* advance pointer to next row */ 01158 } else 01159 dataptr = workspace; /* switch pointer to extended workspace */ 01160 } 01161 01162 /* Pass 2: process columns. 01163 * We leave the results scaled up by an overall factor of 8. 01164 * We must also scale the output by (8/10)**2 = 16/25, which we partially 01165 * fold into the constant multipliers and final/initial shifting: 01166 * cK now represents sqrt(2) * cos(K*pi/20) * 32/25. 01167 */ 01168 01169 dataptr = data; 01170 wsptr = workspace; 01171 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 01172 /* Even part */ 01173 01174 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1]; 01175 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0]; 01176 tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7]; 01177 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6]; 01178 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; 01179 01180 tmp10 = tmp0 + tmp4; 01181 tmp13 = tmp0 - tmp4; 01182 tmp11 = tmp1 + tmp3; 01183 tmp14 = tmp1 - tmp3; 01184 01185 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1]; 01186 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0]; 01187 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7]; 01188 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6]; 01189 tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; 01190 01191 dataptr[DCTSIZE*0] = (DCTELEM) 01192 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */ 01193 CONST_BITS+2); 01194 tmp12 += tmp12; 01195 dataptr[DCTSIZE*4] = (DCTELEM) 01196 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */ 01197 MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */ 01198 CONST_BITS+2); 01199 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */ 01200 dataptr[DCTSIZE*2] = (DCTELEM) 01201 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */ 01202 CONST_BITS+2); 01203 dataptr[DCTSIZE*6] = (DCTELEM) 01204 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */ 01205 CONST_BITS+2); 01206 01207 /* Odd part */ 01208 01209 tmp10 = tmp0 + tmp4; 01210 tmp11 = tmp1 - tmp3; 01211 dataptr[DCTSIZE*5] = (DCTELEM) 01212 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */ 01213 CONST_BITS+2); 01214 tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */ 01215 dataptr[DCTSIZE*1] = (DCTELEM) 01216 DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */ 01217 MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */ 01218 MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */ 01219 MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */ 01220 CONST_BITS+2); 01221 tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */ 01222 MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */ 01223 tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */ 01224 MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */ 01225 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2); 01226 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2); 01227 01228 dataptr++; /* advance pointer to next column */ 01229 wsptr++; /* advance pointer to next column */ 01230 } 01231 } 01232 01233 01234 /* 01235 * Perform the forward DCT on an 11x11 sample block. 01236 */ 01237 01238 GLOBAL(void) 01239 jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 01240 { 01241 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 01242 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 01243 INT32 z1, z2, z3; 01244 DCTELEM workspace[8*3]; 01245 DCTELEM *dataptr; 01246 DCTELEM *wsptr; 01247 JSAMPROW elemptr; 01248 int ctr; 01249 SHIFT_TEMPS 01250 01251 /* Pass 1: process rows. */ 01252 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 01253 /* we scale the results further by 2 as part of output adaption */ 01254 /* scaling for different DCT size. */ 01255 /* cK represents sqrt(2) * cos(K*pi/22). */ 01256 01257 dataptr = data; 01258 ctr = 0; 01259 for (;;) { 01260 elemptr = sample_data[ctr] + start_col; 01261 01262 /* Even part */ 01263 01264 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]); 01265 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]); 01266 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]); 01267 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]); 01268 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]); 01269 tmp5 = GETJSAMPLE(elemptr[5]); 01270 01271 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]); 01272 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]); 01273 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]); 01274 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]); 01275 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]); 01276 01277 /* Apply unsigned->signed conversion */ 01278 dataptr[0] = (DCTELEM) 01279 ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1); 01280 tmp5 += tmp5; 01281 tmp0 -= tmp5; 01282 tmp1 -= tmp5; 01283 tmp2 -= tmp5; 01284 tmp3 -= tmp5; 01285 tmp4 -= tmp5; 01286 z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */ 01287 MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */ 01288 z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */ 01289 z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */ 01290 dataptr[2] = (DCTELEM) 01291 DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */ 01292 - MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */ 01293 CONST_BITS-1); 01294 dataptr[4] = (DCTELEM) 01295 DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */ 01296 - MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */ 01297 + MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */ 01298 CONST_BITS-1); 01299 dataptr[6] = (DCTELEM) 01300 DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */ 01301 - MULTIPLY(tmp2, FIX(0.788749120)), /* c8+c10 */ 01302 CONST_BITS-1); 01303 01304 /* Odd part */ 01305 01306 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905)); /* c3 */ 01307 tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298)); /* c5 */ 01308 tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576)); /* c7 */ 01309 tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */ 01310 + MULTIPLY(tmp14, FIX(0.398430003)); /* c9 */ 01311 tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576)); /* -c7 */ 01312 tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907)); /* -c1 */ 01313 tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */ 01314 - MULTIPLY(tmp14, FIX(1.068791298)); /* c5 */ 01315 tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003)); /* c9 */ 01316 tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */ 01317 + MULTIPLY(tmp14, FIX(1.399818907)); /* c1 */ 01318 tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */ 01319 - MULTIPLY(tmp14, FIX(1.286413905)); /* c3 */ 01320 01321 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1); 01322 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1); 01323 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1); 01324 dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1); 01325 01326 ctr++; 01327 01328 if (ctr != DCTSIZE) { 01329 if (ctr == 11) 01330 break; /* Done. */ 01331 dataptr += DCTSIZE; /* advance pointer to next row */ 01332 } else 01333 dataptr = workspace; /* switch pointer to extended workspace */ 01334 } 01335 01336 /* Pass 2: process columns. 01337 * We leave the results scaled up by an overall factor of 8. 01338 * We must also scale the output by (8/11)**2 = 64/121, which we partially 01339 * fold into the constant multipliers and final/initial shifting: 01340 * cK now represents sqrt(2) * cos(K*pi/22) * 128/121. 01341 */ 01342 01343 dataptr = data; 01344 wsptr = workspace; 01345 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 01346 /* Even part */ 01347 01348 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2]; 01349 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1]; 01350 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0]; 01351 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7]; 01352 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6]; 01353 tmp5 = dataptr[DCTSIZE*5]; 01354 01355 tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2]; 01356 tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1]; 01357 tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0]; 01358 tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7]; 01359 tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6]; 01360 01361 dataptr[DCTSIZE*0] = (DCTELEM) 01362 DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5, 01363 FIX(1.057851240)), /* 128/121 */ 01364 CONST_BITS+2); 01365 tmp5 += tmp5; 01366 tmp0 -= tmp5; 01367 tmp1 -= tmp5; 01368 tmp2 -= tmp5; 01369 tmp3 -= tmp5; 01370 tmp4 -= tmp5; 01371 z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) + /* c2 */ 01372 MULTIPLY(tmp2 + tmp4, FIX(0.212906922)); /* c10 */ 01373 z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713)); /* c6 */ 01374 z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479)); /* c4 */ 01375 dataptr[DCTSIZE*2] = (DCTELEM) 01376 DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */ 01377 - MULTIPLY(tmp4, FIX(1.471445400)), /* c4+c10 */ 01378 CONST_BITS+2); 01379 dataptr[DCTSIZE*4] = (DCTELEM) 01380 DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */ 01381 - MULTIPLY(tmp2, FIX(1.435427942)) /* c2 */ 01382 + MULTIPLY(tmp4, FIX(0.621472312)), /* c8 */ 01383 CONST_BITS+2); 01384 dataptr[DCTSIZE*6] = (DCTELEM) 01385 DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */ 01386 - MULTIPLY(tmp2, FIX(0.834379234)), /* c8+c10 */ 01387 CONST_BITS+2); 01388 01389 /* Odd part */ 01390 01391 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544)); /* c3 */ 01392 tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199)); /* c5 */ 01393 tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568)); /* c7 */ 01394 tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */ 01395 + MULTIPLY(tmp14, FIX(0.421479672)); /* c9 */ 01396 tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568)); /* -c7 */ 01397 tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167)); /* -c1 */ 01398 tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */ 01399 - MULTIPLY(tmp14, FIX(1.130622199)); /* c5 */ 01400 tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672)); /* c9 */ 01401 tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */ 01402 + MULTIPLY(tmp14, FIX(1.480800167)); /* c1 */ 01403 tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */ 01404 - MULTIPLY(tmp14, FIX(1.360834544)); /* c3 */ 01405 01406 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2); 01407 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2); 01408 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2); 01409 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2); 01410 01411 dataptr++; /* advance pointer to next column */ 01412 wsptr++; /* advance pointer to next column */ 01413 } 01414 } 01415 01416 01417 /* 01418 * Perform the forward DCT on a 12x12 sample block. 01419 */ 01420 01421 GLOBAL(void) 01422 jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 01423 { 01424 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 01425 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 01426 DCTELEM workspace[8*4]; 01427 DCTELEM *dataptr; 01428 DCTELEM *wsptr; 01429 JSAMPROW elemptr; 01430 int ctr; 01431 SHIFT_TEMPS 01432 01433 /* Pass 1: process rows. */ 01434 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 01435 /* cK represents sqrt(2) * cos(K*pi/24). */ 01436 01437 dataptr = data; 01438 ctr = 0; 01439 for (;;) { 01440 elemptr = sample_data[ctr] + start_col; 01441 01442 /* Even part */ 01443 01444 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]); 01445 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]); 01446 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]); 01447 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]); 01448 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]); 01449 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]); 01450 01451 tmp10 = tmp0 + tmp5; 01452 tmp13 = tmp0 - tmp5; 01453 tmp11 = tmp1 + tmp4; 01454 tmp14 = tmp1 - tmp4; 01455 tmp12 = tmp2 + tmp3; 01456 tmp15 = tmp2 - tmp3; 01457 01458 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]); 01459 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]); 01460 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); 01461 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); 01462 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); 01463 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); 01464 01465 /* Apply unsigned->signed conversion */ 01466 dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE); 01467 dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15); 01468 dataptr[4] = (DCTELEM) 01469 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ 01470 CONST_BITS); 01471 dataptr[2] = (DCTELEM) 01472 DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */ 01473 CONST_BITS); 01474 01475 /* Odd part */ 01476 01477 tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */ 01478 tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */ 01479 tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */ 01480 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */ 01481 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */ 01482 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */ 01483 + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */ 01484 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */ 01485 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */ 01486 + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */ 01487 tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */ 01488 - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */ 01489 tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */ 01490 - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */ 01491 01492 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS); 01493 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS); 01494 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS); 01495 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS); 01496 01497 ctr++; 01498 01499 if (ctr != DCTSIZE) { 01500 if (ctr == 12) 01501 break; /* Done. */ 01502 dataptr += DCTSIZE; /* advance pointer to next row */ 01503 } else 01504 dataptr = workspace; /* switch pointer to extended workspace */ 01505 } 01506 01507 /* Pass 2: process columns. 01508 * We leave the results scaled up by an overall factor of 8. 01509 * We must also scale the output by (8/12)**2 = 4/9, which we partially 01510 * fold into the constant multipliers and final shifting: 01511 * cK now represents sqrt(2) * cos(K*pi/24) * 8/9. 01512 */ 01513 01514 dataptr = data; 01515 wsptr = workspace; 01516 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 01517 /* Even part */ 01518 01519 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3]; 01520 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2]; 01521 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1]; 01522 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0]; 01523 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7]; 01524 tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6]; 01525 01526 tmp10 = tmp0 + tmp5; 01527 tmp13 = tmp0 - tmp5; 01528 tmp11 = tmp1 + tmp4; 01529 tmp14 = tmp1 - tmp4; 01530 tmp12 = tmp2 + tmp3; 01531 tmp15 = tmp2 - tmp3; 01532 01533 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3]; 01534 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2]; 01535 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1]; 01536 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0]; 01537 tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7]; 01538 tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6]; 01539 01540 dataptr[DCTSIZE*0] = (DCTELEM) 01541 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */ 01542 CONST_BITS+1); 01543 dataptr[DCTSIZE*6] = (DCTELEM) 01544 DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */ 01545 CONST_BITS+1); 01546 dataptr[DCTSIZE*4] = (DCTELEM) 01547 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */ 01548 CONST_BITS+1); 01549 dataptr[DCTSIZE*2] = (DCTELEM) 01550 DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */ 01551 MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */ 01552 CONST_BITS+1); 01553 01554 /* Odd part */ 01555 01556 tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */ 01557 tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */ 01558 tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */ 01559 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */ 01560 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */ 01561 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */ 01562 + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */ 01563 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */ 01564 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */ 01565 + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */ 01566 tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */ 01567 - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */ 01568 tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */ 01569 - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */ 01570 01571 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1); 01572 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1); 01573 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1); 01574 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1); 01575 01576 dataptr++; /* advance pointer to next column */ 01577 wsptr++; /* advance pointer to next column */ 01578 } 01579 } 01580 01581 01582 /* 01583 * Perform the forward DCT on a 13x13 sample block. 01584 */ 01585 01586 GLOBAL(void) 01587 jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 01588 { 01589 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 01590 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 01591 INT32 z1, z2; 01592 DCTELEM workspace[8*5]; 01593 DCTELEM *dataptr; 01594 DCTELEM *wsptr; 01595 JSAMPROW elemptr; 01596 int ctr; 01597 SHIFT_TEMPS 01598 01599 /* Pass 1: process rows. */ 01600 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 01601 /* cK represents sqrt(2) * cos(K*pi/26). */ 01602 01603 dataptr = data; 01604 ctr = 0; 01605 for (;;) { 01606 elemptr = sample_data[ctr] + start_col; 01607 01608 /* Even part */ 01609 01610 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]); 01611 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]); 01612 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]); 01613 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]); 01614 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]); 01615 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]); 01616 tmp6 = GETJSAMPLE(elemptr[6]); 01617 01618 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]); 01619 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]); 01620 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]); 01621 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]); 01622 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]); 01623 tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]); 01624 01625 /* Apply unsigned->signed conversion */ 01626 dataptr[0] = (DCTELEM) 01627 (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE); 01628 tmp6 += tmp6; 01629 tmp0 -= tmp6; 01630 tmp1 -= tmp6; 01631 tmp2 -= tmp6; 01632 tmp3 -= tmp6; 01633 tmp4 -= tmp6; 01634 tmp5 -= tmp6; 01635 dataptr[2] = (DCTELEM) 01636 DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */ 01637 MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */ 01638 MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */ 01639 MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */ 01640 MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */ 01641 MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */ 01642 CONST_BITS); 01643 z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */ 01644 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */ 01645 MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */ 01646 z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */ 01647 MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */ 01648 MULTIPLY(tmp1 + tmp5, FIX(0.486914739)); /* (c8+c12)/2 */ 01649 01650 dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS); 01651 dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS); 01652 01653 /* Odd part */ 01654 01655 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651)); /* c3 */ 01656 tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945)); /* c5 */ 01657 tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) + /* c7 */ 01658 MULTIPLY(tmp14 + tmp15, FIX(0.338443458)); /* c11 */ 01659 tmp0 = tmp1 + tmp2 + tmp3 - 01660 MULTIPLY(tmp10, FIX(2.020082300)) + /* c3+c5+c7-c1 */ 01661 MULTIPLY(tmp14, FIX(0.318774355)); /* c9-c11 */ 01662 tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) - /* c7 */ 01663 MULTIPLY(tmp11 + tmp12, FIX(0.338443458)); /* c11 */ 01664 tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */ 01665 tmp1 += tmp4 + tmp5 + 01666 MULTIPLY(tmp11, FIX(0.837223564)) - /* c5+c9+c11-c3 */ 01667 MULTIPLY(tmp14, FIX(2.341699410)); /* c1+c7 */ 01668 tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */ 01669 tmp2 += tmp4 + tmp6 - 01670 MULTIPLY(tmp12, FIX(1.572116027)) + /* c1+c5-c9-c11 */ 01671 MULTIPLY(tmp15, FIX(2.260109708)); /* c3+c7 */ 01672 tmp3 += tmp5 + tmp6 + 01673 MULTIPLY(tmp13, FIX(2.205608352)) - /* c3+c5+c9-c7 */ 01674 MULTIPLY(tmp15, FIX(1.742345811)); /* c1+c11 */ 01675 01676 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS); 01677 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS); 01678 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS); 01679 dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS); 01680 01681 ctr++; 01682 01683 if (ctr != DCTSIZE) { 01684 if (ctr == 13) 01685 break; /* Done. */ 01686 dataptr += DCTSIZE; /* advance pointer to next row */ 01687 } else 01688 dataptr = workspace; /* switch pointer to extended workspace */ 01689 } 01690 01691 /* Pass 2: process columns. 01692 * We leave the results scaled up by an overall factor of 8. 01693 * We must also scale the output by (8/13)**2 = 64/169, which we partially 01694 * fold into the constant multipliers and final shifting: 01695 * cK now represents sqrt(2) * cos(K*pi/26) * 128/169. 01696 */ 01697 01698 dataptr = data; 01699 wsptr = workspace; 01700 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 01701 /* Even part */ 01702 01703 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4]; 01704 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3]; 01705 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2]; 01706 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1]; 01707 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0]; 01708 tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7]; 01709 tmp6 = dataptr[DCTSIZE*6]; 01710 01711 tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4]; 01712 tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3]; 01713 tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2]; 01714 tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1]; 01715 tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0]; 01716 tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7]; 01717 01718 dataptr[DCTSIZE*0] = (DCTELEM) 01719 DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6, 01720 FIX(0.757396450)), /* 128/169 */ 01721 CONST_BITS+1); 01722 tmp6 += tmp6; 01723 tmp0 -= tmp6; 01724 tmp1 -= tmp6; 01725 tmp2 -= tmp6; 01726 tmp3 -= tmp6; 01727 tmp4 -= tmp6; 01728 tmp5 -= tmp6; 01729 dataptr[DCTSIZE*2] = (DCTELEM) 01730 DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) + /* c2 */ 01731 MULTIPLY(tmp1, FIX(0.801745081)) + /* c6 */ 01732 MULTIPLY(tmp2, FIX(0.379824504)) - /* c10 */ 01733 MULTIPLY(tmp3, FIX(0.129109289)) - /* c12 */ 01734 MULTIPLY(tmp4, FIX(0.608465700)) - /* c8 */ 01735 MULTIPLY(tmp5, FIX(0.948429952)), /* c4 */ 01736 CONST_BITS+1); 01737 z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */ 01738 MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */ 01739 MULTIPLY(tmp1 - tmp5, FIX(0.239678205)); /* (c8-c12)/2 */ 01740 z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */ 01741 MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */ 01742 MULTIPLY(tmp1 + tmp5, FIX(0.368787494)); /* (c8+c12)/2 */ 01743 01744 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1); 01745 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1); 01746 01747 /* Odd part */ 01748 01749 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908)); /* c3 */ 01750 tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751)); /* c5 */ 01751 tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) + /* c7 */ 01752 MULTIPLY(tmp14 + tmp15, FIX(0.256335874)); /* c11 */ 01753 tmp0 = tmp1 + tmp2 + tmp3 - 01754 MULTIPLY(tmp10, FIX(1.530003162)) + /* c3+c5+c7-c1 */ 01755 MULTIPLY(tmp14, FIX(0.241438564)); /* c9-c11 */ 01756 tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) - /* c7 */ 01757 MULTIPLY(tmp11 + tmp12, FIX(0.256335874)); /* c11 */ 01758 tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */ 01759 tmp1 += tmp4 + tmp5 + 01760 MULTIPLY(tmp11, FIX(0.634110155)) - /* c5+c9+c11-c3 */ 01761 MULTIPLY(tmp14, FIX(1.773594819)); /* c1+c7 */ 01762 tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */ 01763 tmp2 += tmp4 + tmp6 - 01764 MULTIPLY(tmp12, FIX(1.190715098)) + /* c1+c5-c9-c11 */ 01765 MULTIPLY(tmp15, FIX(1.711799069)); /* c3+c7 */ 01766 tmp3 += tmp5 + tmp6 + 01767 MULTIPLY(tmp13, FIX(1.670519935)) - /* c3+c5+c9-c7 */ 01768 MULTIPLY(tmp15, FIX(1.319646532)); /* c1+c11 */ 01769 01770 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1); 01771 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1); 01772 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1); 01773 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1); 01774 01775 dataptr++; /* advance pointer to next column */ 01776 wsptr++; /* advance pointer to next column */ 01777 } 01778 } 01779 01780 01781 /* 01782 * Perform the forward DCT on a 14x14 sample block. 01783 */ 01784 01785 GLOBAL(void) 01786 jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 01787 { 01788 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 01789 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 01790 DCTELEM workspace[8*6]; 01791 DCTELEM *dataptr; 01792 DCTELEM *wsptr; 01793 JSAMPROW elemptr; 01794 int ctr; 01795 SHIFT_TEMPS 01796 01797 /* Pass 1: process rows. */ 01798 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 01799 /* cK represents sqrt(2) * cos(K*pi/28). */ 01800 01801 dataptr = data; 01802 ctr = 0; 01803 for (;;) { 01804 elemptr = sample_data[ctr] + start_col; 01805 01806 /* Even part */ 01807 01808 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]); 01809 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]); 01810 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]); 01811 tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]); 01812 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]); 01813 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]); 01814 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]); 01815 01816 tmp10 = tmp0 + tmp6; 01817 tmp14 = tmp0 - tmp6; 01818 tmp11 = tmp1 + tmp5; 01819 tmp15 = tmp1 - tmp5; 01820 tmp12 = tmp2 + tmp4; 01821 tmp16 = tmp2 - tmp4; 01822 01823 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]); 01824 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]); 01825 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]); 01826 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); 01827 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); 01828 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); 01829 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); 01830 01831 /* Apply unsigned->signed conversion */ 01832 dataptr[0] = (DCTELEM) 01833 (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE); 01834 tmp13 += tmp13; 01835 dataptr[4] = (DCTELEM) 01836 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ 01837 MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */ 01838 MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */ 01839 CONST_BITS); 01840 01841 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */ 01842 01843 dataptr[2] = (DCTELEM) 01844 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */ 01845 + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */ 01846 CONST_BITS); 01847 dataptr[6] = (DCTELEM) 01848 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */ 01849 - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */ 01850 CONST_BITS); 01851 01852 /* Odd part */ 01853 01854 tmp10 = tmp1 + tmp2; 01855 tmp11 = tmp5 - tmp4; 01856 dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6); 01857 tmp3 <<= CONST_BITS; 01858 tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */ 01859 tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */ 01860 tmp10 += tmp11 - tmp3; 01861 tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */ 01862 MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */ 01863 dataptr[5] = (DCTELEM) 01864 DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */ 01865 + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */ 01866 CONST_BITS); 01867 tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */ 01868 MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */ 01869 dataptr[3] = (DCTELEM) 01870 DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */ 01871 - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */ 01872 CONST_BITS); 01873 dataptr[1] = (DCTELEM) 01874 DESCALE(tmp11 + tmp12 + tmp3 + tmp6 - 01875 MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */ 01876 CONST_BITS); 01877 01878 ctr++; 01879 01880 if (ctr != DCTSIZE) { 01881 if (ctr == 14) 01882 break; /* Done. */ 01883 dataptr += DCTSIZE; /* advance pointer to next row */ 01884 } else 01885 dataptr = workspace; /* switch pointer to extended workspace */ 01886 } 01887 01888 /* Pass 2: process columns. 01889 * We leave the results scaled up by an overall factor of 8. 01890 * We must also scale the output by (8/14)**2 = 16/49, which we partially 01891 * fold into the constant multipliers and final shifting: 01892 * cK now represents sqrt(2) * cos(K*pi/28) * 32/49. 01893 */ 01894 01895 dataptr = data; 01896 wsptr = workspace; 01897 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 01898 /* Even part */ 01899 01900 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5]; 01901 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4]; 01902 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3]; 01903 tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2]; 01904 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1]; 01905 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0]; 01906 tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; 01907 01908 tmp10 = tmp0 + tmp6; 01909 tmp14 = tmp0 - tmp6; 01910 tmp11 = tmp1 + tmp5; 01911 tmp15 = tmp1 - tmp5; 01912 tmp12 = tmp2 + tmp4; 01913 tmp16 = tmp2 - tmp4; 01914 01915 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5]; 01916 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4]; 01917 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3]; 01918 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2]; 01919 tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1]; 01920 tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0]; 01921 tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; 01922 01923 dataptr[DCTSIZE*0] = (DCTELEM) 01924 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13, 01925 FIX(0.653061224)), /* 32/49 */ 01926 CONST_BITS+1); 01927 tmp13 += tmp13; 01928 dataptr[DCTSIZE*4] = (DCTELEM) 01929 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */ 01930 MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */ 01931 MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */ 01932 CONST_BITS+1); 01933 01934 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */ 01935 01936 dataptr[DCTSIZE*2] = (DCTELEM) 01937 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */ 01938 + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */ 01939 CONST_BITS+1); 01940 dataptr[DCTSIZE*6] = (DCTELEM) 01941 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */ 01942 - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */ 01943 CONST_BITS+1); 01944 01945 /* Odd part */ 01946 01947 tmp10 = tmp1 + tmp2; 01948 tmp11 = tmp5 - tmp4; 01949 dataptr[DCTSIZE*7] = (DCTELEM) 01950 DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6, 01951 FIX(0.653061224)), /* 32/49 */ 01952 CONST_BITS+1); 01953 tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */ 01954 tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */ 01955 tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */ 01956 tmp10 += tmp11 - tmp3; 01957 tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */ 01958 MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */ 01959 dataptr[DCTSIZE*5] = (DCTELEM) 01960 DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */ 01961 + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */ 01962 CONST_BITS+1); 01963 tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */ 01964 MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */ 01965 dataptr[DCTSIZE*3] = (DCTELEM) 01966 DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */ 01967 - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */ 01968 CONST_BITS+1); 01969 dataptr[DCTSIZE*1] = (DCTELEM) 01970 DESCALE(tmp11 + tmp12 + tmp3 01971 - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */ 01972 - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */ 01973 CONST_BITS+1); 01974 01975 dataptr++; /* advance pointer to next column */ 01976 wsptr++; /* advance pointer to next column */ 01977 } 01978 } 01979 01980 01981 /* 01982 * Perform the forward DCT on a 15x15 sample block. 01983 */ 01984 01985 GLOBAL(void) 01986 jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 01987 { 01988 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 01989 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 01990 INT32 z1, z2, z3; 01991 DCTELEM workspace[8*7]; 01992 DCTELEM *dataptr; 01993 DCTELEM *wsptr; 01994 JSAMPROW elemptr; 01995 int ctr; 01996 SHIFT_TEMPS 01997 01998 /* Pass 1: process rows. */ 01999 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 02000 /* cK represents sqrt(2) * cos(K*pi/30). */ 02001 02002 dataptr = data; 02003 ctr = 0; 02004 for (;;) { 02005 elemptr = sample_data[ctr] + start_col; 02006 02007 /* Even part */ 02008 02009 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]); 02010 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]); 02011 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]); 02012 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]); 02013 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]); 02014 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]); 02015 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]); 02016 tmp7 = GETJSAMPLE(elemptr[7]); 02017 02018 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]); 02019 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]); 02020 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]); 02021 tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]); 02022 tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]); 02023 tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]); 02024 tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]); 02025 02026 z1 = tmp0 + tmp4 + tmp5; 02027 z2 = tmp1 + tmp3 + tmp6; 02028 z3 = tmp2 + tmp7; 02029 /* Apply unsigned->signed conversion */ 02030 dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE); 02031 z3 += z3; 02032 dataptr[6] = (DCTELEM) 02033 DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */ 02034 MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */ 02035 CONST_BITS); 02036 tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7; 02037 z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */ 02038 MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */ 02039 z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */ 02040 MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */ 02041 z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */ 02042 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */ 02043 MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */ 02044 02045 dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS); 02046 dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS); 02047 02048 /* Odd part */ 02049 02050 tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16, 02051 FIX(1.224744871)); /* c5 */ 02052 tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */ 02053 MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876)); /* c9 */ 02054 tmp12 = MULTIPLY(tmp12, FIX(1.224744871)); /* c5 */ 02055 tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) + /* c1 */ 02056 MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) + /* c3 */ 02057 MULTIPLY(tmp13 + tmp15, FIX(0.575212477)); /* c11 */ 02058 tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) - /* c7-c11 */ 02059 MULTIPLY(tmp14, FIX(0.513743148)) + /* c3-c9 */ 02060 MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12; /* c1+c13 */ 02061 tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) - /* -(c1-c7) */ 02062 MULTIPLY(tmp11, FIX(2.176250899)) - /* c3+c9 */ 02063 MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12; /* c11+c13 */ 02064 02065 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS); 02066 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS); 02067 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS); 02068 dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS); 02069 02070 ctr++; 02071 02072 if (ctr != DCTSIZE) { 02073 if (ctr == 15) 02074 break; /* Done. */ 02075 dataptr += DCTSIZE; /* advance pointer to next row */ 02076 } else 02077 dataptr = workspace; /* switch pointer to extended workspace */ 02078 } 02079 02080 /* Pass 2: process columns. 02081 * We leave the results scaled up by an overall factor of 8. 02082 * We must also scale the output by (8/15)**2 = 64/225, which we partially 02083 * fold into the constant multipliers and final shifting: 02084 * cK now represents sqrt(2) * cos(K*pi/30) * 256/225. 02085 */ 02086 02087 dataptr = data; 02088 wsptr = workspace; 02089 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 02090 /* Even part */ 02091 02092 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6]; 02093 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5]; 02094 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4]; 02095 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3]; 02096 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2]; 02097 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1]; 02098 tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0]; 02099 tmp7 = dataptr[DCTSIZE*7]; 02100 02101 tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6]; 02102 tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5]; 02103 tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4]; 02104 tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3]; 02105 tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2]; 02106 tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1]; 02107 tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0]; 02108 02109 z1 = tmp0 + tmp4 + tmp5; 02110 z2 = tmp1 + tmp3 + tmp6; 02111 z3 = tmp2 + tmp7; 02112 dataptr[DCTSIZE*0] = (DCTELEM) 02113 DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */ 02114 CONST_BITS+2); 02115 z3 += z3; 02116 dataptr[DCTSIZE*6] = (DCTELEM) 02117 DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */ 02118 MULTIPLY(z2 - z3, FIX(0.497227121)), /* c12 */ 02119 CONST_BITS+2); 02120 tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7; 02121 z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) - /* c2+c14 */ 02122 MULTIPLY(tmp6 - tmp2, FIX(2.546621957)); /* c4+c8 */ 02123 z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) - /* c8-c14 */ 02124 MULTIPLY(tmp0 - tmp2, FIX(0.103948774)); /* c2-c4 */ 02125 z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) + /* c2 */ 02126 MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) + /* c8 */ 02127 MULTIPLY(tmp1 - tmp4, FIX(0.899492312)); /* (c6+c12)/2 */ 02128 02129 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2); 02130 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2); 02131 02132 /* Odd part */ 02133 02134 tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16, 02135 FIX(1.393487498)); /* c5 */ 02136 tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */ 02137 MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187)); /* c9 */ 02138 tmp12 = MULTIPLY(tmp12, FIX(1.393487498)); /* c5 */ 02139 tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) + /* c1 */ 02140 MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) + /* c3 */ 02141 MULTIPLY(tmp13 + tmp15, FIX(0.654463974)); /* c11 */ 02142 tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) - /* c7-c11 */ 02143 MULTIPLY(tmp14, FIX(0.584525538)) + /* c3-c9 */ 02144 MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12; /* c1+c13 */ 02145 tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) - /* -(c1-c7) */ 02146 MULTIPLY(tmp11, FIX(2.476089912)) - /* c3+c9 */ 02147 MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12; /* c11+c13 */ 02148 02149 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2); 02150 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2); 02151 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2); 02152 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2); 02153 02154 dataptr++; /* advance pointer to next column */ 02155 wsptr++; /* advance pointer to next column */ 02156 } 02157 } 02158 02159 02160 /* 02161 * Perform the forward DCT on a 16x16 sample block. 02162 */ 02163 02164 GLOBAL(void) 02165 jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 02166 { 02167 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 02168 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; 02169 DCTELEM workspace[DCTSIZE2]; 02170 DCTELEM *dataptr; 02171 DCTELEM *wsptr; 02172 JSAMPROW elemptr; 02173 int ctr; 02174 SHIFT_TEMPS 02175 02176 /* Pass 1: process rows. */ 02177 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 02178 /* furthermore, we scale the results by 2**PASS1_BITS. */ 02179 /* cK represents sqrt(2) * cos(K*pi/32). */ 02180 02181 dataptr = data; 02182 ctr = 0; 02183 for (;;) { 02184 elemptr = sample_data[ctr] + start_col; 02185 02186 /* Even part */ 02187 02188 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]); 02189 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]); 02190 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]); 02191 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]); 02192 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]); 02193 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]); 02194 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]); 02195 tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]); 02196 02197 tmp10 = tmp0 + tmp7; 02198 tmp14 = tmp0 - tmp7; 02199 tmp11 = tmp1 + tmp6; 02200 tmp15 = tmp1 - tmp6; 02201 tmp12 = tmp2 + tmp5; 02202 tmp16 = tmp2 - tmp5; 02203 tmp13 = tmp3 + tmp4; 02204 tmp17 = tmp3 - tmp4; 02205 02206 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]); 02207 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]); 02208 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]); 02209 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]); 02210 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); 02211 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); 02212 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); 02213 tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); 02214 02215 /* Apply unsigned->signed conversion */ 02216 dataptr[0] = (DCTELEM) 02217 ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); 02218 dataptr[4] = (DCTELEM) 02219 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ 02220 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ 02221 CONST_BITS-PASS1_BITS); 02222 02223 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ 02224 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ 02225 02226 dataptr[2] = (DCTELEM) 02227 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ 02228 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ 02229 CONST_BITS-PASS1_BITS); 02230 dataptr[6] = (DCTELEM) 02231 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ 02232 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ 02233 CONST_BITS-PASS1_BITS); 02234 02235 /* Odd part */ 02236 02237 tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ 02238 MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ 02239 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ 02240 MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ 02241 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ 02242 MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ 02243 tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ 02244 MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ 02245 tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ 02246 MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ 02247 tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ 02248 MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ 02249 tmp10 = tmp11 + tmp12 + tmp13 - 02250 MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ 02251 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ 02252 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ 02253 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ 02254 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ 02255 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ 02256 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ 02257 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ 02258 02259 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 02260 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); 02261 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); 02262 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); 02263 02264 ctr++; 02265 02266 if (ctr != DCTSIZE) { 02267 if (ctr == DCTSIZE * 2) 02268 break; /* Done. */ 02269 dataptr += DCTSIZE; /* advance pointer to next row */ 02270 } else 02271 dataptr = workspace; /* switch pointer to extended workspace */ 02272 } 02273 02274 /* Pass 2: process columns. 02275 * We remove the PASS1_BITS scaling, but leave the results scaled up 02276 * by an overall factor of 8. 02277 * We must also scale the output by (8/16)**2 = 1/2**2. 02278 */ 02279 02280 dataptr = data; 02281 wsptr = workspace; 02282 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 02283 /* Even part */ 02284 02285 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7]; 02286 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6]; 02287 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5]; 02288 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4]; 02289 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3]; 02290 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2]; 02291 tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1]; 02292 tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0]; 02293 02294 tmp10 = tmp0 + tmp7; 02295 tmp14 = tmp0 - tmp7; 02296 tmp11 = tmp1 + tmp6; 02297 tmp15 = tmp1 - tmp6; 02298 tmp12 = tmp2 + tmp5; 02299 tmp16 = tmp2 - tmp5; 02300 tmp13 = tmp3 + tmp4; 02301 tmp17 = tmp3 - tmp4; 02302 02303 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7]; 02304 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6]; 02305 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5]; 02306 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4]; 02307 tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3]; 02308 tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2]; 02309 tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1]; 02310 tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0]; 02311 02312 dataptr[DCTSIZE*0] = (DCTELEM) 02313 DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2); 02314 dataptr[DCTSIZE*4] = (DCTELEM) 02315 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ 02316 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ 02317 CONST_BITS+PASS1_BITS+2); 02318 02319 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ 02320 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ 02321 02322 dataptr[DCTSIZE*2] = (DCTELEM) 02323 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ 02324 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+10 */ 02325 CONST_BITS+PASS1_BITS+2); 02326 dataptr[DCTSIZE*6] = (DCTELEM) 02327 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ 02328 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ 02329 CONST_BITS+PASS1_BITS+2); 02330 02331 /* Odd part */ 02332 02333 tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ 02334 MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ 02335 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ 02336 MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ 02337 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ 02338 MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ 02339 tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ 02340 MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ 02341 tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ 02342 MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ 02343 tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ 02344 MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ 02345 tmp10 = tmp11 + tmp12 + tmp13 - 02346 MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ 02347 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ 02348 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ 02349 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ 02350 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ 02351 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ 02352 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ 02353 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ 02354 02355 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2); 02356 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2); 02357 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2); 02358 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2); 02359 02360 dataptr++; /* advance pointer to next column */ 02361 wsptr++; /* advance pointer to next column */ 02362 } 02363 } 02364 02365 02366 /* 02367 * Perform the forward DCT on a 16x8 sample block. 02368 * 02369 * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns). 02370 */ 02371 02372 GLOBAL(void) 02373 jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 02374 { 02375 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 02376 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; 02377 INT32 z1; 02378 DCTELEM *dataptr; 02379 JSAMPROW elemptr; 02380 int ctr; 02381 SHIFT_TEMPS 02382 02383 /* Pass 1: process rows. */ 02384 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 02385 /* furthermore, we scale the results by 2**PASS1_BITS. */ 02386 /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ 02387 02388 dataptr = data; 02389 ctr = 0; 02390 for (ctr = 0; ctr < DCTSIZE; ctr++) { 02391 elemptr = sample_data[ctr] + start_col; 02392 02393 /* Even part */ 02394 02395 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]); 02396 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]); 02397 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]); 02398 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]); 02399 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]); 02400 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]); 02401 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]); 02402 tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]); 02403 02404 tmp10 = tmp0 + tmp7; 02405 tmp14 = tmp0 - tmp7; 02406 tmp11 = tmp1 + tmp6; 02407 tmp15 = tmp1 - tmp6; 02408 tmp12 = tmp2 + tmp5; 02409 tmp16 = tmp2 - tmp5; 02410 tmp13 = tmp3 + tmp4; 02411 tmp17 = tmp3 - tmp4; 02412 02413 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]); 02414 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]); 02415 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]); 02416 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]); 02417 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); 02418 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); 02419 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); 02420 tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); 02421 02422 /* Apply unsigned->signed conversion */ 02423 dataptr[0] = (DCTELEM) 02424 ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); 02425 dataptr[4] = (DCTELEM) 02426 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ 02427 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ 02428 CONST_BITS-PASS1_BITS); 02429 02430 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ 02431 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ 02432 02433 dataptr[2] = (DCTELEM) 02434 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ 02435 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ 02436 CONST_BITS-PASS1_BITS); 02437 dataptr[6] = (DCTELEM) 02438 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ 02439 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ 02440 CONST_BITS-PASS1_BITS); 02441 02442 /* Odd part */ 02443 02444 tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ 02445 MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ 02446 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ 02447 MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ 02448 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ 02449 MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ 02450 tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ 02451 MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ 02452 tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ 02453 MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ 02454 tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ 02455 MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ 02456 tmp10 = tmp11 + tmp12 + tmp13 - 02457 MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ 02458 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ 02459 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ 02460 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ 02461 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ 02462 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ 02463 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ 02464 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ 02465 02466 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 02467 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); 02468 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); 02469 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); 02470 02471 dataptr += DCTSIZE; /* advance pointer to next row */ 02472 } 02473 02474 /* Pass 2: process columns. 02475 * We remove the PASS1_BITS scaling, but leave the results scaled up 02476 * by an overall factor of 8. 02477 * We must also scale the output by 8/16 = 1/2. 02478 */ 02479 02480 dataptr = data; 02481 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 02482 /* Even part per LL&M figure 1 --- note that published figure is faulty; 02483 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 02484 */ 02485 02486 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 02487 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 02488 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 02489 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 02490 02491 tmp10 = tmp0 + tmp3; 02492 tmp12 = tmp0 - tmp3; 02493 tmp11 = tmp1 + tmp2; 02494 tmp13 = tmp1 - tmp2; 02495 02496 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 02497 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 02498 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 02499 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 02500 02501 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1); 02502 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1); 02503 02504 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 02505 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), 02506 CONST_BITS+PASS1_BITS+1); 02507 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), 02508 CONST_BITS+PASS1_BITS+1); 02509 02510 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 02511 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 02512 * i0..i3 in the paper are tmp0..tmp3 here. 02513 */ 02514 02515 tmp10 = tmp0 + tmp3; 02516 tmp11 = tmp1 + tmp2; 02517 tmp12 = tmp0 + tmp2; 02518 tmp13 = tmp1 + tmp3; 02519 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 02520 02521 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 02522 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 02523 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 02524 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 02525 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 02526 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 02527 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 02528 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 02529 02530 tmp12 += z1; 02531 tmp13 += z1; 02532 02533 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, 02534 CONST_BITS+PASS1_BITS+1); 02535 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, 02536 CONST_BITS+PASS1_BITS+1); 02537 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, 02538 CONST_BITS+PASS1_BITS+1); 02539 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, 02540 CONST_BITS+PASS1_BITS+1); 02541 02542 dataptr++; /* advance pointer to next column */ 02543 } 02544 } 02545 02546 02547 /* 02548 * Perform the forward DCT on a 14x7 sample block. 02549 * 02550 * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns). 02551 */ 02552 02553 GLOBAL(void) 02554 jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 02555 { 02556 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 02557 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 02558 INT32 z1, z2, z3; 02559 DCTELEM *dataptr; 02560 JSAMPROW elemptr; 02561 int ctr; 02562 SHIFT_TEMPS 02563 02564 /* Zero bottom row of output coefficient block. */ 02565 MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE); 02566 02567 /* Pass 1: process rows. */ 02568 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 02569 /* furthermore, we scale the results by 2**PASS1_BITS. */ 02570 /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ 02571 02572 dataptr = data; 02573 for (ctr = 0; ctr < 7; ctr++) { 02574 elemptr = sample_data[ctr] + start_col; 02575 02576 /* Even part */ 02577 02578 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]); 02579 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]); 02580 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]); 02581 tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]); 02582 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]); 02583 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]); 02584 tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]); 02585 02586 tmp10 = tmp0 + tmp6; 02587 tmp14 = tmp0 - tmp6; 02588 tmp11 = tmp1 + tmp5; 02589 tmp15 = tmp1 - tmp5; 02590 tmp12 = tmp2 + tmp4; 02591 tmp16 = tmp2 - tmp4; 02592 02593 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]); 02594 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]); 02595 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]); 02596 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); 02597 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); 02598 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); 02599 tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); 02600 02601 /* Apply unsigned->signed conversion */ 02602 dataptr[0] = (DCTELEM) 02603 ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS); 02604 tmp13 += tmp13; 02605 dataptr[4] = (DCTELEM) 02606 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ 02607 MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */ 02608 MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */ 02609 CONST_BITS-PASS1_BITS); 02610 02611 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */ 02612 02613 dataptr[2] = (DCTELEM) 02614 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */ 02615 + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */ 02616 CONST_BITS-PASS1_BITS); 02617 dataptr[6] = (DCTELEM) 02618 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */ 02619 - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */ 02620 CONST_BITS-PASS1_BITS); 02621 02622 /* Odd part */ 02623 02624 tmp10 = tmp1 + tmp2; 02625 tmp11 = tmp5 - tmp4; 02626 dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS); 02627 tmp3 <<= CONST_BITS; 02628 tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */ 02629 tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */ 02630 tmp10 += tmp11 - tmp3; 02631 tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */ 02632 MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */ 02633 dataptr[5] = (DCTELEM) 02634 DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */ 02635 + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */ 02636 CONST_BITS-PASS1_BITS); 02637 tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */ 02638 MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */ 02639 dataptr[3] = (DCTELEM) 02640 DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */ 02641 - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */ 02642 CONST_BITS-PASS1_BITS); 02643 dataptr[1] = (DCTELEM) 02644 DESCALE(tmp11 + tmp12 + tmp3 + tmp6 - 02645 MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */ 02646 CONST_BITS-PASS1_BITS); 02647 02648 dataptr += DCTSIZE; /* advance pointer to next row */ 02649 } 02650 02651 /* Pass 2: process columns. 02652 * We remove the PASS1_BITS scaling, but leave the results scaled up 02653 * by an overall factor of 8. 02654 * We must also scale the output by (8/14)*(8/7) = 32/49, which we 02655 * partially fold into the constant multipliers and final shifting: 02656 * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49. 02657 */ 02658 02659 dataptr = data; 02660 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 02661 /* Even part */ 02662 02663 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6]; 02664 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5]; 02665 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4]; 02666 tmp3 = dataptr[DCTSIZE*3]; 02667 02668 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6]; 02669 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5]; 02670 tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4]; 02671 02672 z1 = tmp0 + tmp2; 02673 dataptr[DCTSIZE*0] = (DCTELEM) 02674 DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */ 02675 CONST_BITS+PASS1_BITS+1); 02676 tmp3 += tmp3; 02677 z1 -= tmp3; 02678 z1 -= tmp3; 02679 z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */ 02680 z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */ 02681 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */ 02682 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1); 02683 z1 -= z2; 02684 z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */ 02685 dataptr[DCTSIZE*4] = (DCTELEM) 02686 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */ 02687 CONST_BITS+PASS1_BITS+1); 02688 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1); 02689 02690 /* Odd part */ 02691 02692 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */ 02693 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */ 02694 tmp0 = tmp1 - tmp2; 02695 tmp1 += tmp2; 02696 tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */ 02697 tmp1 += tmp2; 02698 tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */ 02699 tmp0 += tmp3; 02700 tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */ 02701 02702 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1); 02703 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1); 02704 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1); 02705 02706 dataptr++; /* advance pointer to next column */ 02707 } 02708 } 02709 02710 02711 /* 02712 * Perform the forward DCT on a 12x6 sample block. 02713 * 02714 * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns). 02715 */ 02716 02717 GLOBAL(void) 02718 jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 02719 { 02720 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 02721 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 02722 DCTELEM *dataptr; 02723 JSAMPROW elemptr; 02724 int ctr; 02725 SHIFT_TEMPS 02726 02727 /* Zero 2 bottom rows of output coefficient block. */ 02728 MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2); 02729 02730 /* Pass 1: process rows. */ 02731 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 02732 /* furthermore, we scale the results by 2**PASS1_BITS. */ 02733 /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ 02734 02735 dataptr = data; 02736 for (ctr = 0; ctr < 6; ctr++) { 02737 elemptr = sample_data[ctr] + start_col; 02738 02739 /* Even part */ 02740 02741 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]); 02742 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]); 02743 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]); 02744 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]); 02745 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]); 02746 tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]); 02747 02748 tmp10 = tmp0 + tmp5; 02749 tmp13 = tmp0 - tmp5; 02750 tmp11 = tmp1 + tmp4; 02751 tmp14 = tmp1 - tmp4; 02752 tmp12 = tmp2 + tmp3; 02753 tmp15 = tmp2 - tmp3; 02754 02755 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]); 02756 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]); 02757 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); 02758 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); 02759 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); 02760 tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); 02761 02762 /* Apply unsigned->signed conversion */ 02763 dataptr[0] = (DCTELEM) 02764 ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS); 02765 dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS); 02766 dataptr[4] = (DCTELEM) 02767 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ 02768 CONST_BITS-PASS1_BITS); 02769 dataptr[2] = (DCTELEM) 02770 DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */ 02771 CONST_BITS-PASS1_BITS); 02772 02773 /* Odd part */ 02774 02775 tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */ 02776 tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */ 02777 tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */ 02778 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */ 02779 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */ 02780 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */ 02781 + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */ 02782 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */ 02783 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */ 02784 + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */ 02785 tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */ 02786 - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */ 02787 tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */ 02788 - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */ 02789 02790 dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 02791 dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); 02792 dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); 02793 dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); 02794 02795 dataptr += DCTSIZE; /* advance pointer to next row */ 02796 } 02797 02798 /* Pass 2: process columns. 02799 * We remove the PASS1_BITS scaling, but leave the results scaled up 02800 * by an overall factor of 8. 02801 * We must also scale the output by (8/12)*(8/6) = 8/9, which we 02802 * partially fold into the constant multipliers and final shifting: 02803 * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9. 02804 */ 02805 02806 dataptr = data; 02807 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 02808 /* Even part */ 02809 02810 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; 02811 tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; 02812 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; 02813 02814 tmp10 = tmp0 + tmp2; 02815 tmp12 = tmp0 - tmp2; 02816 02817 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; 02818 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; 02819 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; 02820 02821 dataptr[DCTSIZE*0] = (DCTELEM) 02822 DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ 02823 CONST_BITS+PASS1_BITS+1); 02824 dataptr[DCTSIZE*2] = (DCTELEM) 02825 DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ 02826 CONST_BITS+PASS1_BITS+1); 02827 dataptr[DCTSIZE*4] = (DCTELEM) 02828 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ 02829 CONST_BITS+PASS1_BITS+1); 02830 02831 /* Odd part */ 02832 02833 tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ 02834 02835 dataptr[DCTSIZE*1] = (DCTELEM) 02836 DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 02837 CONST_BITS+PASS1_BITS+1); 02838 dataptr[DCTSIZE*3] = (DCTELEM) 02839 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ 02840 CONST_BITS+PASS1_BITS+1); 02841 dataptr[DCTSIZE*5] = (DCTELEM) 02842 DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ 02843 CONST_BITS+PASS1_BITS+1); 02844 02845 dataptr++; /* advance pointer to next column */ 02846 } 02847 } 02848 02849 02850 /* 02851 * Perform the forward DCT on a 10x5 sample block. 02852 * 02853 * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns). 02854 */ 02855 02856 GLOBAL(void) 02857 jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 02858 { 02859 INT32 tmp0, tmp1, tmp2, tmp3, tmp4; 02860 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 02861 DCTELEM *dataptr; 02862 JSAMPROW elemptr; 02863 int ctr; 02864 SHIFT_TEMPS 02865 02866 /* Zero 3 bottom rows of output coefficient block. */ 02867 MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3); 02868 02869 /* Pass 1: process rows. */ 02870 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 02871 /* furthermore, we scale the results by 2**PASS1_BITS. */ 02872 /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ 02873 02874 dataptr = data; 02875 for (ctr = 0; ctr < 5; ctr++) { 02876 elemptr = sample_data[ctr] + start_col; 02877 02878 /* Even part */ 02879 02880 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]); 02881 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]); 02882 tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]); 02883 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]); 02884 tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]); 02885 02886 tmp10 = tmp0 + tmp4; 02887 tmp13 = tmp0 - tmp4; 02888 tmp11 = tmp1 + tmp3; 02889 tmp14 = tmp1 - tmp3; 02890 02891 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]); 02892 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); 02893 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); 02894 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); 02895 tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); 02896 02897 /* Apply unsigned->signed conversion */ 02898 dataptr[0] = (DCTELEM) 02899 ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS); 02900 tmp12 += tmp12; 02901 dataptr[4] = (DCTELEM) 02902 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ 02903 MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */ 02904 CONST_BITS-PASS1_BITS); 02905 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */ 02906 dataptr[2] = (DCTELEM) 02907 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */ 02908 CONST_BITS-PASS1_BITS); 02909 dataptr[6] = (DCTELEM) 02910 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */ 02911 CONST_BITS-PASS1_BITS); 02912 02913 /* Odd part */ 02914 02915 tmp10 = tmp0 + tmp4; 02916 tmp11 = tmp1 - tmp3; 02917 dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS); 02918 tmp2 <<= CONST_BITS; 02919 dataptr[1] = (DCTELEM) 02920 DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */ 02921 MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */ 02922 MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */ 02923 MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */ 02924 CONST_BITS-PASS1_BITS); 02925 tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */ 02926 MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */ 02927 tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */ 02928 (tmp11 << (CONST_BITS - 1)) - tmp2; 02929 dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS); 02930 dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS); 02931 02932 dataptr += DCTSIZE; /* advance pointer to next row */ 02933 } 02934 02935 /* Pass 2: process columns. 02936 * We remove the PASS1_BITS scaling, but leave the results scaled up 02937 * by an overall factor of 8. 02938 * We must also scale the output by (8/10)*(8/5) = 32/25, which we 02939 * fold into the constant multipliers: 02940 * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25. 02941 */ 02942 02943 dataptr = data; 02944 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 02945 /* Even part */ 02946 02947 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4]; 02948 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3]; 02949 tmp2 = dataptr[DCTSIZE*2]; 02950 02951 tmp10 = tmp0 + tmp1; 02952 tmp11 = tmp0 - tmp1; 02953 02954 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4]; 02955 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3]; 02956 02957 dataptr[DCTSIZE*0] = (DCTELEM) 02958 DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */ 02959 CONST_BITS+PASS1_BITS); 02960 tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */ 02961 tmp10 -= tmp2 << 2; 02962 tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */ 02963 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS); 02964 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS); 02965 02966 /* Odd part */ 02967 02968 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */ 02969 02970 dataptr[DCTSIZE*1] = (DCTELEM) 02971 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */ 02972 CONST_BITS+PASS1_BITS); 02973 dataptr[DCTSIZE*3] = (DCTELEM) 02974 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */ 02975 CONST_BITS+PASS1_BITS); 02976 02977 dataptr++; /* advance pointer to next column */ 02978 } 02979 } 02980 02981 02982 /* 02983 * Perform the forward DCT on an 8x4 sample block. 02984 * 02985 * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns). 02986 */ 02987 02988 GLOBAL(void) 02989 jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 02990 { 02991 INT32 tmp0, tmp1, tmp2, tmp3; 02992 INT32 tmp10, tmp11, tmp12, tmp13; 02993 INT32 z1; 02994 DCTELEM *dataptr; 02995 JSAMPROW elemptr; 02996 int ctr; 02997 SHIFT_TEMPS 02998 02999 /* Zero 4 bottom rows of output coefficient block. */ 03000 MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4); 03001 03002 /* Pass 1: process rows. */ 03003 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 03004 /* furthermore, we scale the results by 2**PASS1_BITS. */ 03005 /* We must also scale the output by 8/4 = 2, which we add here. */ 03006 03007 dataptr = data; 03008 for (ctr = 0; ctr < 4; ctr++) { 03009 elemptr = sample_data[ctr] + start_col; 03010 03011 /* Even part per LL&M figure 1 --- note that published figure is faulty; 03012 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 03013 */ 03014 03015 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); 03016 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); 03017 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); 03018 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); 03019 03020 tmp10 = tmp0 + tmp3; 03021 tmp12 = tmp0 - tmp3; 03022 tmp11 = tmp1 + tmp2; 03023 tmp13 = tmp1 - tmp2; 03024 03025 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); 03026 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); 03027 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); 03028 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); 03029 03030 /* Apply unsigned->signed conversion */ 03031 dataptr[0] = (DCTELEM) 03032 ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1)); 03033 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1)); 03034 03035 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 03036 /* Add fudge factor here for final descale. */ 03037 z1 += ONE << (CONST_BITS-PASS1_BITS-2); 03038 dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), 03039 CONST_BITS-PASS1_BITS-1); 03040 dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), 03041 CONST_BITS-PASS1_BITS-1); 03042 03043 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 03044 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 03045 * i0..i3 in the paper are tmp0..tmp3 here. 03046 */ 03047 03048 tmp10 = tmp0 + tmp3; 03049 tmp11 = tmp1 + tmp2; 03050 tmp12 = tmp0 + tmp2; 03051 tmp13 = tmp1 + tmp3; 03052 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 03053 /* Add fudge factor here for final descale. */ 03054 z1 += ONE << (CONST_BITS-PASS1_BITS-2); 03055 03056 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 03057 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 03058 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 03059 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 03060 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 03061 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 03062 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 03063 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 03064 03065 tmp12 += z1; 03066 tmp13 += z1; 03067 03068 dataptr[1] = (DCTELEM) 03069 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1); 03070 dataptr[3] = (DCTELEM) 03071 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1); 03072 dataptr[5] = (DCTELEM) 03073 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1); 03074 dataptr[7] = (DCTELEM) 03075 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1); 03076 03077 dataptr += DCTSIZE; /* advance pointer to next row */ 03078 } 03079 03080 /* Pass 2: process columns. 03081 * We remove the PASS1_BITS scaling, but leave the results scaled up 03082 * by an overall factor of 8. 03083 * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 03084 */ 03085 03086 dataptr = data; 03087 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 03088 /* Even part */ 03089 03090 /* Add fudge factor here for final descale. */ 03091 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1)); 03092 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; 03093 03094 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; 03095 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; 03096 03097 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); 03098 dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); 03099 03100 /* Odd part */ 03101 03102 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 03103 /* Add fudge factor here for final descale. */ 03104 tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); 03105 03106 dataptr[DCTSIZE*1] = (DCTELEM) 03107 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 03108 CONST_BITS+PASS1_BITS); 03109 dataptr[DCTSIZE*3] = (DCTELEM) 03110 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 03111 CONST_BITS+PASS1_BITS); 03112 03113 dataptr++; /* advance pointer to next column */ 03114 } 03115 } 03116 03117 03118 /* 03119 * Perform the forward DCT on a 6x3 sample block. 03120 * 03121 * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns). 03122 */ 03123 03124 GLOBAL(void) 03125 jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 03126 { 03127 INT32 tmp0, tmp1, tmp2; 03128 INT32 tmp10, tmp11, tmp12; 03129 DCTELEM *dataptr; 03130 JSAMPROW elemptr; 03131 int ctr; 03132 SHIFT_TEMPS 03133 03134 /* Pre-zero output coefficient block. */ 03135 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 03136 03137 /* Pass 1: process rows. */ 03138 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 03139 /* furthermore, we scale the results by 2**PASS1_BITS. */ 03140 /* We scale the results further by 2 as part of output adaption */ 03141 /* scaling for different DCT size. */ 03142 /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ 03143 03144 dataptr = data; 03145 for (ctr = 0; ctr < 3; ctr++) { 03146 elemptr = sample_data[ctr] + start_col; 03147 03148 /* Even part */ 03149 03150 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); 03151 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); 03152 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); 03153 03154 tmp10 = tmp0 + tmp2; 03155 tmp12 = tmp0 - tmp2; 03156 03157 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); 03158 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); 03159 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); 03160 03161 /* Apply unsigned->signed conversion */ 03162 dataptr[0] = (DCTELEM) 03163 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1)); 03164 dataptr[2] = (DCTELEM) 03165 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ 03166 CONST_BITS-PASS1_BITS-1); 03167 dataptr[4] = (DCTELEM) 03168 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ 03169 CONST_BITS-PASS1_BITS-1); 03170 03171 /* Odd part */ 03172 03173 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ 03174 CONST_BITS-PASS1_BITS-1); 03175 03176 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1))); 03177 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1)); 03178 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1))); 03179 03180 dataptr += DCTSIZE; /* advance pointer to next row */ 03181 } 03182 03183 /* Pass 2: process columns. 03184 * We remove the PASS1_BITS scaling, but leave the results scaled up 03185 * by an overall factor of 8. 03186 * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially 03187 * fold into the constant multipliers (other part was done in pass 1): 03188 * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9. 03189 */ 03190 03191 dataptr = data; 03192 for (ctr = 0; ctr < 6; ctr++) { 03193 /* Even part */ 03194 03195 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2]; 03196 tmp1 = dataptr[DCTSIZE*1]; 03197 03198 tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2]; 03199 03200 dataptr[DCTSIZE*0] = (DCTELEM) 03201 DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 03202 CONST_BITS+PASS1_BITS); 03203 dataptr[DCTSIZE*2] = (DCTELEM) 03204 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */ 03205 CONST_BITS+PASS1_BITS); 03206 03207 /* Odd part */ 03208 03209 dataptr[DCTSIZE*1] = (DCTELEM) 03210 DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */ 03211 CONST_BITS+PASS1_BITS); 03212 03213 dataptr++; /* advance pointer to next column */ 03214 } 03215 } 03216 03217 03218 /* 03219 * Perform the forward DCT on a 4x2 sample block. 03220 * 03221 * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns). 03222 */ 03223 03224 GLOBAL(void) 03225 jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 03226 { 03227 INT32 tmp0, tmp1; 03228 INT32 tmp10, tmp11; 03229 DCTELEM *dataptr; 03230 JSAMPROW elemptr; 03231 int ctr; 03232 SHIFT_TEMPS 03233 03234 /* Pre-zero output coefficient block. */ 03235 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 03236 03237 /* Pass 1: process rows. */ 03238 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 03239 /* furthermore, we scale the results by 2**PASS1_BITS. */ 03240 /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */ 03241 /* 4-point FDCT kernel, */ 03242 /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ 03243 03244 dataptr = data; 03245 for (ctr = 0; ctr < 2; ctr++) { 03246 elemptr = sample_data[ctr] + start_col; 03247 03248 /* Even part */ 03249 03250 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); 03251 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); 03252 03253 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); 03254 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); 03255 03256 /* Apply unsigned->signed conversion */ 03257 dataptr[0] = (DCTELEM) 03258 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3)); 03259 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3)); 03260 03261 /* Odd part */ 03262 03263 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 03264 /* Add fudge factor here for final descale. */ 03265 tmp0 += ONE << (CONST_BITS-PASS1_BITS-4); 03266 03267 dataptr[1] = (DCTELEM) 03268 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 03269 CONST_BITS-PASS1_BITS-3); 03270 dataptr[3] = (DCTELEM) 03271 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 03272 CONST_BITS-PASS1_BITS-3); 03273 03274 dataptr += DCTSIZE; /* advance pointer to next row */ 03275 } 03276 03277 /* Pass 2: process columns. 03278 * We remove the PASS1_BITS scaling, but leave the results scaled up 03279 * by an overall factor of 8. 03280 */ 03281 03282 dataptr = data; 03283 for (ctr = 0; ctr < 4; ctr++) { 03284 /* Even part */ 03285 03286 /* Add fudge factor here for final descale. */ 03287 tmp0 = dataptr[DCTSIZE*0] + (ONE << (PASS1_BITS-1)); 03288 tmp1 = dataptr[DCTSIZE*1]; 03289 03290 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); 03291 03292 /* Odd part */ 03293 03294 dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); 03295 03296 dataptr++; /* advance pointer to next column */ 03297 } 03298 } 03299 03300 03301 /* 03302 * Perform the forward DCT on a 2x1 sample block. 03303 * 03304 * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns). 03305 */ 03306 03307 GLOBAL(void) 03308 jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 03309 { 03310 INT32 tmp0, tmp1; 03311 JSAMPROW elemptr; 03312 03313 /* Pre-zero output coefficient block. */ 03314 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 03315 03316 elemptr = sample_data[0] + start_col; 03317 03318 tmp0 = GETJSAMPLE(elemptr[0]); 03319 tmp1 = GETJSAMPLE(elemptr[1]); 03320 03321 /* We leave the results scaled up by an overall factor of 8. 03322 * We must also scale the output by (8/2)*(8/1) = 2**5. 03323 */ 03324 03325 /* Even part */ 03326 /* Apply unsigned->signed conversion */ 03327 data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); 03328 03329 /* Odd part */ 03330 data[1] = (DCTELEM) ((tmp0 - tmp1) << 5); 03331 } 03332 03333 03334 /* 03335 * Perform the forward DCT on an 8x16 sample block. 03336 * 03337 * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns). 03338 */ 03339 03340 GLOBAL(void) 03341 jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 03342 { 03343 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 03344 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; 03345 INT32 z1; 03346 DCTELEM workspace[DCTSIZE2]; 03347 DCTELEM *dataptr; 03348 DCTELEM *wsptr; 03349 JSAMPROW elemptr; 03350 int ctr; 03351 SHIFT_TEMPS 03352 03353 /* Pass 1: process rows. */ 03354 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 03355 /* furthermore, we scale the results by 2**PASS1_BITS. */ 03356 03357 dataptr = data; 03358 ctr = 0; 03359 for (;;) { 03360 elemptr = sample_data[ctr] + start_col; 03361 03362 /* Even part per LL&M figure 1 --- note that published figure is faulty; 03363 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 03364 */ 03365 03366 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); 03367 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); 03368 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); 03369 tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); 03370 03371 tmp10 = tmp0 + tmp3; 03372 tmp12 = tmp0 - tmp3; 03373 tmp11 = tmp1 + tmp2; 03374 tmp13 = tmp1 - tmp2; 03375 03376 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); 03377 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); 03378 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); 03379 tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); 03380 03381 /* Apply unsigned->signed conversion */ 03382 dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); 03383 dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); 03384 03385 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 03386 dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), 03387 CONST_BITS-PASS1_BITS); 03388 dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), 03389 CONST_BITS-PASS1_BITS); 03390 03391 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 03392 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 03393 * i0..i3 in the paper are tmp0..tmp3 here. 03394 */ 03395 03396 tmp10 = tmp0 + tmp3; 03397 tmp11 = tmp1 + tmp2; 03398 tmp12 = tmp0 + tmp2; 03399 tmp13 = tmp1 + tmp3; 03400 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 03401 03402 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 03403 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 03404 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 03405 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 03406 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 03407 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 03408 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 03409 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 03410 03411 tmp12 += z1; 03412 tmp13 += z1; 03413 03414 dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); 03415 dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); 03416 dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); 03417 dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); 03418 03419 ctr++; 03420 03421 if (ctr != DCTSIZE) { 03422 if (ctr == DCTSIZE * 2) 03423 break; /* Done. */ 03424 dataptr += DCTSIZE; /* advance pointer to next row */ 03425 } else 03426 dataptr = workspace; /* switch pointer to extended workspace */ 03427 } 03428 03429 /* Pass 2: process columns. 03430 * We remove the PASS1_BITS scaling, but leave the results scaled up 03431 * by an overall factor of 8. 03432 * We must also scale the output by 8/16 = 1/2. 03433 * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). 03434 */ 03435 03436 dataptr = data; 03437 wsptr = workspace; 03438 for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { 03439 /* Even part */ 03440 03441 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7]; 03442 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6]; 03443 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5]; 03444 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4]; 03445 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3]; 03446 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2]; 03447 tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1]; 03448 tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0]; 03449 03450 tmp10 = tmp0 + tmp7; 03451 tmp14 = tmp0 - tmp7; 03452 tmp11 = tmp1 + tmp6; 03453 tmp15 = tmp1 - tmp6; 03454 tmp12 = tmp2 + tmp5; 03455 tmp16 = tmp2 - tmp5; 03456 tmp13 = tmp3 + tmp4; 03457 tmp17 = tmp3 - tmp4; 03458 03459 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7]; 03460 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6]; 03461 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5]; 03462 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4]; 03463 tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3]; 03464 tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2]; 03465 tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1]; 03466 tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0]; 03467 03468 dataptr[DCTSIZE*0] = (DCTELEM) 03469 DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1); 03470 dataptr[DCTSIZE*4] = (DCTELEM) 03471 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ 03472 MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ 03473 CONST_BITS+PASS1_BITS+1); 03474 03475 tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ 03476 MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ 03477 03478 dataptr[DCTSIZE*2] = (DCTELEM) 03479 DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ 03480 + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ 03481 CONST_BITS+PASS1_BITS+1); 03482 dataptr[DCTSIZE*6] = (DCTELEM) 03483 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ 03484 - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ 03485 CONST_BITS+PASS1_BITS+1); 03486 03487 /* Odd part */ 03488 03489 tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ 03490 MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ 03491 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ 03492 MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ 03493 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ 03494 MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ 03495 tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ 03496 MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ 03497 tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ 03498 MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ 03499 tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ 03500 MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ 03501 tmp10 = tmp11 + tmp12 + tmp13 - 03502 MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ 03503 MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ 03504 tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ 03505 - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ 03506 tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ 03507 + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ 03508 tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ 03509 + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ 03510 03511 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1); 03512 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1); 03513 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1); 03514 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1); 03515 03516 dataptr++; /* advance pointer to next column */ 03517 wsptr++; /* advance pointer to next column */ 03518 } 03519 } 03520 03521 03522 /* 03523 * Perform the forward DCT on a 7x14 sample block. 03524 * 03525 * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns). 03526 */ 03527 03528 GLOBAL(void) 03529 jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 03530 { 03531 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 03532 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 03533 INT32 z1, z2, z3; 03534 DCTELEM workspace[8*6]; 03535 DCTELEM *dataptr; 03536 DCTELEM *wsptr; 03537 JSAMPROW elemptr; 03538 int ctr; 03539 SHIFT_TEMPS 03540 03541 /* Pre-zero output coefficient block. */ 03542 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 03543 03544 /* Pass 1: process rows. */ 03545 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 03546 /* furthermore, we scale the results by 2**PASS1_BITS. */ 03547 /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ 03548 03549 dataptr = data; 03550 ctr = 0; 03551 for (;;) { 03552 elemptr = sample_data[ctr] + start_col; 03553 03554 /* Even part */ 03555 03556 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]); 03557 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]); 03558 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]); 03559 tmp3 = GETJSAMPLE(elemptr[3]); 03560 03561 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); 03562 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); 03563 tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); 03564 03565 z1 = tmp0 + tmp2; 03566 /* Apply unsigned->signed conversion */ 03567 dataptr[0] = (DCTELEM) 03568 ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); 03569 tmp3 += tmp3; 03570 z1 -= tmp3; 03571 z1 -= tmp3; 03572 z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */ 03573 z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */ 03574 z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */ 03575 dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS); 03576 z1 -= z2; 03577 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */ 03578 dataptr[4] = (DCTELEM) 03579 DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */ 03580 CONST_BITS-PASS1_BITS); 03581 dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS); 03582 03583 /* Odd part */ 03584 03585 tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 03586 tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 03587 tmp0 = tmp1 - tmp2; 03588 tmp1 += tmp2; 03589 tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */ 03590 tmp1 += tmp2; 03591 tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */ 03592 tmp0 += tmp3; 03593 tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */ 03594 03595 dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); 03596 dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); 03597 dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); 03598 03599 ctr++; 03600 03601 if (ctr != DCTSIZE) { 03602 if (ctr == 14) 03603 break; /* Done. */ 03604 dataptr += DCTSIZE; /* advance pointer to next row */ 03605 } else 03606 dataptr = workspace; /* switch pointer to extended workspace */ 03607 } 03608 03609 /* Pass 2: process columns. 03610 * We remove the PASS1_BITS scaling, but leave the results scaled up 03611 * by an overall factor of 8. 03612 * We must also scale the output by (8/7)*(8/14) = 32/49, which we 03613 * fold into the constant multipliers: 03614 * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49. 03615 */ 03616 03617 dataptr = data; 03618 wsptr = workspace; 03619 for (ctr = 0; ctr < 7; ctr++) { 03620 /* Even part */ 03621 03622 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5]; 03623 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4]; 03624 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3]; 03625 tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2]; 03626 tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1]; 03627 tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0]; 03628 tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; 03629 03630 tmp10 = tmp0 + tmp6; 03631 tmp14 = tmp0 - tmp6; 03632 tmp11 = tmp1 + tmp5; 03633 tmp15 = tmp1 - tmp5; 03634 tmp12 = tmp2 + tmp4; 03635 tmp16 = tmp2 - tmp4; 03636 03637 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5]; 03638 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4]; 03639 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3]; 03640 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2]; 03641 tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1]; 03642 tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0]; 03643 tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; 03644 03645 dataptr[DCTSIZE*0] = (DCTELEM) 03646 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13, 03647 FIX(0.653061224)), /* 32/49 */ 03648 CONST_BITS+PASS1_BITS); 03649 tmp13 += tmp13; 03650 dataptr[DCTSIZE*4] = (DCTELEM) 03651 DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */ 03652 MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */ 03653 MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */ 03654 CONST_BITS+PASS1_BITS); 03655 03656 tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */ 03657 03658 dataptr[DCTSIZE*2] = (DCTELEM) 03659 DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */ 03660 + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */ 03661 CONST_BITS+PASS1_BITS); 03662 dataptr[DCTSIZE*6] = (DCTELEM) 03663 DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */ 03664 - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */ 03665 CONST_BITS+PASS1_BITS); 03666 03667 /* Odd part */ 03668 03669 tmp10 = tmp1 + tmp2; 03670 tmp11 = tmp5 - tmp4; 03671 dataptr[DCTSIZE*7] = (DCTELEM) 03672 DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6, 03673 FIX(0.653061224)), /* 32/49 */ 03674 CONST_BITS+PASS1_BITS); 03675 tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */ 03676 tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */ 03677 tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */ 03678 tmp10 += tmp11 - tmp3; 03679 tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */ 03680 MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */ 03681 dataptr[DCTSIZE*5] = (DCTELEM) 03682 DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */ 03683 + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */ 03684 CONST_BITS+PASS1_BITS); 03685 tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */ 03686 MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */ 03687 dataptr[DCTSIZE*3] = (DCTELEM) 03688 DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */ 03689 - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */ 03690 CONST_BITS+PASS1_BITS); 03691 dataptr[DCTSIZE*1] = (DCTELEM) 03692 DESCALE(tmp11 + tmp12 + tmp3 03693 - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */ 03694 - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */ 03695 CONST_BITS+PASS1_BITS); 03696 03697 dataptr++; /* advance pointer to next column */ 03698 wsptr++; /* advance pointer to next column */ 03699 } 03700 } 03701 03702 03703 /* 03704 * Perform the forward DCT on a 6x12 sample block. 03705 * 03706 * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns). 03707 */ 03708 03709 GLOBAL(void) 03710 jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 03711 { 03712 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 03713 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 03714 DCTELEM workspace[8*4]; 03715 DCTELEM *dataptr; 03716 DCTELEM *wsptr; 03717 JSAMPROW elemptr; 03718 int ctr; 03719 SHIFT_TEMPS 03720 03721 /* Pre-zero output coefficient block. */ 03722 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 03723 03724 /* Pass 1: process rows. */ 03725 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 03726 /* furthermore, we scale the results by 2**PASS1_BITS. */ 03727 /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ 03728 03729 dataptr = data; 03730 ctr = 0; 03731 for (;;) { 03732 elemptr = sample_data[ctr] + start_col; 03733 03734 /* Even part */ 03735 03736 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); 03737 tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); 03738 tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); 03739 03740 tmp10 = tmp0 + tmp2; 03741 tmp12 = tmp0 - tmp2; 03742 03743 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); 03744 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); 03745 tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); 03746 03747 /* Apply unsigned->signed conversion */ 03748 dataptr[0] = (DCTELEM) 03749 ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); 03750 dataptr[2] = (DCTELEM) 03751 DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ 03752 CONST_BITS-PASS1_BITS); 03753 dataptr[4] = (DCTELEM) 03754 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ 03755 CONST_BITS-PASS1_BITS); 03756 03757 /* Odd part */ 03758 03759 tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ 03760 CONST_BITS-PASS1_BITS); 03761 03762 dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS)); 03763 dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS); 03764 dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS)); 03765 03766 ctr++; 03767 03768 if (ctr != DCTSIZE) { 03769 if (ctr == 12) 03770 break; /* Done. */ 03771 dataptr += DCTSIZE; /* advance pointer to next row */ 03772 } else 03773 dataptr = workspace; /* switch pointer to extended workspace */ 03774 } 03775 03776 /* Pass 2: process columns. 03777 * We remove the PASS1_BITS scaling, but leave the results scaled up 03778 * by an overall factor of 8. 03779 * We must also scale the output by (8/6)*(8/12) = 8/9, which we 03780 * fold into the constant multipliers: 03781 * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9. 03782 */ 03783 03784 dataptr = data; 03785 wsptr = workspace; 03786 for (ctr = 0; ctr < 6; ctr++) { 03787 /* Even part */ 03788 03789 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3]; 03790 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2]; 03791 tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1]; 03792 tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0]; 03793 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7]; 03794 tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6]; 03795 03796 tmp10 = tmp0 + tmp5; 03797 tmp13 = tmp0 - tmp5; 03798 tmp11 = tmp1 + tmp4; 03799 tmp14 = tmp1 - tmp4; 03800 tmp12 = tmp2 + tmp3; 03801 tmp15 = tmp2 - tmp3; 03802 03803 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3]; 03804 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2]; 03805 tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1]; 03806 tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0]; 03807 tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7]; 03808 tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6]; 03809 03810 dataptr[DCTSIZE*0] = (DCTELEM) 03811 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */ 03812 CONST_BITS+PASS1_BITS); 03813 dataptr[DCTSIZE*6] = (DCTELEM) 03814 DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */ 03815 CONST_BITS+PASS1_BITS); 03816 dataptr[DCTSIZE*4] = (DCTELEM) 03817 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */ 03818 CONST_BITS+PASS1_BITS); 03819 dataptr[DCTSIZE*2] = (DCTELEM) 03820 DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */ 03821 MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */ 03822 CONST_BITS+PASS1_BITS); 03823 03824 /* Odd part */ 03825 03826 tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */ 03827 tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */ 03828 tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */ 03829 tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */ 03830 tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */ 03831 tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */ 03832 + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */ 03833 tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */ 03834 tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */ 03835 + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */ 03836 tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */ 03837 - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */ 03838 tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */ 03839 - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */ 03840 03841 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS); 03842 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS); 03843 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS); 03844 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS); 03845 03846 dataptr++; /* advance pointer to next column */ 03847 wsptr++; /* advance pointer to next column */ 03848 } 03849 } 03850 03851 03852 /* 03853 * Perform the forward DCT on a 5x10 sample block. 03854 * 03855 * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns). 03856 */ 03857 03858 GLOBAL(void) 03859 jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 03860 { 03861 INT32 tmp0, tmp1, tmp2, tmp3, tmp4; 03862 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 03863 DCTELEM workspace[8*2]; 03864 DCTELEM *dataptr; 03865 DCTELEM *wsptr; 03866 JSAMPROW elemptr; 03867 int ctr; 03868 SHIFT_TEMPS 03869 03870 /* Pre-zero output coefficient block. */ 03871 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 03872 03873 /* Pass 1: process rows. */ 03874 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 03875 /* furthermore, we scale the results by 2**PASS1_BITS. */ 03876 /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ 03877 03878 dataptr = data; 03879 ctr = 0; 03880 for (;;) { 03881 elemptr = sample_data[ctr] + start_col; 03882 03883 /* Even part */ 03884 03885 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]); 03886 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]); 03887 tmp2 = GETJSAMPLE(elemptr[2]); 03888 03889 tmp10 = tmp0 + tmp1; 03890 tmp11 = tmp0 - tmp1; 03891 03892 tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); 03893 tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); 03894 03895 /* Apply unsigned->signed conversion */ 03896 dataptr[0] = (DCTELEM) 03897 ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS); 03898 tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ 03899 tmp10 -= tmp2 << 2; 03900 tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ 03901 dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS); 03902 dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS); 03903 03904 /* Odd part */ 03905 03906 tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */ 03907 03908 dataptr[1] = (DCTELEM) 03909 DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */ 03910 CONST_BITS-PASS1_BITS); 03911 dataptr[3] = (DCTELEM) 03912 DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */ 03913 CONST_BITS-PASS1_BITS); 03914 03915 ctr++; 03916 03917 if (ctr != DCTSIZE) { 03918 if (ctr == 10) 03919 break; /* Done. */ 03920 dataptr += DCTSIZE; /* advance pointer to next row */ 03921 } else 03922 dataptr = workspace; /* switch pointer to extended workspace */ 03923 } 03924 03925 /* Pass 2: process columns. 03926 * We remove the PASS1_BITS scaling, but leave the results scaled up 03927 * by an overall factor of 8. 03928 * We must also scale the output by (8/5)*(8/10) = 32/25, which we 03929 * fold into the constant multipliers: 03930 * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25. 03931 */ 03932 03933 dataptr = data; 03934 wsptr = workspace; 03935 for (ctr = 0; ctr < 5; ctr++) { 03936 /* Even part */ 03937 03938 tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1]; 03939 tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0]; 03940 tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7]; 03941 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6]; 03942 tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; 03943 03944 tmp10 = tmp0 + tmp4; 03945 tmp13 = tmp0 - tmp4; 03946 tmp11 = tmp1 + tmp3; 03947 tmp14 = tmp1 - tmp3; 03948 03949 tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1]; 03950 tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0]; 03951 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7]; 03952 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6]; 03953 tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; 03954 03955 dataptr[DCTSIZE*0] = (DCTELEM) 03956 DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */ 03957 CONST_BITS+PASS1_BITS); 03958 tmp12 += tmp12; 03959 dataptr[DCTSIZE*4] = (DCTELEM) 03960 DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */ 03961 MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */ 03962 CONST_BITS+PASS1_BITS); 03963 tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */ 03964 dataptr[DCTSIZE*2] = (DCTELEM) 03965 DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */ 03966 CONST_BITS+PASS1_BITS); 03967 dataptr[DCTSIZE*6] = (DCTELEM) 03968 DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */ 03969 CONST_BITS+PASS1_BITS); 03970 03971 /* Odd part */ 03972 03973 tmp10 = tmp0 + tmp4; 03974 tmp11 = tmp1 - tmp3; 03975 dataptr[DCTSIZE*5] = (DCTELEM) 03976 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */ 03977 CONST_BITS+PASS1_BITS); 03978 tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */ 03979 dataptr[DCTSIZE*1] = (DCTELEM) 03980 DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */ 03981 MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */ 03982 MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */ 03983 MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */ 03984 CONST_BITS+PASS1_BITS); 03985 tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */ 03986 MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */ 03987 tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */ 03988 MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */ 03989 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS); 03990 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS); 03991 03992 dataptr++; /* advance pointer to next column */ 03993 wsptr++; /* advance pointer to next column */ 03994 } 03995 } 03996 03997 03998 /* 03999 * Perform the forward DCT on a 4x8 sample block. 04000 * 04001 * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns). 04002 */ 04003 04004 GLOBAL(void) 04005 jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 04006 { 04007 INT32 tmp0, tmp1, tmp2, tmp3; 04008 INT32 tmp10, tmp11, tmp12, tmp13; 04009 INT32 z1; 04010 DCTELEM *dataptr; 04011 JSAMPROW elemptr; 04012 int ctr; 04013 SHIFT_TEMPS 04014 04015 /* Pre-zero output coefficient block. */ 04016 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 04017 04018 /* Pass 1: process rows. */ 04019 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 04020 /* furthermore, we scale the results by 2**PASS1_BITS. */ 04021 /* We must also scale the output by 8/4 = 2, which we add here. */ 04022 /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ 04023 04024 dataptr = data; 04025 for (ctr = 0; ctr < DCTSIZE; ctr++) { 04026 elemptr = sample_data[ctr] + start_col; 04027 04028 /* Even part */ 04029 04030 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); 04031 tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); 04032 04033 tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); 04034 tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); 04035 04036 /* Apply unsigned->signed conversion */ 04037 dataptr[0] = (DCTELEM) 04038 ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1)); 04039 dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1)); 04040 04041 /* Odd part */ 04042 04043 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 04044 /* Add fudge factor here for final descale. */ 04045 tmp0 += ONE << (CONST_BITS-PASS1_BITS-2); 04046 04047 dataptr[1] = (DCTELEM) 04048 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 04049 CONST_BITS-PASS1_BITS-1); 04050 dataptr[3] = (DCTELEM) 04051 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 04052 CONST_BITS-PASS1_BITS-1); 04053 04054 dataptr += DCTSIZE; /* advance pointer to next row */ 04055 } 04056 04057 /* Pass 2: process columns. 04058 * We remove the PASS1_BITS scaling, but leave the results scaled up 04059 * by an overall factor of 8. 04060 */ 04061 04062 dataptr = data; 04063 for (ctr = 0; ctr < 4; ctr++) { 04064 /* Even part per LL&M figure 1 --- note that published figure is faulty; 04065 * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". 04066 */ 04067 04068 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; 04069 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; 04070 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; 04071 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; 04072 04073 /* Add fudge factor here for final descale. */ 04074 tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1)); 04075 tmp12 = tmp0 - tmp3; 04076 tmp11 = tmp1 + tmp2; 04077 tmp13 = tmp1 - tmp2; 04078 04079 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; 04080 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; 04081 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; 04082 tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; 04083 04084 dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); 04085 dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); 04086 04087 z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); 04088 /* Add fudge factor here for final descale. */ 04089 z1 += ONE << (CONST_BITS+PASS1_BITS-1); 04090 dataptr[DCTSIZE*2] = (DCTELEM) 04091 RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); 04092 dataptr[DCTSIZE*6] = (DCTELEM) 04093 RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); 04094 04095 /* Odd part per figure 8 --- note paper omits factor of sqrt(2). 04096 * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 04097 * i0..i3 in the paper are tmp0..tmp3 here. 04098 */ 04099 04100 tmp10 = tmp0 + tmp3; 04101 tmp11 = tmp1 + tmp2; 04102 tmp12 = tmp0 + tmp2; 04103 tmp13 = tmp1 + tmp3; 04104 z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ 04105 /* Add fudge factor here for final descale. */ 04106 z1 += ONE << (CONST_BITS+PASS1_BITS-1); 04107 04108 tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ 04109 tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ 04110 tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ 04111 tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ 04112 tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ 04113 tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ 04114 tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ 04115 tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ 04116 04117 tmp12 += z1; 04118 tmp13 += z1; 04119 04120 dataptr[DCTSIZE*1] = (DCTELEM) 04121 RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); 04122 dataptr[DCTSIZE*3] = (DCTELEM) 04123 RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); 04124 dataptr[DCTSIZE*5] = (DCTELEM) 04125 RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); 04126 dataptr[DCTSIZE*7] = (DCTELEM) 04127 RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); 04128 04129 dataptr++; /* advance pointer to next column */ 04130 } 04131 } 04132 04133 04134 /* 04135 * Perform the forward DCT on a 3x6 sample block. 04136 * 04137 * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns). 04138 */ 04139 04140 GLOBAL(void) 04141 jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 04142 { 04143 INT32 tmp0, tmp1, tmp2; 04144 INT32 tmp10, tmp11, tmp12; 04145 DCTELEM *dataptr; 04146 JSAMPROW elemptr; 04147 int ctr; 04148 SHIFT_TEMPS 04149 04150 /* Pre-zero output coefficient block. */ 04151 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 04152 04153 /* Pass 1: process rows. */ 04154 /* Note results are scaled up by sqrt(8) compared to a true DCT; */ 04155 /* furthermore, we scale the results by 2**PASS1_BITS. */ 04156 /* We scale the results further by 2 as part of output adaption */ 04157 /* scaling for different DCT size. */ 04158 /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ 04159 04160 dataptr = data; 04161 for (ctr = 0; ctr < 6; ctr++) { 04162 elemptr = sample_data[ctr] + start_col; 04163 04164 /* Even part */ 04165 04166 tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); 04167 tmp1 = GETJSAMPLE(elemptr[1]); 04168 04169 tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); 04170 04171 /* Apply unsigned->signed conversion */ 04172 dataptr[0] = (DCTELEM) 04173 ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1)); 04174 dataptr[2] = (DCTELEM) 04175 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ 04176 CONST_BITS-PASS1_BITS-1); 04177 04178 /* Odd part */ 04179 04180 dataptr[1] = (DCTELEM) 04181 DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */ 04182 CONST_BITS-PASS1_BITS-1); 04183 04184 dataptr += DCTSIZE; /* advance pointer to next row */ 04185 } 04186 04187 /* Pass 2: process columns. 04188 * We remove the PASS1_BITS scaling, but leave the results scaled up 04189 * by an overall factor of 8. 04190 * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially 04191 * fold into the constant multipliers (other part was done in pass 1): 04192 * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9. 04193 */ 04194 04195 dataptr = data; 04196 for (ctr = 0; ctr < 3; ctr++) { 04197 /* Even part */ 04198 04199 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; 04200 tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; 04201 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; 04202 04203 tmp10 = tmp0 + tmp2; 04204 tmp12 = tmp0 - tmp2; 04205 04206 tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; 04207 tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; 04208 tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; 04209 04210 dataptr[DCTSIZE*0] = (DCTELEM) 04211 DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ 04212 CONST_BITS+PASS1_BITS); 04213 dataptr[DCTSIZE*2] = (DCTELEM) 04214 DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ 04215 CONST_BITS+PASS1_BITS); 04216 dataptr[DCTSIZE*4] = (DCTELEM) 04217 DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ 04218 CONST_BITS+PASS1_BITS); 04219 04220 /* Odd part */ 04221 04222 tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ 04223 04224 dataptr[DCTSIZE*1] = (DCTELEM) 04225 DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ 04226 CONST_BITS+PASS1_BITS); 04227 dataptr[DCTSIZE*3] = (DCTELEM) 04228 DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ 04229 CONST_BITS+PASS1_BITS); 04230 dataptr[DCTSIZE*5] = (DCTELEM) 04231 DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ 04232 CONST_BITS+PASS1_BITS); 04233 04234 dataptr++; /* advance pointer to next column */ 04235 } 04236 } 04237 04238 04239 /* 04240 * Perform the forward DCT on a 2x4 sample block. 04241 * 04242 * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns). 04243 */ 04244 04245 GLOBAL(void) 04246 jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 04247 { 04248 INT32 tmp0, tmp1; 04249 INT32 tmp10, tmp11; 04250 DCTELEM *dataptr; 04251 JSAMPROW elemptr; 04252 int ctr; 04253 SHIFT_TEMPS 04254 04255 /* Pre-zero output coefficient block. */ 04256 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 04257 04258 /* Pass 1: process rows. */ 04259 /* Note results are scaled up by sqrt(8) compared to a true DCT. */ 04260 /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */ 04261 04262 dataptr = data; 04263 for (ctr = 0; ctr < 4; ctr++) { 04264 elemptr = sample_data[ctr] + start_col; 04265 04266 /* Even part */ 04267 04268 tmp0 = GETJSAMPLE(elemptr[0]); 04269 tmp1 = GETJSAMPLE(elemptr[1]); 04270 04271 /* Apply unsigned->signed conversion */ 04272 dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3); 04273 04274 /* Odd part */ 04275 04276 dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3); 04277 04278 dataptr += DCTSIZE; /* advance pointer to next row */ 04279 } 04280 04281 /* Pass 2: process columns. 04282 * We leave the results scaled up by an overall factor of 8. 04283 * 4-point FDCT kernel, 04284 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. 04285 */ 04286 04287 dataptr = data; 04288 for (ctr = 0; ctr < 2; ctr++) { 04289 /* Even part */ 04290 04291 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3]; 04292 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; 04293 04294 tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; 04295 tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; 04296 04297 dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1); 04298 dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1); 04299 04300 /* Odd part */ 04301 04302 tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ 04303 /* Add fudge factor here for final descale. */ 04304 tmp0 += ONE << (CONST_BITS-1); 04305 04306 dataptr[DCTSIZE*1] = (DCTELEM) 04307 RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ 04308 CONST_BITS); 04309 dataptr[DCTSIZE*3] = (DCTELEM) 04310 RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ 04311 CONST_BITS); 04312 04313 dataptr++; /* advance pointer to next column */ 04314 } 04315 } 04316 04317 04318 /* 04319 * Perform the forward DCT on a 1x2 sample block. 04320 * 04321 * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns). 04322 */ 04323 04324 GLOBAL(void) 04325 jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) 04326 { 04327 INT32 tmp0, tmp1; 04328 04329 /* Pre-zero output coefficient block. */ 04330 MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); 04331 04332 tmp0 = GETJSAMPLE(sample_data[0][start_col]); 04333 tmp1 = GETJSAMPLE(sample_data[1][start_col]); 04334 04335 /* We leave the results scaled up by an overall factor of 8. 04336 * We must also scale the output by (8/1)*(8/2) = 2**5. 04337 */ 04338 04339 /* Even part */ 04340 /* Apply unsigned->signed conversion */ 04341 data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); 04342 04343 /* Odd part */ 04344 data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5); 04345 } 04346 04347 #endif /* DCT_SCALING_SUPPORTED */ 04348 #endif /* DCT_ISLOW_SUPPORTED */ Generated on Sun May 27 2012 04:19:26 for ReactOS by
1.7.6.1
|