ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

jfdctfst.c
Go to the documentation of this file.
00001 /*
00002  * jfdctfst.c
00003  *
00004  * Copyright (C) 1994-1996, Thomas G. Lane.
00005  * Modified 2003-2009 by Guido Vollbeding.
00006  * This file is part of the Independent JPEG Group's software.
00007  * For conditions of distribution and use, see the accompanying README file.
00008  *
00009  * This file contains a fast, not so accurate integer implementation of the
00010  * forward DCT (Discrete Cosine Transform).
00011  *
00012  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
00013  * on each column.  Direct algorithms are also available, but they are
00014  * much more complex and seem not to be any faster when reduced to code.
00015  *
00016  * This implementation is based on Arai, Agui, and Nakajima's algorithm for
00017  * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
00018  * Japanese, but the algorithm is described in the Pennebaker & Mitchell
00019  * JPEG textbook (see REFERENCES section in file README).  The following code
00020  * is based directly on figure 4-8 in P&M.
00021  * While an 8-point DCT cannot be done in less than 11 multiplies, it is
00022  * possible to arrange the computation so that many of the multiplies are
00023  * simple scalings of the final outputs.  These multiplies can then be
00024  * folded into the multiplications or divisions by the JPEG quantization
00025  * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
00026  * to be done in the DCT itself.
00027  * The primary disadvantage of this method is that with fixed-point math,
00028  * accuracy is lost due to imprecise representation of the scaled
00029  * quantization values.  The smaller the quantization table entry, the less
00030  * precise the scaled value, so this implementation does worse with high-
00031  * quality-setting files than with low-quality ones.
00032  */
00033 
00034 #define JPEG_INTERNALS
00035 #include "jinclude.h"
00036 #include "jpeglib.h"
00037 #include "jdct.h"       /* Private declarations for DCT subsystem */
00038 
00039 #ifdef DCT_IFAST_SUPPORTED
00040 
00041 
00042 /*
00043  * This module is specialized to the case DCTSIZE = 8.
00044  */
00045 
00046 #if DCTSIZE != 8
00047   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
00048 #endif
00049 
00050 
00051 /* Scaling decisions are generally the same as in the LL&M algorithm;
00052  * see jfdctint.c for more details.  However, we choose to descale
00053  * (right shift) multiplication products as soon as they are formed,
00054  * rather than carrying additional fractional bits into subsequent additions.
00055  * This compromises accuracy slightly, but it lets us save a few shifts.
00056  * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
00057  * everywhere except in the multiplications proper; this saves a good deal
00058  * of work on 16-bit-int machines.
00059  *
00060  * Again to save a few shifts, the intermediate results between pass 1 and
00061  * pass 2 are not upscaled, but are represented only to integral precision.
00062  *
00063  * A final compromise is to represent the multiplicative constants to only
00064  * 8 fractional bits, rather than 13.  This saves some shifting work on some
00065  * machines, and may also reduce the cost of multiplication (since there
00066  * are fewer one-bits in the constants).
00067  */
00068 
00069 #define CONST_BITS  8
00070 
00071 
00072 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
00073  * causing a lot of useless floating-point operations at run time.
00074  * To get around this we use the following pre-calculated constants.
00075  * If you change CONST_BITS you may want to add appropriate values.
00076  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
00077  */
00078 
00079 #if CONST_BITS == 8
00080 #define FIX_0_382683433  ((INT32)   98)     /* FIX(0.382683433) */
00081 #define FIX_0_541196100  ((INT32)  139)     /* FIX(0.541196100) */
00082 #define FIX_0_707106781  ((INT32)  181)     /* FIX(0.707106781) */
00083 #define FIX_1_306562965  ((INT32)  334)     /* FIX(1.306562965) */
00084 #else
00085 #define FIX_0_382683433  FIX(0.382683433)
00086 #define FIX_0_541196100  FIX(0.541196100)
00087 #define FIX_0_707106781  FIX(0.707106781)
00088 #define FIX_1_306562965  FIX(1.306562965)
00089 #endif
00090 
00091 
00092 /* We can gain a little more speed, with a further compromise in accuracy,
00093  * by omitting the addition in a descaling shift.  This yields an incorrectly
00094  * rounded result half the time...
00095  */
00096 
00097 #ifndef USE_ACCURATE_ROUNDING
00098 #undef DESCALE
00099 #define DESCALE(x,n)  RIGHT_SHIFT(x, n)
00100 #endif
00101 
00102 
00103 /* Multiply a DCTELEM variable by an INT32 constant, and immediately
00104  * descale to yield a DCTELEM result.
00105  */
00106 
00107 #define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
00108 
00109 
00110 /*
00111  * Perform the forward DCT on one block of samples.
00112  */
00113 
00114 GLOBAL(void)
00115 jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00116 {
00117   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00118   DCTELEM tmp10, tmp11, tmp12, tmp13;
00119   DCTELEM z1, z2, z3, z4, z5, z11, z13;
00120   DCTELEM *dataptr;
00121   JSAMPROW elemptr;
00122   int ctr;
00123   SHIFT_TEMPS
00124 
00125   /* Pass 1: process rows. */
00126 
00127   dataptr = data;
00128   for (ctr = 0; ctr < DCTSIZE; ctr++) {
00129     elemptr = sample_data[ctr] + start_col;
00130 
00131     /* Load data into workspace */
00132     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
00133     tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
00134     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
00135     tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
00136     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
00137     tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
00138     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
00139     tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
00140 
00141     /* Even part */
00142 
00143     tmp10 = tmp0 + tmp3;    /* phase 2 */
00144     tmp13 = tmp0 - tmp3;
00145     tmp11 = tmp1 + tmp2;
00146     tmp12 = tmp1 - tmp2;
00147 
00148     /* Apply unsigned->signed conversion */
00149     dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
00150     dataptr[4] = tmp10 - tmp11;
00151 
00152     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
00153     dataptr[2] = tmp13 + z1;    /* phase 5 */
00154     dataptr[6] = tmp13 - z1;
00155 
00156     /* Odd part */
00157 
00158     tmp10 = tmp4 + tmp5;    /* phase 2 */
00159     tmp11 = tmp5 + tmp6;
00160     tmp12 = tmp6 + tmp7;
00161 
00162     /* The rotator is modified from fig 4-8 to avoid extra negations. */
00163     z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
00164     z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
00165     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
00166     z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
00167 
00168     z11 = tmp7 + z3;        /* phase 5 */
00169     z13 = tmp7 - z3;
00170 
00171     dataptr[5] = z13 + z2;  /* phase 6 */
00172     dataptr[3] = z13 - z2;
00173     dataptr[1] = z11 + z4;
00174     dataptr[7] = z11 - z4;
00175 
00176     dataptr += DCTSIZE;     /* advance pointer to next row */
00177   }
00178 
00179   /* Pass 2: process columns. */
00180 
00181   dataptr = data;
00182   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00183     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
00184     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
00185     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
00186     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
00187     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
00188     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
00189     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
00190     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
00191 
00192     /* Even part */
00193 
00194     tmp10 = tmp0 + tmp3;    /* phase 2 */
00195     tmp13 = tmp0 - tmp3;
00196     tmp11 = tmp1 + tmp2;
00197     tmp12 = tmp1 - tmp2;
00198 
00199     dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
00200     dataptr[DCTSIZE*4] = tmp10 - tmp11;
00201 
00202     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
00203     dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
00204     dataptr[DCTSIZE*6] = tmp13 - z1;
00205 
00206     /* Odd part */
00207 
00208     tmp10 = tmp4 + tmp5;    /* phase 2 */
00209     tmp11 = tmp5 + tmp6;
00210     tmp12 = tmp6 + tmp7;
00211 
00212     /* The rotator is modified from fig 4-8 to avoid extra negations. */
00213     z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
00214     z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
00215     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
00216     z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
00217 
00218     z11 = tmp7 + z3;        /* phase 5 */
00219     z13 = tmp7 - z3;
00220 
00221     dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
00222     dataptr[DCTSIZE*3] = z13 - z2;
00223     dataptr[DCTSIZE*1] = z11 + z4;
00224     dataptr[DCTSIZE*7] = z11 - z4;
00225 
00226     dataptr++;          /* advance pointer to next column */
00227   }
00228 }
00229 
00230 #endif /* DCT_IFAST_SUPPORTED */

Generated on Sun May 27 2012 04:19:25 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.