Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygendct64_i386.c
Go to the documentation of this file.
00001 /* 00002 dct64_i386.c: DCT64, a C variant for i386 00003 00004 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 00005 see COPYING and AUTHORS files in distribution or http://mpg123.org 00006 initially written by Michael Hipp 00007 */ 00008 00009 /* 00010 * Discrete Cosine Tansform (DCT) for subband synthesis 00011 * optimized for machines with no auto-increment. 00012 * The performance is highly compiler dependend. Maybe 00013 * the dct64.c version for 'normal' processor may be faster 00014 * even for Intel processors. 00015 */ 00016 00017 #include "mpg123lib_intern.h" 00018 00019 static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples) 00020 { 00021 { 00022 register real *costab = pnts[0]; 00023 00024 b1[0x00] = samples[0x00] + samples[0x1F]; 00025 b1[0x01] = samples[0x01] + samples[0x1E]; 00026 b1[0x1F] = REAL_MUL(samples[0x00] - samples[0x1F], costab[0x0]); 00027 b1[0x1E] = REAL_MUL(samples[0x01] - samples[0x1E], costab[0x1]); 00028 00029 b1[0x02] = samples[0x02] + samples[0x1D]; 00030 b1[0x03] = samples[0x03] + samples[0x1C]; 00031 b1[0x1D] = REAL_MUL(samples[0x02] - samples[0x1D], costab[0x2]); 00032 b1[0x1C] = REAL_MUL(samples[0x03] - samples[0x1C], costab[0x3]); 00033 00034 b1[0x04] = samples[0x04] + samples[0x1B]; 00035 b1[0x05] = samples[0x05] + samples[0x1A]; 00036 b1[0x1B] = REAL_MUL(samples[0x04] - samples[0x1B], costab[0x4]); 00037 b1[0x1A] = REAL_MUL(samples[0x05] - samples[0x1A], costab[0x5]); 00038 00039 b1[0x06] = samples[0x06] + samples[0x19]; 00040 b1[0x07] = samples[0x07] + samples[0x18]; 00041 b1[0x19] = REAL_MUL(samples[0x06] - samples[0x19], costab[0x6]); 00042 b1[0x18] = REAL_MUL(samples[0x07] - samples[0x18], costab[0x7]); 00043 00044 b1[0x08] = samples[0x08] + samples[0x17]; 00045 b1[0x09] = samples[0x09] + samples[0x16]; 00046 b1[0x17] = REAL_MUL(samples[0x08] - samples[0x17], costab[0x8]); 00047 b1[0x16] = REAL_MUL(samples[0x09] - samples[0x16], costab[0x9]); 00048 00049 b1[0x0A] = samples[0x0A] + samples[0x15]; 00050 b1[0x0B] = samples[0x0B] + samples[0x14]; 00051 b1[0x15] = REAL_MUL(samples[0x0A] - samples[0x15], costab[0xA]); 00052 b1[0x14] = REAL_MUL(samples[0x0B] - samples[0x14], costab[0xB]); 00053 00054 b1[0x0C] = samples[0x0C] + samples[0x13]; 00055 b1[0x0D] = samples[0x0D] + samples[0x12]; 00056 b1[0x13] = REAL_MUL(samples[0x0C] - samples[0x13], costab[0xC]); 00057 b1[0x12] = REAL_MUL(samples[0x0D] - samples[0x12], costab[0xD]); 00058 00059 b1[0x0E] = samples[0x0E] + samples[0x11]; 00060 b1[0x0F] = samples[0x0F] + samples[0x10]; 00061 b1[0x11] = REAL_MUL(samples[0x0E] - samples[0x11], costab[0xE]); 00062 b1[0x10] = REAL_MUL(samples[0x0F] - samples[0x10], costab[0xF]); 00063 00064 } 00065 00066 00067 { 00068 register real *costab = pnts[1]; 00069 00070 b2[0x00] = b1[0x00] + b1[0x0F]; 00071 b2[0x01] = b1[0x01] + b1[0x0E]; 00072 b2[0x0F] = REAL_MUL(b1[0x00] - b1[0x0F], costab[0]); 00073 b2[0x0E] = REAL_MUL(b1[0x01] - b1[0x0E], costab[1]); 00074 00075 b2[0x02] = b1[0x02] + b1[0x0D]; 00076 b2[0x03] = b1[0x03] + b1[0x0C]; 00077 b2[0x0D] = REAL_MUL(b1[0x02] - b1[0x0D], costab[2]); 00078 b2[0x0C] = REAL_MUL(b1[0x03] - b1[0x0C], costab[3]); 00079 00080 b2[0x04] = b1[0x04] + b1[0x0B]; 00081 b2[0x05] = b1[0x05] + b1[0x0A]; 00082 b2[0x0B] = REAL_MUL(b1[0x04] - b1[0x0B], costab[4]); 00083 b2[0x0A] = REAL_MUL(b1[0x05] - b1[0x0A], costab[5]); 00084 00085 b2[0x06] = b1[0x06] + b1[0x09]; 00086 b2[0x07] = b1[0x07] + b1[0x08]; 00087 b2[0x09] = REAL_MUL(b1[0x06] - b1[0x09], costab[6]); 00088 b2[0x08] = REAL_MUL(b1[0x07] - b1[0x08], costab[7]); 00089 00090 /* */ 00091 00092 b2[0x10] = b1[0x10] + b1[0x1F]; 00093 b2[0x11] = b1[0x11] + b1[0x1E]; 00094 b2[0x1F] = REAL_MUL(b1[0x1F] - b1[0x10], costab[0]); 00095 b2[0x1E] = REAL_MUL(b1[0x1E] - b1[0x11], costab[1]); 00096 00097 b2[0x12] = b1[0x12] + b1[0x1D]; 00098 b2[0x13] = b1[0x13] + b1[0x1C]; 00099 b2[0x1D] = REAL_MUL(b1[0x1D] - b1[0x12], costab[2]); 00100 b2[0x1C] = REAL_MUL(b1[0x1C] - b1[0x13], costab[3]); 00101 00102 b2[0x14] = b1[0x14] + b1[0x1B]; 00103 b2[0x15] = b1[0x15] + b1[0x1A]; 00104 b2[0x1B] = REAL_MUL(b1[0x1B] - b1[0x14], costab[4]); 00105 b2[0x1A] = REAL_MUL(b1[0x1A] - b1[0x15], costab[5]); 00106 00107 b2[0x16] = b1[0x16] + b1[0x19]; 00108 b2[0x17] = b1[0x17] + b1[0x18]; 00109 b2[0x19] = REAL_MUL(b1[0x19] - b1[0x16], costab[6]); 00110 b2[0x18] = REAL_MUL(b1[0x18] - b1[0x17], costab[7]); 00111 } 00112 00113 { 00114 register real *costab = pnts[2]; 00115 00116 b1[0x00] = b2[0x00] + b2[0x07]; 00117 b1[0x07] = REAL_MUL(b2[0x00] - b2[0x07], costab[0]); 00118 b1[0x01] = b2[0x01] + b2[0x06]; 00119 b1[0x06] = REAL_MUL(b2[0x01] - b2[0x06], costab[1]); 00120 b1[0x02] = b2[0x02] + b2[0x05]; 00121 b1[0x05] = REAL_MUL(b2[0x02] - b2[0x05], costab[2]); 00122 b1[0x03] = b2[0x03] + b2[0x04]; 00123 b1[0x04] = REAL_MUL(b2[0x03] - b2[0x04], costab[3]); 00124 00125 b1[0x08] = b2[0x08] + b2[0x0F]; 00126 b1[0x0F] = REAL_MUL(b2[0x0F] - b2[0x08], costab[0]); 00127 b1[0x09] = b2[0x09] + b2[0x0E]; 00128 b1[0x0E] = REAL_MUL(b2[0x0E] - b2[0x09], costab[1]); 00129 b1[0x0A] = b2[0x0A] + b2[0x0D]; 00130 b1[0x0D] = REAL_MUL(b2[0x0D] - b2[0x0A], costab[2]); 00131 b1[0x0B] = b2[0x0B] + b2[0x0C]; 00132 b1[0x0C] = REAL_MUL(b2[0x0C] - b2[0x0B], costab[3]); 00133 00134 b1[0x10] = b2[0x10] + b2[0x17]; 00135 b1[0x17] = REAL_MUL(b2[0x10] - b2[0x17], costab[0]); 00136 b1[0x11] = b2[0x11] + b2[0x16]; 00137 b1[0x16] = REAL_MUL(b2[0x11] - b2[0x16], costab[1]); 00138 b1[0x12] = b2[0x12] + b2[0x15]; 00139 b1[0x15] = REAL_MUL(b2[0x12] - b2[0x15], costab[2]); 00140 b1[0x13] = b2[0x13] + b2[0x14]; 00141 b1[0x14] = REAL_MUL(b2[0x13] - b2[0x14], costab[3]); 00142 00143 b1[0x18] = b2[0x18] + b2[0x1F]; 00144 b1[0x1F] = REAL_MUL(b2[0x1F] - b2[0x18], costab[0]); 00145 b1[0x19] = b2[0x19] + b2[0x1E]; 00146 b1[0x1E] = REAL_MUL(b2[0x1E] - b2[0x19], costab[1]); 00147 b1[0x1A] = b2[0x1A] + b2[0x1D]; 00148 b1[0x1D] = REAL_MUL(b2[0x1D] - b2[0x1A], costab[2]); 00149 b1[0x1B] = b2[0x1B] + b2[0x1C]; 00150 b1[0x1C] = REAL_MUL(b2[0x1C] - b2[0x1B], costab[3]); 00151 } 00152 00153 { 00154 register real const cos0 = pnts[3][0]; 00155 register real const cos1 = pnts[3][1]; 00156 00157 b2[0x00] = b1[0x00] + b1[0x03]; 00158 b2[0x03] = REAL_MUL(b1[0x00] - b1[0x03], cos0); 00159 b2[0x01] = b1[0x01] + b1[0x02]; 00160 b2[0x02] = REAL_MUL(b1[0x01] - b1[0x02], cos1); 00161 00162 b2[0x04] = b1[0x04] + b1[0x07]; 00163 b2[0x07] = REAL_MUL(b1[0x07] - b1[0x04], cos0); 00164 b2[0x05] = b1[0x05] + b1[0x06]; 00165 b2[0x06] = REAL_MUL(b1[0x06] - b1[0x05], cos1); 00166 00167 b2[0x08] = b1[0x08] + b1[0x0B]; 00168 b2[0x0B] = REAL_MUL(b1[0x08] - b1[0x0B], cos0); 00169 b2[0x09] = b1[0x09] + b1[0x0A]; 00170 b2[0x0A] = REAL_MUL(b1[0x09] - b1[0x0A], cos1); 00171 00172 b2[0x0C] = b1[0x0C] + b1[0x0F]; 00173 b2[0x0F] = REAL_MUL(b1[0x0F] - b1[0x0C], cos0); 00174 b2[0x0D] = b1[0x0D] + b1[0x0E]; 00175 b2[0x0E] = REAL_MUL(b1[0x0E] - b1[0x0D], cos1); 00176 00177 b2[0x10] = b1[0x10] + b1[0x13]; 00178 b2[0x13] = REAL_MUL(b1[0x10] - b1[0x13], cos0); 00179 b2[0x11] = b1[0x11] + b1[0x12]; 00180 b2[0x12] = REAL_MUL(b1[0x11] - b1[0x12], cos1); 00181 00182 b2[0x14] = b1[0x14] + b1[0x17]; 00183 b2[0x17] = REAL_MUL(b1[0x17] - b1[0x14], cos0); 00184 b2[0x15] = b1[0x15] + b1[0x16]; 00185 b2[0x16] = REAL_MUL(b1[0x16] - b1[0x15], cos1); 00186 00187 b2[0x18] = b1[0x18] + b1[0x1B]; 00188 b2[0x1B] = REAL_MUL(b1[0x18] - b1[0x1B], cos0); 00189 b2[0x19] = b1[0x19] + b1[0x1A]; 00190 b2[0x1A] = REAL_MUL(b1[0x19] - b1[0x1A], cos1); 00191 00192 b2[0x1C] = b1[0x1C] + b1[0x1F]; 00193 b2[0x1F] = REAL_MUL(b1[0x1F] - b1[0x1C], cos0); 00194 b2[0x1D] = b1[0x1D] + b1[0x1E]; 00195 b2[0x1E] = REAL_MUL(b1[0x1E] - b1[0x1D], cos1); 00196 } 00197 00198 { 00199 register real const cos0 = pnts[4][0]; 00200 00201 b1[0x00] = b2[0x00] + b2[0x01]; 00202 b1[0x01] = REAL_MUL(b2[0x00] - b2[0x01], cos0); 00203 b1[0x02] = b2[0x02] + b2[0x03]; 00204 b1[0x03] = REAL_MUL(b2[0x03] - b2[0x02], cos0); 00205 b1[0x02] += b1[0x03]; 00206 00207 b1[0x04] = b2[0x04] + b2[0x05]; 00208 b1[0x05] = REAL_MUL(b2[0x04] - b2[0x05], cos0); 00209 b1[0x06] = b2[0x06] + b2[0x07]; 00210 b1[0x07] = REAL_MUL(b2[0x07] - b2[0x06], cos0); 00211 b1[0x06] += b1[0x07]; 00212 b1[0x04] += b1[0x06]; 00213 b1[0x06] += b1[0x05]; 00214 b1[0x05] += b1[0x07]; 00215 00216 b1[0x08] = b2[0x08] + b2[0x09]; 00217 b1[0x09] = REAL_MUL(b2[0x08] - b2[0x09], cos0); 00218 b1[0x0A] = b2[0x0A] + b2[0x0B]; 00219 b1[0x0B] = REAL_MUL(b2[0x0B] - b2[0x0A], cos0); 00220 b1[0x0A] += b1[0x0B]; 00221 00222 b1[0x0C] = b2[0x0C] + b2[0x0D]; 00223 b1[0x0D] = REAL_MUL(b2[0x0C] - b2[0x0D], cos0); 00224 b1[0x0E] = b2[0x0E] + b2[0x0F]; 00225 b1[0x0F] = REAL_MUL(b2[0x0F] - b2[0x0E], cos0); 00226 b1[0x0E] += b1[0x0F]; 00227 b1[0x0C] += b1[0x0E]; 00228 b1[0x0E] += b1[0x0D]; 00229 b1[0x0D] += b1[0x0F]; 00230 00231 b1[0x10] = b2[0x10] + b2[0x11]; 00232 b1[0x11] = REAL_MUL(b2[0x10] - b2[0x11], cos0); 00233 b1[0x12] = b2[0x12] + b2[0x13]; 00234 b1[0x13] = REAL_MUL(b2[0x13] - b2[0x12], cos0); 00235 b1[0x12] += b1[0x13]; 00236 00237 b1[0x14] = b2[0x14] + b2[0x15]; 00238 b1[0x15] = REAL_MUL(b2[0x14] - b2[0x15], cos0); 00239 b1[0x16] = b2[0x16] + b2[0x17]; 00240 b1[0x17] = REAL_MUL(b2[0x17] - b2[0x16], cos0); 00241 b1[0x16] += b1[0x17]; 00242 b1[0x14] += b1[0x16]; 00243 b1[0x16] += b1[0x15]; 00244 b1[0x15] += b1[0x17]; 00245 00246 b1[0x18] = b2[0x18] + b2[0x19]; 00247 b1[0x19] = REAL_MUL(b2[0x18] - b2[0x19], cos0); 00248 b1[0x1A] = b2[0x1A] + b2[0x1B]; 00249 b1[0x1B] = REAL_MUL(b2[0x1B] - b2[0x1A], cos0); 00250 b1[0x1A] += b1[0x1B]; 00251 00252 b1[0x1C] = b2[0x1C] + b2[0x1D]; 00253 b1[0x1D] = REAL_MUL(b2[0x1C] - b2[0x1D], cos0); 00254 b1[0x1E] = b2[0x1E] + b2[0x1F]; 00255 b1[0x1F] = REAL_MUL(b2[0x1F] - b2[0x1E], cos0); 00256 b1[0x1E] += b1[0x1F]; 00257 b1[0x1C] += b1[0x1E]; 00258 b1[0x1E] += b1[0x1D]; 00259 b1[0x1D] += b1[0x1F]; 00260 } 00261 00262 out0[0x10*16] = REAL_SCALE_DCT64(b1[0x00]); 00263 out0[0x10*12] = REAL_SCALE_DCT64(b1[0x04]); 00264 out0[0x10* 8] = REAL_SCALE_DCT64(b1[0x02]); 00265 out0[0x10* 4] = REAL_SCALE_DCT64(b1[0x06]); 00266 out0[0x10* 0] = REAL_SCALE_DCT64(b1[0x01]); 00267 out1[0x10* 0] = REAL_SCALE_DCT64(b1[0x01]); 00268 out1[0x10* 4] = REAL_SCALE_DCT64(b1[0x05]); 00269 out1[0x10* 8] = REAL_SCALE_DCT64(b1[0x03]); 00270 out1[0x10*12] = REAL_SCALE_DCT64(b1[0x07]); 00271 00272 #if 1 00273 out0[0x10*14] = REAL_SCALE_DCT64(b1[0x08] + b1[0x0C]); 00274 out0[0x10*10] = REAL_SCALE_DCT64(b1[0x0C] + b1[0x0a]); 00275 out0[0x10* 6] = REAL_SCALE_DCT64(b1[0x0A] + b1[0x0E]); 00276 out0[0x10* 2] = REAL_SCALE_DCT64(b1[0x0E] + b1[0x09]); 00277 out1[0x10* 2] = REAL_SCALE_DCT64(b1[0x09] + b1[0x0D]); 00278 out1[0x10* 6] = REAL_SCALE_DCT64(b1[0x0D] + b1[0x0B]); 00279 out1[0x10*10] = REAL_SCALE_DCT64(b1[0x0B] + b1[0x0F]); 00280 out1[0x10*14] = REAL_SCALE_DCT64(b1[0x0F]); 00281 #else 00282 b1[0x08] += b1[0x0C]; 00283 out0[0x10*14] = REAL_SCALE_DCT64(b1[0x08]); 00284 b1[0x0C] += b1[0x0a]; 00285 out0[0x10*10] = REAL_SCALE_DCT64(b1[0x0C]); 00286 b1[0x0A] += b1[0x0E]; 00287 out0[0x10* 6] = REAL_SCALE_DCT64(b1[0x0A]); 00288 b1[0x0E] += b1[0x09]; 00289 out0[0x10* 2] = REAL_SCALE_DCT64(b1[0x0E]); 00290 b1[0x09] += b1[0x0D]; 00291 out1[0x10* 2] = REAL_SCALE_DCT64(b1[0x09]); 00292 b1[0x0D] += b1[0x0B]; 00293 out1[0x10* 6] = REAL_SCALE_DCT64(b1[0x0D]); 00294 b1[0x0B] += b1[0x0F]; 00295 out1[0x10*10] = REAL_SCALE_DCT64(b1[0x0B]); 00296 out1[0x10*14] = REAL_SCALE_DCT64(b1[0x0F]); 00297 #endif 00298 00299 { 00300 real tmp; 00301 tmp = b1[0x18] + b1[0x1C]; 00302 out0[0x10*15] = REAL_SCALE_DCT64(tmp + b1[0x10]); 00303 out0[0x10*13] = REAL_SCALE_DCT64(tmp + b1[0x14]); 00304 tmp = b1[0x1C] + b1[0x1A]; 00305 out0[0x10*11] = REAL_SCALE_DCT64(tmp + b1[0x14]); 00306 out0[0x10* 9] = REAL_SCALE_DCT64(tmp + b1[0x12]); 00307 tmp = b1[0x1A] + b1[0x1E]; 00308 out0[0x10* 7] = REAL_SCALE_DCT64(tmp + b1[0x12]); 00309 out0[0x10* 5] = REAL_SCALE_DCT64(tmp + b1[0x16]); 00310 tmp = b1[0x1E] + b1[0x19]; 00311 out0[0x10* 3] = REAL_SCALE_DCT64(tmp + b1[0x16]); 00312 out0[0x10* 1] = REAL_SCALE_DCT64(tmp + b1[0x11]); 00313 tmp = b1[0x19] + b1[0x1D]; 00314 out1[0x10* 1] = REAL_SCALE_DCT64(tmp + b1[0x11]); 00315 out1[0x10* 3] = REAL_SCALE_DCT64(tmp + b1[0x15]); 00316 tmp = b1[0x1D] + b1[0x1B]; 00317 out1[0x10* 5] = REAL_SCALE_DCT64(tmp + b1[0x15]); 00318 out1[0x10* 7] = REAL_SCALE_DCT64(tmp + b1[0x13]); 00319 tmp = b1[0x1B] + b1[0x1F]; 00320 out1[0x10* 9] = REAL_SCALE_DCT64(tmp + b1[0x13]); 00321 out1[0x10*11] = REAL_SCALE_DCT64(tmp + b1[0x17]); 00322 out1[0x10*13] = REAL_SCALE_DCT64(b1[0x17] + b1[0x1F]); 00323 out1[0x10*15] = REAL_SCALE_DCT64(b1[0x1F]); 00324 } 00325 } 00326 00327 /* 00328 * the call via dct64 is a trick to force GCC to use 00329 * (new) registers for the b1,b2 pointer to the bufs[xx] field 00330 */ 00331 void dct64_i386(real *a,real *b,real *c) 00332 { 00333 real bufs[0x40]; 00334 dct64_1(a,b,bufs,bufs+0x20,c); 00335 } 00336 Generated on Sat May 26 2012 04:33:00 for ReactOS by
1.7.6.1
|