Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygensynth_altivec.c
Go to the documentation of this file.
00001 /* 00002 decode.c: decoding samples... 00003 00004 copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1 00005 see COPYING and AUTHORS files in distribution or http://mpg123.org 00006 initially written by Michael Hipp 00007 altivec optimization by tmkk 00008 */ 00009 00010 #include "mpg123lib_intern.h" 00011 00012 #ifndef __APPLE__ 00013 #include <altivec.h> 00014 #endif 00015 00016 /* A macro for normal synth functions */ 00017 #define SYNTH_ALTIVEC(B0STEP) \ 00018 v1 = vec_ld(0,window); \ 00019 v2 = vec_ld(16,window); \ 00020 v3 = vec_ld(32,window); \ 00021 v4 = vec_ld(48,window); \ 00022 v5 = vec_ld(64,window); \ 00023 v1 = vec_perm(v1,v2,vperm1); \ 00024 v6 = vec_ld(0,b0); \ 00025 v2 = vec_perm(v2,v3,vperm1); \ 00026 v7 = vec_ld(16,b0); \ 00027 v3 = vec_perm(v3,v4,vperm1); \ 00028 v8 = vec_ld(32,b0); \ 00029 v4 = vec_perm(v4,v5,vperm1); \ 00030 v9 = vec_ld(48,b0); \ 00031 \ 00032 vsum = vec_madd(v1,v6,vzero); \ 00033 vsum = vec_madd(v2,v7,vsum); \ 00034 vsum = vec_madd(v3,v8,vsum); \ 00035 vsum = vec_madd(v4,v9,vsum); \ 00036 \ 00037 window += 32; \ 00038 b0 += B0STEP; \ 00039 \ 00040 v1 = vec_ld(0,window); \ 00041 v2 = vec_ld(16,window); \ 00042 v3 = vec_ld(32,window); \ 00043 v4 = vec_ld(48,window); \ 00044 v5 = vec_ld(64,window); \ 00045 v1 = vec_perm(v1,v2,vperm1); \ 00046 v6 = vec_ld(0,b0); \ 00047 v2 = vec_perm(v2,v3,vperm1); \ 00048 v7 = vec_ld(16,b0); \ 00049 v3 = vec_perm(v3,v4,vperm1); \ 00050 v8 = vec_ld(32,b0); \ 00051 v4 = vec_perm(v4,v5,vperm1); \ 00052 v9 = vec_ld(48,b0); \ 00053 \ 00054 vsum2 = vec_madd(v1,v6,vzero); \ 00055 vsum2 = vec_madd(v2,v7,vsum2); \ 00056 vsum2 = vec_madd(v3,v8,vsum2); \ 00057 vsum2 = vec_madd(v4,v9,vsum2); \ 00058 \ 00059 window += 32; \ 00060 b0 += B0STEP; \ 00061 \ 00062 v1 = vec_ld(0,window); \ 00063 v2 = vec_ld(16,window); \ 00064 v3 = vec_ld(32,window); \ 00065 v4 = vec_ld(48,window); \ 00066 v5 = vec_ld(64,window); \ 00067 v1 = vec_perm(v1,v2,vperm1); \ 00068 v6 = vec_ld(0,b0); \ 00069 v2 = vec_perm(v2,v3,vperm1); \ 00070 v7 = vec_ld(16,b0); \ 00071 v3 = vec_perm(v3,v4,vperm1); \ 00072 v8 = vec_ld(32,b0); \ 00073 v4 = vec_perm(v4,v5,vperm1); \ 00074 v9 = vec_ld(48,b0); \ 00075 \ 00076 vsum3 = vec_madd(v1,v6,vzero); \ 00077 vsum3 = vec_madd(v2,v7,vsum3); \ 00078 vsum3 = vec_madd(v3,v8,vsum3); \ 00079 vsum3 = vec_madd(v4,v9,vsum3); \ 00080 \ 00081 window += 32; \ 00082 b0 += B0STEP; \ 00083 \ 00084 v1 = vec_ld(0,window); \ 00085 v2 = vec_ld(16,window); \ 00086 v3 = vec_ld(32,window); \ 00087 v4 = vec_ld(48,window); \ 00088 v5 = vec_ld(64,window); \ 00089 v1 = vec_perm(v1,v2,vperm1); \ 00090 v6 = vec_ld(0,b0); \ 00091 v2 = vec_perm(v2,v3,vperm1); \ 00092 v7 = vec_ld(16,b0); \ 00093 v3 = vec_perm(v3,v4,vperm1); \ 00094 v8 = vec_ld(32,b0); \ 00095 v4 = vec_perm(v4,v5,vperm1); \ 00096 v9 = vec_ld(48,b0); \ 00097 \ 00098 vsum4 = vec_madd(v1,v6,vzero); \ 00099 vsum4 = vec_madd(v2,v7,vsum4); \ 00100 vsum4 = vec_madd(v3,v8,vsum4); \ 00101 vsum4 = vec_madd(v4,v9,vsum4); \ 00102 \ 00103 window += 32; \ 00104 b0 += B0STEP; \ 00105 \ 00106 v1 = vec_mergeh(vsum,vsum3); \ 00107 v2 = vec_mergeh(vsum2,vsum4); \ 00108 v3 = vec_mergel(vsum,vsum3); \ 00109 v4 = vec_mergel(vsum2,vsum4); \ 00110 v5 = vec_mergeh(v1,v2); \ 00111 v6 = vec_mergel(v1,v2); \ 00112 v7 = vec_mergeh(v3,v4); \ 00113 v8 = vec_mergel(v3,v4); 00114 00115 /* A macro for stereo synth functions */ 00116 #define SYNTH_STEREO_ALTIVEC(B0STEP) \ 00117 v1 = vec_ld(0,window); \ 00118 v2 = vec_ld(16,window); \ 00119 v3 = vec_ld(32,window); \ 00120 v4 = vec_ld(48,window); \ 00121 v5 = vec_ld(64,window); \ 00122 v1 = vec_perm(v1,v2,vperm1); \ 00123 v6 = vec_ld(0,b0l); \ 00124 v10 = vec_ld(0,b0r); \ 00125 v2 = vec_perm(v2,v3,vperm1); \ 00126 v7 = vec_ld(16,b0l); \ 00127 v11 = vec_ld(16,b0r); \ 00128 v3 = vec_perm(v3,v4,vperm1); \ 00129 v8 = vec_ld(32,b0l); \ 00130 v12 = vec_ld(32,b0r); \ 00131 v4 = vec_perm(v4,v5,vperm1); \ 00132 v9 = vec_ld(48,b0l); \ 00133 v13 = vec_ld(48,b0r); \ 00134 \ 00135 vsum = vec_madd(v1,v6,vzero); \ 00136 vsum5 = vec_madd(v1,v10,vzero); \ 00137 vsum = vec_madd(v2,v7,vsum); \ 00138 vsum5 = vec_madd(v2,v11,vsum5); \ 00139 vsum = vec_madd(v3,v8,vsum); \ 00140 vsum5 = vec_madd(v3,v12,vsum5); \ 00141 vsum = vec_madd(v4,v9,vsum); \ 00142 vsum5 = vec_madd(v4,v13,vsum5); \ 00143 \ 00144 window += 32; \ 00145 b0l += B0STEP; \ 00146 b0r += B0STEP; \ 00147 \ 00148 v1 = vec_ld(0,window); \ 00149 v2 = vec_ld(16,window); \ 00150 v3 = vec_ld(32,window); \ 00151 v4 = vec_ld(48,window); \ 00152 v5 = vec_ld(64,window); \ 00153 v1 = vec_perm(v1,v2,vperm1); \ 00154 v6 = vec_ld(0,b0l); \ 00155 v10 = vec_ld(0,b0r); \ 00156 v2 = vec_perm(v2,v3,vperm1); \ 00157 v7 = vec_ld(16,b0l); \ 00158 v11 = vec_ld(16,b0r); \ 00159 v3 = vec_perm(v3,v4,vperm1); \ 00160 v8 = vec_ld(32,b0l); \ 00161 v12 = vec_ld(32,b0r); \ 00162 v4 = vec_perm(v4,v5,vperm1); \ 00163 v9 = vec_ld(48,b0l); \ 00164 v13 = vec_ld(48,b0r); \ 00165 \ 00166 vsum2 = vec_madd(v1,v6,vzero); \ 00167 vsum6 = vec_madd(v1,v10,vzero); \ 00168 vsum2 = vec_madd(v2,v7,vsum2); \ 00169 vsum6 = vec_madd(v2,v11,vsum6); \ 00170 vsum2 = vec_madd(v3,v8,vsum2); \ 00171 vsum6 = vec_madd(v3,v12,vsum6); \ 00172 vsum2 = vec_madd(v4,v9,vsum2); \ 00173 vsum6 = vec_madd(v4,v13,vsum6); \ 00174 \ 00175 window += 32; \ 00176 b0l += B0STEP; \ 00177 b0r += B0STEP; \ 00178 \ 00179 v1 = vec_ld(0,window); \ 00180 v2 = vec_ld(16,window); \ 00181 v3 = vec_ld(32,window); \ 00182 v4 = vec_ld(48,window); \ 00183 v5 = vec_ld(64,window); \ 00184 v1 = vec_perm(v1,v2,vperm1); \ 00185 v6 = vec_ld(0,b0l); \ 00186 v10 = vec_ld(0,b0r); \ 00187 v2 = vec_perm(v2,v3,vperm1); \ 00188 v7 = vec_ld(16,b0l); \ 00189 v11 = vec_ld(16,b0r); \ 00190 v3 = vec_perm(v3,v4,vperm1); \ 00191 v8 = vec_ld(32,b0l); \ 00192 v12 = vec_ld(32,b0r); \ 00193 v4 = vec_perm(v4,v5,vperm1); \ 00194 v9 = vec_ld(48,b0l); \ 00195 v13 = vec_ld(48,b0r); \ 00196 \ 00197 vsum3 = vec_madd(v1,v6,vzero); \ 00198 vsum7 = vec_madd(v1,v10,vzero); \ 00199 vsum3 = vec_madd(v2,v7,vsum3); \ 00200 vsum7 = vec_madd(v2,v11,vsum7); \ 00201 vsum3 = vec_madd(v3,v8,vsum3); \ 00202 vsum7 = vec_madd(v3,v12,vsum7); \ 00203 vsum3 = vec_madd(v4,v9,vsum3); \ 00204 vsum7 = vec_madd(v4,v13,vsum7); \ 00205 \ 00206 window += 32; \ 00207 b0l += B0STEP; \ 00208 b0r += B0STEP; \ 00209 \ 00210 v1 = vec_ld(0,window); \ 00211 v2 = vec_ld(16,window); \ 00212 v3 = vec_ld(32,window); \ 00213 v4 = vec_ld(48,window); \ 00214 v5 = vec_ld(64,window); \ 00215 v1 = vec_perm(v1,v2,vperm1); \ 00216 v6 = vec_ld(0,b0l); \ 00217 v10 = vec_ld(0,b0r); \ 00218 v2 = vec_perm(v2,v3,vperm1); \ 00219 v7 = vec_ld(16,b0l); \ 00220 v11 = vec_ld(16,b0r); \ 00221 v3 = vec_perm(v3,v4,vperm1); \ 00222 v8 = vec_ld(32,b0l); \ 00223 v12 = vec_ld(32,b0r); \ 00224 v4 = vec_perm(v4,v5,vperm1); \ 00225 v9 = vec_ld(48,b0l); \ 00226 v13 = vec_ld(48,b0r); \ 00227 \ 00228 vsum4 = vec_madd(v1,v6,vzero); \ 00229 vsum8 = vec_madd(v1,v10,vzero); \ 00230 vsum4 = vec_madd(v2,v7,vsum4); \ 00231 vsum8 = vec_madd(v2,v11,vsum8); \ 00232 vsum4 = vec_madd(v3,v8,vsum4); \ 00233 vsum8 = vec_madd(v3,v12,vsum8); \ 00234 vsum4 = vec_madd(v4,v9,vsum4); \ 00235 vsum8 = vec_madd(v4,v13,vsum8); \ 00236 \ 00237 window += 32; \ 00238 b0l += B0STEP; \ 00239 b0r += B0STEP; \ 00240 \ 00241 v1 = vec_mergeh(vsum,vsum3); \ 00242 v5 = vec_mergeh(vsum5,vsum7); \ 00243 v2 = vec_mergeh(vsum2,vsum4); \ 00244 v6 = vec_mergeh(vsum6,vsum8); \ 00245 v3 = vec_mergel(vsum,vsum3); \ 00246 v7 = vec_mergel(vsum5,vsum7); \ 00247 v4 = vec_mergel(vsum2,vsum4); \ 00248 v8 = vec_mergel(vsum6,vsum8); \ 00249 vsum = vec_mergeh(v1,v2); \ 00250 vsum5 = vec_mergeh(v5,v6); \ 00251 vsum2 = vec_mergel(v1,v2); \ 00252 vsum6 = vec_mergel(v5,v6); \ 00253 vsum3 = vec_mergeh(v3,v4); \ 00254 vsum7 = vec_mergeh(v7,v8); \ 00255 vsum4 = vec_mergel(v3,v4); \ 00256 vsum8 = vec_mergel(v7,v8); 00257 00258 int synth_1to1_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final) 00259 { 00260 short *samples = (short *) (fr->buffer.data+fr->buffer.fill); 00261 00262 real *b0, **buf; 00263 int clip; 00264 int bo1; 00265 00266 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); 00267 00268 if(!channel) 00269 { 00270 fr->bo--; 00271 fr->bo &= 0xf; 00272 buf = fr->real_buffs[0]; 00273 } 00274 else 00275 { 00276 samples++; 00277 buf = fr->real_buffs[1]; 00278 } 00279 00280 if(fr->bo & 0x1) 00281 { 00282 b0 = buf[0]; 00283 bo1 = fr->bo; 00284 dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); 00285 } 00286 else 00287 { 00288 b0 = buf[1]; 00289 bo1 = fr->bo+1; 00290 dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); 00291 } 00292 00293 00294 { 00295 register int j; 00296 real *window = fr->decwin + 16 - bo1; 00297 00298 ALIGNED(16) int clip_tmp[4]; 00299 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9; 00300 vector unsigned char vperm1,vperm2,vperm3,vperm4; 00301 vector float vsum,vsum2,vsum3,vsum4,vmin,vmax,vzero; 00302 vector signed int vclip; 00303 vector signed short vsample1,vsample2; 00304 vector unsigned int vshift; 00305 vclip = vec_xor(vclip,vclip); 00306 vzero = vec_xor(vzero,vzero); 00307 vshift = vec_splat_u32(-1); /* 31 */ 00308 #ifdef __APPLE__ 00309 vmax = (vector float)(32767.0f); 00310 vmin = (vector float)(-32768.0f); 00311 vperm4 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31); 00312 #else 00313 vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f}; 00314 vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f}; 00315 vperm4 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31}; 00316 #endif 00317 00318 vperm1 = vec_lvsl(0,window); 00319 vperm2 = vec_lvsl(0,samples); 00320 vperm3 = vec_lvsr(0,samples); 00321 for (j=4;j;j--) 00322 { 00323 SYNTH_ALTIVEC(16); 00324 00325 vsum = vec_sub(v5,v6); 00326 v9 = vec_sub(v7,v8); 00327 vsum = vec_add(vsum,v9); 00328 00329 v3 = vec_round(vsum); 00330 v3 = (vector float)vec_cts(v3,0); 00331 v1 = (vector float)vec_cmpgt(vsum,vmax); 00332 v2 = (vector float)vec_cmplt(vsum,vmin); 00333 vsample1 = vec_ld(0,samples); 00334 vsample2 = vec_ld(15,samples); 00335 v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3); 00336 v4 = (vector float)vec_perm(vsample1,vsample2,vperm2); 00337 v5 = (vector float)vec_perm(v3,v4,vperm4); 00338 v6 = (vector float)vec_perm(vsample2,vsample1,vperm2); 00339 v7 = (vector float)vec_perm(v5,v6,vperm3); 00340 v8 = (vector float)vec_perm(v6,v5,vperm3); 00341 vec_st((vector signed short)v7,15,samples); 00342 vec_st((vector signed short)v8,0,samples); 00343 samples += 8; 00344 00345 v1 = (vector float)vec_sr((vector unsigned int)v1, vshift); 00346 v2 = (vector float)vec_sr((vector unsigned int)v2, vshift); 00347 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2); 00348 vclip = vec_sums((vector signed int)v1,vclip); 00349 } 00350 00351 for (j=4;j;j--) 00352 { 00353 SYNTH_ALTIVEC(-16); 00354 00355 vsum = vec_add(v5,v6); 00356 v9 = vec_add(v7,v8); 00357 vsum = vec_add(vsum,v9); 00358 00359 v3 = vec_round(vsum); 00360 v3 = (vector float)vec_cts(v3,0); 00361 v1 = (vector float)vec_cmpgt(vsum,vmax); 00362 v2 = (vector float)vec_cmplt(vsum,vmin); 00363 vsample1 = vec_ld(0,samples); 00364 vsample2 = vec_ld(15,samples); 00365 v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3); 00366 v4 = (vector float)vec_perm(vsample1,vsample2,vperm2); 00367 v5 = (vector float)vec_perm(v3,v4,vperm4); 00368 v6 = (vector float)vec_perm(vsample2,vsample1,vperm2); 00369 v7 = (vector float)vec_perm(v5,v6,vperm3); 00370 v8 = (vector float)vec_perm(v6,v5,vperm3); 00371 vec_st((vector signed short)v7,15,samples); 00372 vec_st((vector signed short)v8,0,samples); 00373 samples += 8; 00374 00375 v1 = (vector float)vec_sr((vector unsigned int)v1, vshift); 00376 v2 = (vector float)vec_sr((vector unsigned int)v2, vshift); 00377 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2); 00378 vclip = vec_sums((vector signed int)v1,vclip); 00379 } 00380 00381 vec_st(vclip,0,clip_tmp); 00382 clip = clip_tmp[3]; 00383 } 00384 if(final) fr->buffer.fill += 128; 00385 00386 return clip; 00387 } 00388 00389 int synth_1to1_stereo_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 00390 { 00391 short *samples = (short *) (fr->buffer.data+fr->buffer.fill); 00392 00393 real *b0l, *b0r, **bufl, **bufr; 00394 int clip; 00395 int bo1; 00396 00397 if(fr->have_eq_settings) 00398 { 00399 do_equalizer(bandPtr_l,0,fr->equalizer); 00400 do_equalizer(bandPtr_r,1,fr->equalizer); 00401 } 00402 00403 fr->bo--; 00404 fr->bo &= 0xf; 00405 bufl = fr->real_buffs[0]; 00406 bufr = fr->real_buffs[1]; 00407 00408 if(fr->bo & 0x1) 00409 { 00410 b0l = bufl[0]; 00411 b0r = bufr[0]; 00412 bo1 = fr->bo; 00413 dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); 00414 dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); 00415 } 00416 else 00417 { 00418 b0l = bufl[1]; 00419 b0r = bufr[1]; 00420 bo1 = fr->bo+1; 00421 dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); 00422 dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); 00423 } 00424 00425 00426 { 00427 register int j; 00428 real *window = fr->decwin + 16 - bo1; 00429 00430 ALIGNED(16) int clip_tmp[4]; 00431 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13; 00432 vector unsigned char vperm1,vperm2; 00433 vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vmin,vmax,vzero; 00434 vector signed int vclip; 00435 vector unsigned int vshift; 00436 vector signed short vprev; 00437 vclip = vec_xor(vclip,vclip); 00438 vzero = vec_xor(vzero,vzero); 00439 vshift = vec_splat_u32(-1); /* 31 */ 00440 #ifdef __APPLE__ 00441 vmax = (vector float)(32767.0f); 00442 vmin = (vector float)(-32768.0f); 00443 #else 00444 vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f}; 00445 vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f}; 00446 #endif 00447 00448 vperm1 = vec_lvsl(0,window); 00449 vperm2 = vec_lvsr(0,samples); 00450 vprev = vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples)); 00451 for (j=4;j;j--) 00452 { 00453 SYNTH_STEREO_ALTIVEC(16); 00454 00455 vsum = vec_sub(vsum,vsum2); 00456 vsum2 = vec_sub(vsum5,vsum6); 00457 vsum3 = vec_sub(vsum3,vsum4); 00458 vsum4 = vec_sub(vsum7,vsum8); 00459 vsum = vec_add(vsum,vsum3); 00460 vsum2 = vec_add(vsum2,vsum4); 00461 00462 v1 = vec_round(vsum); 00463 v2 = vec_round(vsum2); 00464 v1 = (vector float)vec_cts(v1,0); 00465 v2 = (vector float)vec_cts(v2,0); 00466 v3 = vec_mergeh(v1, v2); 00467 v4 = vec_mergel(v1, v2); 00468 v5 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v4); 00469 v6 = (vector float)vec_perm(vprev,(vector signed short)v5,vperm2); 00470 vprev = (vector signed short)v5; 00471 v1 = (vector float)vec_cmpgt(vsum,vmax); 00472 v2 = (vector float)vec_cmplt(vsum,vmin); 00473 v3 = (vector float)vec_cmpgt(vsum2,vmax); 00474 v4 = (vector float)vec_cmplt(vsum2,vmin); 00475 vec_st((vector signed short)v6,0,samples); 00476 samples += 8; 00477 00478 v1 = (vector float)vec_sr((vector unsigned int)v1, vshift); 00479 v2 = (vector float)vec_sr((vector unsigned int)v2, vshift); 00480 v3 = (vector float)vec_sr((vector unsigned int)v3, vshift); 00481 v4 = (vector float)vec_sr((vector unsigned int)v4, vshift); 00482 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2); 00483 v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4); 00484 vclip = vec_sums((vector signed int)v1,vclip); 00485 vclip = vec_sums((vector signed int)v2,vclip); 00486 } 00487 00488 for (j=4;j;j--) 00489 { 00490 SYNTH_STEREO_ALTIVEC(-16); 00491 00492 vsum = vec_add(vsum,vsum2); 00493 vsum2 = vec_add(vsum5,vsum6); 00494 vsum3 = vec_add(vsum3,vsum4); 00495 vsum4 = vec_add(vsum7,vsum8); 00496 vsum = vec_add(vsum,vsum3); 00497 vsum2 = vec_add(vsum2,vsum4); 00498 00499 v1 = vec_round(vsum); 00500 v2 = vec_round(vsum2); 00501 v1 = (vector float)vec_cts(v1,0); 00502 v2 = (vector float)vec_cts(v2,0); 00503 v3 = vec_mergeh(v1, v2); 00504 v4 = vec_mergel(v1, v2); 00505 v5 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v4); 00506 v6 = (vector float)vec_perm(vprev,(vector signed short)v5,vperm2); 00507 vprev = (vector signed short)v5; 00508 v1 = (vector float)vec_cmpgt(vsum,vmax); 00509 v2 = (vector float)vec_cmplt(vsum,vmin); 00510 v3 = (vector float)vec_cmpgt(vsum2,vmax); 00511 v4 = (vector float)vec_cmplt(vsum2,vmin); 00512 vec_st((vector signed short)v6,0,samples); 00513 samples += 8; 00514 00515 v1 = (vector float)vec_sr((vector unsigned int)v1, vshift); 00516 v2 = (vector float)vec_sr((vector unsigned int)v2, vshift); 00517 v3 = (vector float)vec_sr((vector unsigned int)v3, vshift); 00518 v4 = (vector float)vec_sr((vector unsigned int)v4, vshift); 00519 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2); 00520 v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4); 00521 vclip = vec_sums((vector signed int)v1,vclip); 00522 vclip = vec_sums((vector signed int)v2,vclip); 00523 } 00524 00525 if((size_t)samples & 0xf) 00526 { 00527 v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples)); 00528 v2 = (vector float)vec_perm(vprev,(vector signed short)v1,vperm2); 00529 vec_st((vector signed short)v2,0,samples); 00530 } 00531 00532 vec_st(vclip,0,clip_tmp); 00533 clip = clip_tmp[3]; 00534 } 00535 fr->buffer.fill += 128; 00536 00537 return clip; 00538 } 00539 00540 int synth_1to1_real_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final) 00541 { 00542 real *samples = (real *) (fr->buffer.data+fr->buffer.fill); 00543 00544 real *b0, **buf; 00545 int bo1; 00546 00547 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); 00548 00549 if(!channel) 00550 { 00551 fr->bo--; 00552 fr->bo &= 0xf; 00553 buf = fr->real_buffs[0]; 00554 } 00555 else 00556 { 00557 samples++; 00558 buf = fr->real_buffs[1]; 00559 } 00560 00561 if(fr->bo & 0x1) 00562 { 00563 b0 = buf[0]; 00564 bo1 = fr->bo; 00565 dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); 00566 } 00567 else 00568 { 00569 b0 = buf[1]; 00570 bo1 = fr->bo+1; 00571 dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); 00572 } 00573 00574 00575 { 00576 register int j; 00577 real *window = fr->decwin + 16 - bo1; 00578 00579 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9; 00580 vector unsigned char vperm1,vperm2,vperm3,vperm4, vperm5; 00581 vector float vsum,vsum2,vsum3,vsum4,vscale,vzero; 00582 vector float vsample1,vsample2,vsample3; 00583 vzero = vec_xor(vzero, vzero); 00584 #ifdef __APPLE__ 00585 vscale = (vector float)(1.0f/32768.0f); 00586 vperm4 = (vector unsigned char)(0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31); 00587 vperm5 = (vector unsigned char)(8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31); 00588 #else 00589 vscale = (vector float){1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f}; 00590 vperm4 = (vector unsigned char){0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31}; 00591 vperm5 = (vector unsigned char){8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31}; 00592 #endif 00593 00594 vperm1 = vec_lvsl(0,window); 00595 vperm2 = vec_lvsl(0,samples); 00596 vperm3 = vec_lvsr(0,samples); 00597 for (j=4;j;j--) 00598 { 00599 SYNTH_ALTIVEC(16); 00600 00601 vsum = vec_sub(v5,v6); 00602 v9 = vec_sub(v7,v8); 00603 vsum = vec_add(vsum,v9); 00604 vsum = vec_madd(vsum, vscale, vzero); 00605 00606 vsample1 = vec_ld(0,samples); 00607 vsample2 = vec_ld(16,samples); 00608 vsample3 = vec_ld(31,samples); 00609 v1 = vec_perm(vsample1, vsample2, vperm2); 00610 v2 = vec_perm(vsample2, vsample3, vperm2); 00611 v1 = vec_perm(vsum, v1, vperm4); 00612 v2 = vec_perm(vsum, v2, vperm5); 00613 v3 = vec_perm(vsample3, vsample2, vperm2); 00614 v4 = vec_perm(vsample2, vsample1, vperm2); 00615 v5 = vec_perm(v2, v3, vperm3); 00616 v6 = vec_perm(v1, v2, vperm3); 00617 v7 = vec_perm(v4, v1, vperm3); 00618 vec_st(v5,31,samples); 00619 vec_st(v6,16,samples); 00620 vec_st(v7,0,samples); 00621 samples += 8; 00622 } 00623 00624 for (j=4;j;j--) 00625 { 00626 SYNTH_ALTIVEC(-16); 00627 00628 vsum = vec_add(v5,v6); 00629 v9 = vec_add(v7,v8); 00630 vsum = vec_add(vsum,v9); 00631 vsum = vec_madd(vsum, vscale, vzero); 00632 00633 vsample1 = vec_ld(0,samples); 00634 vsample2 = vec_ld(16,samples); 00635 vsample3 = vec_ld(31,samples); 00636 v1 = vec_perm(vsample1, vsample2, vperm2); 00637 v2 = vec_perm(vsample2, vsample3, vperm2); 00638 v1 = vec_perm(vsum, v1, vperm4); 00639 v2 = vec_perm(vsum, v2, vperm5); 00640 v3 = vec_perm(vsample3, vsample2, vperm2); 00641 v4 = vec_perm(vsample2, vsample1, vperm2); 00642 v5 = vec_perm(v2, v3, vperm3); 00643 v6 = vec_perm(v1, v2, vperm3); 00644 v7 = vec_perm(v4, v1, vperm3); 00645 vec_st(v5,31,samples); 00646 vec_st(v6,16,samples); 00647 vec_st(v7,0,samples); 00648 samples += 8; 00649 } 00650 } 00651 if(final) fr->buffer.fill += 256; 00652 00653 return 0; 00654 } 00655 00656 int synth_1to1_real_stereo_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 00657 { 00658 real *samples = (real *) (fr->buffer.data+fr->buffer.fill); 00659 00660 real *b0l, *b0r, **bufl, **bufr; 00661 int bo1; 00662 00663 if(fr->have_eq_settings) 00664 { 00665 do_equalizer(bandPtr_l,0,fr->equalizer); 00666 do_equalizer(bandPtr_r,1,fr->equalizer); 00667 } 00668 00669 fr->bo--; 00670 fr->bo &= 0xf; 00671 bufl = fr->real_buffs[0]; 00672 bufr = fr->real_buffs[1]; 00673 00674 if(fr->bo & 0x1) 00675 { 00676 b0l = bufl[0]; 00677 b0r = bufr[0]; 00678 bo1 = fr->bo; 00679 dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); 00680 dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); 00681 } 00682 else 00683 { 00684 b0l = bufl[1]; 00685 b0r = bufr[1]; 00686 bo1 = fr->bo+1; 00687 dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); 00688 dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); 00689 } 00690 00691 00692 { 00693 register int j; 00694 real *window = fr->decwin + 16 - bo1; 00695 00696 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13; 00697 vector unsigned char vperm1,vperm2; 00698 vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vscale,vzero; 00699 vector float vprev; 00700 vzero = vec_xor(vzero,vzero); 00701 #ifdef __APPLE__ 00702 vscale = (vector float)(1.0f/32768.0f); 00703 #else 00704 vscale = (vector float){1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f}; 00705 #endif 00706 00707 vperm1 = vec_lvsl(0,window); 00708 vperm2 = vec_lvsr(0,samples); 00709 vprev = vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples)); 00710 for (j=4;j;j--) 00711 { 00712 SYNTH_STEREO_ALTIVEC(16); 00713 00714 vsum = vec_sub(vsum,vsum2); 00715 vsum2 = vec_sub(vsum5,vsum6); 00716 vsum3 = vec_sub(vsum3,vsum4); 00717 vsum4 = vec_sub(vsum7,vsum8); 00718 vsum = vec_add(vsum,vsum3); 00719 vsum2 = vec_add(vsum2,vsum4); 00720 vsum = vec_madd(vsum, vscale, vzero); 00721 vsum2 = vec_madd(vsum2, vscale, vzero); 00722 00723 v1 = vec_mergeh(vsum, vsum2); 00724 v2 = vec_mergel(vsum, vsum2); 00725 v3 = vec_perm(vprev,v1,vperm2); 00726 v4 = vec_perm(v1,v2,vperm2); 00727 vprev = v2; 00728 vec_st(v3,0,samples); 00729 vec_st(v4,16,samples); 00730 samples += 8; 00731 } 00732 00733 for (j=4;j;j--) 00734 { 00735 SYNTH_STEREO_ALTIVEC(-16); 00736 00737 vsum = vec_add(vsum,vsum2); 00738 vsum2 = vec_add(vsum5,vsum6); 00739 vsum3 = vec_add(vsum3,vsum4); 00740 vsum4 = vec_add(vsum7,vsum8); 00741 vsum = vec_add(vsum,vsum3); 00742 vsum2 = vec_add(vsum2,vsum4); 00743 vsum = vec_madd(vsum, vscale, vzero); 00744 vsum2 = vec_madd(vsum2, vscale, vzero); 00745 00746 v1 = vec_mergeh(vsum, vsum2); 00747 v2 = vec_mergel(vsum, vsum2); 00748 v3 = vec_perm(vprev,v1,vperm2); 00749 v4 = vec_perm(v1,v2,vperm2); 00750 vprev = v2; 00751 vec_st(v3,0,samples); 00752 vec_st(v4,16,samples); 00753 samples += 8; 00754 } 00755 00756 if((size_t)samples & 0xf) 00757 { 00758 v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples)); 00759 v2 = (vector float)vec_perm(vprev,v1,vperm2); 00760 vec_st(v2,0,samples); 00761 } 00762 } 00763 fr->buffer.fill += 256; 00764 00765 return 0; 00766 } 00767 00768 int synth_1to1_s32_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final) 00769 { 00770 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 00771 00772 real *b0, **buf; 00773 int clip; 00774 int bo1; 00775 00776 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); 00777 00778 if(!channel) 00779 { 00780 fr->bo--; 00781 fr->bo &= 0xf; 00782 buf = fr->real_buffs[0]; 00783 } 00784 else 00785 { 00786 samples++; 00787 buf = fr->real_buffs[1]; 00788 } 00789 00790 if(fr->bo & 0x1) 00791 { 00792 b0 = buf[0]; 00793 bo1 = fr->bo; 00794 dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); 00795 } 00796 else 00797 { 00798 b0 = buf[1]; 00799 bo1 = fr->bo+1; 00800 dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); 00801 } 00802 00803 00804 { 00805 register int j; 00806 real *window = fr->decwin + 16 - bo1; 00807 00808 ALIGNED(16) int clip_tmp[4]; 00809 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9; 00810 vector unsigned char vperm1,vperm2,vperm3,vperm4,vperm5; 00811 vector float vsum,vsum2,vsum3,vsum4,vmax,vmin,vzero; 00812 vector signed int vsample1,vsample2,vsample3; 00813 vector unsigned int vshift; 00814 vector signed int vclip; 00815 vzero = vec_xor(vzero, vzero); 00816 vclip = vec_xor(vclip, vclip); 00817 vshift = vec_splat_u32(-1); /* 31 */ 00818 #ifdef __APPLE__ 00819 vmax = (vector float)(32767.999f); 00820 vmin = (vector float)(-32768.0f); 00821 vperm4 = (vector unsigned char)(0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31); 00822 vperm5 = (vector unsigned char)(8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31); 00823 #else 00824 vmax = (vector float){32767.999f,32767.999f,32767.999f,32767.999f}; 00825 vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f}; 00826 vperm4 = (vector unsigned char){0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31}; 00827 vperm5 = (vector unsigned char){8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31}; 00828 #endif 00829 00830 vperm1 = vec_lvsl(0,window); 00831 vperm2 = vec_lvsl(0,samples); 00832 vperm3 = vec_lvsr(0,samples); 00833 for (j=4;j;j--) 00834 { 00835 SYNTH_ALTIVEC(16); 00836 00837 vsum = vec_sub(v5,v6); 00838 v9 = vec_sub(v7,v8); 00839 v1 = vec_add(vsum,v9); 00840 vsum = (vector float)vec_cts(v1,16); 00841 v8 = (vector float)vec_cmpgt(v1,vmax); 00842 v9 = (vector float)vec_cmplt(v1,vmin); 00843 00844 vsample1 = vec_ld(0,samples); 00845 vsample2 = vec_ld(16,samples); 00846 vsample3 = vec_ld(31,samples); 00847 v1 = (vector float)vec_perm(vsample1, vsample2, vperm2); 00848 v2 = (vector float)vec_perm(vsample2, vsample3, vperm2); 00849 v1 = vec_perm(vsum, v1, vperm4); 00850 v2 = vec_perm(vsum, v2, vperm5); 00851 v3 = (vector float)vec_perm(vsample3, vsample2, vperm2); 00852 v4 = (vector float)vec_perm(vsample2, vsample1, vperm2); 00853 v5 = vec_perm(v2, v3, vperm3); 00854 v6 = vec_perm(v1, v2, vperm3); 00855 v7 = vec_perm(v4, v1, vperm3); 00856 vec_st((vector signed int)v5,31,samples); 00857 vec_st((vector signed int)v6,16,samples); 00858 vec_st((vector signed int)v7,0,samples); 00859 samples += 8; 00860 00861 v1 = (vector float)vec_sr((vector unsigned int)v8, vshift); 00862 v2 = (vector float)vec_sr((vector unsigned int)v9, vshift); 00863 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2); 00864 vclip = vec_sums((vector signed int)v1,vclip); 00865 } 00866 00867 for (j=4;j;j--) 00868 { 00869 SYNTH_ALTIVEC(-16); 00870 00871 vsum = vec_add(v5,v6); 00872 v9 = vec_add(v7,v8); 00873 v1 = vec_add(vsum,v9); 00874 vsum = (vector float)vec_cts(v1,16); 00875 v8 = (vector float)vec_cmpgt(v1,vmax); 00876 v9 = (vector float)vec_cmplt(v1,vmin); 00877 00878 vsample1 = vec_ld(0,samples); 00879 vsample2 = vec_ld(16,samples); 00880 vsample3 = vec_ld(31,samples); 00881 v1 = (vector float)vec_perm(vsample1, vsample2, vperm2); 00882 v2 = (vector float)vec_perm(vsample2, vsample3, vperm2); 00883 v1 = vec_perm(vsum, v1, vperm4); 00884 v2 = vec_perm(vsum, v2, vperm5); 00885 v3 = (vector float)vec_perm(vsample3, vsample2, vperm2); 00886 v4 = (vector float)vec_perm(vsample2, vsample1, vperm2); 00887 v5 = vec_perm(v2, v3, vperm3); 00888 v6 = vec_perm(v1, v2, vperm3); 00889 v7 = vec_perm(v4, v1, vperm3); 00890 vec_st((vector signed int)v5,31,samples); 00891 vec_st((vector signed int)v6,16,samples); 00892 vec_st((vector signed int)v7,0,samples); 00893 samples += 8; 00894 00895 v1 = (vector float)vec_sr((vector unsigned int)v8, vshift); 00896 v2 = (vector float)vec_sr((vector unsigned int)v9, vshift); 00897 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2); 00898 vclip = vec_sums((vector signed int)v1,vclip); 00899 } 00900 00901 vec_st(vclip,0,clip_tmp); 00902 clip = clip_tmp[3]; 00903 } 00904 if(final) fr->buffer.fill += 256; 00905 00906 return clip; 00907 } 00908 00909 00910 int synth_1to1_s32_stereo_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) 00911 { 00912 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); 00913 00914 real *b0l, *b0r, **bufl, **bufr; 00915 int clip; 00916 int bo1; 00917 00918 if(fr->have_eq_settings) 00919 { 00920 do_equalizer(bandPtr_l,0,fr->equalizer); 00921 do_equalizer(bandPtr_r,1,fr->equalizer); 00922 } 00923 00924 fr->bo--; 00925 fr->bo &= 0xf; 00926 bufl = fr->real_buffs[0]; 00927 bufr = fr->real_buffs[1]; 00928 00929 if(fr->bo & 0x1) 00930 { 00931 b0l = bufl[0]; 00932 b0r = bufr[0]; 00933 bo1 = fr->bo; 00934 dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); 00935 dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); 00936 } 00937 else 00938 { 00939 b0l = bufl[1]; 00940 b0r = bufr[1]; 00941 bo1 = fr->bo+1; 00942 dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); 00943 dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); 00944 } 00945 00946 00947 { 00948 register int j; 00949 real *window = fr->decwin + 16 - bo1; 00950 00951 ALIGNED(16) int clip_tmp[4]; 00952 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13; 00953 vector unsigned char vperm1,vperm2; 00954 vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vmax,vmin,vzero; 00955 vector float vprev; 00956 vector unsigned int vshift; 00957 vector signed int vclip; 00958 vzero = vec_xor(vzero, vzero); 00959 vclip = vec_xor(vclip, vclip); 00960 vshift = vec_splat_u32(-1); /* 31 */ 00961 #ifdef __APPLE__ 00962 vmax = (vector float)(32767.999f); 00963 vmin = (vector float)(-32768.0f); 00964 #else 00965 vmax = (vector float){32767.999f,32767.999f,32767.999f,32767.999f}; 00966 vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f}; 00967 #endif 00968 00969 vperm1 = vec_lvsl(0,window); 00970 vperm2 = vec_lvsr(0,samples); 00971 vprev = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples)); 00972 for (j=4;j;j--) 00973 { 00974 SYNTH_STEREO_ALTIVEC(16); 00975 00976 vsum = vec_sub(vsum,vsum2); 00977 vsum2 = vec_sub(vsum5,vsum6); 00978 vsum3 = vec_sub(vsum3,vsum4); 00979 vsum4 = vec_sub(vsum7,vsum8); 00980 v1 = vec_add(vsum,vsum3); 00981 v2 = vec_add(vsum2,vsum4); 00982 vsum = (vector float)vec_cts(v1,16); 00983 vsum2 = (vector float)vec_cts(v2,16); 00984 v5 = (vector float)vec_cmpgt(v1,vmax); 00985 v6 = (vector float)vec_cmplt(v1,vmin); 00986 v7 = (vector float)vec_cmpgt(v2,vmax); 00987 v8 = (vector float)vec_cmplt(v2,vmin); 00988 00989 v1 = vec_mergeh(vsum, vsum2); 00990 v2 = vec_mergel(vsum, vsum2); 00991 v3 = vec_perm(vprev,v1,vperm2); 00992 v4 = vec_perm(v1,v2,vperm2); 00993 vprev = v2; 00994 vec_st((vector signed int)v3,0,samples); 00995 vec_st((vector signed int)v4,16,samples); 00996 samples += 8; 00997 00998 v1 = (vector float)vec_sr((vector unsigned int)v5, vshift); 00999 v2 = (vector float)vec_sr((vector unsigned int)v6, vshift); 01000 v3 = (vector float)vec_sr((vector unsigned int)v7, vshift); 01001 v4 = (vector float)vec_sr((vector unsigned int)v8, vshift); 01002 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2); 01003 v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4); 01004 vclip = vec_sums((vector signed int)v1,vclip); 01005 vclip = vec_sums((vector signed int)v2,vclip); 01006 } 01007 01008 for (j=4;j;j--) 01009 { 01010 SYNTH_STEREO_ALTIVEC(-16); 01011 01012 vsum = vec_add(vsum,vsum2); 01013 vsum2 = vec_add(vsum5,vsum6); 01014 vsum3 = vec_add(vsum3,vsum4); 01015 vsum4 = vec_add(vsum7,vsum8); 01016 v1 = vec_add(vsum,vsum3); 01017 v2 = vec_add(vsum2,vsum4); 01018 vsum = (vector float)vec_cts(v1,16); 01019 vsum2 = (vector float)vec_cts(v2,16); 01020 v5 = (vector float)vec_cmpgt(v1,vmax); 01021 v6 = (vector float)vec_cmplt(v1,vmin); 01022 v7 = (vector float)vec_cmpgt(v2,vmax); 01023 v8 = (vector float)vec_cmplt(v2,vmin); 01024 01025 v1 = vec_mergeh(vsum, vsum2); 01026 v2 = vec_mergel(vsum, vsum2); 01027 v3 = vec_perm(vprev,v1,vperm2); 01028 v4 = vec_perm(v1,v2,vperm2); 01029 vprev = v2; 01030 vec_st((vector signed int)v3,0,samples); 01031 vec_st((vector signed int)v4,16,samples); 01032 samples += 8; 01033 01034 v1 = (vector float)vec_sr((vector unsigned int)v5, vshift); 01035 v2 = (vector float)vec_sr((vector unsigned int)v6, vshift); 01036 v3 = (vector float)vec_sr((vector unsigned int)v7, vshift); 01037 v4 = (vector float)vec_sr((vector unsigned int)v8, vshift); 01038 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2); 01039 v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4); 01040 vclip = vec_sums((vector signed int)v1,vclip); 01041 vclip = vec_sums((vector signed int)v2,vclip); 01042 } 01043 01044 if((size_t)samples & 0xf) 01045 { 01046 v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples)); 01047 v2 = (vector float)vec_perm(vprev,v1,vperm2); 01048 vec_st((vector signed int)v2,0,samples); 01049 } 01050 01051 vec_st(vclip,0,clip_tmp); 01052 clip = clip_tmp[3]; 01053 } 01054 fr->buffer.fill += 256; 01055 01056 return clip; 01057 } Generated on Sat May 26 2012 04:33:02 for ReactOS by
1.7.6.1
|