ReactOS 0.4.16-dev-319-g6cf4263
synth_altivec.c File Reference
#include "mpg123lib_intern.h"
#include <altivec.h>
Include dependency graph for synth_altivec.c:

Go to the source code of this file.

Macros

#define SYNTH_ALTIVEC(B0STEP)
 
#define SYNTH_STEREO_ALTIVEC(B0STEP)
 

Functions

int synth_1to1_altivec (real *bandPtr, int channel, mpg123_handle *fr, int final)
 
int synth_1to1_stereo_altivec (real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
 
int synth_1to1_real_altivec (real *bandPtr, int channel, mpg123_handle *fr, int final)
 
int synth_1to1_fltst_altivec (real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
 
int synth_1to1_s32_altivec (real *bandPtr, int channel, mpg123_handle *fr, int final)
 
int synth_1to1_s32_stereo_altivec (real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
 

Macro Definition Documentation

◆ SYNTH_ALTIVEC

#define SYNTH_ALTIVEC (   B0STEP)

Definition at line 17 of file synth_altivec.c.

◆ SYNTH_STEREO_ALTIVEC

#define SYNTH_STEREO_ALTIVEC (   B0STEP)

Definition at line 116 of file synth_altivec.c.

Function Documentation

◆ synth_1to1_altivec()

int synth_1to1_altivec ( real bandPtr,
int  channel,
mpg123_handle fr,
int  final 
)

Definition at line 258 of file synth_altivec.c.

259{
260 short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
261
262 real *b0, **buf;
263 int clip;
264 int bo1;
265#ifndef NO_EQUALIZER
266 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
267#endif
268 if(!channel)
269 {
270 fr->bo--;
271 fr->bo &= 0xf;
272 buf = fr->real_buffs[0];
273 }
274 else
275 {
276 samples++;
277 buf = fr->real_buffs[1];
278 }
279
280 if(fr->bo & 0x1)
281 {
282 b0 = buf[0];
283 bo1 = fr->bo;
284 dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
285 }
286 else
287 {
288 b0 = buf[1];
289 bo1 = fr->bo+1;
290 dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
291 }
292
293
294 {
295 register int j;
296 real *window = fr->decwin + 16 - bo1;
297
298 ALIGNED(16) int clip_tmp[4];
299 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
300 vector unsigned char vperm1,vperm2,vperm3,vperm4;
301 vector float vsum,vsum2,vsum3,vsum4,vmin,vmax,vzero;
302 vector signed int vclip;
303 vector signed short vsample1,vsample2;
304 vector unsigned int vshift;
305 vclip = vec_xor(vclip,vclip);
306 vzero = vec_xor(vzero,vzero);
307 vshift = vec_splat_u32(-1); /* 31 */
308#ifdef __APPLE__
309 vmax = (vector float)(32767.0f);
310 vmin = (vector float)(-32768.0f);
311 vperm4 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31);
312#else
313 vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f};
314 vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
315 vperm4 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31};
316#endif
317
318 vperm1 = vec_lvsl(0,window);
319 vperm2 = vec_lvsl(0,samples);
320 vperm3 = vec_lvsr(0,samples);
321 for (j=4;j;j--)
322 {
323 SYNTH_ALTIVEC(16);
324
325 vsum = vec_sub(v5,v6);
326 v9 = vec_sub(v7,v8);
327 vsum = vec_add(vsum,v9);
328
329 v3 = vec_round(vsum);
330 v3 = (vector float)vec_cts(v3,0);
331 v1 = (vector float)vec_cmpgt(vsum,vmax);
332 v2 = (vector float)vec_cmplt(vsum,vmin);
333 vsample1 = vec_ld(0,samples);
334 vsample2 = vec_ld(15,samples);
335 v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
336 v4 = (vector float)vec_perm(vsample1,vsample2,vperm2);
337 v5 = (vector float)vec_perm(v3,v4,vperm4);
338 v6 = (vector float)vec_perm(vsample2,vsample1,vperm2);
339 v7 = (vector float)vec_perm(v5,v6,vperm3);
340 v8 = (vector float)vec_perm(v6,v5,vperm3);
341 vec_st((vector signed short)v7,15,samples);
342 vec_st((vector signed short)v8,0,samples);
343 samples += 8;
344
345 v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
346 v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
347 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
348 vclip = vec_sums((vector signed int)v1,vclip);
349 }
350
351 for (j=4;j;j--)
352 {
353 SYNTH_ALTIVEC(-16);
354
355 vsum = vec_add(v5,v6);
356 v9 = vec_add(v7,v8);
357 vsum = vec_add(vsum,v9);
358
359 v3 = vec_round(vsum);
360 v3 = (vector float)vec_cts(v3,0);
361 v1 = (vector float)vec_cmpgt(vsum,vmax);
362 v2 = (vector float)vec_cmplt(vsum,vmin);
363 vsample1 = vec_ld(0,samples);
364 vsample2 = vec_ld(15,samples);
365 v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
366 v4 = (vector float)vec_perm(vsample1,vsample2,vperm2);
367 v5 = (vector float)vec_perm(v3,v4,vperm4);
368 v6 = (vector float)vec_perm(vsample2,vsample1,vperm2);
369 v7 = (vector float)vec_perm(v5,v6,vperm3);
370 v8 = (vector float)vec_perm(v6,v5,vperm3);
371 vec_st((vector signed short)v7,15,samples);
372 vec_st((vector signed short)v8,0,samples);
373 samples += 8;
374
375 v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
376 v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
377 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
378 vclip = vec_sums((vector signed int)v1,vclip);
379 }
380
381 vec_st(vclip,0,clip_tmp);
382 clip = clip_tmp[3];
383 }
384 if(final) fr->buffer.fill += 128;
385
386 return clip;
387}
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31
unsigned char
Definition: typeof.h:29
GLsizei samples
Definition: glext.h:7006
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
GLfloat GLfloat GLfloat GLfloat v3
Definition: glext.h:6064
GLfloat GLfloat v1
Definition: glext.h:6062
GLfloat GLfloat GLfloat v2
Definition: glext.h:6063
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
#define dct64_altivec
Definition: intsym.h:144
#define do_equalizer
Definition: intsym.h:175
static IHTMLWindow2 * window
Definition: events.c:77
static float(__cdecl *square_half_float)(float x
#define real
#define ALIGNED(a)
Definition: optimize.h:190
real * real_buffs[2][2]
Definition: frame.h:106
struct outbuffer buffer
Definition: frame.h:267
real equalizer[2][32]
Definition: frame.h:127
#define SYNTH_ALTIVEC(B0STEP)
Definition: synth_altivec.c:17

◆ synth_1to1_fltst_altivec()

int synth_1to1_fltst_altivec ( real bandPtr_l,
real bandPtr_r,
mpg123_handle fr 
)

Definition at line 656 of file synth_altivec.c.

657{
658 real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
659
660 real *b0l, *b0r, **bufl, **bufr;
661 int bo1;
662#ifndef NO_EQUALIZER
663 if(fr->have_eq_settings)
664 {
665 do_equalizer(bandPtr_l,0,fr->equalizer);
666 do_equalizer(bandPtr_r,1,fr->equalizer);
667 }
668#endif
669 fr->bo--;
670 fr->bo &= 0xf;
671 bufl = fr->real_buffs[0];
672 bufr = fr->real_buffs[1];
673
674 if(fr->bo & 0x1)
675 {
676 b0l = bufl[0];
677 b0r = bufr[0];
678 bo1 = fr->bo;
679 dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
680 dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
681 }
682 else
683 {
684 b0l = bufl[1];
685 b0r = bufr[1];
686 bo1 = fr->bo+1;
687 dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
688 dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
689 }
690
691
692 {
693 register int j;
694 real *window = fr->decwin + 16 - bo1;
695
696 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
697 vector unsigned char vperm1,vperm2;
698 vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vscale,vzero;
699 vector float vprev;
700 vzero = vec_xor(vzero,vzero);
701#ifdef __APPLE__
702 vscale = (vector float)(1.0f/32768.0f);
703#else
704 vscale = (vector float){1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f};
705#endif
706
707 vperm1 = vec_lvsl(0,window);
708 vperm2 = vec_lvsr(0,samples);
709 vprev = vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
710 for (j=4;j;j--)
711 {
713
714 vsum = vec_sub(vsum,vsum2);
715 vsum2 = vec_sub(vsum5,vsum6);
716 vsum3 = vec_sub(vsum3,vsum4);
717 vsum4 = vec_sub(vsum7,vsum8);
718 vsum = vec_add(vsum,vsum3);
719 vsum2 = vec_add(vsum2,vsum4);
720 vsum = vec_madd(vsum, vscale, vzero);
721 vsum2 = vec_madd(vsum2, vscale, vzero);
722
723 v1 = vec_mergeh(vsum, vsum2);
724 v2 = vec_mergel(vsum, vsum2);
725 v3 = vec_perm(vprev,v1,vperm2);
726 v4 = vec_perm(v1,v2,vperm2);
727 vprev = v2;
728 vec_st(v3,0,samples);
729 vec_st(v4,16,samples);
730 samples += 8;
731 }
732
733 for (j=4;j;j--)
734 {
736
737 vsum = vec_add(vsum,vsum2);
738 vsum2 = vec_add(vsum5,vsum6);
739 vsum3 = vec_add(vsum3,vsum4);
740 vsum4 = vec_add(vsum7,vsum8);
741 vsum = vec_add(vsum,vsum3);
742 vsum2 = vec_add(vsum2,vsum4);
743 vsum = vec_madd(vsum, vscale, vzero);
744 vsum2 = vec_madd(vsum2, vscale, vzero);
745
746 v1 = vec_mergeh(vsum, vsum2);
747 v2 = vec_mergel(vsum, vsum2);
748 v3 = vec_perm(vprev,v1,vperm2);
749 v4 = vec_perm(v1,v2,vperm2);
750 vprev = v2;
751 vec_st(v3,0,samples);
752 vec_st(v4,16,samples);
753 samples += 8;
754 }
755
756 if((size_t)samples & 0xf)
757 {
758 v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
759 v2 = (vector float)vec_perm(vprev,v1,vperm2);
760 vec_st(v2,0,samples);
761 }
762 }
763 fr->buffer.fill += 256;
764
765 return 0;
766}
#define SYNTH_STEREO_ALTIVEC(B0STEP)

◆ synth_1to1_real_altivec()

int synth_1to1_real_altivec ( real bandPtr,
int  channel,
mpg123_handle fr,
int  final 
)

Definition at line 540 of file synth_altivec.c.

541{
542 real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
543
544 real *b0, **buf;
545 int bo1;
546#ifndef NO_EQUALIZER
547 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
548#endif
549 if(!channel)
550 {
551 fr->bo--;
552 fr->bo &= 0xf;
553 buf = fr->real_buffs[0];
554 }
555 else
556 {
557 samples++;
558 buf = fr->real_buffs[1];
559 }
560
561 if(fr->bo & 0x1)
562 {
563 b0 = buf[0];
564 bo1 = fr->bo;
565 dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
566 }
567 else
568 {
569 b0 = buf[1];
570 bo1 = fr->bo+1;
571 dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
572 }
573
574
575 {
576 register int j;
577 real *window = fr->decwin + 16 - bo1;
578
579 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
580 vector unsigned char vperm1,vperm2,vperm3,vperm4, vperm5;
581 vector float vsum,vsum2,vsum3,vsum4,vscale,vzero;
582 vector float vsample1,vsample2,vsample3;
583 vzero = vec_xor(vzero, vzero);
584#ifdef __APPLE__
585 vscale = (vector float)(1.0f/32768.0f);
586 vperm4 = (vector unsigned char)(0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31);
587 vperm5 = (vector unsigned char)(8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31);
588#else
589 vscale = (vector float){1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f};
590 vperm4 = (vector unsigned char){0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31};
591 vperm5 = (vector unsigned char){8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31};
592#endif
593
594 vperm1 = vec_lvsl(0,window);
595 vperm2 = vec_lvsl(0,samples);
596 vperm3 = vec_lvsr(0,samples);
597 for (j=4;j;j--)
598 {
599 SYNTH_ALTIVEC(16);
600
601 vsum = vec_sub(v5,v6);
602 v9 = vec_sub(v7,v8);
603 vsum = vec_add(vsum,v9);
604 vsum = vec_madd(vsum, vscale, vzero);
605
606 vsample1 = vec_ld(0,samples);
607 vsample2 = vec_ld(16,samples);
608 vsample3 = vec_ld(31,samples);
609 v1 = vec_perm(vsample1, vsample2, vperm2);
610 v2 = vec_perm(vsample2, vsample3, vperm2);
611 v1 = vec_perm(vsum, v1, vperm4);
612 v2 = vec_perm(vsum, v2, vperm5);
613 v3 = vec_perm(vsample3, vsample2, vperm2);
614 v4 = vec_perm(vsample2, vsample1, vperm2);
615 v5 = vec_perm(v2, v3, vperm3);
616 v6 = vec_perm(v1, v2, vperm3);
617 v7 = vec_perm(v4, v1, vperm3);
618 vec_st(v5,31,samples);
619 vec_st(v6,16,samples);
620 vec_st(v7,0,samples);
621 samples += 8;
622 }
623
624 for (j=4;j;j--)
625 {
626 SYNTH_ALTIVEC(-16);
627
628 vsum = vec_add(v5,v6);
629 v9 = vec_add(v7,v8);
630 vsum = vec_add(vsum,v9);
631 vsum = vec_madd(vsum, vscale, vzero);
632
633 vsample1 = vec_ld(0,samples);
634 vsample2 = vec_ld(16,samples);
635 vsample3 = vec_ld(31,samples);
636 v1 = vec_perm(vsample1, vsample2, vperm2);
637 v2 = vec_perm(vsample2, vsample3, vperm2);
638 v1 = vec_perm(vsum, v1, vperm4);
639 v2 = vec_perm(vsum, v2, vperm5);
640 v3 = vec_perm(vsample3, vsample2, vperm2);
641 v4 = vec_perm(vsample2, vsample1, vperm2);
642 v5 = vec_perm(v2, v3, vperm3);
643 v6 = vec_perm(v1, v2, vperm3);
644 v7 = vec_perm(v4, v1, vperm3);
645 vec_st(v5,31,samples);
646 vec_st(v6,16,samples);
647 vec_st(v7,0,samples);
648 samples += 8;
649 }
650 }
651 if(final) fr->buffer.fill += 256;
652
653 return 0;
654}

◆ synth_1to1_s32_altivec()

int synth_1to1_s32_altivec ( real bandPtr,
int  channel,
mpg123_handle fr,
int  final 
)

Definition at line 768 of file synth_altivec.c.

769{
770 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
771
772 real *b0, **buf;
773 int clip;
774 int bo1;
775#ifndef NO_EQUALIZER
776 if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
777#endif
778 if(!channel)
779 {
780 fr->bo--;
781 fr->bo &= 0xf;
782 buf = fr->real_buffs[0];
783 }
784 else
785 {
786 samples++;
787 buf = fr->real_buffs[1];
788 }
789
790 if(fr->bo & 0x1)
791 {
792 b0 = buf[0];
793 bo1 = fr->bo;
794 dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
795 }
796 else
797 {
798 b0 = buf[1];
799 bo1 = fr->bo+1;
800 dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
801 }
802
803
804 {
805 register int j;
806 real *window = fr->decwin + 16 - bo1;
807
808 ALIGNED(16) int clip_tmp[4];
809 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
810 vector unsigned char vperm1,vperm2,vperm3,vperm4,vperm5;
811 vector float vsum,vsum2,vsum3,vsum4,vmax,vmin,vzero;
812 vector signed int vsample1,vsample2,vsample3;
813 vector unsigned int vshift;
814 vector signed int vclip;
815 vzero = vec_xor(vzero, vzero);
816 vclip = vec_xor(vclip, vclip);
817 vshift = vec_splat_u32(-1); /* 31 */
818#ifdef __APPLE__
819 vmax = (vector float)(32767.999f);
820 vmin = (vector float)(-32768.0f);
821 vperm4 = (vector unsigned char)(0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31);
822 vperm5 = (vector unsigned char)(8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31);
823#else
824 vmax = (vector float){32767.999f,32767.999f,32767.999f,32767.999f};
825 vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
826 vperm4 = (vector unsigned char){0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31};
827 vperm5 = (vector unsigned char){8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31};
828#endif
829
830 vperm1 = vec_lvsl(0,window);
831 vperm2 = vec_lvsl(0,samples);
832 vperm3 = vec_lvsr(0,samples);
833 for (j=4;j;j--)
834 {
835 SYNTH_ALTIVEC(16);
836
837 vsum = vec_sub(v5,v6);
838 v9 = vec_sub(v7,v8);
839 v1 = vec_add(vsum,v9);
840 vsum = (vector float)vec_cts(v1,16);
841 v8 = (vector float)vec_cmpgt(v1,vmax);
842 v9 = (vector float)vec_cmplt(v1,vmin);
843
844 vsample1 = vec_ld(0,samples);
845 vsample2 = vec_ld(16,samples);
846 vsample3 = vec_ld(31,samples);
847 v1 = (vector float)vec_perm(vsample1, vsample2, vperm2);
848 v2 = (vector float)vec_perm(vsample2, vsample3, vperm2);
849 v1 = vec_perm(vsum, v1, vperm4);
850 v2 = vec_perm(vsum, v2, vperm5);
851 v3 = (vector float)vec_perm(vsample3, vsample2, vperm2);
852 v4 = (vector float)vec_perm(vsample2, vsample1, vperm2);
853 v5 = vec_perm(v2, v3, vperm3);
854 v6 = vec_perm(v1, v2, vperm3);
855 v7 = vec_perm(v4, v1, vperm3);
856 vec_st((vector signed int)v5,31,samples);
857 vec_st((vector signed int)v6,16,samples);
858 vec_st((vector signed int)v7,0,samples);
859 samples += 8;
860
861 v1 = (vector float)vec_sr((vector unsigned int)v8, vshift);
862 v2 = (vector float)vec_sr((vector unsigned int)v9, vshift);
863 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
864 vclip = vec_sums((vector signed int)v1,vclip);
865 }
866
867 for (j=4;j;j--)
868 {
869 SYNTH_ALTIVEC(-16);
870
871 vsum = vec_add(v5,v6);
872 v9 = vec_add(v7,v8);
873 v1 = vec_add(vsum,v9);
874 vsum = (vector float)vec_cts(v1,16);
875 v8 = (vector float)vec_cmpgt(v1,vmax);
876 v9 = (vector float)vec_cmplt(v1,vmin);
877
878 vsample1 = vec_ld(0,samples);
879 vsample2 = vec_ld(16,samples);
880 vsample3 = vec_ld(31,samples);
881 v1 = (vector float)vec_perm(vsample1, vsample2, vperm2);
882 v2 = (vector float)vec_perm(vsample2, vsample3, vperm2);
883 v1 = vec_perm(vsum, v1, vperm4);
884 v2 = vec_perm(vsum, v2, vperm5);
885 v3 = (vector float)vec_perm(vsample3, vsample2, vperm2);
886 v4 = (vector float)vec_perm(vsample2, vsample1, vperm2);
887 v5 = vec_perm(v2, v3, vperm3);
888 v6 = vec_perm(v1, v2, vperm3);
889 v7 = vec_perm(v4, v1, vperm3);
890 vec_st((vector signed int)v5,31,samples);
891 vec_st((vector signed int)v6,16,samples);
892 vec_st((vector signed int)v7,0,samples);
893 samples += 8;
894
895 v1 = (vector float)vec_sr((vector unsigned int)v8, vshift);
896 v2 = (vector float)vec_sr((vector unsigned int)v9, vshift);
897 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
898 vclip = vec_sums((vector signed int)v1,vclip);
899 }
900
901 vec_st(vclip,0,clip_tmp);
902 clip = clip_tmp[3];
903 }
904 if(final) fr->buffer.fill += 256;
905
906 return clip;
907}
INT32 int32_t
Definition: types.h:71

◆ synth_1to1_s32_stereo_altivec()

int synth_1to1_s32_stereo_altivec ( real bandPtr_l,
real bandPtr_r,
mpg123_handle fr 
)

Definition at line 910 of file synth_altivec.c.

911{
912 int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
913
914 real *b0l, *b0r, **bufl, **bufr;
915 int clip;
916 int bo1;
917#ifndef NO_EQUALIZER
918 if(fr->have_eq_settings)
919 {
920 do_equalizer(bandPtr_l,0,fr->equalizer);
921 do_equalizer(bandPtr_r,1,fr->equalizer);
922 }
923#endif
924 fr->bo--;
925 fr->bo &= 0xf;
926 bufl = fr->real_buffs[0];
927 bufr = fr->real_buffs[1];
928
929 if(fr->bo & 0x1)
930 {
931 b0l = bufl[0];
932 b0r = bufr[0];
933 bo1 = fr->bo;
934 dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
935 dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
936 }
937 else
938 {
939 b0l = bufl[1];
940 b0r = bufr[1];
941 bo1 = fr->bo+1;
942 dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
943 dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
944 }
945
946
947 {
948 register int j;
949 real *window = fr->decwin + 16 - bo1;
950
951 ALIGNED(16) int clip_tmp[4];
952 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
953 vector unsigned char vperm1,vperm2;
954 vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vmax,vmin,vzero;
955 vector float vprev;
956 vector unsigned int vshift;
957 vector signed int vclip;
958 vzero = vec_xor(vzero, vzero);
959 vclip = vec_xor(vclip, vclip);
960 vshift = vec_splat_u32(-1); /* 31 */
961#ifdef __APPLE__
962 vmax = (vector float)(32767.999f);
963 vmin = (vector float)(-32768.0f);
964#else
965 vmax = (vector float){32767.999f,32767.999f,32767.999f,32767.999f};
966 vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
967#endif
968
969 vperm1 = vec_lvsl(0,window);
970 vperm2 = vec_lvsr(0,samples);
971 vprev = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
972 for (j=4;j;j--)
973 {
975
976 vsum = vec_sub(vsum,vsum2);
977 vsum2 = vec_sub(vsum5,vsum6);
978 vsum3 = vec_sub(vsum3,vsum4);
979 vsum4 = vec_sub(vsum7,vsum8);
980 v1 = vec_add(vsum,vsum3);
981 v2 = vec_add(vsum2,vsum4);
982 vsum = (vector float)vec_cts(v1,16);
983 vsum2 = (vector float)vec_cts(v2,16);
984 v5 = (vector float)vec_cmpgt(v1,vmax);
985 v6 = (vector float)vec_cmplt(v1,vmin);
986 v7 = (vector float)vec_cmpgt(v2,vmax);
987 v8 = (vector float)vec_cmplt(v2,vmin);
988
989 v1 = vec_mergeh(vsum, vsum2);
990 v2 = vec_mergel(vsum, vsum2);
991 v3 = vec_perm(vprev,v1,vperm2);
992 v4 = vec_perm(v1,v2,vperm2);
993 vprev = v2;
994 vec_st((vector signed int)v3,0,samples);
995 vec_st((vector signed int)v4,16,samples);
996 samples += 8;
997
998 v1 = (vector float)vec_sr((vector unsigned int)v5, vshift);
999 v2 = (vector float)vec_sr((vector unsigned int)v6, vshift);
1000 v3 = (vector float)vec_sr((vector unsigned int)v7, vshift);
1001 v4 = (vector float)vec_sr((vector unsigned int)v8, vshift);
1002 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
1003 v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
1004 vclip = vec_sums((vector signed int)v1,vclip);
1005 vclip = vec_sums((vector signed int)v2,vclip);
1006 }
1007
1008 for (j=4;j;j--)
1009 {
1011
1012 vsum = vec_add(vsum,vsum2);
1013 vsum2 = vec_add(vsum5,vsum6);
1014 vsum3 = vec_add(vsum3,vsum4);
1015 vsum4 = vec_add(vsum7,vsum8);
1016 v1 = vec_add(vsum,vsum3);
1017 v2 = vec_add(vsum2,vsum4);
1018 vsum = (vector float)vec_cts(v1,16);
1019 vsum2 = (vector float)vec_cts(v2,16);
1020 v5 = (vector float)vec_cmpgt(v1,vmax);
1021 v6 = (vector float)vec_cmplt(v1,vmin);
1022 v7 = (vector float)vec_cmpgt(v2,vmax);
1023 v8 = (vector float)vec_cmplt(v2,vmin);
1024
1025 v1 = vec_mergeh(vsum, vsum2);
1026 v2 = vec_mergel(vsum, vsum2);
1027 v3 = vec_perm(vprev,v1,vperm2);
1028 v4 = vec_perm(v1,v2,vperm2);
1029 vprev = v2;
1030 vec_st((vector signed int)v3,0,samples);
1031 vec_st((vector signed int)v4,16,samples);
1032 samples += 8;
1033
1034 v1 = (vector float)vec_sr((vector unsigned int)v5, vshift);
1035 v2 = (vector float)vec_sr((vector unsigned int)v6, vshift);
1036 v3 = (vector float)vec_sr((vector unsigned int)v7, vshift);
1037 v4 = (vector float)vec_sr((vector unsigned int)v8, vshift);
1038 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
1039 v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
1040 vclip = vec_sums((vector signed int)v1,vclip);
1041 vclip = vec_sums((vector signed int)v2,vclip);
1042 }
1043
1044 if((size_t)samples & 0xf)
1045 {
1046 v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
1047 v2 = (vector float)vec_perm(vprev,v1,vperm2);
1048 vec_st((vector signed int)v2,0,samples);
1049 }
1050
1051 vec_st(vclip,0,clip_tmp);
1052 clip = clip_tmp[3];
1053 }
1054 fr->buffer.fill += 256;
1055
1056 return clip;
1057}

◆ synth_1to1_stereo_altivec()

int synth_1to1_stereo_altivec ( real bandPtr_l,
real bandPtr_r,
mpg123_handle fr 
)

Definition at line 389 of file synth_altivec.c.

390{
391 short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
392
393 real *b0l, *b0r, **bufl, **bufr;
394 int clip;
395 int bo1;
396#ifndef NO_EQUALIZER
397 if(fr->have_eq_settings)
398 {
399 do_equalizer(bandPtr_l,0,fr->equalizer);
400 do_equalizer(bandPtr_r,1,fr->equalizer);
401 }
402#endif
403 fr->bo--;
404 fr->bo &= 0xf;
405 bufl = fr->real_buffs[0];
406 bufr = fr->real_buffs[1];
407
408 if(fr->bo & 0x1)
409 {
410 b0l = bufl[0];
411 b0r = bufr[0];
412 bo1 = fr->bo;
413 dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
414 dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
415 }
416 else
417 {
418 b0l = bufl[1];
419 b0r = bufr[1];
420 bo1 = fr->bo+1;
421 dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
422 dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
423 }
424
425
426 {
427 register int j;
428 real *window = fr->decwin + 16 - bo1;
429
430 ALIGNED(16) int clip_tmp[4];
431 vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
432 vector unsigned char vperm1,vperm2;
433 vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vmin,vmax,vzero;
434 vector signed int vclip;
435 vector unsigned int vshift;
436 vector signed short vprev;
437 vclip = vec_xor(vclip,vclip);
438 vzero = vec_xor(vzero,vzero);
439 vshift = vec_splat_u32(-1); /* 31 */
440#ifdef __APPLE__
441 vmax = (vector float)(32767.0f);
442 vmin = (vector float)(-32768.0f);
443#else
444 vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f};
445 vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
446#endif
447
448 vperm1 = vec_lvsl(0,window);
449 vperm2 = vec_lvsr(0,samples);
450 vprev = vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
451 for (j=4;j;j--)
452 {
454
455 vsum = vec_sub(vsum,vsum2);
456 vsum2 = vec_sub(vsum5,vsum6);
457 vsum3 = vec_sub(vsum3,vsum4);
458 vsum4 = vec_sub(vsum7,vsum8);
459 vsum = vec_add(vsum,vsum3);
460 vsum2 = vec_add(vsum2,vsum4);
461
462 v1 = vec_round(vsum);
463 v2 = vec_round(vsum2);
464 v1 = (vector float)vec_cts(v1,0);
465 v2 = (vector float)vec_cts(v2,0);
466 v3 = vec_mergeh(v1, v2);
467 v4 = vec_mergel(v1, v2);
468 v5 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v4);
469 v6 = (vector float)vec_perm(vprev,(vector signed short)v5,vperm2);
470 vprev = (vector signed short)v5;
471 v1 = (vector float)vec_cmpgt(vsum,vmax);
472 v2 = (vector float)vec_cmplt(vsum,vmin);
473 v3 = (vector float)vec_cmpgt(vsum2,vmax);
474 v4 = (vector float)vec_cmplt(vsum2,vmin);
475 vec_st((vector signed short)v6,0,samples);
476 samples += 8;
477
478 v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
479 v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
480 v3 = (vector float)vec_sr((vector unsigned int)v3, vshift);
481 v4 = (vector float)vec_sr((vector unsigned int)v4, vshift);
482 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
483 v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
484 vclip = vec_sums((vector signed int)v1,vclip);
485 vclip = vec_sums((vector signed int)v2,vclip);
486 }
487
488 for (j=4;j;j--)
489 {
491
492 vsum = vec_add(vsum,vsum2);
493 vsum2 = vec_add(vsum5,vsum6);
494 vsum3 = vec_add(vsum3,vsum4);
495 vsum4 = vec_add(vsum7,vsum8);
496 vsum = vec_add(vsum,vsum3);
497 vsum2 = vec_add(vsum2,vsum4);
498
499 v1 = vec_round(vsum);
500 v2 = vec_round(vsum2);
501 v1 = (vector float)vec_cts(v1,0);
502 v2 = (vector float)vec_cts(v2,0);
503 v3 = vec_mergeh(v1, v2);
504 v4 = vec_mergel(v1, v2);
505 v5 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v4);
506 v6 = (vector float)vec_perm(vprev,(vector signed short)v5,vperm2);
507 vprev = (vector signed short)v5;
508 v1 = (vector float)vec_cmpgt(vsum,vmax);
509 v2 = (vector float)vec_cmplt(vsum,vmin);
510 v3 = (vector float)vec_cmpgt(vsum2,vmax);
511 v4 = (vector float)vec_cmplt(vsum2,vmin);
512 vec_st((vector signed short)v6,0,samples);
513 samples += 8;
514
515 v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
516 v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
517 v3 = (vector float)vec_sr((vector unsigned int)v3, vshift);
518 v4 = (vector float)vec_sr((vector unsigned int)v4, vshift);
519 v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
520 v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
521 vclip = vec_sums((vector signed int)v1,vclip);
522 vclip = vec_sums((vector signed int)v2,vclip);
523 }
524
525 if((size_t)samples & 0xf)
526 {
527 v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
528 v2 = (vector float)vec_perm(vprev,(vector signed short)v1,vperm2);
529 vec_st((vector signed short)v2,0,samples);
530 }
531
532 vec_st(vclip,0,clip_tmp);
533 clip = clip_tmp[3];
534 }
535 fr->buffer.fill += 128;
536
537 return clip;
538}
unsigned short(__cdecl typeof(TIFFCurrentDirectory))(struct tiff *)
Definition: typeof.h:94