ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

synth_sse3d.h
Go to the documentation of this file.
00001 /*
00002     decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic)
00003 
00004     copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1
00005     see COPYING and AUTHORS files in distribution or http://mpg123.org
00006     initially written by the mysterious higway for MMX (apparently)
00007     then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
00008     Both have agreed to distribution under LGPL 2.1 .
00009 
00010     Transformed back into standalone asm, with help of
00011     gcc -S -DHAVE_CONFIG_H -I.  -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c}
00012 
00013     The difference between SSE and 3DNowExt is the dct64 function and the synth function name.
00014     This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S...
00015     That's not memory efficient since there's doubled code, but it's easier than giving another function pointer.
00016     Maybe I'll change it in future, but now I need something that works.
00017 
00018     Original comment from MPlayer source follows:
00019 */
00020 
00021 /*
00022  * this code comes under GPL
00023  * This code was taken from http://www.mpg123.org
00024  * See ChangeLog of mpg123-0.59s-pre.1 for detail
00025  * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
00026  *
00027  * Local ChangeLog:
00028  * - Partial loops unrolling and removing MOVW insn from loops
00029 */
00030 
00031 #include "mangle.h"
00032 
00033     .data
00034     ALIGN8
00035 one_null:
00036     .long   -65536
00037     .long   -65536
00038     ALIGN8
00039 null_one:
00040     .long   65535
00041     .long   65535
00042 
00043     .text
00044     ALIGN16
00045     /* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */
00046 .globl SYNTH_NAME
00047 SYNTH_NAME:
00048     pushl   %ebp
00049 /* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */
00050     movl    %esp, %ebp
00051 /* Now the old stack addresses are preserved via %epb. */
00052     subl  $4,%esp /* What has been called temp before. */
00053     pushl   %edi
00054     pushl   %esi
00055     pushl   %ebx
00056 #define TEMP 12(%esp)
00057 /* APP */
00058     movl 12(%ebp),%ecx
00059     movl 16(%ebp),%edi
00060     movl $15,%ebx
00061     movl 24(%ebp),%edx
00062     leal (%edi,%ecx,2),%edi
00063     decl %ecx
00064     movl 20(%ebp),%esi
00065     movl (%edx),%eax
00066     jecxz .L01
00067     decl %eax
00068     andl %ebx,%eax
00069     leal 1088(%esi),%esi
00070     movl %eax,(%edx)
00071     .L01:
00072     leal (%esi,%eax,2),%edx
00073     movl %eax,TEMP
00074     incl %eax
00075     andl %ebx,%eax
00076     leal 544(%esi,%eax,2),%ecx
00077     incl %ebx
00078     testl $1, %eax
00079     jnz .L02
00080     xchgl %edx,%ecx
00081     incl TEMP
00082     leal 544(%esi),%esi
00083     .L02:
00084     pushl 8(%ebp)
00085     pushl %edx
00086     pushl %ecx
00087     call MPL_DCT64
00088     addl $12, %esp
00089     leal 1(%ebx), %ecx
00090     subl TEMP,%ebx
00091     pushl %ecx
00092     /* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */
00093     movl 28(%ebp),%ecx
00094     leal (%ecx,%ebx,2), %edx
00095     movl (%esp),%ecx /* restore, but leave value on stack */
00096     shrl $1, %ecx
00097     ALIGN16
00098     .L03:
00099     movq  (%edx),%mm0
00100     movq  64(%edx),%mm4
00101     pmaddwd (%esi),%mm0
00102     pmaddwd 32(%esi),%mm4
00103     movq  8(%edx),%mm1
00104     movq  72(%edx),%mm5
00105     pmaddwd 8(%esi),%mm1
00106     pmaddwd 40(%esi),%mm5
00107     movq  16(%edx),%mm2
00108     movq  80(%edx),%mm6
00109     pmaddwd 16(%esi),%mm2
00110     pmaddwd 48(%esi),%mm6
00111     movq  24(%edx),%mm3
00112     movq  88(%edx),%mm7
00113     pmaddwd 24(%esi),%mm3
00114     pmaddwd 56(%esi),%mm7
00115     paddd %mm1,%mm0
00116     paddd %mm5,%mm4
00117     paddd %mm2,%mm0
00118     paddd %mm6,%mm4
00119     paddd %mm3,%mm0
00120     paddd %mm7,%mm4
00121     movq  %mm0,%mm1
00122     movq  %mm4,%mm5
00123     psrlq $32,%mm1
00124     psrlq $32,%mm5
00125     paddd %mm1,%mm0
00126     paddd %mm5,%mm4
00127     psrad $13,%mm0
00128     psrad $13,%mm4
00129     packssdw %mm0,%mm0
00130     packssdw %mm4,%mm4
00131     movq    (%edi), %mm1
00132     punpckldq %mm4, %mm0
00133     pand   one_null, %mm1
00134     pand   null_one, %mm0
00135     por    %mm0, %mm1
00136     movq   %mm1,(%edi)
00137     leal 64(%esi),%esi
00138     leal 128(%edx),%edx
00139     leal 8(%edi),%edi
00140     decl %ecx
00141     jnz  .L03
00142     popl %ecx
00143     andl $1, %ecx
00144     jecxz .next_loop
00145     movq  (%edx),%mm0
00146     pmaddwd (%esi),%mm0
00147     movq  8(%edx),%mm1
00148     pmaddwd 8(%esi),%mm1
00149     movq  16(%edx),%mm2
00150     pmaddwd 16(%esi),%mm2
00151     movq  24(%edx),%mm3
00152     pmaddwd 24(%esi),%mm3
00153     paddd %mm1,%mm0
00154     paddd %mm2,%mm0
00155     paddd %mm3,%mm0
00156     movq  %mm0,%mm1
00157     psrlq $32,%mm1
00158     paddd %mm1,%mm0
00159     psrad $13,%mm0
00160     packssdw %mm0,%mm0
00161     movd %mm0,%eax
00162     movw %ax, (%edi)
00163     leal 32(%esi),%esi
00164     leal 64(%edx),%edx
00165     leal 4(%edi),%edi
00166     .next_loop:
00167     subl $64,%esi
00168     movl $7,%ecx
00169     ALIGN16
00170     .L04:
00171     movq  (%edx),%mm0
00172     movq  64(%edx),%mm4
00173     pmaddwd (%esi),%mm0
00174     pmaddwd -32(%esi),%mm4
00175     movq  8(%edx),%mm1
00176     movq  72(%edx),%mm5
00177     pmaddwd 8(%esi),%mm1
00178     pmaddwd -24(%esi),%mm5
00179     movq  16(%edx),%mm2
00180     movq  80(%edx),%mm6
00181     pmaddwd 16(%esi),%mm2
00182     pmaddwd -16(%esi),%mm6
00183     movq  24(%edx),%mm3
00184     movq  88(%edx),%mm7
00185     pmaddwd 24(%esi),%mm3
00186     pmaddwd -8(%esi),%mm7
00187     paddd %mm1,%mm0
00188     paddd %mm5,%mm4
00189     paddd %mm2,%mm0
00190     paddd %mm6,%mm4
00191     paddd %mm3,%mm0
00192     paddd %mm7,%mm4
00193     movq  %mm0,%mm1
00194     movq  %mm4,%mm5
00195     psrlq $32,%mm1
00196     psrlq $32,%mm5
00197     paddd %mm0,%mm1
00198     paddd %mm4,%mm5
00199     psrad $13,%mm1
00200     psrad $13,%mm5
00201     packssdw %mm1,%mm1
00202     packssdw %mm5,%mm5
00203     psubd %mm0,%mm0
00204     psubd %mm4,%mm4
00205     psubsw %mm1,%mm0
00206     psubsw %mm5,%mm4
00207     movq    (%edi), %mm1
00208     punpckldq %mm4, %mm0
00209     pand   one_null, %mm1
00210     pand   null_one, %mm0
00211     por    %mm0, %mm1
00212     movq   %mm1,(%edi)
00213     subl $64,%esi
00214     addl $128,%edx
00215     leal 8(%edi),%edi
00216     decl %ecx
00217     jnz  .L04
00218     movq  (%edx),%mm0
00219     pmaddwd (%esi),%mm0
00220     movq  8(%edx),%mm1
00221     pmaddwd 8(%esi),%mm1
00222     movq  16(%edx),%mm2
00223     pmaddwd 16(%esi),%mm2
00224     movq  24(%edx),%mm3
00225     pmaddwd 24(%esi),%mm3
00226     paddd %mm1,%mm0
00227     paddd %mm2,%mm0
00228     paddd %mm3,%mm0
00229     movq  %mm0,%mm1
00230     psrlq $32,%mm1
00231     paddd %mm0,%mm1
00232     psrad $13,%mm1
00233     packssdw %mm1,%mm1
00234     psubd %mm0,%mm0
00235     psubsw %mm1,%mm0
00236     movd %mm0,%eax
00237     movw %ax,(%edi)
00238     emms
00239 
00240 /* NO_APP */
00241     popl    %ebx
00242     popl    %esi
00243     popl    %edi
00244     addl $4,%esp
00245     popl    %ebp
00246     ret

Generated on Sun May 27 2012 04:33:13 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.