Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygensynth_sse3d.h
Go to the documentation of this file.
00001 /* 00002 decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic) 00003 00004 copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1 00005 see COPYING and AUTHORS files in distribution or http://mpg123.org 00006 initially written by the mysterious higway for MMX (apparently) 00007 then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec 00008 Both have agreed to distribution under LGPL 2.1 . 00009 00010 Transformed back into standalone asm, with help of 00011 gcc -S -DHAVE_CONFIG_H -I. -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c} 00012 00013 The difference between SSE and 3DNowExt is the dct64 function and the synth function name. 00014 This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S... 00015 That's not memory efficient since there's doubled code, but it's easier than giving another function pointer. 00016 Maybe I'll change it in future, but now I need something that works. 00017 00018 Original comment from MPlayer source follows: 00019 */ 00020 00021 /* 00022 * this code comes under GPL 00023 * This code was taken from http://www.mpg123.org 00024 * See ChangeLog of mpg123-0.59s-pre.1 for detail 00025 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> 00026 * 00027 * Local ChangeLog: 00028 * - Partial loops unrolling and removing MOVW insn from loops 00029 */ 00030 00031 #include "mangle.h" 00032 00033 .data 00034 ALIGN8 00035 one_null: 00036 .long -65536 00037 .long -65536 00038 ALIGN8 00039 null_one: 00040 .long 65535 00041 .long 65535 00042 00043 .text 00044 ALIGN16 00045 /* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */ 00046 .globl SYNTH_NAME 00047 SYNTH_NAME: 00048 pushl %ebp 00049 /* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */ 00050 movl %esp, %ebp 00051 /* Now the old stack addresses are preserved via %epb. */ 00052 subl $4,%esp /* What has been called temp before. */ 00053 pushl %edi 00054 pushl %esi 00055 pushl %ebx 00056 #define TEMP 12(%esp) 00057 /* APP */ 00058 movl 12(%ebp),%ecx 00059 movl 16(%ebp),%edi 00060 movl $15,%ebx 00061 movl 24(%ebp),%edx 00062 leal (%edi,%ecx,2),%edi 00063 decl %ecx 00064 movl 20(%ebp),%esi 00065 movl (%edx),%eax 00066 jecxz .L01 00067 decl %eax 00068 andl %ebx,%eax 00069 leal 1088(%esi),%esi 00070 movl %eax,(%edx) 00071 .L01: 00072 leal (%esi,%eax,2),%edx 00073 movl %eax,TEMP 00074 incl %eax 00075 andl %ebx,%eax 00076 leal 544(%esi,%eax,2),%ecx 00077 incl %ebx 00078 testl $1, %eax 00079 jnz .L02 00080 xchgl %edx,%ecx 00081 incl TEMP 00082 leal 544(%esi),%esi 00083 .L02: 00084 pushl 8(%ebp) 00085 pushl %edx 00086 pushl %ecx 00087 call MPL_DCT64 00088 addl $12, %esp 00089 leal 1(%ebx), %ecx 00090 subl TEMP,%ebx 00091 pushl %ecx 00092 /* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */ 00093 movl 28(%ebp),%ecx 00094 leal (%ecx,%ebx,2), %edx 00095 movl (%esp),%ecx /* restore, but leave value on stack */ 00096 shrl $1, %ecx 00097 ALIGN16 00098 .L03: 00099 movq (%edx),%mm0 00100 movq 64(%edx),%mm4 00101 pmaddwd (%esi),%mm0 00102 pmaddwd 32(%esi),%mm4 00103 movq 8(%edx),%mm1 00104 movq 72(%edx),%mm5 00105 pmaddwd 8(%esi),%mm1 00106 pmaddwd 40(%esi),%mm5 00107 movq 16(%edx),%mm2 00108 movq 80(%edx),%mm6 00109 pmaddwd 16(%esi),%mm2 00110 pmaddwd 48(%esi),%mm6 00111 movq 24(%edx),%mm3 00112 movq 88(%edx),%mm7 00113 pmaddwd 24(%esi),%mm3 00114 pmaddwd 56(%esi),%mm7 00115 paddd %mm1,%mm0 00116 paddd %mm5,%mm4 00117 paddd %mm2,%mm0 00118 paddd %mm6,%mm4 00119 paddd %mm3,%mm0 00120 paddd %mm7,%mm4 00121 movq %mm0,%mm1 00122 movq %mm4,%mm5 00123 psrlq $32,%mm1 00124 psrlq $32,%mm5 00125 paddd %mm1,%mm0 00126 paddd %mm5,%mm4 00127 psrad $13,%mm0 00128 psrad $13,%mm4 00129 packssdw %mm0,%mm0 00130 packssdw %mm4,%mm4 00131 movq (%edi), %mm1 00132 punpckldq %mm4, %mm0 00133 pand one_null, %mm1 00134 pand null_one, %mm0 00135 por %mm0, %mm1 00136 movq %mm1,(%edi) 00137 leal 64(%esi),%esi 00138 leal 128(%edx),%edx 00139 leal 8(%edi),%edi 00140 decl %ecx 00141 jnz .L03 00142 popl %ecx 00143 andl $1, %ecx 00144 jecxz .next_loop 00145 movq (%edx),%mm0 00146 pmaddwd (%esi),%mm0 00147 movq 8(%edx),%mm1 00148 pmaddwd 8(%esi),%mm1 00149 movq 16(%edx),%mm2 00150 pmaddwd 16(%esi),%mm2 00151 movq 24(%edx),%mm3 00152 pmaddwd 24(%esi),%mm3 00153 paddd %mm1,%mm0 00154 paddd %mm2,%mm0 00155 paddd %mm3,%mm0 00156 movq %mm0,%mm1 00157 psrlq $32,%mm1 00158 paddd %mm1,%mm0 00159 psrad $13,%mm0 00160 packssdw %mm0,%mm0 00161 movd %mm0,%eax 00162 movw %ax, (%edi) 00163 leal 32(%esi),%esi 00164 leal 64(%edx),%edx 00165 leal 4(%edi),%edi 00166 .next_loop: 00167 subl $64,%esi 00168 movl $7,%ecx 00169 ALIGN16 00170 .L04: 00171 movq (%edx),%mm0 00172 movq 64(%edx),%mm4 00173 pmaddwd (%esi),%mm0 00174 pmaddwd -32(%esi),%mm4 00175 movq 8(%edx),%mm1 00176 movq 72(%edx),%mm5 00177 pmaddwd 8(%esi),%mm1 00178 pmaddwd -24(%esi),%mm5 00179 movq 16(%edx),%mm2 00180 movq 80(%edx),%mm6 00181 pmaddwd 16(%esi),%mm2 00182 pmaddwd -16(%esi),%mm6 00183 movq 24(%edx),%mm3 00184 movq 88(%edx),%mm7 00185 pmaddwd 24(%esi),%mm3 00186 pmaddwd -8(%esi),%mm7 00187 paddd %mm1,%mm0 00188 paddd %mm5,%mm4 00189 paddd %mm2,%mm0 00190 paddd %mm6,%mm4 00191 paddd %mm3,%mm0 00192 paddd %mm7,%mm4 00193 movq %mm0,%mm1 00194 movq %mm4,%mm5 00195 psrlq $32,%mm1 00196 psrlq $32,%mm5 00197 paddd %mm0,%mm1 00198 paddd %mm4,%mm5 00199 psrad $13,%mm1 00200 psrad $13,%mm5 00201 packssdw %mm1,%mm1 00202 packssdw %mm5,%mm5 00203 psubd %mm0,%mm0 00204 psubd %mm4,%mm4 00205 psubsw %mm1,%mm0 00206 psubsw %mm5,%mm4 00207 movq (%edi), %mm1 00208 punpckldq %mm4, %mm0 00209 pand one_null, %mm1 00210 pand null_one, %mm0 00211 por %mm0, %mm1 00212 movq %mm1,(%edi) 00213 subl $64,%esi 00214 addl $128,%edx 00215 leal 8(%edi),%edi 00216 decl %ecx 00217 jnz .L04 00218 movq (%edx),%mm0 00219 pmaddwd (%esi),%mm0 00220 movq 8(%edx),%mm1 00221 pmaddwd 8(%esi),%mm1 00222 movq 16(%edx),%mm2 00223 pmaddwd 16(%esi),%mm2 00224 movq 24(%edx),%mm3 00225 pmaddwd 24(%esi),%mm3 00226 paddd %mm1,%mm0 00227 paddd %mm2,%mm0 00228 paddd %mm3,%mm0 00229 movq %mm0,%mm1 00230 psrlq $32,%mm1 00231 paddd %mm0,%mm1 00232 psrad $13,%mm1 00233 packssdw %mm1,%mm1 00234 psubd %mm0,%mm0 00235 psubsw %mm1,%mm0 00236 movd %mm0,%eax 00237 movw %ax,(%edi) 00238 emms 00239 00240 /* NO_APP */ 00241 popl %ebx 00242 popl %esi 00243 popl %edi 00244 addl $4,%esp 00245 popl %ebp 00246 ret Generated on Sun May 27 2012 04:33:13 for ReactOS by
1.7.6.1
|