ReactOS  0.4.13-dev-982-g9853eab
dct64_i486.c
Go to the documentation of this file.
1 /*
2  dct64_i486.c: DCT64, a plain C variant for i486
3 
4  copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5  see COPYING and AUTHORS files in distribution or http://mpg123.org
6  initially written by Fabrice Bellard
7 */
8 
9 /* Discrete Cosine Tansform (DCT) for subband synthesis.
10  *
11  * This code is optimized for 80486. It should be compiled with gcc
12  * 2.7.2 or higher.
13  *
14  * Note: This code does not give the necessary accuracy. Moreover, no
15  * overflow test are done.
16  *
17  * (c) 1998 Fabrice Bellard.
18  */
19 
20 #include "mpg123lib_intern.h"
21 
22 #define COS_0_0 16403
23 #define COS_0_1 16563
24 #define COS_0_2 16890
25 #define COS_0_3 17401
26 #define COS_0_4 18124
27 #define COS_0_5 19101
28 #define COS_0_6 20398
29 #define COS_0_7 22112
30 #define COS_0_8 24396
31 #define COS_0_9 27503
32 #define COS_0_10 31869
33 #define COS_0_11 38320
34 #define COS_0_12 48633
35 #define COS_0_13 67429
36 #define COS_0_14 111660
37 #define COS_0_15 333906
38 #define COS_1_0 16463
39 #define COS_1_1 17121
40 #define COS_1_2 18577
41 #define COS_1_3 21195
42 #define COS_1_4 25826
43 #define COS_1_5 34756
44 #define COS_1_6 56441
45 #define COS_1_7 167154
46 #define COS_2_0 16704
47 #define COS_2_1 19704
48 #define COS_2_2 29490
49 #define COS_2_3 83981
50 #define COS_3_0 17733
51 #define COS_3_1 42813
52 #define COS_4_0 23170
53 
54 #define SETOUT(out,n,expr) out[FIR_BUFFER_SIZE*(n)]=(expr)
55 #define MULL(a,b) (((long long)(a)*(long long)(b)) >> 15)
56 #define MUL(a,b) \
57 (\
58  ((!(b & 0x3F)) ? (((a)*(b >> 6)) >> 9) :\
59  ((!(b & 0x1F)) ? (((a)*(b >> 5)) >> 10) :\
60  ((!(b & 0x0F)) ? (((a)*(b >> 4)) >> 11) :\
61  ((!(b & 0x07)) ? (((a)*(b >> 3)) >> 12) :\
62  ((!(b & 0x03)) ? (((a)*(b >> 2)) >> 13) :\
63  ((!(b & 0x01)) ? (((a)*(b >> 1)) >> 14) :\
64  (((a)*(b )) >> 15))))))))
65 
66 
67 void dct64_1_486(int *out0,int *out1,int *b1,int *b2)
68 {
69  b1[0x00] = b2[0x00] + b2[0x1F];
70  b1[0x1F] = MUL((b2[0x00] - b2[0x1F]),COS_0_0);
71 
72  b1[0x01] = b2[0x01] + b2[0x1E];
73  b1[0x1E] = MUL((b2[0x01] - b2[0x1E]),COS_0_1);
74 
75  b1[0x02] = b2[0x02] + b2[0x1D];
76  b1[0x1D] = MUL((b2[0x02] - b2[0x1D]),COS_0_2);
77 
78  b1[0x03] = b2[0x03] + b2[0x1C];
79  b1[0x1C] = MUL((b2[0x03] - b2[0x1C]),COS_0_3);
80 
81  b1[0x04] = b2[0x04] + b2[0x1B];
82  b1[0x1B] = MUL((b2[0x04] - b2[0x1B]),COS_0_4);
83 
84  b1[0x05] = b2[0x05] + b2[0x1A];
85  b1[0x1A] = MUL((b2[0x05] - b2[0x1A]),COS_0_5);
86 
87  b1[0x06] = b2[0x06] + b2[0x19];
88  b1[0x19] = MUL((b2[0x06] - b2[0x19]),COS_0_6);
89 
90  b1[0x07] = b2[0x07] + b2[0x18];
91  b1[0x18] = MUL((b2[0x07] - b2[0x18]),COS_0_7);
92 
93  b1[0x08] = b2[0x08] + b2[0x17];
94  b1[0x17] = MUL((b2[0x08] - b2[0x17]),COS_0_8);
95 
96  b1[0x09] = b2[0x09] + b2[0x16];
97  b1[0x16] = MUL((b2[0x09] - b2[0x16]),COS_0_9);
98 
99  b1[0x0A] = b2[0x0A] + b2[0x15];
100  b1[0x15] = MUL((b2[0x0A] - b2[0x15]),COS_0_10);
101 
102  b1[0x0B] = b2[0x0B] + b2[0x14];
103  b1[0x14] = MUL((b2[0x0B] - b2[0x14]),COS_0_11);
104 
105  b1[0x0C] = b2[0x0C] + b2[0x13];
106  b1[0x13] = MUL((b2[0x0C] - b2[0x13]),COS_0_12);
107 
108  b1[0x0D] = b2[0x0D] + b2[0x12];
109  b1[0x12] = MULL((b2[0x0D] - b2[0x12]),COS_0_13);
110 
111  b1[0x0E] = b2[0x0E] + b2[0x11];
112  b1[0x11] = MULL((b2[0x0E] - b2[0x11]),COS_0_14);
113 
114  b1[0x0F] = b2[0x0F] + b2[0x10];
115  b1[0x10] = MULL((b2[0x0F] - b2[0x10]),COS_0_15);
116 
117 
118  b2[0x00] = b1[0x00] + b1[0x0F];
119  b2[0x0F] = MUL((b1[0x00] - b1[0x0F]),COS_1_0);
120  b2[0x01] = b1[0x01] + b1[0x0E];
121  b2[0x0E] = MUL((b1[0x01] - b1[0x0E]),COS_1_1);
122  b2[0x02] = b1[0x02] + b1[0x0D];
123  b2[0x0D] = MUL((b1[0x02] - b1[0x0D]),COS_1_2);
124  b2[0x03] = b1[0x03] + b1[0x0C];
125  b2[0x0C] = MUL((b1[0x03] - b1[0x0C]),COS_1_3);
126  b2[0x04] = b1[0x04] + b1[0x0B];
127  b2[0x0B] = MUL((b1[0x04] - b1[0x0B]),COS_1_4);
128  b2[0x05] = b1[0x05] + b1[0x0A];
129  b2[0x0A] = MUL((b1[0x05] - b1[0x0A]),COS_1_5);
130  b2[0x06] = b1[0x06] + b1[0x09];
131  b2[0x09] = MUL((b1[0x06] - b1[0x09]),COS_1_6);
132  b2[0x07] = b1[0x07] + b1[0x08];
133  b2[0x08] = MULL((b1[0x07] - b1[0x08]),COS_1_7);
134 
135  b2[0x10] = b1[0x10] + b1[0x1F];
136  b2[0x1F] = MUL((b1[0x1F] - b1[0x10]),COS_1_0);
137  b2[0x11] = b1[0x11] + b1[0x1E];
138  b2[0x1E] = MUL((b1[0x1E] - b1[0x11]),COS_1_1);
139  b2[0x12] = b1[0x12] + b1[0x1D];
140  b2[0x1D] = MUL((b1[0x1D] - b1[0x12]),COS_1_2);
141  b2[0x13] = b1[0x13] + b1[0x1C];
142  b2[0x1C] = MUL((b1[0x1C] - b1[0x13]),COS_1_3);
143  b2[0x14] = b1[0x14] + b1[0x1B];
144  b2[0x1B] = MUL((b1[0x1B] - b1[0x14]),COS_1_4);
145  b2[0x15] = b1[0x15] + b1[0x1A];
146  b2[0x1A] = MUL((b1[0x1A] - b1[0x15]),COS_1_5);
147  b2[0x16] = b1[0x16] + b1[0x19];
148  b2[0x19] = MUL((b1[0x19] - b1[0x16]),COS_1_6);
149  b2[0x17] = b1[0x17] + b1[0x18];
150  b2[0x18] = MULL((b1[0x18] - b1[0x17]),COS_1_7);
151 
152 
153  b1[0x00] = b2[0x00] + b2[0x07];
154  b1[0x07] = MUL((b2[0x00] - b2[0x07]),COS_2_0);
155  b1[0x01] = b2[0x01] + b2[0x06];
156  b1[0x06] = MUL((b2[0x01] - b2[0x06]),COS_2_1);
157  b1[0x02] = b2[0x02] + b2[0x05];
158  b1[0x05] = MUL((b2[0x02] - b2[0x05]),COS_2_2);
159  b1[0x03] = b2[0x03] + b2[0x04];
160  b1[0x04] = MULL((b2[0x03] - b2[0x04]),COS_2_3);
161 
162  b1[0x08] = b2[0x08] + b2[0x0F];
163  b1[0x0F] = MUL((b2[0x0F] - b2[0x08]),COS_2_0);
164  b1[0x09] = b2[0x09] + b2[0x0E];
165  b1[0x0E] = MUL((b2[0x0E] - b2[0x09]),COS_2_1);
166  b1[0x0A] = b2[0x0A] + b2[0x0D];
167  b1[0x0D] = MUL((b2[0x0D] - b2[0x0A]),COS_2_2);
168  b1[0x0B] = b2[0x0B] + b2[0x0C];
169  b1[0x0C] = MULL((b2[0x0C] - b2[0x0B]),COS_2_3);
170 
171  b1[0x10] = b2[0x10] + b2[0x17];
172  b1[0x17] = MUL((b2[0x10] - b2[0x17]),COS_2_0);
173  b1[0x11] = b2[0x11] + b2[0x16];
174  b1[0x16] = MUL((b2[0x11] - b2[0x16]),COS_2_1);
175  b1[0x12] = b2[0x12] + b2[0x15];
176  b1[0x15] = MUL((b2[0x12] - b2[0x15]),COS_2_2);
177  b1[0x13] = b2[0x13] + b2[0x14];
178  b1[0x14] = MULL((b2[0x13] - b2[0x14]),COS_2_3);
179 
180  b1[0x18] = b2[0x18] + b2[0x1F];
181  b1[0x1F] = MUL((b2[0x1F] - b2[0x18]),COS_2_0);
182  b1[0x19] = b2[0x19] + b2[0x1E];
183  b1[0x1E] = MUL((b2[0x1E] - b2[0x19]),COS_2_1);
184  b1[0x1A] = b2[0x1A] + b2[0x1D];
185  b1[0x1D] = MUL((b2[0x1D] - b2[0x1A]),COS_2_2);
186  b1[0x1B] = b2[0x1B] + b2[0x1C];
187  b1[0x1C] = MULL((b2[0x1C] - b2[0x1B]),COS_2_3);
188 
189 
190  b2[0x00] = b1[0x00] + b1[0x03];
191  b2[0x03] = MUL((b1[0x00] - b1[0x03]),COS_3_0);
192  b2[0x01] = b1[0x01] + b1[0x02];
193  b2[0x02] = MUL((b1[0x01] - b1[0x02]),COS_3_1);
194 
195  b2[0x04] = b1[0x04] + b1[0x07];
196  b2[0x07] = MUL((b1[0x07] - b1[0x04]),COS_3_0);
197  b2[0x05] = b1[0x05] + b1[0x06];
198  b2[0x06] = MUL((b1[0x06] - b1[0x05]),COS_3_1);
199 
200  b2[0x08] = b1[0x08] + b1[0x0B];
201  b2[0x0B] = MUL((b1[0x08] - b1[0x0B]),COS_3_0);
202  b2[0x09] = b1[0x09] + b1[0x0A];
203  b2[0x0A] = MUL((b1[0x09] - b1[0x0A]),COS_3_1);
204 
205  b2[0x0C] = b1[0x0C] + b1[0x0F];
206  b2[0x0F] = MUL((b1[0x0F] - b1[0x0C]),COS_3_0);
207  b2[0x0D] = b1[0x0D] + b1[0x0E];
208  b2[0x0E] = MUL((b1[0x0E] - b1[0x0D]),COS_3_1);
209 
210  b2[0x10] = b1[0x10] + b1[0x13];
211  b2[0x13] = MUL((b1[0x10] - b1[0x13]),COS_3_0);
212  b2[0x11] = b1[0x11] + b1[0x12];
213  b2[0x12] = MUL((b1[0x11] - b1[0x12]),COS_3_1);
214 
215  b2[0x14] = b1[0x14] + b1[0x17];
216  b2[0x17] = MUL((b1[0x17] - b1[0x14]),COS_3_0);
217  b2[0x15] = b1[0x15] + b1[0x16];
218  b2[0x16] = MUL((b1[0x16] - b1[0x15]),COS_3_1);
219 
220  b2[0x18] = b1[0x18] + b1[0x1B];
221  b2[0x1B] = MUL((b1[0x18] - b1[0x1B]),COS_3_0);
222  b2[0x19] = b1[0x19] + b1[0x1A];
223  b2[0x1A] = MUL((b1[0x19] - b1[0x1A]),COS_3_1);
224 
225  b2[0x1C] = b1[0x1C] + b1[0x1F];
226  b2[0x1F] = MUL((b1[0x1F] - b1[0x1C]),COS_3_0);
227  b2[0x1D] = b1[0x1D] + b1[0x1E];
228  b2[0x1E] = MUL((b1[0x1E] - b1[0x1D]),COS_3_1);
229 
230  {
231  int i;
232  for(i=0;i<32;i+=4) {
233  b1[i+0x00] = b2[i+0x00] + b2[i+0x01];
234  b1[i+0x01] = MUL((b2[i+0x00] - b2[i+0x01]),COS_4_0);
235  b1[i+0x02] = b2[i+0x02] + b2[i+0x03];
236  b1[i+0x03] = MUL((b2[i+0x03] - b2[i+0x02]),COS_4_0);
237  }
238  }
239 
240  b1[0x02] += b1[0x03];
241  b1[0x06] += b1[0x07];
242  b1[0x04] += b1[0x06];
243  b1[0x06] += b1[0x05];
244  b1[0x05] += b1[0x07];
245 
246  b1[0x0A] += b1[0x0B];
247  b1[0x0E] += b1[0x0F];
248  b1[0x0C] += b1[0x0E];
249  b1[0x0E] += b1[0x0D];
250  b1[0x0D] += b1[0x0F];
251 
252  b1[0x12] += b1[0x13];
253  b1[0x16] += b1[0x17];
254  b1[0x14] += b1[0x16];
255  b1[0x16] += b1[0x15];
256  b1[0x15] += b1[0x17];
257 
258  b1[0x1A] += b1[0x1B];
259  b1[0x1E] += b1[0x1F];
260  b1[0x1C] += b1[0x1E];
261  b1[0x1E] += b1[0x1D];
262  b1[0x1D] += b1[0x1F];
263 
264  SETOUT(out0,16,b1[0x00]);
265  SETOUT(out0,12,b1[0x04]);
266  SETOUT(out0, 8,b1[0x02]);
267  SETOUT(out0, 4,b1[0x06]);
268  SETOUT(out0, 0,b1[0x01]);
269  SETOUT(out1, 0,b1[0x01]);
270  SETOUT(out1, 4,b1[0x05]);
271  SETOUT(out1, 8,b1[0x03]);
272  SETOUT(out1,12,b1[0x07]);
273 
274  b1[0x08] += b1[0x0C];
275  SETOUT(out0,14,b1[0x08]);
276  b1[0x0C] += b1[0x0a];
277  SETOUT(out0,10,b1[0x0C]);
278  b1[0x0A] += b1[0x0E];
279  SETOUT(out0, 6,b1[0x0A]);
280  b1[0x0E] += b1[0x09];
281  SETOUT(out0, 2,b1[0x0E]);
282  b1[0x09] += b1[0x0D];
283  SETOUT(out1, 2,b1[0x09]);
284  b1[0x0D] += b1[0x0B];
285  SETOUT(out1, 6,b1[0x0D]);
286  b1[0x0B] += b1[0x0F];
287  SETOUT(out1,10,b1[0x0B]);
288  SETOUT(out1,14,b1[0x0F]);
289 
290  b1[0x18] += b1[0x1C];
291  SETOUT(out0,15,b1[0x10] + b1[0x18]);
292  SETOUT(out0,13,b1[0x18] + b1[0x14]);
293  b1[0x1C] += b1[0x1a];
294  SETOUT(out0,11,b1[0x14] + b1[0x1C]);
295  SETOUT(out0, 9,b1[0x1C] + b1[0x12]);
296  b1[0x1A] += b1[0x1E];
297  SETOUT(out0, 7,b1[0x12] + b1[0x1A]);
298  SETOUT(out0, 5,b1[0x1A] + b1[0x16]);
299  b1[0x1E] += b1[0x19];
300  SETOUT(out0, 3,b1[0x16] + b1[0x1E]);
301  SETOUT(out0, 1,b1[0x1E] + b1[0x11]);
302  b1[0x19] += b1[0x1D];
303  SETOUT(out1, 1,b1[0x11] + b1[0x19]);
304  SETOUT(out1, 3,b1[0x19] + b1[0x15]);
305  b1[0x1D] += b1[0x1B];
306  SETOUT(out1, 5,b1[0x15] + b1[0x1D]);
307  SETOUT(out1, 7,b1[0x1D] + b1[0x13]);
308  b1[0x1B] += b1[0x1F];
309  SETOUT(out1, 9,b1[0x13] + b1[0x1B]);
310  SETOUT(out1,11,b1[0x1B] + b1[0x17]);
311  SETOUT(out1,13,b1[0x17] + b1[0x1F]);
312  SETOUT(out1,15,b1[0x1F]);
313 }
314 
315 
316 /*
317  * the call via dct64 is a trick to force GCC to use
318  * (new) registers for the b1,b2 pointer to the bufs[xx] field
319  */
320 void dct64_i486(int *a,int *b,real *samples)
321 {
322  int bufs[64];
323  int i;
324 
325 #ifdef REAL_IS_FIXED
326 #define TOINT(a) ((a) * 32768 / (int)REAL_FACTOR)
327 
328  for(i=0;i<32;i++) {
329  bufs[i]=TOINT(samples[i]);
330  }
331 #else
332  int *p = bufs;
333  register double const scale = ((65536.0 * 32) + 1) * 65536.0;
334 
335  for(i=0;i<32;i++) {
336  *((double *) (p++)) = scale + *samples++; /* beware on bufs overrun: 8B store from x87 */
337  }
338 #endif
339 
340  dct64_1_486(a,b,bufs+32,bufs);
341 }
342 
GLsizei samples
Definition: glext.h:7006
#define COS_0_7
Definition: dct64_i486.c:29
#define COS_0_0
Definition: dct64_i486.c:22
const GLenum * bufs
Definition: glext.h:6026
#define COS_0_14
Definition: dct64_i486.c:36
GLenum GLenum GLenum GLenum GLenum scale
Definition: glext.h:9032
#define COS_3_1
Definition: dct64_i486.c:51
#define COS_2_3
Definition: dct64_i486.c:49
_Tp _STLP_CALL real(const complex< _Tp > &__z)
Definition: _complex.h:727
#define COS_0_2
Definition: dct64_i486.c:24
#define COS_1_1
Definition: dct64_i486.c:39
#define SETOUT(out, n, expr)
Definition: dct64_i486.c:54
#define COS_0_10
Definition: dct64_i486.c:32
#define COS_0_11
Definition: dct64_i486.c:33
#define COS_0_12
Definition: dct64_i486.c:34
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define COS_1_5
Definition: dct64_i486.c:43
static CRYPT_DATA_BLOB b1[]
Definition: msg.c:573
#define COS_0_9
Definition: dct64_i486.c:31
#define COS_1_3
Definition: dct64_i486.c:41
void dct64_1_486(int *out0, int *out1, int *b1, int *b2)
Definition: dct64_i486.c:67
#define COS_1_0
Definition: dct64_i486.c:38
#define COS_0_13
Definition: dct64_i486.c:35
#define COS_0_3
Definition: dct64_i486.c:25
#define COS_0_15
Definition: dct64_i486.c:37
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
#define MUL(a, b)
Definition: dct64_i486.c:56
#define COS_1_7
Definition: dct64_i486.c:45
#define MULL(a, b)
Definition: dct64_i486.c:55
#define COS_0_4
Definition: dct64_i486.c:26
#define COS_1_6
Definition: dct64_i486.c:44
#define COS_0_8
Definition: dct64_i486.c:30
#define COS_0_6
Definition: dct64_i486.c:28
#define COS_1_2
Definition: dct64_i486.c:40
void dct64_i486(int *a, int *b, real *samples)
Definition: dct64_i486.c:320
#define COS_0_1
Definition: dct64_i486.c:23
#define COS_0_5
Definition: dct64_i486.c:27
#define COS_2_1
Definition: dct64_i486.c:47
#define COS_4_0
Definition: dct64_i486.c:52
#define COS_2_0
Definition: dct64_i486.c:46
static CRYPT_DATA_BLOB b2[]
Definition: msg.c:582
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLfloat GLfloat p
Definition: glext.h:8902
#define COS_3_0
Definition: dct64_i486.c:50
#define COS_2_2
Definition: dct64_i486.c:48
#define COS_1_4
Definition: dct64_i486.c:42