ReactOS  0.4.13-dev-92-gf251225
dct64_i386.c
Go to the documentation of this file.
1 /*
2  dct64_i386.c: DCT64, a C variant for i386
3 
4  copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5  see COPYING and AUTHORS files in distribution or http://mpg123.org
6  initially written by Michael Hipp
7 */
8 
9 /*
10  * Discrete Cosine Tansform (DCT) for subband synthesis
11  * optimized for machines with no auto-increment.
12  * The performance is highly compiler dependend. Maybe
13  * the dct64.c version for 'normal' processor may be faster
14  * even for Intel processors.
15  */
16 
17 #include "mpg123lib_intern.h"
18 
19 static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples)
20 {
21  {
22  register real *costab = pnts[0];
23 
24  b1[0x00] = samples[0x00] + samples[0x1F];
25  b1[0x01] = samples[0x01] + samples[0x1E];
26  b1[0x1F] = REAL_MUL(samples[0x00] - samples[0x1F], costab[0x0]);
27  b1[0x1E] = REAL_MUL(samples[0x01] - samples[0x1E], costab[0x1]);
28 
29  b1[0x02] = samples[0x02] + samples[0x1D];
30  b1[0x03] = samples[0x03] + samples[0x1C];
31  b1[0x1D] = REAL_MUL(samples[0x02] - samples[0x1D], costab[0x2]);
32  b1[0x1C] = REAL_MUL(samples[0x03] - samples[0x1C], costab[0x3]);
33 
34  b1[0x04] = samples[0x04] + samples[0x1B];
35  b1[0x05] = samples[0x05] + samples[0x1A];
36  b1[0x1B] = REAL_MUL(samples[0x04] - samples[0x1B], costab[0x4]);
37  b1[0x1A] = REAL_MUL(samples[0x05] - samples[0x1A], costab[0x5]);
38 
39  b1[0x06] = samples[0x06] + samples[0x19];
40  b1[0x07] = samples[0x07] + samples[0x18];
41  b1[0x19] = REAL_MUL(samples[0x06] - samples[0x19], costab[0x6]);
42  b1[0x18] = REAL_MUL(samples[0x07] - samples[0x18], costab[0x7]);
43 
44  b1[0x08] = samples[0x08] + samples[0x17];
45  b1[0x09] = samples[0x09] + samples[0x16];
46  b1[0x17] = REAL_MUL(samples[0x08] - samples[0x17], costab[0x8]);
47  b1[0x16] = REAL_MUL(samples[0x09] - samples[0x16], costab[0x9]);
48 
49  b1[0x0A] = samples[0x0A] + samples[0x15];
50  b1[0x0B] = samples[0x0B] + samples[0x14];
51  b1[0x15] = REAL_MUL(samples[0x0A] - samples[0x15], costab[0xA]);
52  b1[0x14] = REAL_MUL(samples[0x0B] - samples[0x14], costab[0xB]);
53 
54  b1[0x0C] = samples[0x0C] + samples[0x13];
55  b1[0x0D] = samples[0x0D] + samples[0x12];
56  b1[0x13] = REAL_MUL(samples[0x0C] - samples[0x13], costab[0xC]);
57  b1[0x12] = REAL_MUL(samples[0x0D] - samples[0x12], costab[0xD]);
58 
59  b1[0x0E] = samples[0x0E] + samples[0x11];
60  b1[0x0F] = samples[0x0F] + samples[0x10];
61  b1[0x11] = REAL_MUL(samples[0x0E] - samples[0x11], costab[0xE]);
62  b1[0x10] = REAL_MUL(samples[0x0F] - samples[0x10], costab[0xF]);
63 
64  }
65 
66 
67  {
68  register real *costab = pnts[1];
69 
70  b2[0x00] = b1[0x00] + b1[0x0F];
71  b2[0x01] = b1[0x01] + b1[0x0E];
72  b2[0x0F] = REAL_MUL(b1[0x00] - b1[0x0F], costab[0]);
73  b2[0x0E] = REAL_MUL(b1[0x01] - b1[0x0E], costab[1]);
74 
75  b2[0x02] = b1[0x02] + b1[0x0D];
76  b2[0x03] = b1[0x03] + b1[0x0C];
77  b2[0x0D] = REAL_MUL(b1[0x02] - b1[0x0D], costab[2]);
78  b2[0x0C] = REAL_MUL(b1[0x03] - b1[0x0C], costab[3]);
79 
80  b2[0x04] = b1[0x04] + b1[0x0B];
81  b2[0x05] = b1[0x05] + b1[0x0A];
82  b2[0x0B] = REAL_MUL(b1[0x04] - b1[0x0B], costab[4]);
83  b2[0x0A] = REAL_MUL(b1[0x05] - b1[0x0A], costab[5]);
84 
85  b2[0x06] = b1[0x06] + b1[0x09];
86  b2[0x07] = b1[0x07] + b1[0x08];
87  b2[0x09] = REAL_MUL(b1[0x06] - b1[0x09], costab[6]);
88  b2[0x08] = REAL_MUL(b1[0x07] - b1[0x08], costab[7]);
89 
90  /* */
91 
92  b2[0x10] = b1[0x10] + b1[0x1F];
93  b2[0x11] = b1[0x11] + b1[0x1E];
94  b2[0x1F] = REAL_MUL(b1[0x1F] - b1[0x10], costab[0]);
95  b2[0x1E] = REAL_MUL(b1[0x1E] - b1[0x11], costab[1]);
96 
97  b2[0x12] = b1[0x12] + b1[0x1D];
98  b2[0x13] = b1[0x13] + b1[0x1C];
99  b2[0x1D] = REAL_MUL(b1[0x1D] - b1[0x12], costab[2]);
100  b2[0x1C] = REAL_MUL(b1[0x1C] - b1[0x13], costab[3]);
101 
102  b2[0x14] = b1[0x14] + b1[0x1B];
103  b2[0x15] = b1[0x15] + b1[0x1A];
104  b2[0x1B] = REAL_MUL(b1[0x1B] - b1[0x14], costab[4]);
105  b2[0x1A] = REAL_MUL(b1[0x1A] - b1[0x15], costab[5]);
106 
107  b2[0x16] = b1[0x16] + b1[0x19];
108  b2[0x17] = b1[0x17] + b1[0x18];
109  b2[0x19] = REAL_MUL(b1[0x19] - b1[0x16], costab[6]);
110  b2[0x18] = REAL_MUL(b1[0x18] - b1[0x17], costab[7]);
111  }
112 
113  {
114  register real *costab = pnts[2];
115 
116  b1[0x00] = b2[0x00] + b2[0x07];
117  b1[0x07] = REAL_MUL(b2[0x00] - b2[0x07], costab[0]);
118  b1[0x01] = b2[0x01] + b2[0x06];
119  b1[0x06] = REAL_MUL(b2[0x01] - b2[0x06], costab[1]);
120  b1[0x02] = b2[0x02] + b2[0x05];
121  b1[0x05] = REAL_MUL(b2[0x02] - b2[0x05], costab[2]);
122  b1[0x03] = b2[0x03] + b2[0x04];
123  b1[0x04] = REAL_MUL(b2[0x03] - b2[0x04], costab[3]);
124 
125  b1[0x08] = b2[0x08] + b2[0x0F];
126  b1[0x0F] = REAL_MUL(b2[0x0F] - b2[0x08], costab[0]);
127  b1[0x09] = b2[0x09] + b2[0x0E];
128  b1[0x0E] = REAL_MUL(b2[0x0E] - b2[0x09], costab[1]);
129  b1[0x0A] = b2[0x0A] + b2[0x0D];
130  b1[0x0D] = REAL_MUL(b2[0x0D] - b2[0x0A], costab[2]);
131  b1[0x0B] = b2[0x0B] + b2[0x0C];
132  b1[0x0C] = REAL_MUL(b2[0x0C] - b2[0x0B], costab[3]);
133 
134  b1[0x10] = b2[0x10] + b2[0x17];
135  b1[0x17] = REAL_MUL(b2[0x10] - b2[0x17], costab[0]);
136  b1[0x11] = b2[0x11] + b2[0x16];
137  b1[0x16] = REAL_MUL(b2[0x11] - b2[0x16], costab[1]);
138  b1[0x12] = b2[0x12] + b2[0x15];
139  b1[0x15] = REAL_MUL(b2[0x12] - b2[0x15], costab[2]);
140  b1[0x13] = b2[0x13] + b2[0x14];
141  b1[0x14] = REAL_MUL(b2[0x13] - b2[0x14], costab[3]);
142 
143  b1[0x18] = b2[0x18] + b2[0x1F];
144  b1[0x1F] = REAL_MUL(b2[0x1F] - b2[0x18], costab[0]);
145  b1[0x19] = b2[0x19] + b2[0x1E];
146  b1[0x1E] = REAL_MUL(b2[0x1E] - b2[0x19], costab[1]);
147  b1[0x1A] = b2[0x1A] + b2[0x1D];
148  b1[0x1D] = REAL_MUL(b2[0x1D] - b2[0x1A], costab[2]);
149  b1[0x1B] = b2[0x1B] + b2[0x1C];
150  b1[0x1C] = REAL_MUL(b2[0x1C] - b2[0x1B], costab[3]);
151  }
152 
153  {
154  register real const cos0 = pnts[3][0];
155  register real const cos1 = pnts[3][1];
156 
157  b2[0x00] = b1[0x00] + b1[0x03];
158  b2[0x03] = REAL_MUL(b1[0x00] - b1[0x03], cos0);
159  b2[0x01] = b1[0x01] + b1[0x02];
160  b2[0x02] = REAL_MUL(b1[0x01] - b1[0x02], cos1);
161 
162  b2[0x04] = b1[0x04] + b1[0x07];
163  b2[0x07] = REAL_MUL(b1[0x07] - b1[0x04], cos0);
164  b2[0x05] = b1[0x05] + b1[0x06];
165  b2[0x06] = REAL_MUL(b1[0x06] - b1[0x05], cos1);
166 
167  b2[0x08] = b1[0x08] + b1[0x0B];
168  b2[0x0B] = REAL_MUL(b1[0x08] - b1[0x0B], cos0);
169  b2[0x09] = b1[0x09] + b1[0x0A];
170  b2[0x0A] = REAL_MUL(b1[0x09] - b1[0x0A], cos1);
171 
172  b2[0x0C] = b1[0x0C] + b1[0x0F];
173  b2[0x0F] = REAL_MUL(b1[0x0F] - b1[0x0C], cos0);
174  b2[0x0D] = b1[0x0D] + b1[0x0E];
175  b2[0x0E] = REAL_MUL(b1[0x0E] - b1[0x0D], cos1);
176 
177  b2[0x10] = b1[0x10] + b1[0x13];
178  b2[0x13] = REAL_MUL(b1[0x10] - b1[0x13], cos0);
179  b2[0x11] = b1[0x11] + b1[0x12];
180  b2[0x12] = REAL_MUL(b1[0x11] - b1[0x12], cos1);
181 
182  b2[0x14] = b1[0x14] + b1[0x17];
183  b2[0x17] = REAL_MUL(b1[0x17] - b1[0x14], cos0);
184  b2[0x15] = b1[0x15] + b1[0x16];
185  b2[0x16] = REAL_MUL(b1[0x16] - b1[0x15], cos1);
186 
187  b2[0x18] = b1[0x18] + b1[0x1B];
188  b2[0x1B] = REAL_MUL(b1[0x18] - b1[0x1B], cos0);
189  b2[0x19] = b1[0x19] + b1[0x1A];
190  b2[0x1A] = REAL_MUL(b1[0x19] - b1[0x1A], cos1);
191 
192  b2[0x1C] = b1[0x1C] + b1[0x1F];
193  b2[0x1F] = REAL_MUL(b1[0x1F] - b1[0x1C], cos0);
194  b2[0x1D] = b1[0x1D] + b1[0x1E];
195  b2[0x1E] = REAL_MUL(b1[0x1E] - b1[0x1D], cos1);
196  }
197 
198  {
199  register real const cos0 = pnts[4][0];
200 
201  b1[0x00] = b2[0x00] + b2[0x01];
202  b1[0x01] = REAL_MUL(b2[0x00] - b2[0x01], cos0);
203  b1[0x02] = b2[0x02] + b2[0x03];
204  b1[0x03] = REAL_MUL(b2[0x03] - b2[0x02], cos0);
205  b1[0x02] += b1[0x03];
206 
207  b1[0x04] = b2[0x04] + b2[0x05];
208  b1[0x05] = REAL_MUL(b2[0x04] - b2[0x05], cos0);
209  b1[0x06] = b2[0x06] + b2[0x07];
210  b1[0x07] = REAL_MUL(b2[0x07] - b2[0x06], cos0);
211  b1[0x06] += b1[0x07];
212  b1[0x04] += b1[0x06];
213  b1[0x06] += b1[0x05];
214  b1[0x05] += b1[0x07];
215 
216  b1[0x08] = b2[0x08] + b2[0x09];
217  b1[0x09] = REAL_MUL(b2[0x08] - b2[0x09], cos0);
218  b1[0x0A] = b2[0x0A] + b2[0x0B];
219  b1[0x0B] = REAL_MUL(b2[0x0B] - b2[0x0A], cos0);
220  b1[0x0A] += b1[0x0B];
221 
222  b1[0x0C] = b2[0x0C] + b2[0x0D];
223  b1[0x0D] = REAL_MUL(b2[0x0C] - b2[0x0D], cos0);
224  b1[0x0E] = b2[0x0E] + b2[0x0F];
225  b1[0x0F] = REAL_MUL(b2[0x0F] - b2[0x0E], cos0);
226  b1[0x0E] += b1[0x0F];
227  b1[0x0C] += b1[0x0E];
228  b1[0x0E] += b1[0x0D];
229  b1[0x0D] += b1[0x0F];
230 
231  b1[0x10] = b2[0x10] + b2[0x11];
232  b1[0x11] = REAL_MUL(b2[0x10] - b2[0x11], cos0);
233  b1[0x12] = b2[0x12] + b2[0x13];
234  b1[0x13] = REAL_MUL(b2[0x13] - b2[0x12], cos0);
235  b1[0x12] += b1[0x13];
236 
237  b1[0x14] = b2[0x14] + b2[0x15];
238  b1[0x15] = REAL_MUL(b2[0x14] - b2[0x15], cos0);
239  b1[0x16] = b2[0x16] + b2[0x17];
240  b1[0x17] = REAL_MUL(b2[0x17] - b2[0x16], cos0);
241  b1[0x16] += b1[0x17];
242  b1[0x14] += b1[0x16];
243  b1[0x16] += b1[0x15];
244  b1[0x15] += b1[0x17];
245 
246  b1[0x18] = b2[0x18] + b2[0x19];
247  b1[0x19] = REAL_MUL(b2[0x18] - b2[0x19], cos0);
248  b1[0x1A] = b2[0x1A] + b2[0x1B];
249  b1[0x1B] = REAL_MUL(b2[0x1B] - b2[0x1A], cos0);
250  b1[0x1A] += b1[0x1B];
251 
252  b1[0x1C] = b2[0x1C] + b2[0x1D];
253  b1[0x1D] = REAL_MUL(b2[0x1C] - b2[0x1D], cos0);
254  b1[0x1E] = b2[0x1E] + b2[0x1F];
255  b1[0x1F] = REAL_MUL(b2[0x1F] - b2[0x1E], cos0);
256  b1[0x1E] += b1[0x1F];
257  b1[0x1C] += b1[0x1E];
258  b1[0x1E] += b1[0x1D];
259  b1[0x1D] += b1[0x1F];
260  }
261 
262  out0[0x10*16] = REAL_SCALE_DCT64(b1[0x00]);
263  out0[0x10*12] = REAL_SCALE_DCT64(b1[0x04]);
264  out0[0x10* 8] = REAL_SCALE_DCT64(b1[0x02]);
265  out0[0x10* 4] = REAL_SCALE_DCT64(b1[0x06]);
266  out0[0x10* 0] = REAL_SCALE_DCT64(b1[0x01]);
267  out1[0x10* 0] = REAL_SCALE_DCT64(b1[0x01]);
268  out1[0x10* 4] = REAL_SCALE_DCT64(b1[0x05]);
269  out1[0x10* 8] = REAL_SCALE_DCT64(b1[0x03]);
270  out1[0x10*12] = REAL_SCALE_DCT64(b1[0x07]);
271 
272 #if 1
273  out0[0x10*14] = REAL_SCALE_DCT64(b1[0x08] + b1[0x0C]);
274  out0[0x10*10] = REAL_SCALE_DCT64(b1[0x0C] + b1[0x0a]);
275  out0[0x10* 6] = REAL_SCALE_DCT64(b1[0x0A] + b1[0x0E]);
276  out0[0x10* 2] = REAL_SCALE_DCT64(b1[0x0E] + b1[0x09]);
277  out1[0x10* 2] = REAL_SCALE_DCT64(b1[0x09] + b1[0x0D]);
278  out1[0x10* 6] = REAL_SCALE_DCT64(b1[0x0D] + b1[0x0B]);
279  out1[0x10*10] = REAL_SCALE_DCT64(b1[0x0B] + b1[0x0F]);
280  out1[0x10*14] = REAL_SCALE_DCT64(b1[0x0F]);
281 #else
282  b1[0x08] += b1[0x0C];
283  out0[0x10*14] = REAL_SCALE_DCT64(b1[0x08]);
284  b1[0x0C] += b1[0x0a];
285  out0[0x10*10] = REAL_SCALE_DCT64(b1[0x0C]);
286  b1[0x0A] += b1[0x0E];
287  out0[0x10* 6] = REAL_SCALE_DCT64(b1[0x0A]);
288  b1[0x0E] += b1[0x09];
289  out0[0x10* 2] = REAL_SCALE_DCT64(b1[0x0E]);
290  b1[0x09] += b1[0x0D];
291  out1[0x10* 2] = REAL_SCALE_DCT64(b1[0x09]);
292  b1[0x0D] += b1[0x0B];
293  out1[0x10* 6] = REAL_SCALE_DCT64(b1[0x0D]);
294  b1[0x0B] += b1[0x0F];
295  out1[0x10*10] = REAL_SCALE_DCT64(b1[0x0B]);
296  out1[0x10*14] = REAL_SCALE_DCT64(b1[0x0F]);
297 #endif
298 
299  {
300  real tmp;
301  tmp = b1[0x18] + b1[0x1C];
302  out0[0x10*15] = REAL_SCALE_DCT64(tmp + b1[0x10]);
303  out0[0x10*13] = REAL_SCALE_DCT64(tmp + b1[0x14]);
304  tmp = b1[0x1C] + b1[0x1A];
305  out0[0x10*11] = REAL_SCALE_DCT64(tmp + b1[0x14]);
306  out0[0x10* 9] = REAL_SCALE_DCT64(tmp + b1[0x12]);
307  tmp = b1[0x1A] + b1[0x1E];
308  out0[0x10* 7] = REAL_SCALE_DCT64(tmp + b1[0x12]);
309  out0[0x10* 5] = REAL_SCALE_DCT64(tmp + b1[0x16]);
310  tmp = b1[0x1E] + b1[0x19];
311  out0[0x10* 3] = REAL_SCALE_DCT64(tmp + b1[0x16]);
312  out0[0x10* 1] = REAL_SCALE_DCT64(tmp + b1[0x11]);
313  tmp = b1[0x19] + b1[0x1D];
314  out1[0x10* 1] = REAL_SCALE_DCT64(tmp + b1[0x11]);
315  out1[0x10* 3] = REAL_SCALE_DCT64(tmp + b1[0x15]);
316  tmp = b1[0x1D] + b1[0x1B];
317  out1[0x10* 5] = REAL_SCALE_DCT64(tmp + b1[0x15]);
318  out1[0x10* 7] = REAL_SCALE_DCT64(tmp + b1[0x13]);
319  tmp = b1[0x1B] + b1[0x1F];
320  out1[0x10* 9] = REAL_SCALE_DCT64(tmp + b1[0x13]);
321  out1[0x10*11] = REAL_SCALE_DCT64(tmp + b1[0x17]);
322  out1[0x10*13] = REAL_SCALE_DCT64(b1[0x17] + b1[0x1F]);
323  out1[0x10*15] = REAL_SCALE_DCT64(b1[0x1F]);
324  }
325 }
326 
327 /*
328  * the call via dct64 is a trick to force GCC to use
329  * (new) registers for the b1,b2 pointer to the bufs[xx] field
330  */
332 {
333  real bufs[0x40];
334  dct64_1(a,b,bufs,bufs+0x20,c);
335 }
336 
#define REAL_SCALE_DCT64(x)
GLsizei samples
Definition: glext.h:7006
static void dct64_1(real *out0, real *out1, real *b1, real *b2, real *samples)
Definition: dct64_i386.c:19
#define REAL_MUL(x, y)
GLint x0
Definition: linetemp.h:95
const GLenum * bufs
Definition: glext.h:6026
_In_ CLIPOBJ _In_ BRUSHOBJ _In_ LONG x1
Definition: winddi.h:3706
_Tp _STLP_CALL real(const complex< _Tp > &__z)
Definition: _complex.h:727
#define pnts
Definition: intsym.h:8
static CRYPT_DATA_BLOB b1[]
Definition: msg.c:573
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
const GLubyte * c
Definition: glext.h:8905
void dct64_i386(real *a, real *b, real *c)
Definition: dct64_i386.c:331
_In_ CLIPOBJ _In_ BRUSHOBJ _In_ LONG _In_ LONG _In_ LONG x2
Definition: winddi.h:3706
static CRYPT_DATA_BLOB b2[]
Definition: msg.c:582
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204