ReactOS 0.4.16-dev-300-g2aadf2e
optimize.c
Go to the documentation of this file.
1/*
2 optimize: get a grip on the different optimizations
3
4 copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
7
8 Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
9*/
10
11#define I_AM_OPTIMIZE
12#include "mpg123lib_intern.h" /* includes optimize.h */
13#include "debug.h"
14
15#if ((defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)) && (defined OPT_MULTI)
16#include "getcpuflags.h"
17static struct cpuflags cpu_flags;
18#else
19/* Faking stuff for non-multi builds. The same code for synth function choice is used.
20 Just no runtime dependency of result... */
21#define cpu_flags nothing
22#define cpu_i586(s) 1
23#define cpu_fpu(s) 1
24#define cpu_mmx(s) 1
25#define cpu_3dnow(s) 1
26#define cpu_3dnowext(s) 1
27#define cpu_sse(s) 1
28#define cpu_sse2(s) 1
29#define cpu_sse3(s) 1
30#define cpu_avx(s) 1
31#define cpu_neon(s) 1
32#endif
33
34/* Ugly macros to build conditional synth function array values. */
35
36#ifndef NO_8BIT
37#define IF8(synth) synth,
38#else
39#define IF8(synth)
40#endif
41
42#ifndef NO_SYNTH32
43
44#ifndef NO_REAL
45#define IFREAL(synth) synth,
46#else
47#define IFREAL(synth)
48#endif
49
50#ifndef NO_32BIT
51#define IF32(synth) synth
52#else
53#define IF32(synth)
54#endif
55
56#else
57
58#define IFREAL(synth)
59#define IF32(synth)
60
61#endif
62
63#ifndef NO_16BIT
64# define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
65#else
66# define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
67#endif
68
69/* The call of left and right plain synth, wrapped.
70 This may be replaced by a direct stereo optimized synth. */
71static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
72{
73 int clip;
74 clip = (fr->synth)(bandPtr_l, 0, fr, 0);
75 clip += (fr->synth)(bandPtr_r, 1, fr, 1);
76 return clip;
77}
78
79static const struct synth_s synth_base =
80{
81 { /* plain */
83# ifndef NO_DOWNSAMPLE
86# endif
87# ifndef NO_NTOM
89# endif
90 },
91 { /* stereo, by default only wrappers over plain synth */
93# ifndef NO_DOWNSAMPLE
96# endif
97# ifndef NO_NTOM
99# endif
100 },
101 { /* mono2stereo */
103# ifndef NO_DOWNSAMPLE
106# endif
107# ifndef NO_NTOM
109# endif
110 },
111 { /* mono*/
113# ifndef NO_DOWNSAMPLE
116# endif
117# ifndef NO_NTOM
119#endif
120 }
121};
122
123#ifdef OPT_X86
124/* More plain synths for i386 */
125const func_synth plain_i386[r_limit][f_limit] =
126{ /* plain */
128# ifndef NO_DOWNSAMPLE
131# endif
132# ifndef NO_NTOM
134# endif
135};
136#endif
137
138
139enum optdec defdec(void){ return defopt; }
140
141enum optcla decclass(const enum optdec type)
142{
143 return
144 (
145 type == mmx
146 || type == sse
147 || type == sse_vintage
148 || type == dreidnowext
150 || type == x86_64
151 || type == neon
152 || type == neon64
153 || type == avx
154 ) ? mmxsse : normal;
155}
156
157static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit])
158{
159 enum synth_resample ri;
160 enum synth_format fi;
161 for(ri=0; ri<r_limit; ++ri)
162 for(fi=0; fi<f_limit; ++fi)
163 if(synth == synths[ri][fi])
164 return TRUE;
165
166 return FALSE;
167}
168
169
170#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
171/* After knowing that it is either vintage or current SSE,
172 this separates the two. In case of non-OPT_MULTI, only one
173 of OPT_SSE and OPT_SSE_VINTAGE is active. */
174static enum optdec sse_or_vintage(mpg123_handle *fr)
175{
176 enum optdec type;
178# ifdef OPT_SSE
179# ifdef OPT_MULTI
180 if(fr->cpu_opts.the_dct36 == dct36_sse)
181# endif
182 type = sse;
183# endif
184 return type;
185}
186#endif
187
188/* Determine what kind of decoder is actually active
189 This depends on runtime choices which may cause fallback to i386 or generic code. */
191{
192 enum optdec type = nodec;
193 /* Direct and indirect usage, 1to1 stereo decoding.
194 Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
195 func_synth basic_synth = fr->synth;
196#ifndef NO_8BIT
197#ifndef NO_16BIT
198 if(basic_synth == synth_1to1_8bit_wrap)
199 basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
200#endif
201#endif
202
203 if(FALSE) ; /* Just to initialize the else if ladder. */
204#ifndef NO_16BIT
205#if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE)
206 else if(basic_synth == synth_1to1_3dnowext)
207 {
209# ifdef OPT_3DNOWEXT_VINTAGE
210# ifdef OPT_MULTI
211 if(fr->cpu_opts.the_dct36 == dct36_3dnowext)
212# endif
214# endif
215 }
216#endif
217#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
218 else if(basic_synth == synth_1to1_sse)
219 {
220 type = sse_or_vintage(fr);
221 }
222#endif
223#if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE)
224 else if(basic_synth == synth_1to1_3dnow)
225 {
226 type = dreidnow;
227# ifdef OPT_3DNOW_VINTAGE
228# ifdef OPT_MULTI
229 if(fr->cpu_opts.the_dct36 == dct36_3dnow)
230# endif
232# endif
233 }
234#endif
235#ifdef OPT_MMX
236 else if(basic_synth == synth_1to1_mmx) type = mmx;
237#endif
238#ifdef OPT_I586_DITHER
239 else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither;
240#endif
241#ifdef OPT_I586
242 else if(basic_synth == synth_1to1_i586) type = ifuenf;
243#endif
244#ifdef OPT_ALTIVEC
245 else if(basic_synth == synth_1to1_altivec) type = altivec;
246#endif
247#ifdef OPT_X86_64
248 else if(basic_synth == synth_1to1_x86_64) type = x86_64;
249#endif
250#ifdef OPT_AVX
251 else if(basic_synth == synth_1to1_avx) type = avx;
252#endif
253#ifdef OPT_ARM
254 else if(basic_synth == synth_1to1_arm) type = arm;
255#endif
256#ifdef OPT_NEON
257 else if(basic_synth == synth_1to1_neon) type = neon;
258#endif
259#ifdef OPT_NEON64
260 else if(basic_synth == synth_1to1_neon64) type = neon64;
261#endif
262#ifdef OPT_GENERIC_DITHER
263 else if(basic_synth == synth_1to1_dither) type = generic_dither;
264#endif
265#ifdef OPT_DITHER /* either i586 or generic! */
266#ifndef NO_DOWNSAMPLE
267 else if
268 (
269 basic_synth == synth_2to1_dither
270 || basic_synth == synth_4to1_dither
272#endif
273#endif
274#endif /* 16bit */
275
276#ifndef NO_SYNTH32
277
278#ifndef NO_REAL
279#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
280 else if(basic_synth == synth_1to1_real_sse)
281 {
282 type = sse_or_vintage(fr);
283 }
284#endif
285#ifdef OPT_X86_64
286 else if(basic_synth == synth_1to1_real_x86_64) type = x86_64;
287#endif
288#ifdef OPT_AVX
289 else if(basic_synth == synth_1to1_real_avx) type = avx;
290#endif
291#ifdef OPT_ALTIVEC
292 else if(basic_synth == synth_1to1_real_altivec) type = altivec;
293#endif
294#ifdef OPT_NEON
295 else if(basic_synth == synth_1to1_real_neon) type = neon;
296#endif
297#ifdef OPT_NEON64
298 else if(basic_synth == synth_1to1_real_neon64) type = neon64;
299#endif
300
301#endif /* real */
302
303#ifndef NO_32BIT
304#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
305 else if(basic_synth == synth_1to1_s32_sse)
306 {
307 type = sse_or_vintage(fr);
308 }
309#endif
310#ifdef OPT_X86_64
311 else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64;
312#endif
313#ifdef OPT_AVX
314 else if(basic_synth == synth_1to1_s32_avx) type = avx;
315#endif
316#ifdef OPT_ALTIVEC
317 else if(basic_synth == synth_1to1_s32_altivec) type = altivec;
318#endif
319#ifdef OPT_NEON
320 else if(basic_synth == synth_1to1_s32_neon) type = neon;
321#endif
322#ifdef OPT_NEON64
323 else if(basic_synth == synth_1to1_s32_neon64) type = neon64;
324#endif
325#endif /* 32bit */
326
327#endif /* any 32 bit synth */
328
329#ifdef OPT_X86
330 else if(find_synth(basic_synth, plain_i386))
331 type = idrei;
332#endif
333
334 else if(find_synth(basic_synth, synth_base.plain))
335 type = generic;
336
337
338
339#ifdef OPT_I486
340 /* i486 is special ... the specific code is in use for 16bit 1to1 stereo
341 otherwise we have i386 active... but still, the distinction doesn't matter*/
342 type = ivier;
343#endif
344
345 if(type != nodec)
346 {
347 fr->cpu_opts.type = type;
348 fr->cpu_opts.class = decclass(type);
349
350 debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
351 return MPG123_OK;
352 }
353 else
354 {
355 if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
356
358 return MPG123_ERR;
359 }
360}
361
362/* set synth functions for current frame, optimizations handled by opt_* macros */
364{
365 enum synth_resample resample = r_none;
366 enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
367
368 /* Select the basic output format, different from 16bit: 8bit, real. */
369 if(FALSE){}
370#ifndef NO_16BIT
371 else if(fr->af.dec_enc & MPG123_ENC_16)
372 basic_format = f_16;
373#endif
374#ifndef NO_8BIT
375 else if(fr->af.dec_enc & MPG123_ENC_8)
376 basic_format = f_8;
377#endif
378#ifndef NO_REAL
379 else if(fr->af.dec_enc & MPG123_ENC_FLOAT)
380 basic_format = f_real;
381#endif
382#ifndef NO_32BIT
383 /* 24 bit integer means decoding to 32 bit first. */
384 else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24)
385 basic_format = f_32;
386#endif
387
388 /* Make sure the chosen format is compiled into this lib. */
389 if(basic_format == f_none)
390 {
391 if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!");
392
393 return -1;
394 }
395
396 /* Be explicit about downsampling variant. */
397 switch(fr->down_sample)
398 {
399 case 0: resample = r_1to1; break;
400#ifndef NO_DOWNSAMPLE
401 case 1: resample = r_2to1; break;
402 case 2: resample = r_4to1; break;
403#endif
404#ifndef NO_NTOM
405 case 3: resample = r_ntom; break;
406#endif
407 }
408
409 if(resample == r_none)
410 {
411 if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!");
412
413 return -1;
414 }
415
416 debug2("selecting synth: resample=%i format=%i", resample, basic_format);
417 /* Finally selecting the synth functions for stereo / mono. */
418 fr->synth = fr->synths.plain[resample][basic_format];
419 fr->synth_stereo = fr->synths.stereo[resample][basic_format];
420 fr->synth_mono = fr->af.channels==2
421 ? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
422 : fr->synths.mono[resample][basic_format]; /* Mono MPEG file decoded to mono. */
423
424 if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
425 {
427 return MPG123_ERR;
428 }
429
430 if(frame_buffers(fr) != 0)
431 {
433 if(NOQUIET) error("Failed to set up decoder buffers!");
434
435 return MPG123_ERR;
436 }
437
438#ifndef NO_8BIT
439 if(basic_format == f_8)
440 {
441 if(make_conv16to8_table(fr) != 0)
442 {
443 if(NOQUIET) error("Failed to set up conv16to8 table!");
444 /* it's a bit more work to get proper error propagation up */
445 return -1;
446 }
447 }
448#endif
449
450#ifdef OPT_MMXORSSE
451 /* Special treatment for MMX, SSE and 3DNowExt stuff.
452 The real-decoding SSE for x86-64 uses normal tables! */
453 if(fr->cpu_opts.class == mmxsse
454# ifndef NO_REAL
455 && basic_format != f_real
456# endif
457# ifndef NO_32BIT
458 && basic_format != f_32
459# endif
460# ifdef ACCURATE_ROUNDING
461 && fr->cpu_opts.type != sse
462 && fr->cpu_opts.type != sse_vintage
463 && fr->cpu_opts.type != x86_64
464 && fr->cpu_opts.type != neon
465 && fr->cpu_opts.type != neon64
466 && fr->cpu_opts.type != avx
467# endif
468 )
469 {
470#ifndef NO_LAYER3
472#endif
473#ifndef NO_LAYER12
475#endif
477 }
478 else
479#endif
480 {
481#ifndef NO_LAYER3
483#endif
484#ifndef NO_LAYER12
486#endif
488 }
489
490 /* We allocated the table buffers just now, so (re)create the tables. */
491 fr->make_decode_tables(fr);
492
493 return 0;
494}
495
496int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
497{
498 const char* chosen = ""; /* the chosen decoder opt as string */
499 enum optdec want_dec = nodec;
500 int done = 0;
501 int auto_choose = 0;
502#ifdef OPT_DITHER
503 int dithered = FALSE; /* If some dithered decoder is chosen. */
504#endif
505
506 want_dec = dectype(cpu);
507 auto_choose = want_dec == autodec;
508 /* Fill whole array of synth functions with generic code first. */
509 fr->synths = synth_base;
510
511#ifndef OPT_MULTI
512 {
513 if(!auto_choose && want_dec != defopt)
514 {
515 if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
516 }
517 auto_choose = TRUE; /* There will be only one choice anyway. */
518 }
519#endif
520
521 fr->cpu_opts.type = nodec;
522#ifdef OPT_MULTI
523#ifndef NO_LAYER3
524#if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
525 fr->cpu_opts.the_dct36 = dct36;
526#endif
527#endif
528#endif
529 /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */
530#ifdef OPT_X86
532 {
533# ifdef OPT_MULTI
534 debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
535# endif
536# ifdef OPT_SSE
537 if( !done && (auto_choose || want_dec == sse)
539 {
540 chosen = dn_sse;
541 fr->cpu_opts.type = sse;
542#ifdef OPT_MULTI
543# ifndef NO_LAYER3
544 /* if(cpu_fast_sse(cpu_flags)) */ fr->cpu_opts.the_dct36 = dct36_sse;
545# endif
546#endif
547# ifndef NO_16BIT
548 fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
549# ifdef ACCURATE_ROUNDING
551# endif
552# endif
553# ifndef NO_REAL
556# endif
557# ifndef NO_32BIT
558 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
560# endif
561 done = 1;
562 }
563# endif
564# ifdef OPT_SSE_VINTAGE
565 if( !done && (auto_choose || want_dec == sse_vintage)
567 {
568 chosen = dn_sse_vintage;
569 fr->cpu_opts.type = sse_vintage;
570# ifndef NO_16BIT
571 fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
572# ifdef ACCURATE_ROUNDING
574# endif
575# endif
576# ifndef NO_REAL
579# endif
580# ifndef NO_32BIT
581 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
583# endif
584 done = 1;
585 }
586# endif
587# ifdef OPT_3DNOWEXT
588 if( !done && (auto_choose || want_dec == dreidnowext)
591 && cpu_mmx(cpu_flags) )
592 {
593 chosen = dn_dreidnowext;
594 fr->cpu_opts.type = dreidnowext;
595# ifndef NO_16BIT
597# endif
598 done = 1;
599 }
600# endif
601# ifdef OPT_3DNOWEXT_VINTAGE
602 if( !done && (auto_choose || want_dec == dreidnowext_vintage)
605 && cpu_mmx(cpu_flags) )
606 {
607 chosen = dn_dreidnowext_vintage;
608 fr->cpu_opts.type = dreidnowext_vintage;
609#ifdef OPT_MULTI
610# ifndef NO_LAYER3
611 fr->cpu_opts.the_dct36 = dct36_3dnowext;
612# endif
613#endif
614# ifndef NO_16BIT
616# endif
617 done = 1;
618 }
619# endif
620# ifdef OPT_3DNOW
621 if( !done && (auto_choose || want_dec == dreidnow)
623 {
624 chosen = dn_dreidnow;
625 fr->cpu_opts.type = dreidnow;
626# ifndef NO_16BIT
627 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
628# endif
629 done = 1;
630 }
631# endif
632# ifdef OPT_3DNOW_VINTAGE
633 if( !done && (auto_choose || want_dec == dreidnow_vintage)
635 {
636 chosen = dn_dreidnow_vintage;
637 fr->cpu_opts.type = dreidnow_vintage;
638#ifdef OPT_MULTI
639# ifndef NO_LAYER3
640 fr->cpu_opts.the_dct36 = dct36_3dnow;
641# endif
642#endif
643# ifndef NO_16BIT
644 fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
645# endif
646 done = 1;
647 }
648# endif
649 #ifdef OPT_MMX
650 if( !done && (auto_choose || want_dec == mmx)
651 && cpu_mmx(cpu_flags) )
652 {
653 chosen = dn_mmx;
654 fr->cpu_opts.type = mmx;
655# ifndef NO_16BIT
656 fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx;
657# endif
658 done = 1;
659 }
660 #endif
661 #ifdef OPT_I586
662 if(!done && (auto_choose || want_dec == ifuenf))
663 {
664 chosen = "i586/pentium";
665 fr->cpu_opts.type = ifuenf;
666# ifndef NO_16BIT
667 fr->synths.plain[r_1to1][f_16] = synth_1to1_i586;
668# endif
669 done = 1;
670 }
671 #endif
672 #ifdef OPT_I586_DITHER
673 if(!done && (auto_choose || want_dec == ifuenf_dither))
674 {
675 chosen = "dithered i586/pentium";
676 fr->cpu_opts.type = ifuenf_dither;
677 dithered = TRUE;
678# ifndef NO_16BIT
680# ifndef NO_DOWNSAMPLE
681 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
682 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
683# endif
684# endif
685 done = 1;
686 }
687 #endif
688 }
689 #ifdef OPT_I486
690 /* That won't cooperate in multi opt mode - forcing i486 in layer3.c
691 But still... here it is... maybe for real use in future. */
692 if(!done && (auto_choose || want_dec == ivier))
693 {
694 chosen = dn_ivier;
695 fr->cpu_opts.type = ivier;
696 done = 1;
697 }
698 #endif
699 #ifdef OPT_I386
700 if(!done && (auto_choose || want_dec == idrei))
701 {
702 chosen = dn_idrei;
703 fr->cpu_opts.type = idrei;
704 done = 1;
705 }
706 #endif
707
708 if(done)
709 {
710 /*
711 We have chosen some x86 decoder... fillup some i386 stuff.
712 There is an open question about using dithered synth_1to1 for 8bit wrappers.
713 For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
714 */
715 enum synth_resample ri;
716 enum synth_format fi;
717# ifndef NO_8BIT
718# ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
719 if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
720 {
723 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
724 }
725# endif
726# endif
727 for(ri=0; ri<r_limit; ++ri)
728 for(fi=0; fi<f_limit; ++fi)
729 {
730 if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
731 fr->synths.plain[ri][fi] = plain_i386[ri][fi];
732 }
733 }
734
735#endif /* OPT_X86 */
736
737#ifdef OPT_AVX
738 if(!done && (auto_choose || want_dec == avx) && cpu_avx(cpu_flags))
739 {
740 chosen = "x86-64 (AVX)";
741 fr->cpu_opts.type = avx;
742#ifdef OPT_MULTI
743# ifndef NO_LAYER3
744 fr->cpu_opts.the_dct36 = dct36_avx;
745# endif
746#endif
747# ifndef NO_16BIT
748 fr->synths.plain[r_1to1][f_16] = synth_1to1_avx;
750# endif
751# ifndef NO_REAL
754# endif
755# ifndef NO_32BIT
756 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_avx;
758# endif
759 done = 1;
760 }
761#endif
762
763#ifdef OPT_X86_64
764 if(!done && (auto_choose || want_dec == x86_64))
765 {
766 chosen = "x86-64 (SSE)";
767 fr->cpu_opts.type = x86_64;
768#ifdef OPT_MULTI
769# ifndef NO_LAYER3
770 fr->cpu_opts.the_dct36 = dct36_x86_64;
771# endif
772#endif
773# ifndef NO_16BIT
774 fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64;
776# endif
777# ifndef NO_REAL
780# endif
781# ifndef NO_32BIT
784# endif
785 done = 1;
786 }
787#endif
788
789# ifdef OPT_ALTIVEC
790 if(!done && (auto_choose || want_dec == altivec))
791 {
792 chosen = dn_altivec;
793 fr->cpu_opts.type = altivec;
794# ifndef NO_16BIT
795 fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec;
797# endif
798# ifndef NO_REAL
801# endif
802# ifndef NO_32BIT
805# endif
806 done = 1;
807 }
808# endif
809
810# ifdef OPT_NEON
811 if(!done && (auto_choose || want_dec == neon) && cpu_neon(cpu_flags))
812 {
813 chosen = dn_neon;
814 fr->cpu_opts.type = neon;
815#ifdef OPT_MULTI
816# ifndef NO_LAYER3
817 fr->cpu_opts.the_dct36 = dct36_neon;
818# endif
819#endif
820# ifndef NO_16BIT
821 fr->synths.plain[r_1to1][f_16] = synth_1to1_neon;
823# endif
824# ifndef NO_REAL
827# endif
828# ifndef NO_32BIT
831# endif
832 done = 1;
833 }
834# endif
835
836# ifdef OPT_ARM
837 if(!done && (auto_choose || want_dec == arm))
838 {
839 chosen = dn_arm;
840 fr->cpu_opts.type = arm;
841# ifndef NO_16BIT
842 fr->synths.plain[r_1to1][f_16] = synth_1to1_arm;
843# endif
844 done = 1;
845 }
846# endif
847
848# ifdef OPT_NEON64
849 if(!done && (auto_choose || want_dec == neon64) && cpu_neon(cpu_flags))
850 {
851 chosen = dn_neon64;
852 fr->cpu_opts.type = neon64;
853#ifdef OPT_MULTI
854# ifndef NO_LAYER3
855 fr->cpu_opts.the_dct36 = dct36_neon64;
856# endif
857#endif
858# ifndef NO_16BIT
859 fr->synths.plain[r_1to1][f_16] = synth_1to1_neon64;
861# endif
862# ifndef NO_REAL
865# endif
866# ifndef NO_32BIT
869# endif
870 done = 1;
871 }
872# endif
873
874# ifdef OPT_GENERIC
875 if(!done && (auto_choose || want_dec == generic))
876 {
877 chosen = dn_generic;
878 fr->cpu_opts.type = generic;
879 done = 1;
880 }
881# endif
882
883#ifdef OPT_GENERIC_DITHER
884 if(!done && (auto_choose || want_dec == generic_dither))
885 {
886 chosen = "dithered generic";
887 fr->cpu_opts.type = generic_dither;
888 dithered = TRUE;
889# ifndef NO_16BIT
890 fr->synths.plain[r_1to1][f_16] = synth_1to1_dither;
891# ifndef NO_DOWNSAMPLE
892 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
893 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
894# endif
895# endif
896 done = 1;
897 }
898#endif
899
900 fr->cpu_opts.class = decclass(fr->cpu_opts.type);
901
902# ifndef NO_8BIT
903# ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
904 /* Last chance to use some optimized routine via generic wrappers (for 8bit). */
905 if( fr->cpu_opts.type != ifuenf_dither
906 && fr->cpu_opts.type != generic_dither
907 && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
908 {
911 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
912 }
913# endif
914# endif
915
916#ifdef OPT_DITHER
917 if(done && dithered)
918 {
919 /* run-time dither noise table generation */
920 if(!frame_dither_init(fr))
921 {
922 if(NOQUIET) error("Dither noise setup failed!");
923 return 0;
924 }
925 }
926#endif
927
928 if(done)
929 {
930 if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
931 return 1;
932 }
933 else
934 {
935 if(NOQUIET) error("Could not set optimization!");
936 return 0;
937 }
938}
939
940enum optdec dectype(const char* decoder)
941{
942 enum optdec dt;
943 if( (decoder == NULL)
944 || (decoder[0] == 0) )
945 return autodec;
946
947 for(dt=autodec; dt<nodec; ++dt)
948 if(!strcasecmp(decoder, decname[dt])) return dt;
949
950 return nodec; /* If we found nothing... */
951}
952
953#ifdef OPT_MULTI
954
955/* same number of entries as full list, but empty at beginning */
956static const char *mpg123_supported_decoder_list[] =
957{
958 #ifdef OPT_SSE
959 NULL,
960 #endif
961 #ifdef OPT_SSE_VINTAGE
962 NULL,
963 #endif
964 #ifdef OPT_3DNOWEXT
965 NULL,
966 #endif
967 #ifdef OPT_3DNOWEXT_VINTAGE
968 NULL,
969 #endif
970 #ifdef OPT_3DNOW
971 NULL,
972 #endif
973 #ifdef OPT_3DNOW_VINTAGE
974 NULL,
975 #endif
976 #ifdef OPT_MMX
977 NULL,
978 #endif
979 #ifdef OPT_I586
980 NULL,
981 #endif
982 #ifdef OPT_I586_DITHER
983 NULL,
984 #endif
985 #ifdef OPT_I486
986 NULL,
987 #endif
988 #ifdef OPT_I386
989 NULL,
990 #endif
991 #ifdef OPT_ALTIVEC
992 NULL,
993 #endif
994 #ifdef OPT_AVX
995 NULL,
996 #endif
997 #ifdef OPT_X86_64
998 NULL,
999 #endif
1000 #ifdef OPT_ARM
1001 NULL,
1002 #endif
1003 #ifdef OPT_NEON
1004 NULL,
1005 #endif
1006 #ifdef OPT_NEON64
1007 NULL,
1008 #endif
1009 #ifdef OPT_GENERIC_FLOAT
1010 NULL,
1011 #endif
1012# ifdef OPT_GENERIC
1013 NULL,
1014# endif
1015# ifdef OPT_GENERIC_DITHER
1016 NULL,
1017# endif
1018 NULL
1019};
1020#endif
1021
1022static const char *mpg123_decoder_list[] =
1023{
1024 #ifdef OPT_SSE
1025 dn_sse,
1026 #endif
1027 #ifdef OPT_SSE_VINTAGE
1028 dn_sse_vintage,
1029 #endif
1030 #ifdef OPT_3DNOWEXT
1031 dn_dreidnowext,
1032 #endif
1033 #ifdef OPT_3DNOWEXT_VINTAGE
1034 dn_dreidnowext_vintage,
1035 #endif
1036 #ifdef OPT_3DNOW
1037 dn_dreidnow,
1038 #endif
1039 #ifdef OPT_3DNOW_VINTAGE
1040 dn_dreidnow_vintage,
1041 #endif
1042 #ifdef OPT_MMX
1043 dn_mmx,
1044 #endif
1045 #ifdef OPT_I586
1046 dn_ifuenf,
1047 #endif
1048 #ifdef OPT_I586_DITHER
1049 dn_ifuenf_dither,
1050 #endif
1051 #ifdef OPT_I486
1052 dn_ivier,
1053 #endif
1054 #ifdef OPT_I386
1055 dn_idrei,
1056 #endif
1057 #ifdef OPT_ALTIVEC
1058 dn_altivec,
1059 #endif
1060 #ifdef OPT_AVX
1061 dn_avx,
1062 #endif
1063 #ifdef OPT_X86_64
1064 dn_x86_64,
1065 #endif
1066 #ifdef OPT_ARM
1067 dn_arm,
1068 #endif
1069 #ifdef OPT_NEON
1070 dn_neon,
1071 #endif
1072 #ifdef OPT_NEON64
1073 dn_neon64,
1074 #endif
1075 #ifdef OPT_GENERIC
1076 dn_generic,
1077 #endif
1078 #ifdef OPT_GENERIC_DITHER
1079 dn_generic_dither,
1080 #endif
1081 NULL
1082};
1083
1085{
1086#ifndef OPT_MULTI
1087 /* In non-multi mode, only the full list (one entry) is used. */
1088 return;
1089#else
1090 const char **d = mpg123_supported_decoder_list;
1091#if (defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)
1093#endif
1094#ifdef OPT_X86
1095 if(cpu_i586(cpu_flags))
1096 {
1097 /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
1098 if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
1099#ifdef OPT_SSE
1100 if(cpu_sse(cpu_flags)) *(d++) = dn_sse;
1101#endif
1102#ifdef OPT_SSE_VINTAGE
1103 if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage;
1104#endif
1105#ifdef OPT_3DNOWEXT
1106 if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext;
1107#endif
1108#ifdef OPT_3DNOWEXT_VINTAGE
1109 if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage;
1110#endif
1111#ifdef OPT_3DNOW
1112 if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow;
1113#endif
1114#ifdef OPT_3DNOW_VINTAGE
1115 if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage;
1116#endif
1117#ifdef OPT_MMX
1118 if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx;
1119#endif
1120#ifdef OPT_I586
1121 *(d++) = dn_ifuenf;
1122#endif
1123#ifdef OPT_I586_DITHER
1124 *(d++) = dn_ifuenf_dither;
1125#endif
1126 }
1127#endif
1128/* just assume that the i486 built is run on a i486 cpu... */
1129#ifdef OPT_I486
1130 *(d++) = dn_ivier;
1131#endif
1132#ifdef OPT_ALTIVEC
1133 *(d++) = dn_altivec;
1134#endif
1135/* every supported x86 can do i386, any cpu can do generic */
1136#ifdef OPT_I386
1137 *(d++) = dn_idrei;
1138#endif
1139#ifdef OPT_AVX
1140 if(cpu_avx(cpu_flags)) *(d++) = dn_avx;
1141#endif
1142#ifdef OPT_X86_64
1143 *(d++) = dn_x86_64;
1144#endif
1145#ifdef OPT_ARM
1146 *(d++) = dn_arm;
1147#endif
1148#ifdef OPT_NEON
1149 if(cpu_neon(cpu_flags)) *(d++) = dn_neon;
1150#endif
1151#ifdef OPT_NEON64
1152 if(cpu_neon(cpu_flags)) *(d++) = dn_neon64;
1153#endif
1154#ifdef OPT_GENERIC
1155 *(d++) = dn_generic;
1156#endif
1157#ifdef OPT_GENERIC_DITHER
1158 *(d++) = dn_generic_dither;
1159#endif
1160#endif /* ndef OPT_MULTI */
1161}
1162
1164{
1165 if(mh == NULL) return NULL;
1166
1167 return decname[mh->cpu_opts.type];
1168}
1169
1172{
1173#ifdef OPT_MULTI
1174 return mpg123_supported_decoder_list;
1175#else
1176 return mpg123_decoder_list;
1177#endif
1178}
#define attribute_align_arg
Definition: abi_align.h:30
#define NULL
Definition: types.h:112
#define TRUE
Definition: types.h:120
#define FALSE
Definition: types.h:117
#define strcasecmp
Definition: fake.h:9
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
const char *attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
Definition: optimize.c:1163
const char attribute_align_arg ** mpg123_decoders(void)
Definition: optimize.c:1170
const char attribute_align_arg ** mpg123_supported_decoders(void)
Definition: optimize.c:1171
@ MPG123_ENC_32
Definition: fmt123.h:52
@ MPG123_ENC_8
Definition: fmt123.h:46
@ MPG123_ENC_24
Definition: fmt123.h:50
@ MPG123_ENC_16
Definition: fmt123.h:48
@ MPG123_ENC_FLOAT
Definition: fmt123.h:56
@ MPG123_ERR
Definition: mpg123.h:382
@ MPG123_NO_BUFFERS
Definition: mpg123.h:394
@ MPG123_BAD_DECODER_SETUP
Definition: mpg123.h:420
@ MPG123_OK
Definition: mpg123.h:383
#define stderr
Definition: stdio.h:100
_Check_return_opt_ _CRTIMP int __cdecl fprintf(_Inout_ FILE *_File, _In_z_ _Printf_format_string_ const char *_Format,...)
#define synth_1to1_8bit_mono
Definition: intsym.h:73
#define synth_2to1_real_mono
Definition: intsym.h:106
#define synth_1to1_real_sse
Definition: intsym.h:90
#define synth_1to1_s32_stereo_x86_64
Definition: intsym.h:120
#define synth_2to1_real_i386
Definition: intsym.h:105
#define synth_1to1_8bit_m2s
Definition: intsym.h:74
#define synth_1to1_s32_avx
Definition: intsym.h:121
#define synth_1to1_8bit_wrap_mono
Definition: intsym.h:75
#define getcpuflags
Definition: intsym.h:206
#define dct36_3dnowext
Definition: intsym.h:148
#define init_layer3_stuff
Definition: intsym.h:162
#define synth_2to1_8bit
Definition: intsym.h:77
#define synth_1to1_s32_i386
Definition: intsym.h:116
#define synth_4to1_real_mono
Definition: intsym.h:110
#define init_layer3_gainpow2_mmx
Definition: intsym.h:169
#define synth_2to1_m2s
Definition: intsym.h:61
#define synth_1to1_x86_64
Definition: intsym.h:45
#define synth_1to1_8bit
Definition: intsym.h:70
#define synth_2to1
Definition: intsym.h:57
#define synth_ntom_s32_m2s
Definition: intsym.h:141
#define synth_1to1_s32_x86_64
Definition: intsym.h:119
#define synth_4to1_8bit_m2s
Definition: intsym.h:84
#define synth_4to1_real_m2s
Definition: intsym.h:111
#define synth_1to1_real_stereo_x86_64
Definition: intsym.h:93
#define synth_4to1_s32_m2s
Definition: intsym.h:138
#define synth_ntom_m2s
Definition: intsym.h:69
#define synth_1to1_real_i386
Definition: intsym.h:89
#define synth_1to1_stereo_neon64
Definition: intsym.h:53
#define synth_ntom
Definition: intsym.h:67
#define init_layer12_table
Definition: intsym.h:164
#define synth_1to1_stereo_x86_64
Definition: intsym.h:46
#define synth_1to1_stereo_altivec
Definition: intsym.h:44
#define synth_1to1_real_altivec
Definition: intsym.h:96
#define synth_1to1_mono
Definition: intsym.h:55
#define synth_1to1_stereo_sse
Definition: intsym.h:41
#define synth_2to1_real_m2s
Definition: intsym.h:107
#define synth_2to1_i386
Definition: intsym.h:59
#define synth_ntom_real
Definition: intsym.h:112
#define make_conv16to8_table
Definition: intsym.h:171
#define synth_1to1_i586_dither
Definition: intsym.h:37
#define synth_1to1_i586
Definition: intsym.h:36
#define synth_1to1_mmx
Definition: intsym.h:38
#define synth_4to1_8bit_mono
Definition: intsym.h:83
#define synth_1to1_m2s
Definition: intsym.h:56
#define synth_1to1_real_stereo_sse
Definition: intsym.h:91
#define synth_1to1_s32_stereo_neon
Definition: intsym.h:126
#define dct36_3dnow
Definition: intsym.h:147
#define dct36
Definition: intsym.h:146
#define synth_1to1_sse
Definition: intsym.h:40
#define dct36_avx
Definition: intsym.h:151
#define synth_2to1_s32_i386
Definition: intsym.h:132
#define synth_4to1_mono
Definition: intsym.h:65
#define synth_1to1_s32_sse
Definition: intsym.h:117
#define synth_1to1_avx
Definition: intsym.h:47
#define dct36_sse
Definition: intsym.h:150
#define dct36_x86_64
Definition: intsym.h:149
#define synth_2to1_s32_m2s
Definition: intsym.h:134
#define synth_1to1_fltst_avx
Definition: intsym.h:95
#define decclass
Definition: intsym.h:235
#define synth_1to1_real_mono
Definition: intsym.h:102
#define synth_1to1_s32_stereo_avx
Definition: intsym.h:122
#define synth_1to1_s32_mono
Definition: intsym.h:129
#define synth_1to1_8bit_wrap_m2s
Definition: intsym.h:76
#define synth_ntom_mono
Definition: intsym.h:68
#define set_synth_functions
Definition: intsym.h:232
#define synth_1to1_real_avx
Definition: intsym.h:94
#define synth_1to1_dither
Definition: intsym.h:34
#define synth_4to1_real_i386
Definition: intsym.h:109
#define dectype
Definition: intsym.h:233
#define synth_1to1_s32_stereo_sse
Definition: intsym.h:118
#define frame_cpu_opt
Definition: intsym.h:231
#define synth_ntom_8bit
Definition: intsym.h:85
#define synth_1to1_s32_m2s
Definition: intsym.h:130
#define synth_1to1_real_neon64
Definition: intsym.h:100
#define synth_ntom_s32_mono
Definition: intsym.h:140
#define synth_ntom_s32
Definition: intsym.h:139
#define synth_1to1_real_x86_64
Definition: intsym.h:92
#define synth_2to1_s32_mono
Definition: intsym.h:133
#define synth_4to1_s32_i386
Definition: intsym.h:136
#define make_decode_tables
Definition: intsym.h:167
#define synth_1to1_fltst_neon64
Definition: intsym.h:101
#define synth_1to1_i386
Definition: intsym.h:35
#define synth_4to1_8bit
Definition: intsym.h:81
#define synth_1to1_8bit_i386
Definition: intsym.h:71
#define synth_ntom_8bit_m2s
Definition: intsym.h:87
#define init_layer3_gainpow2
Definition: intsym.h:161
#define synth_1to1_s32_neon64
Definition: intsym.h:127
#define frame_buffers
Definition: intsym.h:183
#define init_layer12_table_mmx
Definition: intsym.h:170
#define dct36_neon64
Definition: intsym.h:153
#define synth_1to1_real_m2s
Definition: intsym.h:103
#define synth_1to1_fltst_altivec
Definition: intsym.h:97
#define synth_2to1_8bit_mono
Definition: intsym.h:79
#define synth_1to1_arm
Definition: intsym.h:49
#define synth_1to1_s32st_neon64
Definition: intsym.h:128
#define synth_4to1_s32
Definition: intsym.h:135
#define synth_1to1_real
Definition: intsym.h:88
#define synth_2to1_dither
Definition: intsym.h:58
#define synth_2to1_real
Definition: intsym.h:104
#define synth_1to1_real_stereo_neon
Definition: intsym.h:99
#define synth_4to1_real
Definition: intsym.h:108
#define synth_1to1_altivec
Definition: intsym.h:43
#define synth_4to1_dither
Definition: intsym.h:63
#define synth_1to1_stereo_neon
Definition: intsym.h:51
#define init_layer12_stuff
Definition: intsym.h:165
#define synth_2to1_8bit_i386
Definition: intsym.h:78
#define synth_1to1_neon
Definition: intsym.h:50
#define synth_1to1_8bit_wrap
Definition: intsym.h:72
#define synth_1to1_3dnowext
Definition: intsym.h:42
#define synth_4to1
Definition: intsym.h:62
#define dct36_neon
Definition: intsym.h:152
#define synth_4to1_i386
Definition: intsym.h:64
#define synth_1to1
Definition: intsym.h:33
#define synth_1to1_neon64
Definition: intsym.h:52
#define synth_1to1_s32
Definition: intsym.h:115
#define synth_1to1_s32_altivec
Definition: intsym.h:123
#define synth_1to1_s32_neon
Definition: intsym.h:125
#define synth_ntom_real_mono
Definition: intsym.h:113
#define frame_dither_init
Definition: intsym.h:177
#define synth_2to1_mono
Definition: intsym.h:60
#define synth_1to1_s32_stereo_altivec
Definition: intsym.h:124
#define synth_1to1_stereo_avx
Definition: intsym.h:48
#define synth_ntom_real_m2s
Definition: intsym.h:114
#define synth_2to1_s32
Definition: intsym.h:131
#define synth_1to1_real_neon
Definition: intsym.h:98
#define make_decode_tables_mmx
Definition: intsym.h:168
#define synth_4to1_m2s
Definition: intsym.h:66
#define synth_ntom_8bit_mono
Definition: intsym.h:86
#define synth_4to1_8bit_i386
Definition: intsym.h:82
#define synth_4to1_s32_mono
Definition: intsym.h:137
#define synth_1to1_3dnow
Definition: intsym.h:39
#define synth_2to1_8bit_m2s
Definition: intsym.h:80
#define d
Definition: ke_i.h:81
#define error(str)
Definition: mkdosfs.c:1605
#define VERBOSE
#define real
#define NOQUIET
#define cpu_3dnow(s)
Definition: optimize.c:25
static int find_dectype(mpg123_handle *fr)
Definition: optimize.c:190
#define cpu_avx(s)
Definition: optimize.c:30
void check_decoders(void)
Definition: optimize.c:1084
static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
Definition: optimize.c:71
#define cpu_flags
Definition: optimize.c:21
#define cpu_mmx(s)
Definition: optimize.c:24
#define cpu_3dnowext(s)
Definition: optimize.c:26
static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit])
Definition: optimize.c:157
#define cpu_i586(s)
Definition: optimize.c:22
enum optdec defdec(void)
Definition: optimize.c:139
#define cpu_neon(s)
Definition: optimize.c:31
#define cpu_sse(s)
Definition: optimize.c:27
#define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32)
Definition: optimize.c:64
static const struct synth_s synth_base
Definition: optimize.c:79
static const char * mpg123_decoder_list[]
Definition: optimize.c:1022
optcla
Definition: optimize.h:166
@ mmxsse
Definition: optimize.h:166
@ normal
Definition: optimize.h:166
optdec
Definition: optimize.h:95
@ avx
Definition: optimize.h:112
@ ivier
Definition: optimize.h:100
@ x86_64
Definition: optimize.h:108
@ neon64
Definition: optimize.h:111
@ dreidnowext_vintage
Definition: optimize.h:114
@ dreidnow
Definition: optimize.h:104
@ nodec
Definition: optimize.h:116
@ sse_vintage
Definition: optimize.h:115
@ generic_dither
Definition: optimize.h:98
@ ifuenf
Definition: optimize.h:101
@ altivec
Definition: optimize.h:106
@ autodec
Definition: optimize.h:96
@ arm
Definition: optimize.h:109
@ dreidnowext
Definition: optimize.h:105
@ sse
Definition: optimize.h:107
@ ifuenf_dither
Definition: optimize.h:102
@ mmx
Definition: optimize.h:103
@ generic
Definition: optimize.h:97
@ neon
Definition: optimize.h:110
@ dreidnow_vintage
Definition: optimize.h:113
@ idrei
Definition: optimize.h:99
#define ACCURATE_ROUNDING
Definition: config.h:9
#define debug2(s, a, b)
Definition: debug.h:62
#define debug3(s, a, b, c)
Definition: debug.h:63
#define error2(s, a, b)
Definition: debug.h:126
func_synth_mono synth_mono
Definition: frame.h:182
struct synth_s synths
Definition: frame.h:160
func_synth_stereo synth_stereo
Definition: frame.h:181
struct audioformat af
Definition: frame.h:268
func_synth synth
Definition: frame.h:180
struct mpg123_handle_struct::@3459 cpu_opts
void(* make_decode_tables)(mpg123_handle *fr)
Definition: frame.h:184
Definition: synths.h:45
synth_format
Definition: synths.h:28
@ f_real
Definition: synths.h:37
@ f_none
Definition: synths.h:29
@ f_8
Definition: synths.h:34
@ f_limit
Definition: synths.h:42
@ f_32
Definition: synths.h:40
@ f_16
Definition: synths.h:31
int(* func_synth)(real *, int, mpg123_handle *, int)
Definition: synths.h:10
synth_resample
Definition: synths.h:15
@ r_2to1
Definition: synths.h:19
@ r_4to1
Definition: synths.h:20
@ r_1to1
Definition: synths.h:17
@ r_limit
Definition: synths.h:25
@ r_none
Definition: synths.h:16
@ r_ntom
Definition: synths.h:23