26#ifndef LIBM_INLINES_AMD_H_INCLUDED
27#define LIBM_INLINES_AMD_H_INCLUDED 1
32#if defined(USE_VAL_WITH_FLAGS) || defined(USE_VALF_WITH_FLAGS) || \
33 defined(USE_ZERO_WITH_FLAGS) || defined(USE_ZEROF_WITH_FLAGS) || \
34 defined(USE_NAN_WITH_FLAGS) || defined(USE_NANF_WITH_FLAGS) || \
35 defined(USE_INDEFINITE_WITH_FLAGS) || defined(USE_INDEFINITEF_WITH_FLAGS) || \
36 defined(USE_INFINITY_WITH_FLAGS) || defined(USE_INFINITYF_WITH_FLAGS) || \
37 defined(USE_SQRT_AMD_INLINE) || defined(USE_SQRTF_AMD_INLINE) || \
38 (defined(USE_HANDLE_ERROR) || defined(USE_HANDLE_ERRORF))
39#undef USE_RAISE_FPSW_FLAGS
40#define USE_RAISE_FPSW_FLAGS 1
43#if defined(USE_SPLITDOUBLE)
47static inline void splitDouble(
double x,
int *
e,
double *
m)
49 unsigned long long ux, uy;
62#if defined(USE_SPLITDOUBLE_2)
67static inline void splitDouble_2(
double x,
int *
e,
double *
m)
69 unsigned long long ux, vx;
94#if defined(USE_SPLITFLOAT)
98static inline void splitFloat(
float x,
int *
e,
float *
m)
113#if defined(USE_SCALEDOUBLE_1)
116static inline double scaleDouble_1(
double x,
int n)
126#if defined(USE_SCALEDOUBLE_2)
129static inline double scaleDouble_2(
double x,
int n)
143#if defined(USE_SCALEDOUBLE_3)
146static inline double scaleDouble_3(
double x,
int n)
157 return ((
x*t1)*t2)*t3;
162#if defined(USE_SCALEFLOAT_1)
165static inline float scaleFloat_1(
float x,
int n)
175#if defined(USE_SCALEFLOAT_2)
178static inline float scaleFloat_2(
float x,
int n)
192#if defined(USE_SCALEFLOAT_3)
195static inline float scaleFloat_3(
float x,
int n)
206 return ((
x*t1)*t2)*t3;
210#if defined(USE_SETPRECISIONDOUBLE)
211unsigned int setPrecisionDouble(
void)
213 unsigned int cw, cwold = 0;
219#if defined(USE_RESTOREPRECISION)
220void restorePrecision(
unsigned int cwold)
230#if defined(USE_RAISE_FPSW_FLAGS)
243#define AMD_ISW_INVALID 0x0001
244#define AMD_ISW_DENORMAL 0x0002
245#define AMD_ISW_ZERODIVIDE 0x0004
246#define AMD_ISW_OVERFLOW 0x0008
247#define AMD_ISW_UNDERFLOW 0x0010
248#define AMD_ISW_INEXACT 0x0020
253static inline void raise_fpsw_flags(
int flags)
269#if defined(USE_GET_FPSW_INLINE)
271static inline unsigned int get_fpsw_inline(
void)
277#if defined(USE_SET_FPSW_INLINE)
279static inline void set_fpsw_inline(
unsigned int sw)
287#if defined(USE_VAL_WITH_FLAGS)
291static inline double val_with_flags(
double val,
int flags)
293 raise_fpsw_flags(
flags);
298#if defined(USE_VALF_WITH_FLAGS)
302static inline float valf_with_flags(
float val,
int flags)
304 raise_fpsw_flags(
flags);
310#if defined(USE_ZERO_WITH_FLAGS)
314static inline double zero_with_flags(
int flags)
316 raise_fpsw_flags(
flags);
322#if defined(USE_ZEROF_WITH_FLAGS)
326static inline float zerof_with_flags(
int flags)
328 raise_fpsw_flags(
flags);
334#if defined(USE_NAN_WITH_FLAGS)
338static inline double nan_with_flags(
int flags)
341 raise_fpsw_flags(
flags);
347#if defined(USE_NANF_WITH_FLAGS)
351static inline float nanf_with_flags(
int flags)
354 raise_fpsw_flags(
flags);
361#if defined(USE_INDEFINITE_WITH_FLAGS)
365static inline double indefinite_with_flags(
int flags)
368 raise_fpsw_flags(
flags);
374#if defined(USE_INDEFINITEF_WITH_FLAGS)
378static inline float indefinitef_with_flags(
int flags)
381 raise_fpsw_flags(
flags);
388#ifdef USE_INFINITY_WITH_FLAGS
392static inline double infinity_with_flags(
int flags)
395 raise_fpsw_flags(
flags);
401#ifdef USE_INFINITYF_WITH_FLAGS
405static inline float infinityf_with_flags(
int flags)
408 raise_fpsw_flags(
flags);
414#if defined(USE_HANDLE_ERROR) || defined(USE_HANDLE_ERRORF)
422 unsigned long long value,
433 unsigned long long value,
442#if defined(USE_SPLITEXP)
449static inline void splitexp(
double x,
double logbase,
450 double thirtytwo_by_logbaseof2,
451 double logbaseof2_by_32_lead,
452 double logbaseof2_by_32_trail,
453 int *
m,
double *z1,
double *z2)
464 static const double two_to_jby32_lead_table[32] = {
465 1.00000000000000000000e+00,
466 1.02189713716506958008e+00,
467 1.04427373409271240234e+00,
468 1.06714040040969848633e+00,
469 1.09050768613815307617e+00,
470 1.11438673734664916992e+00,
471 1.13878858089447021484e+00,
472 1.16372483968734741211e+00,
473 1.18920707702636718750e+00,
474 1.21524733304977416992e+00,
475 1.24185776710510253906e+00,
476 1.26905095577239990234e+00,
477 1.29683953523635864258e+00,
478 1.32523661851882934570e+00,
479 1.35425549745559692383e+00,
480 1.38390988111495971680e+00,
481 1.41421353816986083984e+00,
482 1.44518077373504638672e+00,
483 1.47682613134384155273e+00,
484 1.50916439294815063477e+00,
485 1.54221081733703613281e+00,
486 1.57598084211349487305e+00,
487 1.61049032211303710938e+00,
488 1.64575546979904174805e+00,
489 1.68179279565811157227e+00,
490 1.71861928701400756836e+00,
491 1.75625211000442504883e+00,
492 1.79470902681350708008e+00,
493 1.83400803804397583008e+00,
494 1.87416762113571166992e+00,
495 1.91520655155181884766e+00,
496 1.95714408159255981445e+00};
498 static const double two_to_jby32_trail_table[32] = {
499 0.00000000000000000000e+00,
500 1.14890470981563546737e-08,
501 4.83347014379782142328e-08,
502 2.67125131841396124714e-10,
503 4.65271045830351350190e-08,
504 5.24924336638693782574e-09,
505 5.38622214388600821910e-08,
506 1.90902301017041969782e-08,
507 3.79763538792174980894e-08,
508 2.69306947081946450986e-08,
509 4.49683815095311756138e-08,
510 1.41933332021066904914e-09,
511 1.94146510233556266402e-08,
512 2.46409119489264118569e-08,
513 4.94812958044698886494e-08,
514 8.48872238075784476136e-10,
515 2.42032342089579394887e-08,
516 3.32420002333182569170e-08,
517 1.45956577586525322754e-08,
518 3.46452721050003920866e-08,
519 8.07090469079979051284e-09,
520 2.99439161340839520436e-09,
521 9.83621719880452147153e-09,
522 8.35492309647188080486e-09,
523 3.48493175137966283582e-08,
524 1.11084703472699692902e-08,
525 5.03688744342840346564e-08,
526 4.81896001063495806249e-08,
527 4.83653666334089557746e-08,
528 1.29745882314081237628e-08,
529 9.84532844621636118964e-09,
530 4.25828404545651943883e-08};
549 r =
x * thirtytwo_by_logbaseof2;
557 r1 =
x -
n * logbaseof2_by_32_lead;
558 r2 = -
n * logbaseof2_by_32_trail;
565 f1 = two_to_jby32_lead_table[
j];
566 f2 = two_to_jby32_trail_table[
j];
573 r1 *= logbase;
r2 *= logbase;
577 r*
r*( 5.00000000000000008883e-01 +
578 r*( 1.66666666665260878863e-01 +
579 r*( 4.16666666662260795726e-02 +
580 r*( 8.33336798434219616221e-03 +
581 r*( 1.38889490863777199667e-03 ))))));
594#if defined(USE_SPLITEXPF)
601static inline void splitexpf(
float x,
float logbase,
602 float thirtytwo_by_logbaseof2,
603 float logbaseof2_by_32_lead,
604 float logbaseof2_by_32_trail,
605 int *
m,
float *z1,
float *z2)
616 static const float two_to_jby32_lead_table[32] = {
650 static const float two_to_jby32_trail_table[32] = {
701 r =
x * thirtytwo_by_logbaseof2;
709 r1 =
x -
n * logbaseof2_by_32_lead;
710 r2 = -
n * logbaseof2_by_32_trail;
717 f1 = two_to_jby32_lead_table[
j];
718 f2 = two_to_jby32_trail_table[
j];
725 r1 *= logbase;
r2 *= logbase;
729 r*
r*( 5.00000000000000008883e-01F +
730 r*( 1.66666666665260878863e-01F )));
743#if defined(USE_SCALEUPDOUBLE1024)
747static inline void scaleUpDouble1024(
unsigned long long ux,
unsigned long long *ur)
749 unsigned long long uy;
764 uy = ux + 0x4000000000000000;
773#if defined(USE_SCALEDOWNDOUBLE)
776static inline void scaleDownDouble(
unsigned long long ux,
int k,
777 unsigned long long *ur)
779 unsigned long long uy,
uk,
ax, xsign;
782 ax = ux & ~SIGNBIT_DP64;
787 uy = (
ax & ~EXPBITS_DP64) |
uk;
791 uy = (
ax & ~EXPBITS_DP64) | 0x0010000000000000;
800 uy = (uy >> 1) + (uy & 1);
809#if defined(USE_SCALEUPFLOAT128)
813static inline void scaleUpFloat128(
unsigned int ux,
unsigned int *ur)
831 uy = ux + 0x40000000;
837#if defined(USE_SCALEDOWNFLOAT)
840static inline void scaleDownFloat(
unsigned int ux,
int k,
843 unsigned int uy,
uk,
ax, xsign;
847 ax = ux & ~SIGNBIT_SP32;
852 uy = (
ax & ~EXPBITS_SP32) |
uk;
856 uy = (
ax & ~EXPBITS_SP32) | 0x00800000;
865 uy = (uy >> 1) + (uy & 1);
873#if defined(USE_SQRT_AMD_INLINE)
874static inline double sqrt_amd_inline(
double x)
901 unsigned long long ux,
ax,
u;
902 double r1,
r2,
c,
y,
p,
q,
r, twop,
z, rtc, rtc_lead, rtc_trail;
911 static const double rt_jby32_lead_table_dbl[97] = {
912 1.00000000000000000000e+00,
913 1.01550388336181640625e+00,
914 1.03077602386474609375e+00,
915 1.04582500457763671875e+00,
916 1.06065940856933593750e+00,
917 1.07528972625732421875e+00,
918 1.08972454071044921875e+00,
919 1.10396957397460937500e+00,
920 1.11803340911865234375e+00,
921 1.13192272186279296875e+00,
922 1.14564323425292968750e+00,
923 1.15920162200927734375e+00,
924 1.17260360717773437500e+00,
925 1.18585395812988281250e+00,
926 1.19895744323730468750e+00,
927 1.21191978454589843750e+00,
928 1.22474479675292968750e+00,
929 1.23743629455566406250e+00,
930 1.25000000000000000000e+00,
931 1.26243782043457031250e+00,
932 1.27475452423095703125e+00,
933 1.28695297241210937500e+00,
934 1.29903793334960937500e+00,
935 1.31101036071777343750e+00,
936 1.32287502288818359375e+00,
937 1.33463478088378906250e+00,
938 1.34629058837890625000e+00,
939 1.35784721374511718750e+00,
940 1.36930561065673828125e+00,
941 1.38066959381103515625e+00,
942 1.39194107055664062500e+00,
943 1.40312099456787109375e+00,
944 1.41421318054199218750e+00,
945 1.42521858215332031250e+00,
946 1.43614006042480468750e+00,
947 1.44697952270507812500e+00,
948 1.45773792266845703125e+00,
949 1.46841716766357421875e+00,
950 1.47901916503906250000e+00,
951 1.48954677581787109375e+00,
952 1.50000000000000000000e+00,
953 1.51038074493408203125e+00,
954 1.52068996429443359375e+00,
955 1.53093051910400390625e+00,
956 1.54110336303710937500e+00,
957 1.55120849609375000000e+00,
958 1.56124877929687500000e+00,
959 1.57122516632080078125e+00,
960 1.58113861083984375000e+00,
961 1.59099006652832031250e+00,
962 1.60078048706054687500e+00,
963 1.61051177978515625000e+00,
964 1.62018489837646484375e+00,
965 1.62979984283447265625e+00,
966 1.63935947418212890625e+00,
967 1.64886283874511718750e+00,
968 1.65831184387207031250e+00,
969 1.66770744323730468750e+00,
970 1.67705059051513671875e+00,
971 1.68634128570556640625e+00,
972 1.69558238983154296875e+00,
973 1.70477199554443359375e+00,
974 1.71391296386718750000e+00,
975 1.72300529479980468750e+00,
976 1.73204994201660156250e+00,
977 1.74104785919189453125e+00,
978 1.75000000000000000000e+00,
979 1.75890541076660156250e+00,
980 1.76776695251464843750e+00,
981 1.77658367156982421875e+00,
982 1.78535652160644531250e+00,
983 1.79408740997314453125e+00,
984 1.80277538299560546875e+00,
985 1.81142139434814453125e+00,
986 1.82002735137939453125e+00,
987 1.82859230041503906250e+00,
988 1.83711719512939453125e+00,
989 1.84560203552246093750e+00,
990 1.85404872894287109375e+00,
991 1.86245727539062500000e+00,
992 1.87082862854003906250e+00,
993 1.87916183471679687500e+00,
994 1.88745784759521484375e+00,
995 1.89571857452392578125e+00,
996 1.90394306182861328125e+00,
997 1.91213226318359375000e+00,
998 1.92028617858886718750e+00,
999 1.92840576171875000000e+00,
1000 1.93649101257324218750e+00,
1001 1.94454288482666015625e+00,
1002 1.95256233215332031250e+00,
1003 1.96054744720458984375e+00,
1004 1.96850109100341796875e+00,
1005 1.97642326354980468750e+00,
1006 1.98431301116943359375e+00,
1007 1.99217128753662109375e+00,
1008 2.00000000000000000000e+00};
1010 static const double rt_jby32_trail_table_dbl[97] = {
1011 0.00000000000000000000e+00,
1012 9.17217678638807524014e-07,
1013 3.82539669043705364790e-07,
1014 2.85899577162227138140e-08,
1015 7.63210485349101216659e-07,
1016 9.32123004127716212874e-07,
1017 1.95174719169309219157e-07,
1018 5.34316371481845492427e-07,
1019 5.79631242504454563052e-07,
1020 4.20404384109571705948e-07,
1021 6.89486030314147010716e-07,
1022 6.89927685625314560328e-07,
1023 3.32778123013641425828e-07,
1024 1.64433259436999584387e-07,
1025 4.37590875197899335723e-07,
1026 1.79808183816018617413e-07,
1027 7.46386593615986477624e-08,
1028 5.72520794105201454728e-07,
1029 0.00000000000000000000e+00,
1030 2.96860689431670420344e-07,
1031 3.54167239176257065345e-07,
1032 7.95211265664474710063e-07,
1033 1.72327048595145565621e-07,
1034 6.99494915996239297020e-07,
1035 6.32644111701500844315e-07,
1036 6.20124838851440463844e-10,
1037 6.13404719757812629969e-07,
1038 3.47654909777986407387e-07,
1039 7.83106177002392475763e-07,
1040 5.33337372440526357008e-07,
1041 2.01508648555298681765e-08,
1042 5.25472356925843939587e-07,
1043 3.81831102861301692797e-07,
1044 6.99220602161420018738e-07,
1045 6.01209702477462624811e-07,
1046 9.01437000591944740554e-08,
1047 5.10428680864685379950e-08,
1048 3.47895267104621031421e-07,
1049 7.80735841510641848628e-07,
1050 1.35158752025506517690e-07,
1051 0.00000000000000000000e+00,
1052 1.76523947728535489812e-09,
1053 6.68280121328499932183e-07,
1054 5.70135482405123276616e-07,
1055 1.37705134737562525897e-07,
1056 7.09655107074516613672e-07,
1057 7.20302724551461693011e-07,
1058 4.69926266058212796694e-07,
1059 2.19244345915999437026e-07,
1060 1.91141411617401877927e-07,
1061 5.72297665296622053774e-07,
1062 5.61055484436830560103e-07,
1063 2.76225500213991506100e-07,
1064 7.58466189522395692908e-07,
1065 1.56893371256836029827e-07,
1066 4.06038997708867066507e-07,
1067 5.51305629612057435809e-07,
1068 5.64778487026561123207e-07,
1069 3.92609705553556897517e-07,
1070 9.09698438776943827802e-07,
1071 1.05949774066016139743e-07,
1072 7.16578798392844784244e-07,
1073 6.86233073531233972561e-07,
1074 7.99211473033494452908e-07,
1075 8.65552275731027456121e-07,
1076 6.75456120386058448618e-07,
1077 0.00000000000000000000e+00,
1078 4.99167184520462138743e-07,
1079 4.51720373502110930296e-10,
1080 1.28874162718371367439e-07,
1081 5.85529267186999798656e-07,
1082 1.01827770937125531924e-07,
1083 2.54736389177809626508e-07,
1084 6.98925535290464831294e-07,
1085 1.20940735036524314513e-07,
1086 5.43759351196479689657e-08,
1087 1.11957989042397958409e-07,
1088 8.47006714134442661218e-07,
1089 8.92831044643427836228e-07,
1090 7.77828292464916501663e-07,
1091 6.48469316302918797451e-08,
1092 2.12579816658859849140e-07,
1093 7.61222472580559138435e-07,
1094 2.86488961857314189607e-07,
1095 2.14637363790165363515e-07,
1096 5.44137005612605847831e-08,
1097 2.58378284856442408413e-07,
1098 3.15848939061134843091e-07,
1099 6.60530466255089632309e-07,
1100 7.63436345535852301127e-07,
1101 8.68233432860324345268e-08,
1102 9.45465175398023087082e-07,
1103 8.77499534786171267246e-07,
1104 2.74055432394999316135e-07,
1105 4.72129009349126213532e-07,
1106 8.93777032327078947306e-07,
1107 0.00000000000000000000e+00};
1113 ax = ux & (~SIGNBIT_DP64);
1115 if(
ax >= 0x7ff0000000000000)
1137 else if (ux <= 0x000fffffffffffff)
1150 static const double corr = 2.5653355008114851558350183e-290;
1193 rtc_lead = rt_jby32_lead_table_dbl[
index-32];
1194 rtc_trail = rt_jby32_trail_table_dbl[
index-32];
1203 p =
r*0.5 -
r*
r*(0.1250079870 -
r*(0.6250522999E-01));
1205 q =
p - (
p*
p + (twop -
r))/(twop + 2.0);
1209 rtc = rtc_lead + rtc_trail;
1211 z = rtc_lead + (rtc*
q+rtc_trail);
1231#if defined(USE_SQRTF_AMD_INLINE)
1233static inline float sqrtf_amd_inline(
float x)
1260 unsigned int ux,
ax,
u;
1261 float r1,
r2,
c,
y,
p,
q,
r, twop,
z, rtc, rtc_lead, rtc_trail;
1262 int e, denorm = 0,
index;
1270static const float rt_jby32_lead_table_float[97] = {
1271 1.00000000000000000000e+00F,
1272 1.01538085937500000000e+00F,
1273 1.03076171875000000000e+00F,
1274 1.04565429687500000000e+00F,
1275 1.06054687500000000000e+00F,
1276 1.07519531250000000000e+00F,
1277 1.08959960937500000000e+00F,
1278 1.10375976562500000000e+00F,
1279 1.11791992187500000000e+00F,
1280 1.13183593750000000000e+00F,
1281 1.14550781250000000000e+00F,
1282 1.15917968750000000000e+00F,
1283 1.17236328125000000000e+00F,
1284 1.18579101562500000000e+00F,
1285 1.19873046875000000000e+00F,
1286 1.21191406250000000000e+00F,
1287 1.22460937500000000000e+00F,
1288 1.23730468750000000000e+00F,
1289 1.25000000000000000000e+00F,
1290 1.26220703125000000000e+00F,
1291 1.27465820312500000000e+00F,
1292 1.28686523437500000000e+00F,
1293 1.29882812500000000000e+00F,
1294 1.31079101562500000000e+00F,
1295 1.32275390625000000000e+00F,
1296 1.33447265625000000000e+00F,
1297 1.34619140625000000000e+00F,
1298 1.35766601562500000000e+00F,
1299 1.36914062500000000000e+00F,
1300 1.38061523437500000000e+00F,
1301 1.39184570312500000000e+00F,
1302 1.40307617187500000000e+00F,
1303 1.41406250000000000000e+00F,
1304 1.42504882812500000000e+00F,
1305 1.43603515625000000000e+00F,
1306 1.44677734375000000000e+00F,
1307 1.45751953125000000000e+00F,
1308 1.46826171875000000000e+00F,
1309 1.47900390625000000000e+00F,
1310 1.48950195312500000000e+00F,
1311 1.50000000000000000000e+00F,
1312 1.51025390625000000000e+00F,
1313 1.52050781250000000000e+00F,
1314 1.53076171875000000000e+00F,
1315 1.54101562500000000000e+00F,
1316 1.55102539062500000000e+00F,
1317 1.56103515625000000000e+00F,
1318 1.57104492187500000000e+00F,
1319 1.58105468750000000000e+00F,
1320 1.59082031250000000000e+00F,
1321 1.60058593750000000000e+00F,
1322 1.61035156250000000000e+00F,
1323 1.62011718750000000000e+00F,
1324 1.62963867187500000000e+00F,
1325 1.63916015625000000000e+00F,
1326 1.64868164062500000000e+00F,
1327 1.65820312500000000000e+00F,
1328 1.66748046875000000000e+00F,
1329 1.67700195312500000000e+00F,
1330 1.68627929687500000000e+00F,
1331 1.69555664062500000000e+00F,
1332 1.70458984375000000000e+00F,
1333 1.71386718750000000000e+00F,
1334 1.72290039062500000000e+00F,
1335 1.73193359375000000000e+00F,
1336 1.74096679687500000000e+00F,
1337 1.75000000000000000000e+00F,
1338 1.75878906250000000000e+00F,
1339 1.76757812500000000000e+00F,
1340 1.77636718750000000000e+00F,
1341 1.78515625000000000000e+00F,
1342 1.79394531250000000000e+00F,
1343 1.80273437500000000000e+00F,
1344 1.81127929687500000000e+00F,
1345 1.81982421875000000000e+00F,
1346 1.82836914062500000000e+00F,
1347 1.83691406250000000000e+00F,
1348 1.84545898437500000000e+00F,
1349 1.85400390625000000000e+00F,
1350 1.86230468750000000000e+00F,
1351 1.87060546875000000000e+00F,
1352 1.87915039062500000000e+00F,
1353 1.88745117187500000000e+00F,
1354 1.89550781250000000000e+00F,
1355 1.90380859375000000000e+00F,
1356 1.91210937500000000000e+00F,
1357 1.92016601562500000000e+00F,
1358 1.92822265625000000000e+00F,
1359 1.93627929687500000000e+00F,
1360 1.94433593750000000000e+00F,
1361 1.95239257812500000000e+00F,
1362 1.96044921875000000000e+00F,
1363 1.96826171875000000000e+00F,
1364 1.97631835937500000000e+00F,
1365 1.98413085937500000000e+00F,
1366 1.99194335937500000000e+00F,
1367 2.00000000000000000000e+00F};
1369static const float rt_jby32_trail_table_float[97] = {
1370 0.00000000000000000000e+00F,
1371 1.23941208585165441036e-04F,
1372 1.46876545841223560274e-05F,
1373 1.70736297150142490864e-04F,
1374 1.13296780909877270460e-04F,
1375 9.53458802541717886925e-05F,
1376 1.25126505736261606216e-04F,
1377 2.10342666832730174065e-04F,
1378 1.14066875539720058441e-04F,
1379 8.72047676239162683487e-05F,
1380 1.36111237225122749805e-04F,
1381 2.26244374061934649944e-05F,
1382 2.40658700931817293167e-04F,
1383 6.31069415248930454254e-05F,
1384 2.27412077947519719601e-04F,
1385 5.90185391047270968556e-06F,
1386 1.35496389702893793583e-04F,
1387 1.32179571664892137051e-04F,
1388 0.00000000000000000000e+00F,
1389 2.31086043640971183777e-04F,
1390 9.66752704698592424393e-05F,
1391 8.85332483449019491673e-05F,
1392 2.09980673389509320259e-04F,
1393 2.20044588786549866199e-04F,
1394 1.21749282698146998882e-04F,
1395 1.62125259521417319775e-04F,
1396 9.97955357888713479042e-05F,
1397 1.81545779923908412457e-04F,
1398 1.65768768056295812130e-04F,
1399 5.48927710042335093021e-05F,
1400 9.53875860432162880898e-05F,
1401 4.53481625299900770187e-05F,
1402 1.51062369695864617825e-04F,
1403 1.70453247847035527229e-04F,
1404 1.05505387182347476482e-04F,
1405 2.02269104192964732647e-04F,
1406 2.18442466575652360916e-04F,
1407 1.55796806211583316326e-04F,
1408 1.60395247803535312414e-05F,
1409 4.49578510597348213196e-05F,
1410 0.00000000000000000000e+00F,
1411 1.26840444863773882389e-04F,
1412 1.82820076588541269302e-04F,
1413 1.69370483490638434887e-04F,
1414 8.78757418831810355186e-05F,
1415 1.83815121999941766262e-04F,
1416 2.14343352126888930798e-04F,
1417 1.80714370799250900745e-04F,
1418 8.41425862745381891727e-05F,
1419 1.69945167726837098598e-04F,
1420 1.95121858268976211548e-04F,
1421 1.60778334247879683971e-04F,
1422 6.79871009197086095810e-05F,
1423 1.61929419846273958683e-04F,
1424 1.99474830878898501396e-04F,
1425 1.81604162207804620266e-04F,
1426 1.09270178654696792364e-04F,
1427 2.27539261686615645885e-04F,
1428 4.90300008095800876617e-05F,
1429 6.28985289949923753738e-05F,
1430 2.58551553997676819563e-05F,
1431 1.82868374395184218884e-04F,
1432 4.64625991298817098141e-05F,
1433 1.05703387816902250051e-04F,
1434 1.17213814519345760345e-04F,
1435 8.17377731436863541603e-05F,
1436 0.00000000000000000000e+00F,
1437 1.16847433673683553934e-04F,
1438 1.88827965757809579372e-04F,
1439 2.16612941585481166840e-04F,
1440 2.00857131858356297016e-04F,
1441 1.42199307447299361229e-04F,
1442 4.12627305195201188326e-05F,
1443 1.42796401632949709892e-04F,
1444 2.03253570361994206905e-04F,
1445 2.23214170546270906925e-04F,
1446 2.03244591830298304558e-04F,
1447 1.43898156238719820976e-04F,
1448 4.57155256299301981926e-05F,
1449 1.53365719597786664963e-04F,
1450 2.23224633373320102692e-04F,
1451 1.16566716314991936088e-05F,
1452 7.43694272387074306607e-06F,
1453 2.11048507480882108212e-04F,
1454 1.34682719362899661064e-04F,
1455 2.29425968427676707506e-05F,
1456 1.20421340398024767637e-04F,
1457 1.83421318070031702518e-04F,
1458 2.12376224226318299770e-04F,
1459 2.07710763788782060146e-04F,
1460 1.69840845046564936638e-04F,
1461 9.91739216260612010956e-05F,
1462 2.40249748458154499531e-04F,
1463 1.05178231024183332920e-04F,
1464 1.82623916771262884140e-04F,
1465 2.28821940254420042038e-04F,
1466 0.00000000000000000000e+00F};
1472 ax = ux & (~SIGNBIT_SP32);
1474 if(
ax >= 0x7f800000)
1495 else if (ux <= 0x007fffff)
1508 static const float corr = 7.888609052210118054e-31F;
1550 rtc_lead = rt_jby32_lead_table_float[
index-32];
1551 rtc_trail = rt_jby32_trail_table_float[
index-32];
1560 p =
r*0.5F -
r*
r*(0.1250079870F -
r*(0.6250522999e-01F));
1562 q =
p - (
p*
p + (twop -
r))/(twop + 2.0);
1566 rtc = rtc_lead + rtc_trail;
1568 z = rtc_lead + (rtc*
q+rtc_trail);
1588#ifdef USE_LOG_KERNEL_AMD
1589static inline void log_kernel_amd64(
double x,
unsigned long long ux,
int *xexp,
double *
r1,
double *
r2)
1593 double r, z1, z2, correction,
f,
f1,
f2,
q,
u,
v, poly;
1611 static const double ln_lead_table[65] = {
1612 0.00000000000000000000e+00,
1613 1.55041813850402832031e-02,
1614 3.07716131210327148438e-02,
1615 4.58095073699951171875e-02,
1616 6.06245994567871093750e-02,
1617 7.52233862876892089844e-02,
1618 8.96121263504028320312e-02,
1619 1.03796780109405517578e-01,
1620 1.17783010005950927734e-01,
1621 1.31576299667358398438e-01,
1622 1.45181953907012939453e-01,
1623 1.58604979515075683594e-01,
1624 1.71850204467773437500e-01,
1625 1.84922337532043457031e-01,
1626 1.97825729846954345703e-01,
1627 2.10564732551574707031e-01,
1628 2.23143517971038818359e-01,
1629 2.35566020011901855469e-01,
1630 2.47836112976074218750e-01,
1631 2.59957492351531982422e-01,
1632 2.71933674812316894531e-01,
1633 2.83768117427825927734e-01,
1634 2.95464158058166503906e-01,
1635 3.07025015354156494141e-01,
1636 3.18453729152679443359e-01,
1637 3.29753279685974121094e-01,
1638 3.40926527976989746094e-01,
1639 3.51976394653320312500e-01,
1640 3.62905442714691162109e-01,
1641 3.73716354370117187500e-01,
1642 3.84411692619323730469e-01,
1643 3.94993782043457031250e-01,
1644 4.05465066432952880859e-01,
1645 4.15827870368957519531e-01,
1646 4.26084339618682861328e-01,
1647 4.36236739158630371094e-01,
1648 4.46287095546722412109e-01,
1649 4.56237375736236572266e-01,
1650 4.66089725494384765625e-01,
1651 4.75845873355865478516e-01,
1652 4.85507786273956298828e-01,
1653 4.95077252388000488281e-01,
1654 5.04556000232696533203e-01,
1655 5.13945698738098144531e-01,
1656 5.23248136043548583984e-01,
1657 5.32464742660522460938e-01,
1658 5.41597247123718261719e-01,
1659 5.50647079944610595703e-01,
1660 5.59615731239318847656e-01,
1661 5.68504691123962402344e-01,
1662 5.77315330505371093750e-01,
1663 5.86049020290374755859e-01,
1664 5.94707071781158447266e-01,
1665 6.03290796279907226562e-01,
1666 6.11801505088806152344e-01,
1667 6.20240390300750732422e-01,
1668 6.28608644008636474609e-01,
1669 6.36907458305358886719e-01,
1670 6.45137906074523925781e-01,
1671 6.53301239013671875000e-01,
1672 6.61398470401763916016e-01,
1673 6.69430613517761230469e-01,
1674 6.77398800849914550781e-01,
1675 6.85303986072540283203e-01,
1676 6.93147122859954833984e-01};
1678 static const double ln_tail_table[65] = {
1679 0.00000000000000000000e+00,
1680 5.15092497094772879206e-09,
1681 4.55457209735272790188e-08,
1682 2.86612990859791781788e-08,
1683 2.23596477332056055352e-08,
1684 3.49498983167142274770e-08,
1685 3.23392843005887000414e-08,
1686 1.35722380472479366661e-08,
1687 2.56504325268044191098e-08,
1688 5.81213608741512136843e-08,
1689 5.59374849578288093334e-08,
1690 5.06615629004996189970e-08,
1691 5.24588857848400955725e-08,
1692 9.61968535632653505972e-10,
1693 1.34829655346594463137e-08,
1694 3.65557749306383026498e-08,
1695 3.33431709374069198903e-08,
1696 5.13008650536088382197e-08,
1697 5.09285070380306053751e-08,
1698 3.20853940845502057341e-08,
1699 4.06713248643004200446e-08,
1700 5.57028186706125221168e-08,
1701 5.48356693724804282546e-08,
1702 1.99407553679345001938e-08,
1703 1.96585517245087232086e-09,
1704 6.68649386072067321503e-09,
1705 5.89936034642113390002e-08,
1706 2.85038578721554472484e-08,
1707 5.09746772910284482606e-08,
1708 5.54234668933210171467e-08,
1709 6.29100830926604004874e-09,
1710 2.61974119468563937716e-08,
1711 4.16752115011186398935e-08,
1712 2.47747534460820790327e-08,
1713 5.56922172017964209793e-08,
1714 2.76162876992552906035e-08,
1715 7.08169709942321478061e-09,
1716 5.77453510221151779025e-08,
1717 4.43021445893361960146e-09,
1718 3.15140984357495864573e-08,
1719 2.95077445089736670973e-08,
1720 1.44098510263167149349e-08,
1721 1.05196987538551827693e-08,
1722 5.23641361722697546261e-08,
1723 7.72099925253243069458e-09,
1724 5.62089493829364197156e-08,
1725 3.53090261098577946927e-08,
1726 3.80080516835568242269e-08,
1727 5.66961038386146408282e-08,
1728 4.42287063097349852717e-08,
1729 3.45294525105681104660e-08,
1730 2.47132034530447431509e-08,
1731 3.59655343422487209774e-08,
1732 5.51581770357780862071e-08,
1733 3.60171867511861372793e-08,
1734 1.94511067964296180547e-08,
1735 1.54137376631349347838e-08,
1736 3.93171034490174464173e-09,
1737 5.52990607758839766440e-08,
1738 3.29990737637586136511e-08,
1739 1.18436010922446096216e-08,
1740 4.04248680368301346709e-08,
1741 2.27418915900284316293e-08,
1742 1.70263791333409206020e-08,
1743 5.76999904754328540596e-08};
1747 ca_1 = 8.33333333333317923934e-02,
1748 ca_2 = 1.25000000037717509602e-02,
1749 ca_3 = 2.23213998791944806202e-03,
1750 ca_4 = 4.34887777707614552256e-04;
1754 cb_1 = 8.33333333333333593622e-02,
1755 cb_2 = 1.24999999978138668903e-02,
1756 cb_3 = 2.23219810758559851206e-03;
1758 static const unsigned long long
1759 log_thresh1 = 0x3fee0faa00000000,
1760 log_thresh2 = 0x3ff1082c00000000;
1764 if (ux >= log_thresh1 && ux <= log_thresh2)
1798 z2 = (
u *
v * (ca_1 +
v * (ca_2 +
v * (ca_3 +
v * ca_4))) - correction);
1834 static const double corr = 2.5653355008114851558350183e-290;
1836 ux |= 0x03d0000000000000;
1856 z1 = ln_lead_table[
index-64];
1857 q = ln_tail_table[
index-64];
1871 poly = (
v * (cb_1 +
v * (cb_2 +
v * cb_3)));
1872 z2 =
q + (
u +
u * poly);
1880#if defined(USE_REMAINDER_PIBY2F_INLINE)
1882#define DEBUGGING_PRINT
1883#undef DEBUGGING_PRINT
1886#ifdef DEBUGGING_PRINT
1890 static char buff[200];
1918static inline void __remainder_piby2f_inline(
unsigned long long ux,
double *
r,
int *region)
1928 unsigned long long res[10];
1929 unsigned long long u, carry,
mask, mant, nextbits;
1930 int first,
last,
i, rexp, xexp, resexp, ltb, determ,
bc;
1933 piby2 = 1.57079632679489655800e+00;
1934 static unsigned long long pibits[] =
1937 5215LL, 13000023176LL, 11362338026LL, 67174558139LL,
1938 34819822259LL, 10612056195LL, 67816420731LL, 57840157550LL,
1939 19558516809LL, 50025467026LL, 25186875954LL, 18152700886LL
1960#ifdef DEBUGGING_PRINT
1969 u = pibits[
i] * ux + carry;
1978 u = pibits[
last] * ux;
1981 u = pibits[
last - 1] * ux + carry;
1984 u = pibits[
last - 2] * ux + carry;
1987 u = pibits[
first] * ux + carry;
1991#ifdef DEBUGGING_PRINT
1992 printf(
"resexp = %d\n", resexp);
1993 printf(
"Significant part of x * 2/pi with binary"
1994 " point in correct place:\n");
1997 if (
i > 0 &&
i % 5 == 0)
2009 >> (
bitsper - 1 - resexp)) & 7);
2014#ifdef DEBUGGING_PRINT
2015 printf(
"ltb = %d (last two bits before binary point"
2016 " and first bit after)\n", ltb);
2017 printf(
"determ = %d (1 means need to negate because the fractional\n"
2018 " part of x * 2/pi is greater than 0.5)\n", determ);
2026 *region = ((ltb >> 1) + 1) & 3;
2028 mant = ~(
res[1]) & ((mant << (
bitsper - resexp)) - 1);
2029 while (mant < 0x0000000000010000)
2038 *region = (ltb >> 1);
2040 mant =
res[1] & ((mant << (
bitsper - resexp)) - 1);
2041 while (mant < 0x0000000000010000)
2046 nextbits =
res[
i+1];
2049#ifdef DEBUGGING_PRINT
2050 printf(
"First bits of mant = %s\n", d2b(mant,
bitsper, -1));
2056 while (mant < 0x0000400000000000)
2061 while (mant < 0x0010000000000000)
2070#ifdef DEBUGGING_PRINT
2071 printf(
"Normalised mantissa = 0x%016lx\n", mant);
2072 printf(
"Exponent to be inserted on mantissa = rexp = %d\n", rexp);
2083#ifdef DEBUGGING_PRINT
2084 printf(
"(x*2/pi) = %25.20e = %s\n",
dx, double2hex(&
dx));
2092#ifdef DEBUGGING_PRINT
2093 printf(
" r = frac(x*2/pi) * pi/2:\n");
2094 printf(
" r = %25.20e = %s\n", *
r, double2hex(
r));
2095 printf(
"region = (number of pi/2 subtracted from x) mod 4 = %d\n",
__ATTRIBUTE_SSE__ void _set_statfp(uintptr_t mask)
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
static unsigned char buff[32768]
GLint GLint GLint GLint GLint x
GLuint GLuint GLsizei GLenum type
GLint GLint GLint GLint GLint GLint y
GLdouble GLdouble GLdouble r
GLdouble GLdouble GLdouble GLdouble q
GLuint GLuint GLuint GLuint arg1
GLuint GLuint GLuint GLuint GLuint GLuint GLuint arg2
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
float _handle_errorf(char *fname, int opcode, unsigned long long value, int type, int flags, int error, float arg1, float arg2, int nargs)
double _handle_error(char *fname, int opcode, unsigned long long value, int type, int flags, int error, double arg1, double arg2, int nargs)
Handles an error condition.
#define GET_BITS_SP32(x, ux)
#define EXPSHIFTBITS_DP64
#define GET_BITS_DP64(x, ux)
#define PUT_BITS_SP32(ux, x)
#define EXPSHIFTBITS_SP32
#define PUT_BITS_DP64(ux, x)
ecx edi movl ebx edx edi decl ecx esi eax jecxz decl eax andl eax esi movl edx movl TEMP incl eax andl eax ecx incl ebx testl eax jnz xchgl ecx incl TEMP esp ecx subl ebx pushl ecx ecx edx ecx shrl ecx mm0 mm4 mm0 mm4 mm1 mm5 mm1 mm5 mm2 mm6 mm2 mm6 mm3 mm7 mm3 mm7 paddd mm0 paddd mm4 paddd mm0 paddd mm4 paddd mm0 paddd mm4 movq mm1 movq mm5 psrlq mm1 psrlq mm5 paddd mm0 paddd mm4 psrad mm0 psrad mm4 packssdw mm0 packssdw mm4 mm1 punpckldq mm0 pand mm1 pand mm0 por mm1 movq edi esi edx edi decl ecx jnz popl ecx andl ecx jecxz mm0 mm0 mm1 mm1 mm2 mm2 mm3 mm3 paddd mm0 paddd mm0 paddd mm0 movq mm1 psrlq mm1 paddd mm0 psrad mm0 packssdw mm0 movd eax movw ax
void _mm_setcsr(unsigned int a)
unsigned int _mm_getcsr(void)