d3/dbf/libm__inlines_8h_source.html

/***********************************************************************************/

/***********************************************************************************/


#ifndef LIBM_INLINES_AMD_H_INCLUDED

#define LIBM_INLINES_AMD_H_INCLUDED 1


#include "libm_util.h"


/* Set defines for inline functions calling other inlines */

#if defined(USE_VAL_WITH_FLAGS) || defined(USE_VALF_WITH_FLAGS) || \

    defined(USE_ZERO_WITH_FLAGS) || defined(USE_ZEROF_WITH_FLAGS) || \

    defined(USE_NAN_WITH_FLAGS) || defined(USE_NANF_WITH_FLAGS) || \

    defined(USE_INDEFINITE_WITH_FLAGS) || defined(USE_INDEFINITEF_WITH_FLAGS) || \

    defined(USE_INFINITY_WITH_FLAGS) || defined(USE_INFINITYF_WITH_FLAGS) || \

    defined(USE_SQRT_AMD_INLINE) || defined(USE_SQRTF_AMD_INLINE) || \

    (defined(USE_HANDLE_ERROR) || defined(USE_HANDLE_ERRORF))

#undef USE_RAISE_FPSW_FLAGS

#define USE_RAISE_FPSW_FLAGS 1

#endif


#if defined(USE_SPLITDOUBLE)

/* Splits double x into exponent e and mantissa m, where 0.5 <= abs(m) < 1.0.

   Assumes that x is not zero, denormal, infinity or NaN, but these conditions

   are not checked */

static inline void splitDouble(double x, int *e, double *m)

{

  unsigned long long ux, uy;

  GET_BITS_DP64(x, ux);

  uy = ux;

  ux &= EXPBITS_DP64;

  ux >>= EXPSHIFTBITS_DP64;

  *e = (int)ux - EXPBIAS_DP64 + 1;

  uy = (uy & (SIGNBIT_DP64 | MANTBITS_DP64)) | HALFEXPBITS_DP64;

  PUT_BITS_DP64(uy, x);

  *m = x;

}

#endif /* USE_SPLITDOUBLE */


#if defined(USE_SPLITDOUBLE_2)

/* Splits double x into exponent e and mantissa m, where 1.0 <= abs(m) < 4.0.

   Assumes that x is not zero, denormal, infinity or NaN, but these conditions

   are not checked. Also assumes EXPBIAS_DP is odd. With this

   assumption, e will be even on exit. */

static inline void splitDouble_2(double x, int *e, double *m)

{

  unsigned long long ux, vx;

  GET_BITS_DP64(x, ux);

  vx = ux;

  ux &= EXPBITS_DP64;

  ux >>= EXPSHIFTBITS_DP64;

  if (ux & 1)

    {

      /* The exponent is odd */

      vx = (vx & (SIGNBIT_DP64 | MANTBITS_DP64)) | ONEEXPBITS_DP64;

      PUT_BITS_DP64(vx, x);

      *m = x;

      *e = ux - EXPBIAS_DP64;

    }

  else

    {

      /* The exponent is even */

      vx = (vx & (SIGNBIT_DP64 | MANTBITS_DP64)) | TWOEXPBITS_DP64;

      PUT_BITS_DP64(vx, x);

      *m = x;

      *e = ux - EXPBIAS_DP64 - 1;

    }

}

#endif /* USE_SPLITDOUBLE_2 */


#if defined(USE_SPLITFLOAT)

/* Splits float x into exponent e and mantissa m, where 0.5 <= abs(m) < 1.0.

   Assumes that x is not zero, denormal, infinity or NaN, but these conditions

   are not checked */

static inline void splitFloat(float x, int *e, float *m)

{

  unsigned int ux, uy;

  GET_BITS_SP32(x, ux);

  uy = ux;

  ux &= EXPBITS_SP32;

  ux >>= EXPSHIFTBITS_SP32;

  *e = (int)ux - EXPBIAS_SP32 + 1;

  uy = (uy & (SIGNBIT_SP32 | MANTBITS_SP32)) | HALFEXPBITS_SP32;

  PUT_BITS_SP32(uy, x);

  *m = x;

}

#endif /* USE_SPLITFLOAT */


#if defined(USE_SCALEDOUBLE_1)

/* Scales the double x by 2.0**n.

   Assumes EMIN <= n <= EMAX, though this condition is not checked. */

static inline double scaleDouble_1(double x, int n)

{

  double t;

  /* Construct the number t = 2.0**n */

  PUT_BITS_DP64(((long long)n + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t);

  return x*t;

}

#endif /* USE_SCALEDOUBLE_1 */


#if defined(USE_SCALEDOUBLE_2)

/* Scales the double x by 2.0**n.

   Assumes 2*EMIN <= n <= 2*EMAX, though this condition is not checked. */

static inline double scaleDouble_2(double x, int n)

{

  double t1, t2;

  int n1, n2;

  n1 = n / 2;

  n2 = n - n1;

  /* Construct the numbers t1 = 2.0**n1 and t2 = 2.0**n2 */

  PUT_BITS_DP64(((long long)n1 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t1);

  PUT_BITS_DP64(((long long)n2 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t2);

  return (x*t1)*t2;

}

#endif /* USE_SCALEDOUBLE_2 */


#if defined(USE_SCALEDOUBLE_3)

/* Scales the double x by 2.0**n.

   Assumes 3*EMIN <= n <= 3*EMAX, though this condition is not checked. */

static inline double scaleDouble_3(double x, int n)

{

  double t1, t2, t3;

  int n1, n2, n3;

  n1 = n / 3;

  n2 = (n - n1) / 2;

  n3 = n - n1 - n2;

  /* Construct the numbers t1 = 2.0**n1, t2 = 2.0**n2 and t3 = 2.0**n3 */

  PUT_BITS_DP64(((long long)n1 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t1);

  PUT_BITS_DP64(((long long)n2 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t2);

  PUT_BITS_DP64(((long long)n3 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t3);

  return ((x*t1)*t2)*t3;

}

#endif /* USE_SCALEDOUBLE_3 */


#if defined(USE_SCALEFLOAT_1)

/* Scales the float x by 2.0**n.

   Assumes EMIN <= n <= EMAX, though this condition is not checked. */

static inline float scaleFloat_1(float x, int n)

{

  float t;

  /* Construct the number t = 2.0**n */

  PUT_BITS_SP32((n + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t);

  return x*t;

}

#endif /* USE_SCALEFLOAT_1 */


#if defined(USE_SCALEFLOAT_2)

/* Scales the float x by 2.0**n.

   Assumes 2*EMIN <= n <= 2*EMAX, though this condition is not checked. */

static inline float scaleFloat_2(float x, int n)

{

  float t1, t2;

  int n1, n2;

  n1 = n / 2;

  n2 = n - n1;

  /* Construct the numbers t1 = 2.0**n1 and t2 = 2.0**n2 */

  PUT_BITS_SP32((n1 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t1);

  PUT_BITS_SP32((n2 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t2);

  return (x*t1)*t2;

}

#endif /* USE_SCALEFLOAT_2 */


#if defined(USE_SCALEFLOAT_3)

/* Scales the float x by 2.0**n.

   Assumes 3*EMIN <= n <= 3*EMAX, though this condition is not checked. */

static inline float scaleFloat_3(float x, int n)

{

  float t1, t2, t3;

  int n1, n2, n3;

  n1 = n / 3;

  n2 = (n - n1) / 2;

  n3 = n - n1 - n2;

  /* Construct the numbers t1 = 2.0**n1, t2 = 2.0**n2 and t3 = 2.0**n3 */

  PUT_BITS_SP32((n1 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t1);

  PUT_BITS_SP32((n2 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t2);

  PUT_BITS_SP32((n3 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t3);

  return ((x*t1)*t2)*t3;

}

#endif /* USE_SCALEFLOAT_3 */


#if defined(USE_SETPRECISIONDOUBLE)

unsigned int setPrecisionDouble(void)

{

  unsigned int cw, cwold = 0;

  /* There is no precision control on Hammer */

  return cwold;

}

#endif /* USE_SETPRECISIONDOUBLE */


#if defined(USE_RESTOREPRECISION)

void restorePrecision(unsigned int cwold)

{

  /* There is no precision control on Hammer */

  return;

}

#endif /* USE_RESTOREPRECISION */


#if defined(USE_RAISE_FPSW_FLAGS)

/* Raises floating-point status flags. The argument should be

   the bitwise or of the flags to be raised, from the

   list above, e.g.

     raise_fpsw_flags(AMD_F_INEXACT | AMD_F_INVALID);

 */


/* ISSUE - wat - 08182010

 * These AMD_ISW_* flags are duplicated from trans.h

 * this is not clean; Mark S. did it for targeted fix of 855457

 * Eliminate all redundant flags in the next overhaul

 */


#define AMD_ISW_INVALID     0x0001

#define AMD_ISW_DENORMAL    0x0002

#define AMD_ISW_ZERODIVIDE  0x0004

#define AMD_ISW_OVERFLOW    0x0008

#define AMD_ISW_UNDERFLOW   0x0010

#define AMD_ISW_INEXACT     0x0020


/* use this function from fpctrl.c */

void _set_statfp(uintptr_t);


static inline void raise_fpsw_flags(int flags)

{

    unsigned int f = 0;


    if (flags & AMD_F_OVERFLOW)     { f |= AMD_ISW_OVERFLOW; }

    if (flags & AMD_F_UNDERFLOW)    { f |= AMD_ISW_UNDERFLOW; }

    if (flags & AMD_F_DIVBYZERO)    { f |= AMD_ISW_ZERODIVIDE; }

    if (flags & AMD_F_INVALID)      { f |= AMD_ISW_INVALID; }

    if (flags & AMD_F_INEXACT)      { f |= AMD_ISW_INEXACT; }


    _set_statfp(f);

}


#endif /* USE_RAISE_FPSW_FLAGS */


#if defined(USE_GET_FPSW_INLINE)

/* Return the current floating-point status word */

static inline unsigned int get_fpsw_inline(void)

{

  return _mm_getcsr();

}

#endif /* USE_GET_FPSW_INLINE */


#if defined(USE_SET_FPSW_INLINE)

/* Set the floating-point status word */

static inline void set_fpsw_inline(unsigned int sw)

{

  _mm_setcsr(sw);

}

#endif /* USE_SET_FPSW_INLINE */


#if defined(USE_VAL_WITH_FLAGS)

/* Returns a double value after raising the given flags,

  e.g.  val_with_flags(AMD_F_INEXACT);

 */

static inline double val_with_flags(double val, int flags)

{

  raise_fpsw_flags(flags);

  return val;

}

#endif /* USE_VAL_WITH_FLAGS */


#if defined(USE_VALF_WITH_FLAGS)

/* Returns a float value after raising the given flags,

  e.g.  valf_with_flags(AMD_F_INEXACT);

 */

static inline float valf_with_flags(float val, int flags)

{

  raise_fpsw_flags(flags);

  return val;

}

#endif /* USE_VALF_WITH_FLAGS */


#if defined(USE_ZERO_WITH_FLAGS)

/* Returns a double +zero after raising the given flags,

  e.g.  zero_with_flags(AMD_F_INEXACT | AMD_F_INVALID);

 */

static inline double zero_with_flags(int flags)

{

  raise_fpsw_flags(flags);

  return 0.0;

}

#endif /* USE_ZERO_WITH_FLAGS */


#if defined(USE_ZEROF_WITH_FLAGS)

/* Returns a float +zero after raising the given flags,

  e.g.  zerof_with_flags(AMD_F_INEXACT | AMD_F_INVALID);

 */

static inline float zerof_with_flags(int flags)

{

  raise_fpsw_flags(flags);

  return 0.0F;

}

#endif /* USE_ZEROF_WITH_FLAGS */


#if defined(USE_NAN_WITH_FLAGS)

/* Returns a double quiet +nan after raising the given flags,

   e.g.  nan_with_flags(AMD_F_INVALID);

*/

static inline double nan_with_flags(int flags)

{

  double z;

  raise_fpsw_flags(flags);

  PUT_BITS_DP64(0x7ff8000000000000, z);

  return z;

}

#endif /* USE_NAN_WITH_FLAGS */


#if defined(USE_NANF_WITH_FLAGS)

/* Returns a float quiet +nan after raising the given flags,

   e.g.  nanf_with_flags(AMD_F_INVALID);

*/

static inline float nanf_with_flags(int flags)

{

  float z;

  raise_fpsw_flags(flags);

  PUT_BITS_SP32(0x7fc00000, z);

  return z;

}

#endif /* USE_NANF_WITH_FLAGS */


#if defined(USE_INDEFINITE_WITH_FLAGS)

/* Returns a double indefinite after raising the given flags,

   e.g.  indefinite_with_flags(AMD_F_INVALID);

*/

static inline double indefinite_with_flags(int flags)

{

  double z;

  raise_fpsw_flags(flags);

  PUT_BITS_DP64(0xfff8000000000000, z);

  return z;

}

#endif /* USE_INDEFINITE_WITH_FLAGS */


#if defined(USE_INDEFINITEF_WITH_FLAGS)

/* Returns a float quiet +indefinite after raising the given flags,

   e.g.  indefinitef_with_flags(AMD_F_INVALID);

*/

static inline float indefinitef_with_flags(int flags)

{

  float z;

  raise_fpsw_flags(flags);

  PUT_BITS_SP32(0xffc00000, z);

  return z;

}

#endif /* USE_INDEFINITEF_WITH_FLAGS */


#ifdef USE_INFINITY_WITH_FLAGS

/* Returns a positive double infinity after raising the given flags,

   e.g.  infinity_with_flags(AMD_F_OVERFLOW);

*/

static inline double infinity_with_flags(int flags)

{

  double z;

  raise_fpsw_flags(flags);

  PUT_BITS_DP64((unsigned long long)(BIASEDEMAX_DP64 + 1) << EXPSHIFTBITS_DP64, z);

  return z;

}

#endif /* USE_INFINITY_WITH_FLAGS */


#ifdef USE_INFINITYF_WITH_FLAGS

/* Returns a positive float infinity after raising the given flags,

   e.g.  infinityf_with_flags(AMD_F_OVERFLOW);

*/

static inline float infinityf_with_flags(int flags)

{

  float z;

  raise_fpsw_flags(flags);

  PUT_BITS_SP32((BIASEDEMAX_SP32 + 1) << EXPSHIFTBITS_SP32, z);

  return z;

}

#endif /* USE_INFINITYF_WITH_FLAGS */


#if defined(USE_HANDLE_ERROR) || defined(USE_HANDLE_ERRORF)

#include <errno.h>

#endif


/* define the Microsoft specific error handling routine */

double _handle_error(

        char *fname,

        int opcode,

        unsigned long long value,

        int type,

        int flags,

        int error,

        double arg1,

        double arg2,

        int nargs

        );

float _handle_errorf(

        char *fname,

        int opcode,

        unsigned long long value,

        int type,

        int flags,

        int error,

        float arg1,

        float arg2,

        int nargs

        );


#if defined(USE_SPLITEXP)

/* Compute the values m, z1, and z2 such that base**x = 2**m * (z1 + z2).

   Small arguments abs(x) < 1/(16*ln(base)) and extreme arguments

   abs(x) > large/(ln(base)) (where large is the largest representable

   floating point number) should be handled separately instead of calling

   this function. This function is called by exp, exp2, exp10,

   cosh and sinh. */

static inline void splitexp(double x, double logbase,

                            double thirtytwo_by_logbaseof2,

                            double logbaseof2_by_32_lead,

                            double logbaseof2_by_32_trail,

                            int *m, double *z1, double *z2)

{

  double q, r, r1, r2, f1, f2;

  int n, j;


/* Arrays two_to_jby32_lead_table and two_to_jby32_trail_table contain

   leading and trailing parts respectively of precomputed

   values of pow(2.0,j/32.0), for j = 0, 1, ..., 31.

   two_to_jby32_lead_table contains the first 25 bits of precision,

   and two_to_jby32_trail_table contains a further 53 bits precision. */


  static const double two_to_jby32_lead_table[32] = {

    1.00000000000000000000e+00,   /* 0x3ff0000000000000 */

    1.02189713716506958008e+00,   /* 0x3ff059b0d0000000 */

    1.04427373409271240234e+00,   /* 0x3ff0b55860000000 */

    1.06714040040969848633e+00,   /* 0x3ff11301d0000000 */

    1.09050768613815307617e+00,   /* 0x3ff172b830000000 */

    1.11438673734664916992e+00,   /* 0x3ff1d48730000000 */

    1.13878858089447021484e+00,   /* 0x3ff2387a60000000 */

    1.16372483968734741211e+00,   /* 0x3ff29e9df0000000 */

    1.18920707702636718750e+00,   /* 0x3ff306fe00000000 */

    1.21524733304977416992e+00,   /* 0x3ff371a730000000 */

    1.24185776710510253906e+00,   /* 0x3ff3dea640000000 */

    1.26905095577239990234e+00,   /* 0x3ff44e0860000000 */

    1.29683953523635864258e+00,   /* 0x3ff4bfdad0000000 */

    1.32523661851882934570e+00,   /* 0x3ff5342b50000000 */

    1.35425549745559692383e+00,   /* 0x3ff5ab07d0000000 */

    1.38390988111495971680e+00,   /* 0x3ff6247eb0000000 */

    1.41421353816986083984e+00,   /* 0x3ff6a09e60000000 */

    1.44518077373504638672e+00,   /* 0x3ff71f75e0000000 */

    1.47682613134384155273e+00,   /* 0x3ff7a11470000000 */

    1.50916439294815063477e+00,   /* 0x3ff8258990000000 */

    1.54221081733703613281e+00,   /* 0x3ff8ace540000000 */

    1.57598084211349487305e+00,   /* 0x3ff93737b0000000 */

    1.61049032211303710938e+00,   /* 0x3ff9c49180000000 */

    1.64575546979904174805e+00,   /* 0x3ffa5503b0000000 */

    1.68179279565811157227e+00,   /* 0x3ffae89f90000000 */

    1.71861928701400756836e+00,   /* 0x3ffb7f76f0000000 */

    1.75625211000442504883e+00,   /* 0x3ffc199bd0000000 */

    1.79470902681350708008e+00,   /* 0x3ffcb720d0000000 */

    1.83400803804397583008e+00,   /* 0x3ffd5818d0000000 */

    1.87416762113571166992e+00,   /* 0x3ffdfc9730000000 */

    1.91520655155181884766e+00,   /* 0x3ffea4afa0000000 */

    1.95714408159255981445e+00};  /* 0x3fff507650000000 */


  static const double two_to_jby32_trail_table[32] = {

    0.00000000000000000000e+00,   /* 0x0000000000000000 */

    1.14890470981563546737e-08,   /* 0x3e48ac2ba1d73e2a */

    4.83347014379782142328e-08,   /* 0x3e69f3121ec53172 */

    2.67125131841396124714e-10,   /* 0x3df25b50a4ebbf1b */

    4.65271045830351350190e-08,   /* 0x3e68faa2f5b9bef9 */

    5.24924336638693782574e-09,   /* 0x3e368b9aa7805b80 */

    5.38622214388600821910e-08,   /* 0x3e6ceac470cd83f6 */

    1.90902301017041969782e-08,   /* 0x3e547f7b84b09745 */

    3.79763538792174980894e-08,   /* 0x3e64636e2a5bd1ab */

    2.69306947081946450986e-08,   /* 0x3e5ceaa72a9c5154 */

    4.49683815095311756138e-08,   /* 0x3e682468446b6824 */

    1.41933332021066904914e-09,   /* 0x3e18624b40c4dbd0 */

    1.94146510233556266402e-08,   /* 0x3e54d8a89c750e5e */

    2.46409119489264118569e-08,   /* 0x3e5a753e077c2a0f */

    4.94812958044698886494e-08,   /* 0x3e6a90a852b19260 */

    8.48872238075784476136e-10,   /* 0x3e0d2ac258f87d03 */

    2.42032342089579394887e-08,   /* 0x3e59fcef32422cbf */

    3.32420002333182569170e-08,   /* 0x3e61d8bee7ba46e2 */

    1.45956577586525322754e-08,   /* 0x3e4f580c36bea881 */

    3.46452721050003920866e-08,   /* 0x3e62999c25159f11 */

    8.07090469079979051284e-09,   /* 0x3e415506dadd3e2a */

    2.99439161340839520436e-09,   /* 0x3e29b8bc9e8a0388 */

    9.83621719880452147153e-09,   /* 0x3e451f8480e3e236 */

    8.35492309647188080486e-09,   /* 0x3e41f12ae45a1224 */

    3.48493175137966283582e-08,   /* 0x3e62b5a75abd0e6a */

    1.11084703472699692902e-08,   /* 0x3e47daf237553d84 */

    5.03688744342840346564e-08,   /* 0x3e6b0aa538444196 */

    4.81896001063495806249e-08,   /* 0x3e69df20d22a0798 */

    4.83653666334089557746e-08,   /* 0x3e69f7490e4bb40b */

    1.29745882314081237628e-08,   /* 0x3e4bdcdaf5cb4656 */

    9.84532844621636118964e-09,   /* 0x3e452486cc2c7b9d */

    4.25828404545651943883e-08};  /* 0x3e66dc8a80ce9f09 */


    /*

      Step 1. Reduce the argument.


      To perform argument reduction, we find the integer n such that

      x = n * logbaseof2/32 + remainder, |remainder| <= logbaseof2/64.

      n is defined by round-to-nearest-integer( x*32/logbaseof2 ) and

      remainder by x - n*logbaseof2/32. The calculation of n is

      straightforward whereas the computation of x - n*logbaseof2/32

      must be carried out carefully.

      logbaseof2/32 is so represented in two pieces that

      (1) logbaseof2/32 is known to extra precision, (2) the product

      of n and the leading piece is a model number and is hence

      calculated without error, and (3) the subtraction of the value

      obtained in (2) from x is a model number and is hence again

      obtained without error.

    */


    r = x * thirtytwo_by_logbaseof2;

    /* Set n = nearest integer to r */

    /* This is faster on Hammer */

    if (r > 0)

      n = (int)(r + 0.5);

    else

      n = (int)(r - 0.5);


    r1 = x - n * logbaseof2_by_32_lead;

    r2 =   - n * logbaseof2_by_32_trail;


    /* Set j = n mod 32:   5 mod 32 = 5,   -5 mod 32 = 27,  etc. */

    /* j = n % 32;

       if (j < 0) j += 32; */

    j = n & 0x0000001f;


    f1 = two_to_jby32_lead_table[j];

    f2 = two_to_jby32_trail_table[j];


    *m = (n - j) / 32;


    /* Step 2. The following is the core approximation. We approximate

       exp(r1+r2)-1 by a polynomial. */


    r1 *= logbase; r2 *= logbase;


    r = r1 + r2;

    q = r1 + (r2 +

              r*r*( 5.00000000000000008883e-01 +

                      r*( 1.66666666665260878863e-01 +

                      r*( 4.16666666662260795726e-02 +

                      r*( 8.33336798434219616221e-03 +

                      r*( 1.38889490863777199667e-03 ))))));


    /* Step 3. Function value reconstruction.

       We now reconstruct the exponential of the input argument

       so that exp(x) = 2**m * (z1 + z2).

       The order of the computation below must be strictly observed. */


    *z1 = f1;

    *z2 = f2 + ((f1 + f2) * q);

}

#endif /* USE_SPLITEXP */


#if defined(USE_SPLITEXPF)

/* Compute the values m, z1, and z2 such that base**x = 2**m * (z1 + z2).

   Small arguments abs(x) < 1/(16*ln(base)) and extreme arguments

   abs(x) > large/(ln(base)) (where large is the largest representable

   floating point number) should be handled separately instead of calling

   this function. This function is called by exp, exp2, exp10,

   cosh and sinh. */

static inline void splitexpf(float x, float logbase,

                             float thirtytwo_by_logbaseof2,

                             float logbaseof2_by_32_lead,

                             float logbaseof2_by_32_trail,

                             int *m, float *z1, float *z2)

{

  float q, r, r1, r2, f1, f2;

  int n, j;


/* Arrays two_to_jby32_lead_table and two_to_jby32_trail_table contain

   leading and trailing parts respectively of precomputed

   values of pow(2.0,j/32.0), for j = 0, 1, ..., 31.

   two_to_jby32_lead_table contains the first 10 bits of precision,

   and two_to_jby32_trail_table contains a further 24 bits precision. */


  static const float two_to_jby32_lead_table[32] = {

    1.0000000000E+00F,  /* 0x3F800000 */

    1.0214843750E+00F,  /* 0x3F82C000 */

    1.0429687500E+00F,  /* 0x3F858000 */

    1.0664062500E+00F,  /* 0x3F888000 */

    1.0898437500E+00F,  /* 0x3F8B8000 */

    1.1132812500E+00F,  /* 0x3F8E8000 */

    1.1386718750E+00F,  /* 0x3F91C000 */

    1.1621093750E+00F,  /* 0x3F94C000 */

    1.1875000000E+00F,  /* 0x3F980000 */

    1.2148437500E+00F,  /* 0x3F9B8000 */

    1.2402343750E+00F,  /* 0x3F9EC000 */

    1.2675781250E+00F,  /* 0x3FA24000 */

    1.2949218750E+00F,  /* 0x3FA5C000 */

    1.3242187500E+00F,  /* 0x3FA98000 */

    1.3535156250E+00F,  /* 0x3FAD4000 */

    1.3828125000E+00F,  /* 0x3FB10000 */

    1.4140625000E+00F,  /* 0x3FB50000 */

    1.4433593750E+00F,  /* 0x3FB8C000 */

    1.4765625000E+00F,  /* 0x3FBD0000 */

    1.5078125000E+00F,  /* 0x3FC10000 */

    1.5410156250E+00F,  /* 0x3FC54000 */

    1.5742187500E+00F,  /* 0x3FC98000 */

    1.6093750000E+00F,  /* 0x3FCE0000 */

    1.6445312500E+00F,  /* 0x3FD28000 */

    1.6816406250E+00F,  /* 0x3FD74000 */

    1.7167968750E+00F,  /* 0x3FDBC000 */

    1.7558593750E+00F,  /* 0x3FE0C000 */

    1.7929687500E+00F,  /* 0x3FE58000 */

    1.8339843750E+00F,  /* 0x3FEAC000 */

    1.8730468750E+00F,  /* 0x3FEFC000 */

    1.9140625000E+00F,  /* 0x3FF50000 */

    1.9570312500E+00F}; /* 0x3FFA8000 */


  static const float two_to_jby32_trail_table[32] = {

    0.0000000000E+00F,  /* 0x00000000 */

    4.1277357377E-04F,  /* 0x39D86988 */

    1.3050324051E-03F,  /* 0x3AAB0D9F */

    7.3415064253E-04F,  /* 0x3A407404 */

    6.6398258787E-04F,  /* 0x3A2E0F1E */

    1.1054925853E-03F,  /* 0x3A90E62D */

    1.1675967835E-04F,  /* 0x38F4DCE0 */

    1.6154836630E-03F,  /* 0x3AD3BEA3 */

    1.7071149778E-03F,  /* 0x3ADFC146 */

    4.0360994171E-04F,  /* 0x39D39B9C */

    1.6234370414E-03F,  /* 0x3AD4C982 */

    1.4728321694E-03F,  /* 0x3AC10C0C */

    1.9176795613E-03F,  /* 0x3AFB5AA6 */

    1.0178930825E-03F,  /* 0x3A856AD3 */

    7.3992193211E-04F,  /* 0x3A41F752 */

    1.0973819299E-03F,  /* 0x3A8FD607 */

    1.5106226783E-04F,  /* 0x391E6678 */

    1.8214319134E-03F,  /* 0x3AEEBD1D */

    2.6364589576E-04F,  /* 0x398A39F4 */

    1.3519275235E-03F,  /* 0x3AB13329 */

    1.1952003697E-03F,  /* 0x3A9CA845 */

    1.7620950239E-03F,  /* 0x3AE6F619 */

    1.1153318919E-03F,  /* 0x3A923054 */

    1.2242280645E-03F,  /* 0x3AA07647 */

    1.5220546629E-04F,  /* 0x391F9958 */

    1.8224230735E-03F,  /* 0x3AEEDE5F */

    3.9278529584E-04F,  /* 0x39CDEEC0 */

    1.7403248930E-03F,  /* 0x3AE41B9D */

    2.3711356334E-05F,  /* 0x37C6E7C0 */

    1.1207590578E-03F,  /* 0x3A92E66F */

    1.1440613307E-03F,  /* 0x3A95F454 */

    1.1287408415E-04F}; /* 0x38ECB6D0 */


    /*

      Step 1. Reduce the argument.


      To perform argument reduction, we find the integer n such that

      x = n * logbaseof2/32 + remainder, |remainder| <= logbaseof2/64.

      n is defined by round-to-nearest-integer( x*32/logbaseof2 ) and

      remainder by x - n*logbaseof2/32. The calculation of n is

      straightforward whereas the computation of x - n*logbaseof2/32

      must be carried out carefully.

      logbaseof2/32 is so represented in two pieces that

      (1) logbaseof2/32 is known to extra precision, (2) the product

      of n and the leading piece is a model number and is hence

      calculated without error, and (3) the subtraction of the value

      obtained in (2) from x is a model number and is hence again

      obtained without error.

    */


    r = x * thirtytwo_by_logbaseof2;

    /* Set n = nearest integer to r */

    /* This is faster on Hammer */

    if (r > 0)

      n = (int)(r + 0.5F);

    else

      n = (int)(r - 0.5F);


    r1 = x - n * logbaseof2_by_32_lead;

    r2 =   - n * logbaseof2_by_32_trail;


    /* Set j = n mod 32:   5 mod 32 = 5,   -5 mod 32 = 27,  etc. */

    /* j = n % 32;

       if (j < 0) j += 32; */

    j = n & 0x0000001f;


    f1 = two_to_jby32_lead_table[j];

    f2 = two_to_jby32_trail_table[j];


    *m = (n - j) / 32;


    /* Step 2. The following is the core approximation. We approximate

       exp(r1+r2)-1 by a polynomial. */


    r1 *= logbase; r2 *= logbase;


    r = r1 + r2;

    q = r1 + (r2 +

              r*r*( 5.00000000000000008883e-01F +

                      r*( 1.66666666665260878863e-01F )));


    /* Step 3. Function value reconstruction.

       We now reconstruct the exponential of the input argument

       so that exp(x) = 2**m * (z1 + z2).

       The order of the computation below must be strictly observed. */


    *z1 = f1;

    *z2 = f2 + ((f1 + f2) * q);

}

#endif /* SPLITEXPF */


#if defined(USE_SCALEUPDOUBLE1024)

/* Scales up a double (normal or denormal) whose bit pattern is given

   as ux by 2**1024. There are no checks that the input number is

   scalable by that amount. */

static inline void scaleUpDouble1024(unsigned long long ux, unsigned long long *ur)

{

  unsigned long long uy;

  double y;


  if ((ux & EXPBITS_DP64) == 0)

    {

      /* ux is denormalised */

      PUT_BITS_DP64(ux | 0x4010000000000000, y);

      if (ux & SIGNBIT_DP64)

        y += 4.0;

      else

        y -= 4.0;

      GET_BITS_DP64(y, uy);

    }

  else

    /* ux is normal */

    uy = ux + 0x4000000000000000;


  *ur = uy;

  return;

}


#endif /* SCALEUPDOUBLE1024 */


#if defined(USE_SCALEDOWNDOUBLE)

/* Scales down a double whose bit pattern is given as ux by 2**k.

   There are no checks that the input number is scalable by that amount. */

static inline void scaleDownDouble(unsigned long long ux, int k,

                                   unsigned long long *ur)

{

  unsigned long long uy, uk, ax, xsign;

  int n, shift;

  xsign = ux & SIGNBIT_DP64;

  ax = ux & ~SIGNBIT_DP64;

  n = (int)((ax & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - k;

  if (n > 0)

    {

      uk = (unsigned long long)n << EXPSHIFTBITS_DP64;

      uy = (ax & ~EXPBITS_DP64) | uk;

    }

  else

    {

      uy = (ax & ~EXPBITS_DP64) | 0x0010000000000000;

      shift = (1 - n);

      if (shift > MANTLENGTH_DP64 + 1)

        /* Sigh. Shifting works mod 64 so be careful not to shift too much */

        uy = 0;

      else

        {

          /* Make sure we round the result */

          uy >>= shift - 1;

          uy = (uy >> 1) + (uy & 1);

        }

    }

  *ur = uy | xsign;

}


#endif /* SCALEDOWNDOUBLE */


#if defined(USE_SCALEUPFLOAT128)

/* Scales up a float (normal or denormal) whose bit pattern is given

   as ux by 2**128. There are no checks that the input number is

   scalable by that amount. */

static inline void scaleUpFloat128(unsigned int ux, unsigned int *ur)

{

  unsigned int uy;

  float y;


  if ((ux & EXPBITS_SP32) == 0)

    {

      /* ux is denormalised */

      PUT_BITS_SP32(ux | 0x40800000, y);

      /* Compensate for the implicit bit just added */

      if (ux & SIGNBIT_SP32)

        y += 4.0F;

      else

        y -= 4.0F;

      GET_BITS_SP32(y, uy);

    }

  else

    /* ux is normal */

    uy = ux + 0x40000000;

  *ur = uy;

}

#endif /* SCALEUPFLOAT128 */


#if defined(USE_SCALEDOWNFLOAT)

/* Scales down a float whose bit pattern is given as ux by 2**k.

   There are no checks that the input number is scalable by that amount. */

static inline void scaleDownFloat(unsigned int ux, int k,

                                  unsigned int *ur)

{

  unsigned int uy, uk, ax, xsign;

  int n, shift;


  xsign = ux & SIGNBIT_SP32;

  ax = ux & ~SIGNBIT_SP32;

  n = ((ax & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - k;

  if (n > 0)

    {

      uk = (unsigned int)n << EXPSHIFTBITS_SP32;

      uy = (ax & ~EXPBITS_SP32) | uk;

    }

  else

    {

      uy = (ax & ~EXPBITS_SP32) | 0x00800000;

      shift = (1 - n);

      if (shift > MANTLENGTH_SP32 + 1)

        /* Sigh. Shifting works mod 32 so be careful not to shift too much */

        uy = 0;

      else

        {

          /* Make sure we round the result */

          uy >>= shift - 1;

          uy = (uy >> 1) + (uy & 1);

        }

    }

  *ur = uy | xsign;

}

#endif /* SCALEDOWNFLOAT */


#if defined(USE_SQRT_AMD_INLINE)

static inline double sqrt_amd_inline(double x)

{

  /*

     Computes the square root of x.


     The calculation is carried out in three steps.


     Step 1. Reduction.

     The input argument is scaled to the interval [1, 4) by

     computing

               x = 2^e * y, where y in [1,4).

     Furthermore y is decomposed as y = c + t where

               c = 1 + j/32, j = 0,1,..,96; and |t| <= 1/64.


     Step 2. Approximation.

     An approximation q = sqrt(1 + (t/c)) - 1  is obtained

     from a basic series expansion using precomputed values

     stored in rt_jby32_lead_table_dbl and rt_jby32_trail_table_dbl.


     Step 3. Reconstruction.

     The value of sqrt(x) is reconstructed via

       sqrt(x) = 2^(e/2) * sqrt(y)

               = 2^(e/2) * sqrt(c) * sqrt(y/c)

               = 2^(e/2) * sqrt(c) * sqrt(1 + t/c)

               = 2^(e/2) * [ sqrt(c) + sqrt(c)*q ]

    */


  unsigned long long ux, ax, u;

  double r1, r2, c, y, p, q, r, twop, z, rtc, rtc_lead, rtc_trail;

  int e, denorm = 0, index;


/* Arrays rt_jby32_lead_table_dbl and rt_jby32_trail_table_dbl contain

   leading and trailing parts respectively of precomputed

   values of sqrt(j/32), for j = 32, 33, ..., 128.

   rt_jby32_lead_table_dbl contains the first 21 bits of precision,

   and rt_jby32_trail_table_dbl contains a further 53 bits precision. */


  static const double rt_jby32_lead_table_dbl[97] = {

    1.00000000000000000000e+00,   /* 0x3ff0000000000000 */

    1.01550388336181640625e+00,   /* 0x3ff03f8100000000 */

    1.03077602386474609375e+00,   /* 0x3ff07e0f00000000 */

    1.04582500457763671875e+00,   /* 0x3ff0bbb300000000 */

    1.06065940856933593750e+00,   /* 0x3ff0f87600000000 */

    1.07528972625732421875e+00,   /* 0x3ff1346300000000 */

    1.08972454071044921875e+00,   /* 0x3ff16f8300000000 */

    1.10396957397460937500e+00,   /* 0x3ff1a9dc00000000 */

    1.11803340911865234375e+00,   /* 0x3ff1e37700000000 */

    1.13192272186279296875e+00,   /* 0x3ff21c5b00000000 */

    1.14564323425292968750e+00,   /* 0x3ff2548e00000000 */

    1.15920162200927734375e+00,   /* 0x3ff28c1700000000 */

    1.17260360717773437500e+00,   /* 0x3ff2c2fc00000000 */

    1.18585395812988281250e+00,   /* 0x3ff2f94200000000 */

    1.19895744323730468750e+00,   /* 0x3ff32eee00000000 */

    1.21191978454589843750e+00,   /* 0x3ff3640600000000 */

    1.22474479675292968750e+00,   /* 0x3ff3988e00000000 */

    1.23743629455566406250e+00,   /* 0x3ff3cc8a00000000 */

    1.25000000000000000000e+00,   /* 0x3ff4000000000000 */

    1.26243782043457031250e+00,   /* 0x3ff432f200000000 */

    1.27475452423095703125e+00,   /* 0x3ff4656500000000 */

    1.28695297241210937500e+00,   /* 0x3ff4975c00000000 */

    1.29903793334960937500e+00,   /* 0x3ff4c8dc00000000 */

    1.31101036071777343750e+00,   /* 0x3ff4f9e600000000 */

    1.32287502288818359375e+00,   /* 0x3ff52a7f00000000 */

    1.33463478088378906250e+00,   /* 0x3ff55aaa00000000 */

    1.34629058837890625000e+00,   /* 0x3ff58a6800000000 */

    1.35784721374511718750e+00,   /* 0x3ff5b9be00000000 */

    1.36930561065673828125e+00,   /* 0x3ff5e8ad00000000 */

    1.38066959381103515625e+00,   /* 0x3ff6173900000000 */

    1.39194107055664062500e+00,   /* 0x3ff6456400000000 */

    1.40312099456787109375e+00,   /* 0x3ff6732f00000000 */

    1.41421318054199218750e+00,   /* 0x3ff6a09e00000000 */

    1.42521858215332031250e+00,   /* 0x3ff6cdb200000000 */

    1.43614006042480468750e+00,   /* 0x3ff6fa6e00000000 */

    1.44697952270507812500e+00,   /* 0x3ff726d400000000 */

    1.45773792266845703125e+00,   /* 0x3ff752e500000000 */

    1.46841716766357421875e+00,   /* 0x3ff77ea300000000 */

    1.47901916503906250000e+00,   /* 0x3ff7aa1000000000 */

    1.48954677581787109375e+00,   /* 0x3ff7d52f00000000 */

    1.50000000000000000000e+00,   /* 0x3ff8000000000000 */

    1.51038074493408203125e+00,   /* 0x3ff82a8500000000 */

    1.52068996429443359375e+00,   /* 0x3ff854bf00000000 */

    1.53093051910400390625e+00,   /* 0x3ff87eb100000000 */

    1.54110336303710937500e+00,   /* 0x3ff8a85c00000000 */

    1.55120849609375000000e+00,   /* 0x3ff8d1c000000000 */

    1.56124877929687500000e+00,   /* 0x3ff8fae000000000 */

    1.57122516632080078125e+00,   /* 0x3ff923bd00000000 */

    1.58113861083984375000e+00,   /* 0x3ff94c5800000000 */

    1.59099006652832031250e+00,   /* 0x3ff974b200000000 */

    1.60078048706054687500e+00,   /* 0x3ff99ccc00000000 */

    1.61051177978515625000e+00,   /* 0x3ff9c4a800000000 */

    1.62018489837646484375e+00,   /* 0x3ff9ec4700000000 */

    1.62979984283447265625e+00,   /* 0x3ffa13a900000000 */

    1.63935947418212890625e+00,   /* 0x3ffa3ad100000000 */

    1.64886283874511718750e+00,   /* 0x3ffa61be00000000 */

    1.65831184387207031250e+00,   /* 0x3ffa887200000000 */

    1.66770744323730468750e+00,   /* 0x3ffaaeee00000000 */

    1.67705059051513671875e+00,   /* 0x3ffad53300000000 */

    1.68634128570556640625e+00,   /* 0x3ffafb4100000000 */

    1.69558238983154296875e+00,   /* 0x3ffb211b00000000 */

    1.70477199554443359375e+00,   /* 0x3ffb46bf00000000 */

    1.71391296386718750000e+00,   /* 0x3ffb6c3000000000 */

    1.72300529479980468750e+00,   /* 0x3ffb916e00000000 */

    1.73204994201660156250e+00,   /* 0x3ffbb67a00000000 */

    1.74104785919189453125e+00,   /* 0x3ffbdb5500000000 */

    1.75000000000000000000e+00,   /* 0x3ffc000000000000 */

    1.75890541076660156250e+00,   /* 0x3ffc247a00000000 */

    1.76776695251464843750e+00,   /* 0x3ffc48c600000000 */

    1.77658367156982421875e+00,   /* 0x3ffc6ce300000000 */

    1.78535652160644531250e+00,   /* 0x3ffc90d200000000 */

    1.79408740997314453125e+00,   /* 0x3ffcb49500000000 */

    1.80277538299560546875e+00,   /* 0x3ffcd82b00000000 */

    1.81142139434814453125e+00,   /* 0x3ffcfb9500000000 */

    1.82002735137939453125e+00,   /* 0x3ffd1ed500000000 */

    1.82859230041503906250e+00,   /* 0x3ffd41ea00000000 */

    1.83711719512939453125e+00,   /* 0x3ffd64d500000000 */

    1.84560203552246093750e+00,   /* 0x3ffd879600000000 */

    1.85404872894287109375e+00,   /* 0x3ffdaa2f00000000 */

    1.86245727539062500000e+00,   /* 0x3ffdcca000000000 */

    1.87082862854003906250e+00,   /* 0x3ffdeeea00000000 */

    1.87916183471679687500e+00,   /* 0x3ffe110c00000000 */

    1.88745784759521484375e+00,   /* 0x3ffe330700000000 */

    1.89571857452392578125e+00,   /* 0x3ffe54dd00000000 */

    1.90394306182861328125e+00,   /* 0x3ffe768d00000000 */

    1.91213226318359375000e+00,   /* 0x3ffe981800000000 */

    1.92028617858886718750e+00,   /* 0x3ffeb97e00000000 */

    1.92840576171875000000e+00,   /* 0x3ffedac000000000 */

    1.93649101257324218750e+00,   /* 0x3ffefbde00000000 */

    1.94454288482666015625e+00,   /* 0x3fff1cd900000000 */

    1.95256233215332031250e+00,   /* 0x3fff3db200000000 */

    1.96054744720458984375e+00,   /* 0x3fff5e6700000000 */

    1.96850109100341796875e+00,   /* 0x3fff7efb00000000 */

    1.97642326354980468750e+00,   /* 0x3fff9f6e00000000 */

    1.98431301116943359375e+00,   /* 0x3fffbfbf00000000 */

    1.99217128753662109375e+00,   /* 0x3fffdfef00000000 */

    2.00000000000000000000e+00};  /* 0x4000000000000000 */


  static const double rt_jby32_trail_table_dbl[97] = {

    0.00000000000000000000e+00,   /* 0x0000000000000000 */

    9.17217678638807524014e-07,   /* 0x3eaec6d70177881c */

    3.82539669043705364790e-07,   /* 0x3e99abfb41bd6b24 */

    2.85899577162227138140e-08,   /* 0x3e5eb2bf6bab55a2 */

    7.63210485349101216659e-07,   /* 0x3ea99bed9b2d8d0c */

    9.32123004127716212874e-07,   /* 0x3eaf46e029c1b296 */

    1.95174719169309219157e-07,   /* 0x3e8a3226fc42f30c */

    5.34316371481845492427e-07,   /* 0x3ea1edbe20701d73 */

    5.79631242504454563052e-07,   /* 0x3ea372fe94f82be7 */

    4.20404384109571705948e-07,   /* 0x3e9c367e08e7bb06 */

    6.89486030314147010716e-07,   /* 0x3ea722a3d0a66608 */

    6.89927685625314560328e-07,   /* 0x3ea7266f067ca1d6 */

    3.32778123013641425828e-07,   /* 0x3e965515a9b34850 */

    1.64433259436999584387e-07,   /* 0x3e8611e23ef6c1bd */

    4.37590875197899335723e-07,   /* 0x3e9d5dc1059ed8e7 */

    1.79808183816018617413e-07,   /* 0x3e88222982d0e4f4 */

    7.46386593615986477624e-08,   /* 0x3e7409212e7d0322 */

    5.72520794105201454728e-07,   /* 0x3ea335ea8a5fcf39 */

    0.00000000000000000000e+00,   /* 0x0000000000000000 */

    2.96860689431670420344e-07,   /* 0x3e93ec071e938bfe */

    3.54167239176257065345e-07,   /* 0x3e97c48bfd9862c6 */

    7.95211265664474710063e-07,   /* 0x3eaaaed010f74671 */

    1.72327048595145565621e-07,   /* 0x3e87211cbfeb62e0 */

    6.99494915996239297020e-07,   /* 0x3ea7789d9660e72d */

    6.32644111701500844315e-07,   /* 0x3ea53a5f1d36f1cf */

    6.20124838851440463844e-10,   /* 0x3e054eacff2057dc */

    6.13404719757812629969e-07,   /* 0x3ea4951b3e6a83cc */

    3.47654909777986407387e-07,   /* 0x3e9754aa76884c66 */

    7.83106177002392475763e-07,   /* 0x3eaa46d4b1de1074 */

    5.33337372440526357008e-07,   /* 0x3ea1e55548f92635 */

    2.01508648555298681765e-08,   /* 0x3e55a3070dd17788 */

    5.25472356925843939587e-07,   /* 0x3ea1a1c5eedb0801 */

    3.81831102861301692797e-07,   /* 0x3e999fcef32422cc */

    6.99220602161420018738e-07,   /* 0x3ea776425d6b0199 */

    6.01209702477462624811e-07,   /* 0x3ea42c5a1e0191a2 */

    9.01437000591944740554e-08,   /* 0x3e7832a0bdff1327 */

    5.10428680864685379950e-08,   /* 0x3e6b674743636676 */

    3.47895267104621031421e-07,   /* 0x3e9758cb90d2f714 */

    7.80735841510641848628e-07,   /* 0x3eaa3278459cde25 */

    1.35158752025506517690e-07,   /* 0x3e822404f4a103ee */

    0.00000000000000000000e+00,   /* 0x0000000000000000 */

    1.76523947728535489812e-09,   /* 0x3e1e539af6892ac5 */

    6.68280121328499932183e-07,   /* 0x3ea66c7b872c9cd0 */

    5.70135482405123276616e-07,   /* 0x3ea3216d2f43887d */

    1.37705134737562525897e-07,   /* 0x3e827b832cbedc0e */

    7.09655107074516613672e-07,   /* 0x3ea7cfe41579091d */

    7.20302724551461693011e-07,   /* 0x3ea82b5a713c490a */

    4.69926266058212796694e-07,   /* 0x3e9f8945932d872e */

    2.19244345915999437026e-07,   /* 0x3e8d6d2da9490251 */

    1.91141411617401877927e-07,   /* 0x3e89a791a3114e4a */

    5.72297665296622053774e-07,   /* 0x3ea333ffe005988d */

    5.61055484436830560103e-07,   /* 0x3ea2d36e0ed49ab1 */

    2.76225500213991506100e-07,   /* 0x3e92898498f55f9e */

    7.58466189522395692908e-07,   /* 0x3ea9732cca1032a3 */

    1.56893371256836029827e-07,   /* 0x3e850ed0b02a22d2 */

    4.06038997708867066507e-07,   /* 0x3e9b3fb265b1e40a */

    5.51305629612057435809e-07,   /* 0x3ea27fade682d1de */

    5.64778487026561123207e-07,   /* 0x3ea2f36906f707ba */

    3.92609705553556897517e-07,   /* 0x3e9a58fbbee883b6 */

    9.09698438776943827802e-07,   /* 0x3eae864005bca6d7 */

    1.05949774066016139743e-07,   /* 0x3e7c70d02300f263 */

    7.16578798392844784244e-07,   /* 0x3ea80b5d712d8e3e */

    6.86233073531233972561e-07,   /* 0x3ea706b27cc7d390 */

    7.99211473033494452908e-07,   /* 0x3eaad12c9d849a97 */

    8.65552275731027456121e-07,   /* 0x3ead0b09954e764b */

    6.75456120386058448618e-07,   /* 0x3ea6aa1fb7826cbd */

    0.00000000000000000000e+00,   /* 0x0000000000000000 */

    4.99167184520462138743e-07,   /* 0x3ea0bfd03f46763c */

    4.51720373502110930296e-10,   /* 0x3dff0abfb4adfb9e */

    1.28874162718371367439e-07,   /* 0x3e814c151f991b2e */

    5.85529267186999798656e-07,   /* 0x3ea3a5a879b09292 */

    1.01827770937125531924e-07,   /* 0x3e7b558d173f9796 */

    2.54736389177809626508e-07,   /* 0x3e9118567cd83fb8 */

    6.98925535290464831294e-07,   /* 0x3ea773b981896751 */

    1.20940735036524314513e-07,   /* 0x3e803b7df49f48a8 */

    5.43759351196479689657e-08,   /* 0x3e6d315f22491900 */

    1.11957989042397958409e-07,   /* 0x3e7e0db1c5bb84b2 */

    8.47006714134442661218e-07,   /* 0x3eac6bbb7644ff76 */

    8.92831044643427836228e-07,   /* 0x3eadf55c3afec01f */

    7.77828292464916501663e-07,   /* 0x3eaa197e81034da3 */

    6.48469316302918797451e-08,   /* 0x3e71683f4920555d */

    2.12579816658859849140e-07,   /* 0x3e8c882fd78bb0b0 */

    7.61222472580559138435e-07,   /* 0x3ea98ad9eb7b83ec */

    2.86488961857314189607e-07,   /* 0x3e9339d7c7777273 */

    2.14637363790165363515e-07,   /* 0x3e8ccee237cae6fe */

    5.44137005612605847831e-08,   /* 0x3e6d368fe324a146 */

    2.58378284856442408413e-07,   /* 0x3e9156e7b6d99b45 */

    3.15848939061134843091e-07,   /* 0x3e95323e5310b5c1 */

    6.60530466255089632309e-07,   /* 0x3ea629e9db362f5d */

    7.63436345535852301127e-07,   /* 0x3ea99dde4728d7ec */

    8.68233432860324345268e-08,   /* 0x3e774e746878544d */

    9.45465175398023087082e-07,   /* 0x3eafb97be873a87d */

    8.77499534786171267246e-07,   /* 0x3ead71a9e23c2f63 */

    2.74055432394999316135e-07,   /* 0x3e92643c89cda173 */

    4.72129009349126213532e-07,   /* 0x3e9faf1d57a4d56c */

    8.93777032327078947306e-07,   /* 0x3eadfd7c7ab7b282 */

    0.00000000000000000000e+00};  /* 0x0000000000000000 */


  /* Handle special arguments first */


  GET_BITS_DP64(x, ux);

  ax = ux & (~SIGNBIT_DP64);


  if(ax >= 0x7ff0000000000000)

    {

      /* x is either NaN or infinity */

      if (ux & MANTBITS_DP64)

        /* x is NaN */

        return x + x; /* Raise invalid if it is a signalling NaN */

      else if (ux & SIGNBIT_DP64)

        /* x is negative infinity */

        return nan_with_flags(AMD_F_INVALID);

      else

        /* x is positive infinity */

        return x;

    }

  else if (ux & SIGNBIT_DP64)

    {

      /* x is negative. */

      if (ux == SIGNBIT_DP64)

        /* Handle negative zero first */

        return x;

      else

        return nan_with_flags(AMD_F_INVALID);

    }

  else if (ux <= 0x000fffffffffffff)

    {

      /* x is denormalised or zero */

      if (ux == 0)

        /* x is zero */

        return x;

      else

        {

          /* x is denormalised; scale it up */

          /* Normalize x by increasing the exponent by 60

             and subtracting a correction to account for the implicit

             bit. This replaces a slow denormalized

             multiplication by a fast normal subtraction. */

          static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */

          denorm = 1;

          GET_BITS_DP64(x, ux);

          PUT_BITS_DP64(ux | 0x03d0000000000000, x);

          x -= corr;

          GET_BITS_DP64(x, ux);

        }

    }


  /* Main algorithm */


  /*

     Find y and e such that x = 2^e * y, where y in [1,4).

     This is done using an in-lined variant of splitDouble,

     which also ensures that e is even.

   */

  y = x;

  ux &= EXPBITS_DP64;

  ux >>= EXPSHIFTBITS_DP64;

  if (ux & 1)

    {

      GET_BITS_DP64(y, u);

      u &= (SIGNBIT_DP64 | MANTBITS_DP64);

      u |= ONEEXPBITS_DP64;

      PUT_BITS_DP64(u, y);

      e = ux - EXPBIAS_DP64;

    }

  else

    {

      GET_BITS_DP64(y, u);

      u &= (SIGNBIT_DP64 | MANTBITS_DP64);

      u |= TWOEXPBITS_DP64;

      PUT_BITS_DP64(u, y);

      e = ux - EXPBIAS_DP64 - 1;

    }


  /* Find the index of the sub-interval of [1,4) in which y lies. */


  index = (int)(32.0*y+0.5);


  /* Look up the table values and compute c and r = c/t */


  rtc_lead = rt_jby32_lead_table_dbl[index-32];

  rtc_trail = rt_jby32_trail_table_dbl[index-32];

  c = 0.03125*index;

  r = (y - c)/c;


  /*

    Find q = sqrt(1+r) - 1.

    From one step of Newton on (q+1)^2 = 1+r

  */


  p = r*0.5 - r*r*(0.1250079870 - r*(0.6250522999E-01));

  twop = p + p;

  q = p - (p*p + (twop - r))/(twop + 2.0);


  /* Reconstruction */


  rtc = rtc_lead + rtc_trail;

  e >>= 1; /* e = e/2 */

  z = rtc_lead + (rtc*q+rtc_trail);


  if (denorm)

    {

      /* Scale by 2**(e-30) */

      PUT_BITS_DP64(((long long)(e - 30) + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, r);

      z *= r;

    }

  else

    {

      /* Scale by 2**e */

      PUT_BITS_DP64(((long long)e + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, r);

      z *= r;

    }


  return z;


}

#endif /* SQRT_AMD_INLINE */


#if defined(USE_SQRTF_AMD_INLINE)


static inline float sqrtf_amd_inline(float x)

{

  /*

     Computes the square root of x.


     The calculation is carried out in three steps.


     Step 1. Reduction.

     The input argument is scaled to the interval [1, 4) by

     computing

               x = 2^e * y, where y in [1,4).

     Furthermore y is decomposed as y = c + t where

               c = 1 + j/32, j = 0,1,..,96; and |t| <= 1/64.


     Step 2. Approximation.

     An approximation q = sqrt(1 + (t/c)) - 1  is obtained

     from a basic series expansion using precomputed values

     stored in rt_jby32_lead_table_float and rt_jby32_trail_table_float.


     Step 3. Reconstruction.

     The value of sqrt(x) is reconstructed via

       sqrt(x) = 2^(e/2) * sqrt(y)

               = 2^(e/2) * sqrt(c) * sqrt(y/c)

               = 2^(e/2) * sqrt(c) * sqrt(1 + t/c)

               = 2^(e/2) * [ sqrt(c) + sqrt(c)*q ]

    */


  unsigned int ux, ax, u;

  float r1, r2, c, y, p, q, r, twop, z, rtc, rtc_lead, rtc_trail;

  int e, denorm = 0, index;


/* Arrays rt_jby32_lead_table_float and rt_jby32_trail_table_float contain

   leading and trailing parts respectively of precomputed

   values of sqrt(j/32), for j = 32, 33, ..., 128.

   rt_jby32_lead_table_float contains the first 13 bits of precision,

   and rt_jby32_trail_table_float contains a further 24 bits precision. */


static const float rt_jby32_lead_table_float[97] = {

    1.00000000000000000000e+00F,   /* 0x3f800000 */

    1.01538085937500000000e+00F,   /* 0x3f81f800 */

    1.03076171875000000000e+00F,   /* 0x3f83f000 */

    1.04565429687500000000e+00F,   /* 0x3f85d800 */

    1.06054687500000000000e+00F,   /* 0x3f87c000 */

    1.07519531250000000000e+00F,   /* 0x3f89a000 */

    1.08959960937500000000e+00F,   /* 0x3f8b7800 */

    1.10375976562500000000e+00F,   /* 0x3f8d4800 */

    1.11791992187500000000e+00F,   /* 0x3f8f1800 */

    1.13183593750000000000e+00F,   /* 0x3f90e000 */

    1.14550781250000000000e+00F,   /* 0x3f92a000 */

    1.15917968750000000000e+00F,   /* 0x3f946000 */

    1.17236328125000000000e+00F,   /* 0x3f961000 */

    1.18579101562500000000e+00F,   /* 0x3f97c800 */

    1.19873046875000000000e+00F,   /* 0x3f997000 */

    1.21191406250000000000e+00F,   /* 0x3f9b2000 */

    1.22460937500000000000e+00F,   /* 0x3f9cc000 */

    1.23730468750000000000e+00F,   /* 0x3f9e6000 */

    1.25000000000000000000e+00F,   /* 0x3fa00000 */

    1.26220703125000000000e+00F,   /* 0x3fa19000 */

    1.27465820312500000000e+00F,   /* 0x3fa32800 */

    1.28686523437500000000e+00F,   /* 0x3fa4b800 */

    1.29882812500000000000e+00F,   /* 0x3fa64000 */

    1.31079101562500000000e+00F,   /* 0x3fa7c800 */

    1.32275390625000000000e+00F,   /* 0x3fa95000 */

    1.33447265625000000000e+00F,   /* 0x3faad000 */

    1.34619140625000000000e+00F,   /* 0x3fac5000 */

    1.35766601562500000000e+00F,   /* 0x3fadc800 */

    1.36914062500000000000e+00F,   /* 0x3faf4000 */

    1.38061523437500000000e+00F,   /* 0x3fb0b800 */

    1.39184570312500000000e+00F,   /* 0x3fb22800 */

    1.40307617187500000000e+00F,   /* 0x3fb39800 */

    1.41406250000000000000e+00F,   /* 0x3fb50000 */

    1.42504882812500000000e+00F,   /* 0x3fb66800 */

    1.43603515625000000000e+00F,   /* 0x3fb7d000 */

    1.44677734375000000000e+00F,   /* 0x3fb93000 */

    1.45751953125000000000e+00F,   /* 0x3fba9000 */

    1.46826171875000000000e+00F,   /* 0x3fbbf000 */

    1.47900390625000000000e+00F,   /* 0x3fbd5000 */

    1.48950195312500000000e+00F,   /* 0x3fbea800 */

    1.50000000000000000000e+00F,   /* 0x3fc00000 */

    1.51025390625000000000e+00F,   /* 0x3fc15000 */

    1.52050781250000000000e+00F,   /* 0x3fc2a000 */

    1.53076171875000000000e+00F,   /* 0x3fc3f000 */

    1.54101562500000000000e+00F,   /* 0x3fc54000 */

    1.55102539062500000000e+00F,   /* 0x3fc68800 */

    1.56103515625000000000e+00F,   /* 0x3fc7d000 */

    1.57104492187500000000e+00F,   /* 0x3fc91800 */

    1.58105468750000000000e+00F,   /* 0x3fca6000 */

    1.59082031250000000000e+00F,   /* 0x3fcba000 */

    1.60058593750000000000e+00F,   /* 0x3fcce000 */

    1.61035156250000000000e+00F,   /* 0x3fce2000 */

    1.62011718750000000000e+00F,   /* 0x3fcf6000 */

    1.62963867187500000000e+00F,   /* 0x3fd09800 */

    1.63916015625000000000e+00F,   /* 0x3fd1d000 */

    1.64868164062500000000e+00F,   /* 0x3fd30800 */

    1.65820312500000000000e+00F,   /* 0x3fd44000 */

    1.66748046875000000000e+00F,   /* 0x3fd57000 */

    1.67700195312500000000e+00F,   /* 0x3fd6a800 */

    1.68627929687500000000e+00F,   /* 0x3fd7d800 */

    1.69555664062500000000e+00F,   /* 0x3fd90800 */

    1.70458984375000000000e+00F,   /* 0x3fda3000 */

    1.71386718750000000000e+00F,   /* 0x3fdb6000 */

    1.72290039062500000000e+00F,   /* 0x3fdc8800 */

    1.73193359375000000000e+00F,   /* 0x3fddb000 */

    1.74096679687500000000e+00F,   /* 0x3fded800 */

    1.75000000000000000000e+00F,   /* 0x3fe00000 */

    1.75878906250000000000e+00F,   /* 0x3fe12000 */

    1.76757812500000000000e+00F,   /* 0x3fe24000 */

    1.77636718750000000000e+00F,   /* 0x3fe36000 */

    1.78515625000000000000e+00F,   /* 0x3fe48000 */

    1.79394531250000000000e+00F,   /* 0x3fe5a000 */

    1.80273437500000000000e+00F,   /* 0x3fe6c000 */

    1.81127929687500000000e+00F,   /* 0x3fe7d800 */

    1.81982421875000000000e+00F,   /* 0x3fe8f000 */

    1.82836914062500000000e+00F,   /* 0x3fea0800 */

    1.83691406250000000000e+00F,   /* 0x3feb2000 */

    1.84545898437500000000e+00F,   /* 0x3fec3800 */

    1.85400390625000000000e+00F,   /* 0x3fed5000 */

    1.86230468750000000000e+00F,   /* 0x3fee6000 */

    1.87060546875000000000e+00F,   /* 0x3fef7000 */

    1.87915039062500000000e+00F,   /* 0x3ff08800 */

    1.88745117187500000000e+00F,   /* 0x3ff19800 */

    1.89550781250000000000e+00F,   /* 0x3ff2a000 */

    1.90380859375000000000e+00F,   /* 0x3ff3b000 */

    1.91210937500000000000e+00F,   /* 0x3ff4c000 */

    1.92016601562500000000e+00F,   /* 0x3ff5c800 */

    1.92822265625000000000e+00F,   /* 0x3ff6d000 */

    1.93627929687500000000e+00F,   /* 0x3ff7d800 */

    1.94433593750000000000e+00F,   /* 0x3ff8e000 */

    1.95239257812500000000e+00F,   /* 0x3ff9e800 */

    1.96044921875000000000e+00F,   /* 0x3ffaf000 */

    1.96826171875000000000e+00F,   /* 0x3ffbf000 */

    1.97631835937500000000e+00F,   /* 0x3ffcf800 */

    1.98413085937500000000e+00F,   /* 0x3ffdf800 */

    1.99194335937500000000e+00F,   /* 0x3ffef800 */

    2.00000000000000000000e+00F};  /* 0x40000000 */


static const float rt_jby32_trail_table_float[97] = {

    0.00000000000000000000e+00F,   /* 0x00000000 */

    1.23941208585165441036e-04F,   /* 0x3901f637 */

    1.46876545841223560274e-05F,   /* 0x37766aff */

    1.70736297150142490864e-04F,   /* 0x393307ad */

    1.13296780909877270460e-04F,   /* 0x38ed99bf */

    9.53458802541717886925e-05F,   /* 0x38c7f46e */

    1.25126505736261606216e-04F,   /* 0x39033464 */

    2.10342666832730174065e-04F,   /* 0x395c8f6e */

    1.14066875539720058441e-04F,   /* 0x38ef3730 */

    8.72047676239162683487e-05F,   /* 0x38b6e1b4 */

    1.36111237225122749805e-04F,   /* 0x390eb915 */

    2.26244374061934649944e-05F,   /* 0x37bdc99c */

    2.40658700931817293167e-04F,   /* 0x397c5954 */

    6.31069415248930454254e-05F,   /* 0x38845848 */

    2.27412077947519719601e-04F,   /* 0x396e7577 */

    5.90185391047270968556e-06F,   /* 0x36c6088a */

    1.35496389702893793583e-04F,   /* 0x390e1409 */

    1.32179571664892137051e-04F,   /* 0x390a99af */

    0.00000000000000000000e+00F,   /* 0x00000000 */

    2.31086043640971183777e-04F,   /* 0x39724fb0 */

    9.66752704698592424393e-05F,   /* 0x38cabe24 */

    8.85332483449019491673e-05F,   /* 0x38b9aaed */

    2.09980673389509320259e-04F,   /* 0x395c2e42 */

    2.20044588786549866199e-04F,   /* 0x3966bbc5 */

    1.21749282698146998882e-04F,   /* 0x38ff53a6 */

    1.62125259521417319775e-04F,   /* 0x392a002b */

    9.97955357888713479042e-05F,   /* 0x38d14952 */

    1.81545779923908412457e-04F,   /* 0x393e5d53 */

    1.65768768056295812130e-04F,   /* 0x392dd237 */

    5.48927710042335093021e-05F,   /* 0x38663caa */

    9.53875860432162880898e-05F,   /* 0x38c80ad2 */

    4.53481625299900770187e-05F,   /* 0x383e3438 */

    1.51062369695864617825e-04F,   /* 0x391e667f */

    1.70453247847035527229e-04F,   /* 0x3932bbb2 */

    1.05505387182347476482e-04F,   /* 0x38dd42c6 */

    2.02269104192964732647e-04F,   /* 0x39541833 */

    2.18442466575652360916e-04F,   /* 0x39650db4 */

    1.55796806211583316326e-04F,   /* 0x39235d63 */

    1.60395247803535312414e-05F,   /* 0x37868c9e */

    4.49578510597348213196e-05F,   /* 0x383c9120 */

    0.00000000000000000000e+00F,   /* 0x00000000 */

    1.26840444863773882389e-04F,   /* 0x39050079 */

    1.82820076588541269302e-04F,   /* 0x393fb364 */

    1.69370483490638434887e-04F,   /* 0x3931990b */

    8.78757418831810355186e-05F,   /* 0x38b849ee */

    1.83815121999941766262e-04F,   /* 0x3940be7f */

    2.14343352126888930798e-04F,   /* 0x3960c15b */

    1.80714370799250900745e-04F,   /* 0x393d7e25 */

    8.41425862745381891727e-05F,   /* 0x38b075b5 */

    1.69945167726837098598e-04F,   /* 0x3932334f */

    1.95121858268976211548e-04F,   /* 0x394c99a0 */

    1.60778334247879683971e-04F,   /* 0x3928969b */

    6.79871009197086095810e-05F,   /* 0x388e944c */

    1.61929419846273958683e-04F,   /* 0x3929cb99 */

    1.99474830878898501396e-04F,   /* 0x39512a1e */

    1.81604162207804620266e-04F,   /* 0x393e6cff */

    1.09270178654696792364e-04F,   /* 0x38e527fb */

    2.27539261686615645885e-04F,   /* 0x396e979b */

    4.90300008095800876617e-05F,   /* 0x384da590 */

    6.28985289949923753738e-05F,   /* 0x3883e864 */

    2.58551553997676819563e-05F,   /* 0x37d8e386 */

    1.82868374395184218884e-04F,   /* 0x393fc05b */

    4.64625991298817098141e-05F,   /* 0x3842e0d6 */

    1.05703387816902250051e-04F,   /* 0x38ddad13 */

    1.17213814519345760345e-04F,   /* 0x38f5d0b0 */

    8.17377731436863541603e-05F,   /* 0x38ab6aa2 */

    0.00000000000000000000e+00F,   /* 0x00000000 */

    1.16847433673683553934e-04F,   /* 0x38f50bfd */

    1.88827965757809579372e-04F,   /* 0x3946001f */

    2.16612941585481166840e-04F,   /* 0x39632298 */

    2.00857131858356297016e-04F,   /* 0x39529d2d */

    1.42199307447299361229e-04F,   /* 0x39151b56 */

    4.12627305195201188326e-05F,   /* 0x382d1185 */

    1.42796401632949709892e-04F,   /* 0x3915bb9e */

    2.03253570361994206905e-04F,   /* 0x39552077 */

    2.23214170546270906925e-04F,   /* 0x396a0e99 */

    2.03244591830298304558e-04F,   /* 0x39551e0e */

    1.43898156238719820976e-04F,   /* 0x3916e35e */

    4.57155256299301981926e-05F,   /* 0x383fbeac */

    1.53365719597786664963e-04F,   /* 0x3920d0cc */

    2.23224633373320102692e-04F,   /* 0x396a1168 */

    1.16566716314991936088e-05F,   /* 0x37439106 */

    7.43694272387074306607e-06F,   /* 0x36f98ada */

    2.11048507480882108212e-04F,   /* 0x395d4ce7 */

    1.34682719362899661064e-04F,   /* 0x390d399e */

    2.29425968427676707506e-05F,   /* 0x37c074da */

    1.20421340398024767637e-04F,   /* 0x38fc8ab7 */

    1.83421318070031702518e-04F,   /* 0x394054c9 */

    2.12376224226318299770e-04F,   /* 0x395eb14f */

    2.07710763788782060146e-04F,   /* 0x3959ccef */

    1.69840845046564936638e-04F,   /* 0x3932174e */

    9.91739216260612010956e-05F,   /* 0x38cffb98 */

    2.40249748458154499531e-04F,   /* 0x397beb8d */

    1.05178231024183332920e-04F,   /* 0x38dc9322 */

    1.82623916771262884140e-04F,   /* 0x393f7ebc */

    2.28821940254420042038e-04F,   /* 0x396fefec */

    0.00000000000000000000e+00F};  /* 0x00000000 */


/* Handle special arguments first */


  GET_BITS_SP32(x, ux);

  ax = ux & (~SIGNBIT_SP32);


  if(ax >= 0x7f800000)

    {

      /* x is either NaN or infinity */

      if (ux & MANTBITS_SP32)

        /* x is NaN */

        return x + x; /* Raise invalid if it is a signalling NaN */

      else if (ux & SIGNBIT_SP32)

        return nanf_with_flags(AMD_F_INVALID);

      else

        /* x is positive infinity */

        return x;

    }

  else if (ux & SIGNBIT_SP32)

    {

      /* x is negative. */

      if (x == 0.0F)

        /* Handle negative zero first */

        return x;

      else

        return nanf_with_flags(AMD_F_INVALID);

    }

  else if (ux <= 0x007fffff)

    {

      /* x is denormalised or zero */

      if (ux == 0)

        /* x is zero */

        return x;

      else

        {

          /* x is denormalised; scale it up */

          /* Normalize x by increasing the exponent by 26

             and subtracting a correction to account for the implicit

             bit. This replaces a slow denormalized

             multiplication by a fast normal subtraction. */

          static const float corr = 7.888609052210118054e-31F; /* 0x0d800000 */

          denorm = 1;

          GET_BITS_SP32(x, ux);

          PUT_BITS_SP32(ux | 0x0d800000, x);

          x -= corr;

          GET_BITS_SP32(x, ux);

        }

    }


  /* Main algorithm */


  /*

     Find y and e such that x = 2^e * y, where y in [1,4).

     This is done using an in-lined variant of splitFloat,

     which also ensures that e is even.

   */

  y = x;

  ux &= EXPBITS_SP32;

  ux >>= EXPSHIFTBITS_SP32;

  if (ux & 1)

    {

      GET_BITS_SP32(y, u);

      u &= (SIGNBIT_SP32 | MANTBITS_SP32);

      u |= ONEEXPBITS_SP32;

      PUT_BITS_SP32(u, y);

      e = ux - EXPBIAS_SP32;

    }

  else

    {

      GET_BITS_SP32(y, u);

      u &= (SIGNBIT_SP32 | MANTBITS_SP32);

      u |= TWOEXPBITS_SP32;

      PUT_BITS_SP32(u, y);

      e = ux - EXPBIAS_SP32 - 1;

    }


  /* Find the index of the sub-interval of [1,4) in which y lies. */


  index = (int)(32.0F*y+0.5);


  /* Look up the table values and compute c and r = c/t */


  rtc_lead = rt_jby32_lead_table_float[index-32];

  rtc_trail = rt_jby32_trail_table_float[index-32];

  c = 0.03125F*index;

  r = (y - c)/c;


  /*

  Find q = sqrt(1+r) - 1.

  From one step of Newton on (q+1)^2 = 1+r

  */


  p = r*0.5F - r*r*(0.1250079870F - r*(0.6250522999e-01F));

  twop = p + p;

  q = p - (p*p + (twop - r))/(twop + 2.0);


  /* Reconstruction */


  rtc = rtc_lead + rtc_trail;

  e >>= 1; /* e = e/2 */

  z = rtc_lead + (rtc*q+rtc_trail);


  if (denorm)

    {

      /* Scale by 2**(e-13) */

      PUT_BITS_SP32(((e - 13) + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, r);

      z *= r;

    }

  else

    {

      /* Scale by 2**e */

      PUT_BITS_SP32((e + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, r);

      z *= r;

    }


  return z;


}

#endif /* SQRTF_AMD_INLINE */


#ifdef USE_LOG_KERNEL_AMD

static inline void log_kernel_amd64(double x, unsigned long long ux, int *xexp, double *r1, double *r2)

{


  int expadjust;

  double r, z1, z2, correction, f, f1, f2, q, u, v, poly;

  int index;


  /*

    Computes natural log(x). Algorithm based on:

    Ping-Tak Peter Tang

    "Table-driven implementation of the logarithm function in IEEE

    floating-point arithmetic"

    ACM Transactions on Mathematical Software (TOMS)

    Volume 16, Issue 4 (December 1990)

  */


/* Arrays ln_lead_table and ln_tail_table contain

   leading and trailing parts respectively of precomputed

   values of natural log(1+i/64), for i = 0, 1, ..., 64.

   ln_lead_table contains the first 24 bits of precision,

   and ln_tail_table contains a further 53 bits precision. */


  static const double ln_lead_table[65] = {

    0.00000000000000000000e+00,   /* 0x0000000000000000 */

    1.55041813850402832031e-02,   /* 0x3f8fc0a800000000 */

    3.07716131210327148438e-02,   /* 0x3f9f829800000000 */

    4.58095073699951171875e-02,   /* 0x3fa7745800000000 */

    6.06245994567871093750e-02,   /* 0x3faf0a3000000000 */

    7.52233862876892089844e-02,   /* 0x3fb341d700000000 */

    8.96121263504028320312e-02,   /* 0x3fb6f0d200000000 */

    1.03796780109405517578e-01,   /* 0x3fba926d00000000 */

    1.17783010005950927734e-01,   /* 0x3fbe270700000000 */

    1.31576299667358398438e-01,   /* 0x3fc0d77e00000000 */

    1.45181953907012939453e-01,   /* 0x3fc2955280000000 */

    1.58604979515075683594e-01,   /* 0x3fc44d2b00000000 */

    1.71850204467773437500e-01,   /* 0x3fc5ff3000000000 */

    1.84922337532043457031e-01,   /* 0x3fc7ab8900000000 */

    1.97825729846954345703e-01,   /* 0x3fc9525a80000000 */

    2.10564732551574707031e-01,   /* 0x3fcaf3c900000000 */

    2.23143517971038818359e-01,   /* 0x3fcc8ff780000000 */

    2.35566020011901855469e-01,   /* 0x3fce270700000000 */

    2.47836112976074218750e-01,   /* 0x3fcfb91800000000 */

    2.59957492351531982422e-01,   /* 0x3fd0a324c0000000 */

    2.71933674812316894531e-01,   /* 0x3fd1675c80000000 */

    2.83768117427825927734e-01,   /* 0x3fd22941c0000000 */

    2.95464158058166503906e-01,   /* 0x3fd2e8e280000000 */

    3.07025015354156494141e-01,   /* 0x3fd3a64c40000000 */

    3.18453729152679443359e-01,   /* 0x3fd4618bc0000000 */

    3.29753279685974121094e-01,   /* 0x3fd51aad80000000 */

    3.40926527976989746094e-01,   /* 0x3fd5d1bd80000000 */

    3.51976394653320312500e-01,   /* 0x3fd686c800000000 */

    3.62905442714691162109e-01,   /* 0x3fd739d7c0000000 */

    3.73716354370117187500e-01,   /* 0x3fd7eaf800000000 */

    3.84411692619323730469e-01,   /* 0x3fd89a3380000000 */

    3.94993782043457031250e-01,   /* 0x3fd9479400000000 */

    4.05465066432952880859e-01,   /* 0x3fd9f323c0000000 */

    4.15827870368957519531e-01,   /* 0x3fda9cec80000000 */

    4.26084339618682861328e-01,   /* 0x3fdb44f740000000 */

    4.36236739158630371094e-01,   /* 0x3fdbeb4d80000000 */

    4.46287095546722412109e-01,   /* 0x3fdc8ff7c0000000 */

    4.56237375736236572266e-01,   /* 0x3fdd32fe40000000 */

    4.66089725494384765625e-01,   /* 0x3fddd46a00000000 */

    4.75845873355865478516e-01,   /* 0x3fde744240000000 */

    4.85507786273956298828e-01,   /* 0x3fdf128f40000000 */

    4.95077252388000488281e-01,   /* 0x3fdfaf5880000000 */

    5.04556000232696533203e-01,   /* 0x3fe02552a0000000 */

    5.13945698738098144531e-01,   /* 0x3fe0723e40000000 */

    5.23248136043548583984e-01,   /* 0x3fe0be72e0000000 */

    5.32464742660522460938e-01,   /* 0x3fe109f380000000 */

    5.41597247123718261719e-01,   /* 0x3fe154c3c0000000 */

    5.50647079944610595703e-01,   /* 0x3fe19ee6a0000000 */

    5.59615731239318847656e-01,   /* 0x3fe1e85f40000000 */

    5.68504691123962402344e-01,   /* 0x3fe23130c0000000 */

    5.77315330505371093750e-01,   /* 0x3fe2795e00000000 */

    5.86049020290374755859e-01,   /* 0x3fe2c0e9e0000000 */

    5.94707071781158447266e-01,   /* 0x3fe307d720000000 */

    6.03290796279907226562e-01,   /* 0x3fe34e2880000000 */

    6.11801505088806152344e-01,   /* 0x3fe393e0c0000000 */

    6.20240390300750732422e-01,   /* 0x3fe3d90260000000 */

    6.28608644008636474609e-01,   /* 0x3fe41d8fe0000000 */

    6.36907458305358886719e-01,   /* 0x3fe4618bc0000000 */

    6.45137906074523925781e-01,   /* 0x3fe4a4f840000000 */

    6.53301239013671875000e-01,   /* 0x3fe4e7d800000000 */

    6.61398470401763916016e-01,   /* 0x3fe52a2d20000000 */

    6.69430613517761230469e-01,   /* 0x3fe56bf9c0000000 */

    6.77398800849914550781e-01,   /* 0x3fe5ad4040000000 */

    6.85303986072540283203e-01,   /* 0x3fe5ee02a0000000 */

    6.93147122859954833984e-01};  /* 0x3fe62e42e0000000 */


  static const double ln_tail_table[65] = {

    0.00000000000000000000e+00,   /* 0x0000000000000000 */

    5.15092497094772879206e-09,   /* 0x3e361f807c79f3db */

    4.55457209735272790188e-08,   /* 0x3e6873c1980267c8 */

    2.86612990859791781788e-08,   /* 0x3e5ec65b9f88c69e */

    2.23596477332056055352e-08,   /* 0x3e58022c54cc2f99 */

    3.49498983167142274770e-08,   /* 0x3e62c37a3a125330 */

    3.23392843005887000414e-08,   /* 0x3e615cad69737c93 */

    1.35722380472479366661e-08,   /* 0x3e4d256ab1b285e9 */

    2.56504325268044191098e-08,   /* 0x3e5b8abcb97a7aa2 */

    5.81213608741512136843e-08,   /* 0x3e6f34239659a5dc */

    5.59374849578288093334e-08,   /* 0x3e6e07fd48d30177 */

    5.06615629004996189970e-08,   /* 0x3e6b32df4799f4f6 */

    5.24588857848400955725e-08,   /* 0x3e6c29e4f4f21cf8 */

    9.61968535632653505972e-10,   /* 0x3e1086c848df1b59 */

    1.34829655346594463137e-08,   /* 0x3e4cf456b4764130 */

    3.65557749306383026498e-08,   /* 0x3e63a02ffcb63398 */

    3.33431709374069198903e-08,   /* 0x3e61e6a6886b0976 */

    5.13008650536088382197e-08,   /* 0x3e6b8abcb97a7aa2 */

    5.09285070380306053751e-08,   /* 0x3e6b578f8aa35552 */

    3.20853940845502057341e-08,   /* 0x3e6139c871afb9fc */

    4.06713248643004200446e-08,   /* 0x3e65d5d30701ce64 */

    5.57028186706125221168e-08,   /* 0x3e6de7bcb2d12142 */

    5.48356693724804282546e-08,   /* 0x3e6d708e984e1664 */

    1.99407553679345001938e-08,   /* 0x3e556945e9c72f36 */

    1.96585517245087232086e-09,   /* 0x3e20e2f613e85bda */

    6.68649386072067321503e-09,   /* 0x3e3cb7e0b42724f6 */

    5.89936034642113390002e-08,   /* 0x3e6fac04e52846c7 */

    2.85038578721554472484e-08,   /* 0x3e5e9b14aec442be */

    5.09746772910284482606e-08,   /* 0x3e6b5de8034e7126 */

    5.54234668933210171467e-08,   /* 0x3e6dc157e1b259d3 */

    6.29100830926604004874e-09,   /* 0x3e3b05096ad69c62 */

    2.61974119468563937716e-08,   /* 0x3e5c2116faba4cdd */

    4.16752115011186398935e-08,   /* 0x3e665fcc25f95b47 */

    2.47747534460820790327e-08,   /* 0x3e5a9a08498d4850 */

    5.56922172017964209793e-08,   /* 0x3e6de647b1465f77 */

    2.76162876992552906035e-08,   /* 0x3e5da71b7bf7861d */

    7.08169709942321478061e-09,   /* 0x3e3e6a6886b09760 */

    5.77453510221151779025e-08,   /* 0x3e6f0075eab0ef64 */

    4.43021445893361960146e-09,   /* 0x3e33071282fb989b */

    3.15140984357495864573e-08,   /* 0x3e60eb43c3f1bed2 */

    2.95077445089736670973e-08,   /* 0x3e5faf06ecb35c84 */

    1.44098510263167149349e-08,   /* 0x3e4ef1e63db35f68 */

    1.05196987538551827693e-08,   /* 0x3e469743fb1a71a5 */

    5.23641361722697546261e-08,   /* 0x3e6c1cdf404e5796 */

    7.72099925253243069458e-09,   /* 0x3e4094aa0ada625e */

    5.62089493829364197156e-08,   /* 0x3e6e2d4c96fde3ec */

    3.53090261098577946927e-08,   /* 0x3e62f4d5e9a98f34 */

    3.80080516835568242269e-08,   /* 0x3e6467c96ecc5cbe */

    5.66961038386146408282e-08,   /* 0x3e6e7040d03dec5a */

    4.42287063097349852717e-08,   /* 0x3e67bebf4282de36 */

    3.45294525105681104660e-08,   /* 0x3e6289b11aeb783f */

    2.47132034530447431509e-08,   /* 0x3e5a891d1772f538 */

    3.59655343422487209774e-08,   /* 0x3e634f10be1fb591 */

    5.51581770357780862071e-08,   /* 0x3e6d9ce1d316eb93 */

    3.60171867511861372793e-08,   /* 0x3e63562a19a9c442 */

    1.94511067964296180547e-08,   /* 0x3e54e2adf548084c */

    1.54137376631349347838e-08,   /* 0x3e508ce55cc8c97a */

    3.93171034490174464173e-09,   /* 0x3e30e2f613e85bda */

    5.52990607758839766440e-08,   /* 0x3e6db03ebb0227bf */

    3.29990737637586136511e-08,   /* 0x3e61b75bb09cb098 */

    1.18436010922446096216e-08,   /* 0x3e496f16abb9df22 */

    4.04248680368301346709e-08,   /* 0x3e65b3f399411c62 */

    2.27418915900284316293e-08,   /* 0x3e586b3e59f65355 */

    1.70263791333409206020e-08,   /* 0x3e52482ceae1ac12 */

    5.76999904754328540596e-08};  /* 0x3e6efa39ef35793c */


  /* Approximating polynomial coefficients for x near 1.0 */

  static const double

    ca_1 = 8.33333333333317923934e-02,  /* 0x3fb55555555554e6 */

    ca_2 = 1.25000000037717509602e-02,  /* 0x3f89999999bac6d4 */

    ca_3 = 2.23213998791944806202e-03,  /* 0x3f62492307f1519f */

    ca_4 = 4.34887777707614552256e-04;  /* 0x3f3c8034c85dfff0 */


  /* Approximating polynomial coefficients for other x */

  static const double

    cb_1 = 8.33333333333333593622e-02,  /* 0x3fb5555555555557 */

    cb_2 = 1.24999999978138668903e-02,  /* 0x3f89999999865ede */

    cb_3 = 2.23219810758559851206e-03;  /* 0x3f6249423bd94741 */


  static const unsigned long long

    log_thresh1 = 0x3fee0faa00000000,

    log_thresh2 = 0x3ff1082c00000000;


  /* log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000

     log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 */

  if (ux >= log_thresh1 && ux <= log_thresh2)

    {

      /* Arguments close to 1.0 are handled separately to maintain

         accuracy.


         The approximation in this region exploits the identity

             log( 1 + r ) = log( 1 + u/2 )  /  log( 1 - u/2 ), where

             u  = 2r / (2+r).

         Note that the right hand side has an odd Taylor series expansion

         which converges much faster than the Taylor series expansion of

         log( 1 + r ) in r. Thus, we approximate log( 1 + r ) by

             u + A1 * u^3 + A2 * u^5 + ... + An * u^(2n+1).


         One subtlety is that since u cannot be calculated from

         r exactly, the rounding error in the first u should be

         avoided if possible. To accomplish this, we observe that

                       u  =  r  -  r*r/(2+r).

         Since x (=1+r) is the input argument, and thus presumed exact,

         the formula above approximates u accurately because

                       u  =  r  -  correction,

         and the magnitude of "correction" (of the order of r*r)

         is small.

         With these observations, we will approximate log( 1 + r ) by

            r + (  (A1*u^3 + ... + An*u^(2n+1)) - correction ).


         We approximate log(1+r) by an odd polynomial in u, where

                  u = 2r/(2+r) = r - r*r/(2+r).

      */

      r = x - 1.0;

      u = r / (2.0 + r);

      correction = r * u;

      u = u + u;

      v = u * u;

      z1 = r;

      z2 = (u * v * (ca_1 + v * (ca_2 + v * (ca_3 + v * ca_4))) - correction);

      *r1 = z1;

      *r2 = z2;

      *xexp = 0;

    }

  else

    {

      /*

        First, we decompose the argument x to the form

        x  =  2**M  *  (F1  +  F2),

        where  1 <= F1+F2 < 2, M has the value of an integer,

        F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.


        Second, we approximate log( 1 + F2/F1 ) by an odd polynomial

        in U, where U  =  2 F2 / (2 F2 + F1).

        Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).

        The core approximation calculates

        Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U   -   1.

        Note that  log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),

        thus, Poly =  2 arctanh( U/2 ) / U  -  1.


        It is not hard to see that

          log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).

        Hence, we return Z1 = log(F1), and  Z2 = log( 1 + F2/F1).

        The values of log(F1) are calculated beforehand and stored

        in the program.

      */


      f = x;

      if (ux < IMPBIT_DP64)

        {

          /* The input argument x is denormalized */

          /* Normalize f by increasing the exponent by 60

             and subtracting a correction to account for the implicit

             bit. This replaces a slow denormalized

             multiplication by a fast normal subtraction. */

          static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */

          GET_BITS_DP64(f, ux);

          ux |= 0x03d0000000000000;

          PUT_BITS_DP64(ux, f);

          f -= corr;

          GET_BITS_DP64(f, ux);

          expadjust = 60;

        }

      else

        expadjust = 0;


      /* Store the exponent of x in xexp and put

         f into the range [0.5,1) */

      *xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust;

      PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);


      /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */


      /* Set index to be the nearest integer to 128*f */

      r = 128.0 * f;

      index = (int)(r + 0.5);


      z1 = ln_lead_table[index-64];

      q = ln_tail_table[index-64];

      f1 = index * 0.0078125; /* 0.0078125 = 1/128 */

      f2 = f - f1;

      /* At this point, x = 2**xexp * ( f1  +  f2 ) where

         f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */


      /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */

      /* u = f2 / (f1 + 0.5 * f2); */

      u = f2 / (f1 + 0.5 * f2);


      /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1).

         The core approximation calculates

         poly = [log(1 + u/2) - log(1 - u/2)]/u  -  1  */

      v = u * u;

      poly = (v * (cb_1 + v * (cb_2 + v * cb_3)));

      z2 = q + (u + u * poly);

      *r1 = z1;

      *r2 = z2;

    }

  return;

}

#endif /* USE_LOG_KERNEL_AMD */


#if defined(USE_REMAINDER_PIBY2F_INLINE)

/* Define this to get debugging print statements activated */

#define DEBUGGING_PRINT

#undef DEBUGGING_PRINT


#ifdef DEBUGGING_PRINT

#include <stdio.h>

char *d2b(long long d, int bitsper, int point)

{

  static char buff[200];

  int i, j;

  j = bitsper;

  if (point >= 0 && point <= bitsper)

    j++;

  buff[j] = '\0';

  for (i = bitsper - 1; i >= 0; i--)

    {

      j--;

      if (d % 2 == 1)

        buff[j] = '1';

      else

        buff[j] = '0';

      if (i == point)

        {

          j--;

          buff[j] = '.';

        }

      d /= 2;

    }

  return buff;

}

#endif


/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using

   extra precision, and return the result in r.

   Return value "region" tells how many lots of pi/2 were subtracted

   from x to put it in the range [-pi/4,pi/4], mod 4. */

static inline void __remainder_piby2f_inline(unsigned long long ux, double *r, int *region)

{


      /* This method simulates multi-precision floating-point

         arithmetic and is accurate for all 1 <= x < infinity */

#if 0

      const int bitsper = 36;

#else

#define bitsper 36

#endif

      unsigned long long res[10];

      unsigned long long u, carry, mask, mant, nextbits;

      int first, last, i, rexp, xexp, resexp, ltb, determ, bc;

      double dx;

      static const double

        piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */

      static unsigned long long pibits[] =

      {

        0LL,

        5215LL, 13000023176LL, 11362338026LL, 67174558139LL,

        34819822259LL, 10612056195LL, 67816420731LL, 57840157550LL,

        19558516809LL, 50025467026LL, 25186875954LL, 18152700886LL

      };


      xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);

      ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29;


      /* Now ux is the mantissa bit pattern of x as a long long integer */

      mask = 1;

      mask = (mask << bitsper) - 1;


      /* Set first and last to the positions of the first

         and last chunks of 2/pi that we need */

      first = xexp / bitsper;

      resexp = xexp - first * bitsper;

      /* 120 is the theoretical maximum number of bits (actually

         115 for IEEE single precision) that we need to extract

         from the middle of 2/pi to compute the reduced argument

         accurately enough for our purposes */

      last = first + 120 / bitsper;


#ifdef DEBUGGING_PRINT

      printf("first = %d, last = %d\n", first, last);

#endif


      /* Do a long multiplication of the bits of 2/pi by the

         integer mantissa */

#if 0

      for (i = last; i >= first; i--)

        {

          u = pibits[i] * ux + carry;

          res[i - first] = u & mask;

          carry = u >> bitsper;

        }

      res[last - first + 1] = 0;

#else

      /* Unroll the loop. This is only correct because we know

         that bitsper is fixed as 36. */

      res[4] = 0;

      u = pibits[last] * ux;

      res[3] = u & mask;

      carry = u >> bitsper;

      u = pibits[last - 1] * ux + carry;

      res[2] = u & mask;

      carry = u >> bitsper;

      u = pibits[last - 2] * ux + carry;

      res[1] = u & mask;

      carry = u >> bitsper;

      u = pibits[first] * ux + carry;

      res[0] = u & mask;

#endif


#ifdef DEBUGGING_PRINT

      printf("resexp = %d\n", resexp);

      printf("Significant part of x * 2/pi with binary"

             " point in correct place:\n");

      for (i = 0; i <= last - first; i++)

        {

          if (i > 0 && i % 5 == 0)

            printf("\n ");

          if (i == 1)

            printf("%s ", d2b(res[i], bitsper, resexp));

          else

            printf("%s ", d2b(res[i], bitsper, -1));

        }

      printf("\n");

#endif


      /* Reconstruct the result */

      ltb = (int)((((res[0] << bitsper) | res[1])

                   >> (bitsper - 1 - resexp)) & 7);


      /* determ says whether the fractional part is >= 0.5 */

      determ = ltb & 1;


#ifdef DEBUGGING_PRINT

      printf("ltb = %d (last two bits before binary point"

             " and first bit after)\n", ltb);

      printf("determ = %d (1 means need to negate because the fractional\n"

             "            part of x * 2/pi is greater than 0.5)\n", determ);

#endif


      i = 1;

      if (determ)

        {

          /* The mantissa is >= 0.5. We want to subtract it

             from 1.0 by negating all the bits */

          *region = ((ltb >> 1) + 1) & 3;

          mant = 1;

          mant = ~(res[1]) & ((mant << (bitsper - resexp)) - 1);

          while (mant < 0x0000000000010000)

            {

              i++;

              mant = (mant << bitsper) | (~(res[i]) & mask);

            }

          nextbits = (~(res[i+1]) & mask);

        }

      else

        {

          *region = (ltb >> 1);

          mant = 1;

          mant = res[1] & ((mant << (bitsper - resexp)) - 1);

          while (mant < 0x0000000000010000)

            {

              i++;

              mant = (mant << bitsper) | res[i];

            }

          nextbits = res[i+1];

        }


#ifdef DEBUGGING_PRINT

      printf("First bits of mant = %s\n", d2b(mant, bitsper, -1));

#endif


      /* Normalize the mantissa. The shift value 6 here, determined by

         trial and error, seems to give optimal speed. */

      bc = 0;

      while (mant < 0x0000400000000000)

        {

          bc += 6;

          mant <<= 6;

        }

      while (mant < 0x0010000000000000)

        {

          bc++;

          mant <<= 1;

        }

      mant |= nextbits >> (bitsper - bc);


      rexp = 52 + resexp - bc - i * bitsper;


#ifdef DEBUGGING_PRINT

      printf("Normalised mantissa = 0x%016lx\n", mant);

      printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp);

#endif


      /* Put the result exponent rexp onto the mantissa pattern */

      u = ((unsigned long long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;

      ux = (mant & MANTBITS_DP64) | u;

      if (determ)

        /* If we negated the mantissa we negate x too */

        ux |= SIGNBIT_DP64;

      PUT_BITS_DP64(ux, dx);


#ifdef DEBUGGING_PRINT

      printf("(x*2/pi) = %25.20e = %s\n", dx, double2hex(&dx));

#endif


      /* x is a double precision version of the fractional part of

         x * 2 / pi. Multiply x by pi/2 in double precision

         to get the reduced argument r. */

      *r = dx * piby2;


#ifdef DEBUGGING_PRINT

      printf(" r = frac(x*2/pi) * pi/2:\n");

      printf(" r = %25.20e = %s\n", *r, double2hex(r));

      printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n",

             *region);

#endif

}

#endif /* USE_REMAINDER_PIBY2F_INLINE */


#endif /* LIBM_INLINES_AMD_H_INCLUDED */

_set_statfp
__ATTRIBUTE_SSE__ void _set_statfp(uintptr_t mask)
Definition: _set_statfp.c:11

index
#define index(s, c)
Definition: various.h:29

bc
PBATCH_CONTEXT bc
Definition: batch.c:67

int
unsigned int(__cdecl typeof(jpeg_read_scanlines))(struct jpeg_decompress_struct *
Definition: typeof.h:31

point
POINTL point
Definition: edittest.c:50

buff
static unsigned char buff[32768]
Definition: fatten.c:17

printf
#define printf
Definition: freeldr.h:97

x
GLint GLint GLint GLint GLint x
Definition: gl.h:1548

v
const GLdouble * v
Definition: gl.h:2040

type
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545

y
GLint GLint GLint GLint GLint GLint y
Definition: gl.h:1548

r
GLdouble GLdouble GLdouble r
Definition: gl.h:2055

t
GLdouble GLdouble t
Definition: gl.h:2047

q
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063

n
GLdouble n
Definition: glext.h:7729

res
GLuint res
Definition: glext.h:9613

arg1
GLuint GLuint GLuint GLuint arg1
Definition: glext.h:9513

c
const GLubyte * c
Definition: glext.h:8905

index
GLuint index
Definition: glext.h:6031

mask
GLenum GLint GLuint mask
Definition: glext.h:6028

arg2
GLuint GLuint GLuint GLuint GLuint GLuint GLuint arg2
Definition: glext.h:9514

f
GLfloat f
Definition: glext.h:7540

flags
GLbitfield flags
Definition: glext.h:7161

first
const GLint * first
Definition: glext.h:5794

val
GLuint GLfloat * val
Definition: glext.h:7180

p
GLfloat GLfloat p
Definition: glext.h:8902

z
GLdouble GLdouble z
Definition: glext.h:5874

m
const GLfloat * m
Definition: glext.h:10848

i
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248

u
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240

j
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250

d
#define d
Definition: ke_i.h:81

e
#define e
Definition: ke_i.h:82

f
#define f
Definition: ke_i.h:83

c
#define c
Definition: ke_i.h:80

_handle_errorf
float _handle_errorf(char *fname, int opcode, unsigned long long value, int type, int flags, int error, float arg1, float arg2, int nargs)
Definition: _handle_error.c:56

_handle_error
double _handle_error(char *fname, int opcode, unsigned long long value, int type, int flags, int error, double arg1, double arg2, int nargs)
Handles an error condition.
Definition: _handle_error.c:34

AMD_F_DIVBYZERO
#define AMD_F_DIVBYZERO
Definition: libm_new.h:85

AMD_F_INEXACT
#define AMD_F_INEXACT
Definition: libm_new.h:82

AMD_F_UNDERFLOW
#define AMD_F_UNDERFLOW
Definition: libm_new.h:84

AMD_F_INVALID
#define AMD_F_INVALID
Definition: libm_new.h:86

AMD_F_OVERFLOW
#define AMD_F_OVERFLOW
Definition: libm_new.h:83

libm_util.h

GET_BITS_SP32
#define GET_BITS_SP32(x, ux)
Definition: libm_util.h:105

IMPBIT_DP64
#define IMPBIT_DP64
Definition: libm_util.h:50

EXPBITS_SP32
#define EXPBITS_SP32
Definition: libm_util.h:69

EXPSHIFTBITS_DP64
#define EXPSHIFTBITS_DP64
Definition: libm_util.h:56

MANTLENGTH_DP64
#define MANTLENGTH_DP64
Definition: libm_util.h:62

SIGNBIT_SP32
#define SIGNBIT_SP32
Definition: libm_util.h:68

MANTLENGTH_SP32
#define MANTLENGTH_SP32
Definition: libm_util.h:86

EXPBIAS_SP32
#define EXPBIAS_SP32
Definition: libm_util.h:79

GET_BITS_DP64
#define GET_BITS_DP64(x, ux)
Definition: libm_util.h:118

HALFEXPBITS_DP64
#define HALFEXPBITS_DP64
Definition: libm_util.h:49

TWOEXPBITS_SP32
#define TWOEXPBITS_SP32
Definition: libm_util.h:72

PUT_BITS_SP32
#define PUT_BITS_SP32(ux, x)
Definition: libm_util.h:111

BIASEDEMAX_DP64
#define BIASEDEMAX_DP64
Definition: libm_util.h:59

SIGNBIT_DP64
#define SIGNBIT_DP64
Definition: libm_util.h:44

EXPBITS_DP64
#define EXPBITS_DP64
Definition: libm_util.h:45

TWOEXPBITS_DP64
#define TWOEXPBITS_DP64
Definition: libm_util.h:48

HALFEXPBITS_SP32
#define HALFEXPBITS_SP32
Definition: libm_util.h:73

EXPSHIFTBITS_SP32
#define EXPSHIFTBITS_SP32
Definition: libm_util.h:80

EXPBIAS_DP64
#define EXPBIAS_DP64
Definition: libm_util.h:55

MANTBITS_DP64
#define MANTBITS_DP64
Definition: libm_util.h:46

MANTBITS_SP32
#define MANTBITS_SP32
Definition: libm_util.h:70

ONEEXPBITS_DP64
#define ONEEXPBITS_DP64
Definition: libm_util.h:47

PUT_BITS_DP64
#define PUT_BITS_DP64(ux, x)
Definition: libm_util.h:124

BIASEDEMAX_SP32
#define BIASEDEMAX_SP32
Definition: libm_util.h:83

ONEEXPBITS_SP32
#define ONEEXPBITS_SP32
Definition: libm_util.h:71

dx
GLint dx
Definition: linetemp.h:97

uintptr_t
unsigned int uintptr_t
Definition: intrin.h:47

error
#define error(str)
Definition: mkdosfs.c:1605

r1
static DNS_RECORDW r1
Definition: record.c:37

r2
static DNS_RECORDW r2
Definition: record.c:38

last
static UINT UINT last
Definition: font.c:45

uk
IUnknown uk
Definition: typelib.c:6156

shift
#define shift
Definition: input.c:1755

k
int k
Definition: mpi.c:3369

long
#define long
Definition: qsort.c:33

bitsper
#define bitsper

n2
int n2
Definition: dwarfget.c:147

n1
int n1
Definition: dwarfget.c:147

n3
int n3
Definition: dwarfget.c:147

f2
#define f2(x, y, z)
Definition: sha1.c:31

f1
#define f1(x, y, z)
Definition: sha1.c:30

ax
ecx edi movl ebx edx edi decl ecx esi eax jecxz decl eax andl eax esi movl edx movl TEMP incl eax andl eax ecx incl ebx testl eax jnz xchgl ecx incl TEMP esp ecx subl ebx pushl ecx ecx edx ecx shrl ecx mm0 mm4 mm0 mm4 mm1 mm5 mm1 mm5 mm2 mm6 mm2 mm6 mm3 mm7 mm3 mm7 paddd mm0 paddd mm4 paddd mm0 paddd mm4 paddd mm0 paddd mm4 movq mm1 movq mm5 psrlq mm1 psrlq mm5 paddd mm0 paddd mm4 psrad mm0 psrad mm4 packssdw mm0 packssdw mm4 mm1 punpckldq mm0 pand mm1 pand mm0 por mm1 movq edi esi edx edi decl ecx jnz popl ecx andl ecx jecxz mm0 mm0 mm1 mm1 mm2 mm2 mm3 mm3 paddd mm0 paddd mm0 paddd mm0 movq mm1 psrlq mm1 paddd mm0 psrad mm0 packssdw mm0 movd eax movw ax
Definition: synth_sse3d.h:180

LL
#define LL
Definition: tui.h:166

value
Definition: pdh_main.c:96

_mm_setcsr
void _mm_setcsr(unsigned int a)
Definition: xmmintrin.h:542

_mm_getcsr
unsigned int _mm_getcsr(void)
Definition: xmmintrin.h:535