ReactOS 0.4.16-dev-136-g52192f1
sqrt.c
Go to the documentation of this file.
1
2#include <intrin.h>
3
4double
6 double x)
7{
8 register union
9 {
10 __m128d x128d;
11 __m128i x128i;
12 } u ;
13 register union
14 {
15 unsigned long long ullx;
16 double dbl;
17 } u2;
18
19 /* Set the lower double-precision value of u to x.
20 All that we want, is that the compiler understands that we have the
21 function parameter in a register that we can address as an __m128.
22 Sadly there is no obvious way to do that. If we use the union, VS will
23 generate code to store xmm0 in memory and the read it into a GPR.
24 We avoid memory access by using a direct move. But even here we won't
25 get a simple MOVSD. We can either do:
26 a) _mm_set_sd: move x into the lower part of an xmm register and zero
27 out the upper part (XORPD+MOVSD)
28 b) _mm_set1_pd: move x into the lower and higher part of an xmm register
29 (MOVSD+UNPCKLPD)
30 c) _mm_set_pd, which either generates a memory access, when we try to
31 tell it to keep the upper 64 bits, or generate 2 MOVAPS + UNPCKLPD
32 We choose a, which is probably the fastest.
33 */
34 u.x128d = _mm_set_sd(x);
35
36 /* Move the contents of the lower 64 bit into a 64 bit GPR using MOVD */
37 u2.ullx = _mm_cvtsi128_si64(u.x128i);
38
39 /* Check for negative values */
40 if (u2.ullx & 0x8000000000000000ULL)
41 {
42 /* Check if this is *really* negative and not just -0.0 */
43 if (u2.ullx != 0x8000000000000000ULL)
44 {
45 /* Return -1.#IND00 */
46 u2.ullx = 0xfff8000000000000ULL;
47 }
48
49 /* Return what we have */
50 return u2.dbl;
51 }
52
53 /* Check if this is a NaN (bits 52-62 are 1, bit 0-61 are not all 0) or
54 negative (bit 63 is 1) */
55 if (u2.ullx > 0x7FF0000000000000ULL)
56 {
57 /* Set this bit. That's what MS function does. */
58 u2.ullx |= 0x8000000000000ULL;
59 return u2.dbl;
60 }
61
62 /* Calculate the square root. */
63#ifdef _MSC_VER
64 /* Another YAY for the MS compiler. There are 2 instructions we could use:
65 SQRTPD (computes sqrt for 2 double values) or SQRTSD (computes sqrt for
66 only the lower 64 bit double value). Obviously we only need 1. And on
67 Some architectures SQRTPD is twice as slow as SQRTSD. On the other hand
68 the MS compiler is stupid and always generates an additional MOVAPS
69 instruction when SQRTSD is used. We choose to use SQRTPD here since on
70 modern hardware it's as fast as SQRTSD. */
71 u.x128d = _mm_sqrt_pd(u.x128d); // SQRTPD
72#else
73 u.x128d = _mm_sqrt_sd(u.x128d, u.x128d); // SQRTSD
74#endif
75
76 return u.x128d.m128d_f64[0];
77}
double sqrt(double x)
Definition: sqrt.c:5
__m128d _mm_set_sd(double w)
Definition: emmintrin.h:1028
__m128d _mm_sqrt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:611
__INTRIN_INLINE_SSE2 long long _mm_cvtsi128_si64(__m128i a)
Definition: emmintrin.h:1528
__m128d _mm_sqrt_pd(__m128d a)
Definition: emmintrin.h:617
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
GLdouble GLdouble u2
Definition: glext.h:8308
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240