ReactOS 0.4.16-dev-311-g9382aa2
fvec.h
Go to the documentation of this file.
1
6#ifndef _FVEC_H_INCLUDED
7#define _FVEC_H_INCLUDED
8
9#ifndef RC_INVOKED
10#ifndef __cplusplus
11#error ERROR: This file is only supported in C++ compilations!
12#endif
13
14#include <xmmintrin.h>
15#include <assert.h>
16#include <ivec.h>
17#include <corecrt.h>
18
19#if defined(_ENABLE_VEC_DEBUG)
20#include <iostream>
21#endif
22
23#pragma pack(push,_CRT_PACKING)
24#pragma pack(push,16)
25
26#define EXPLICIT explicit
27
28class F32vec4 {
29protected:
30 __m128 vec;
31public:
33 F32vec4(__m128 m) { vec = m;}
34 F32vec4(float f3,float f2,float f1,float f0) { vec= _mm_set_ps(f3,f2,f1,f0); }
36 EXPLICIT F32vec4(double d) { vec = _mm_set_ps1((float) d); }
37 F32vec4& operator =(float f) { vec = _mm_set_ps1(f); return *this; }
38 F32vec4& operator =(double d) { vec = _mm_set_ps1((float) d); return *this; }
39 operator __m128() const { return vec; }
40 friend F32vec4 operator &(const F32vec4 &a,const F32vec4 &b) { return _mm_and_ps(a,b); }
41 friend F32vec4 operator |(const F32vec4 &a,const F32vec4 &b) { return _mm_or_ps(a,b); }
42 friend F32vec4 operator ^(const F32vec4 &a,const F32vec4 &b) { return _mm_xor_ps(a,b); }
43 friend F32vec4 operator +(const F32vec4 &a,const F32vec4 &b) { return _mm_add_ps(a,b); }
44 friend F32vec4 operator -(const F32vec4 &a,const F32vec4 &b) { return _mm_sub_ps(a,b); }
45 friend F32vec4 operator *(const F32vec4 &a,const F32vec4 &b) { return _mm_mul_ps(a,b); }
46 friend F32vec4 operator /(const F32vec4 &a,const F32vec4 &b) { return _mm_div_ps(a,b); }
47 F32vec4& operator =(const F32vec4 &a) { vec = a.vec; return *this; }
48 F32vec4& operator =(const __m128 &avec) { vec = avec; return *this; }
49 F32vec4& operator +=(F32vec4 &a) { return *this = _mm_add_ps(vec,a); }
50 F32vec4& operator -=(F32vec4 &a) { return *this = _mm_sub_ps(vec,a); }
51 F32vec4& operator *=(F32vec4 &a) { return *this = _mm_mul_ps(vec,a); }
52 F32vec4& operator /=(F32vec4 &a) { return *this = _mm_div_ps(vec,a); }
53 F32vec4& operator &=(F32vec4 &a) { return *this = _mm_and_ps(vec,a); }
54 F32vec4& operator |=(F32vec4 &a) { return *this = _mm_or_ps(vec,a); }
55 F32vec4& operator ^=(F32vec4 &a) { return *this = _mm_xor_ps(vec,a); }
56 friend float add_horizontal(F32vec4 &a) {
58 return ftemp[0];
59 }
60 friend F32vec4 sqrt(const F32vec4 &a) { return _mm_sqrt_ps(a); }
61 friend F32vec4 rcp(const F32vec4 &a) { return _mm_rcp_ps(a); }
62 friend F32vec4 rsqrt(const F32vec4 &a) { return _mm_rsqrt_ps(a); }
63 friend F32vec4 rcp_nr(const F32vec4 &a) {
64 F32vec4 Ra0 = _mm_rcp_ps(a);
65 return _mm_sub_ps(_mm_add_ps(Ra0,Ra0),_mm_mul_ps(_mm_mul_ps(Ra0,a),Ra0));
66 }
67 friend F32vec4 rsqrt_nr(const F32vec4 &a) {
68 static const F32vec4 fvecf0pt5(0.5f);
69 static const F32vec4 fvecf3pt0(3.0f);
70 F32vec4 Ra0 = _mm_rsqrt_ps(a);
71 return (fvecf0pt5 *Ra0) *(fvecf3pt0 - (a *Ra0) *Ra0);
72
73 }
74#define Fvec32s4_COMP(op) friend F32vec4 cmp##op (const F32vec4 &a,const F32vec4 &b) { return _mm_cmp##op##_ps(a,b); }
80 Fvec32s4_COMP(neq)
81 Fvec32s4_COMP(nlt)
82 Fvec32s4_COMP(nle)
83 Fvec32s4_COMP(ngt)
84 Fvec32s4_COMP(nge)
85#undef Fvec32s4_COMP
86
87 friend F32vec4 simd_min(const F32vec4 &a,const F32vec4 &b) { return _mm_min_ps(a,b); }
88 friend F32vec4 simd_max(const F32vec4 &a,const F32vec4 &b) { return _mm_max_ps(a,b); }
89
90#if defined(_ENABLE_VEC_DEBUG)
91 friend std::ostream & operator<<(std::ostream & os,const F32vec4 &a) {
92 float *fp = (float*)&a;
93 os << "[3]:" << *(fp+3)
94 << " [2]:" << *(fp+2)
95 << " [1]:" << *(fp+1)
96 << " [0]:" << *fp;
97 return os;
98 }
99#endif
100 const float& operator[](int i) const {
101 assert((0 <= i) && (i <= 3));
102 float *fp = (float*)&vec;
103 return *(fp+i);
104 }
105 float& operator[](int i) {
106 assert((0 <= i) && (i <= 3));
107 float *fp = (float*)&vec;
108 return *(fp+i);
109 }
110};
111
112inline F32vec4 unpack_low(const F32vec4 &a,const F32vec4 &b) { return _mm_unpacklo_ps(a,b); }
113inline F32vec4 unpack_high(const F32vec4 &a,const F32vec4 &b) { return _mm_unpackhi_ps(a,b); }
114inline int move_mask(const F32vec4 &a) { return _mm_movemask_ps(a); }
115inline void loadu(F32vec4 &a,float *p) { a = _mm_loadu_ps(p); }
116inline void storeu(float *p,const F32vec4 &a) { _mm_storeu_ps(p,a); }
117inline void store_nta(float *p,F32vec4 &a) { _mm_stream_ps(p,a); }
118
119#define Fvec32s4_SELECT(op) inline F32vec4 select_##op (const F32vec4 &a,const F32vec4 &b,const F32vec4 &c,const F32vec4 &d) { F32vec4 mask = _mm_cmp##op##_ps(a,b); return((mask & c) | F32vec4((_mm_andnot_ps(mask,d)))); }
130#undef Fvec32s4_SELECT
131
132inline Is16vec4 simd_max(const Is16vec4 &a,const Is16vec4 &b) { return _m_pmaxsw(a,b); }
133inline Is16vec4 simd_min(const Is16vec4 &a,const Is16vec4 &b) { return _m_pminsw(a,b); }
134inline Iu8vec8 simd_max(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pmaxub(a,b); }
135inline Iu8vec8 simd_min(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pminub(a,b); }
136inline Iu16vec4 simd_avg(const Iu16vec4 &a,const Iu16vec4 &b) { return _m_pavgw(a,b); }
137inline Iu8vec8 simd_avg(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pavgb(a,b); }
138inline int move_mask(const I8vec8 &a) { return _m_pmovmskb(a); }
139inline Iu16vec4 mul_high(const Iu16vec4 &a,const Iu16vec4 &b) { return _m_pmulhuw(a,b); }
140inline void mask_move(const I8vec8 &a,const I8vec8 &b,char *addr) { _m_maskmovq(a,b,addr); }
141inline void store_nta(__m64 *p,M64 &a) { _mm_stream_pi(p,a); }
142inline int F32vec4ToInt(const F32vec4 &a) { return _mm_cvtt_ss2si(a); }
143inline Is32vec2 F32vec4ToIs32vec2 (const F32vec4 &a) {
144 __m64 result;
145 result = _mm_cvtt_ps2pi(a);
146 return Is32vec2(result);
147}
148
149inline F32vec4 IntToF32vec4(const F32vec4 &a,int i) {
150 __m128 result;
152 return F32vec4(result);
153}
154
155inline F32vec4 Is32vec2ToF32vec4(const F32vec4 &a,const Is32vec2 &b) {
156 __m128 result;
157 result = _mm_cvt_pi2ps(a,b);
158 return F32vec4(result);
159}
160
161class F32vec1 {
162protected:
163 __m128 vec;
164public:
168 EXPLICIT F32vec1(double d) { vec = _mm_set_ss((float) d); }
169 F32vec1(__m128 m) { vec = m; }
170 operator __m128() const { return vec; }
171 friend F32vec1 operator &(const F32vec1 &a,const F32vec1 &b) { return _mm_and_ps(a,b); }
172 friend F32vec1 operator |(const F32vec1 &a,const F32vec1 &b) { return _mm_or_ps(a,b); }
173 friend F32vec1 operator ^(const F32vec1 &a,const F32vec1 &b) { return _mm_xor_ps(a,b); }
174 friend F32vec1 operator +(const F32vec1 &a,const F32vec1 &b) { return _mm_add_ss(a,b); }
175 friend F32vec1 operator -(const F32vec1 &a,const F32vec1 &b) { return _mm_sub_ss(a,b); }
176 friend F32vec1 operator *(const F32vec1 &a,const F32vec1 &b) { return _mm_mul_ss(a,b); }
177 friend F32vec1 operator /(const F32vec1 &a,const F32vec1 &b) { return _mm_div_ss(a,b); }
178 F32vec1& operator +=(F32vec1 &a) { return *this = _mm_add_ss(vec,a); }
179 F32vec1& operator -=(F32vec1 &a) { return *this = _mm_sub_ss(vec,a); }
180 F32vec1& operator *=(F32vec1 &a) { return *this = _mm_mul_ss(vec,a); }
181 F32vec1& operator /=(F32vec1 &a) { return *this = _mm_div_ss(vec,a); }
182 F32vec1& operator &=(F32vec1 &a) { return *this = _mm_and_ps(vec,a); }
183 F32vec1& operator |=(F32vec1 &a) { return *this = _mm_or_ps(vec,a); }
184 F32vec1& operator ^=(F32vec1 &a) { return *this = _mm_xor_ps(vec,a); }
185 friend F32vec1 sqrt(const F32vec1 &a) { return _mm_sqrt_ss(a); }
186 friend F32vec1 rcp(const F32vec1 &a) { return _mm_rcp_ss(a); }
187 friend F32vec1 rsqrt(const F32vec1 &a) { return _mm_rsqrt_ss(a); }
188 friend F32vec1 rcp_nr(const F32vec1 &a) {
189 F32vec1 Ra0 = _mm_rcp_ss(a);
190 return _mm_sub_ss(_mm_add_ss(Ra0,Ra0),_mm_mul_ss(_mm_mul_ss(Ra0,a),Ra0));
191 }
192 friend F32vec1 rsqrt_nr(const F32vec1 &a) {
193 static const F32vec1 fvecf0pt5(0.5f);
194 static const F32vec1 fvecf3pt0(3.0f);
195 F32vec1 Ra0 = _mm_rsqrt_ss(a);
196 return (fvecf0pt5 *Ra0) *(fvecf3pt0 - (a *Ra0) *Ra0);
197 }
198#define Fvec32s1_COMP(op) friend F32vec1 cmp##op (const F32vec1 &a,const F32vec1 &b) { return _mm_cmp##op##_ss(a,b); }
200 Fvec32s1_COMP(lt)
201 Fvec32s1_COMP(le)
202 Fvec32s1_COMP(gt)
203 Fvec32s1_COMP(ge)
204 Fvec32s1_COMP(neq)
205 Fvec32s1_COMP(nlt)
206 Fvec32s1_COMP(nle)
207 Fvec32s1_COMP(ngt)
208 Fvec32s1_COMP(nge)
209#undef Fvec32s1_COMP
210
211 friend F32vec1 simd_min(const F32vec1 &a,const F32vec1 &b) { return _mm_min_ss(a,b); }
212 friend F32vec1 simd_max(const F32vec1 &a,const F32vec1 &b) { return _mm_max_ss(a,b); }
213
214#if defined(_ENABLE_VEC_DEBUG)
215 friend std::ostream & operator<<(std::ostream & os,const F32vec1 &a) {
216 float *fp = (float*)&a;
217 os << "float:" << *fp;
218 return os;
219 }
220#endif
221};
222
223#define Fvec32s1_SELECT(op) inline F32vec1 select_##op (const F32vec1 &a,const F32vec1 &b,const F32vec1 &c,const F32vec1 &d) { F32vec1 mask = _mm_cmp##op##_ss(a,b); return((mask & c) | F32vec1((_mm_andnot_ps(mask,d)))); }
234#undef Fvec32s1_SELECT
235
236inline int F32vec1ToInt(const F32vec1 &a)
237{
238 return _mm_cvtt_ss2si(a);
239}
240
241#pragma pack(pop)
242#pragma pack(pop)
243#endif
244#endif
basic_ostream< _CharT, _Traits > &_STLP_CALL operator<<(basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Definition: _bitset.c:219
Definition: fvec.h:161
friend F32vec1 rsqrt_nr(const F32vec1 &a)
Definition: fvec.h:192
F32vec1 & operator-=(F32vec1 &a)
Definition: fvec.h:179
friend F32vec1 rcp(const F32vec1 &a)
Definition: fvec.h:186
Fvec32s1_COMP(eq) Fvec32s1_COMP(lt) Fvec32s1_COMP(le) Fvec32s1_COMP(gt) Fvec32s1_COMP(ge) Fvec32s1_COMP(neq) Fvec32s1_COMP(nlt) Fvec32s1_COMP(nle) Fvec32s1_COMP(ngt) Fvec32s1_COMP(nge) friend F32vec1 simd_min(const F32vec1 &a
friend F32vec1 operator^(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:173
F32vec1 & operator^=(F32vec1 &a)
Definition: fvec.h:184
F32vec1 & operator+=(F32vec1 &a)
Definition: fvec.h:178
F32vec1 & operator/=(F32vec1 &a)
Definition: fvec.h:181
F32vec1 & operator*=(F32vec1 &a)
Definition: fvec.h:180
friend F32vec1 operator&(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:171
friend F32vec1 operator-(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:175
friend F32vec1 operator*(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:176
__m128 vec
Definition: fvec.h:163
F32vec1(int i)
Definition: fvec.h:166
F32vec1 & operator&=(F32vec1 &a)
Definition: fvec.h:182
friend F32vec1 sqrt(const F32vec1 &a)
Definition: fvec.h:185
friend F32vec1 rcp_nr(const F32vec1 &a)
Definition: fvec.h:188
F32vec1 & operator|=(F32vec1 &a)
Definition: fvec.h:183
friend F32vec1 operator/(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:177
friend F32vec1 operator|(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:172
friend F32vec1 rsqrt(const F32vec1 &a)
Definition: fvec.h:187
F32vec1(__m128 m)
Definition: fvec.h:169
EXPLICIT F32vec1(float f)
Definition: fvec.h:167
F32vec1()
Definition: fvec.h:165
friend F32vec1 simd_max(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:212
friend F32vec1 operator+(const F32vec1 &a, const F32vec1 &b)
Definition: fvec.h:174
EXPLICIT F32vec1(double d)
Definition: fvec.h:168
Definition: fvec.h:28
friend F32vec4 simd_max(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:88
const float & operator[](int i) const
Definition: fvec.h:100
friend float add_horizontal(F32vec4 &a)
Definition: fvec.h:56
friend F32vec4 rsqrt_nr(const F32vec4 &a)
Definition: fvec.h:67
float & operator[](int i)
Definition: fvec.h:105
F32vec4 & operator&=(F32vec4 &a)
Definition: fvec.h:53
F32vec4 & operator+=(F32vec4 &a)
Definition: fvec.h:49
F32vec4(__m128 m)
Definition: fvec.h:33
friend F32vec4 operator|(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:41
friend F32vec4 rcp(const F32vec4 &a)
Definition: fvec.h:61
EXPLICIT F32vec4(double d)
Definition: fvec.h:36
friend F32vec4 operator-(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:44
F32vec4 & operator=(float f)
Definition: fvec.h:37
F32vec4 & operator-=(F32vec4 &a)
Definition: fvec.h:50
friend F32vec4 rsqrt(const F32vec4 &a)
Definition: fvec.h:62
friend F32vec4 operator&(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:40
__m128 vec
Definition: fvec.h:30
F32vec4 & operator^=(F32vec4 &a)
Definition: fvec.h:55
Fvec32s4_COMP(eq) Fvec32s4_COMP(lt) Fvec32s4_COMP(le) Fvec32s4_COMP(gt) Fvec32s4_COMP(ge) Fvec32s4_COMP(neq) Fvec32s4_COMP(nlt) Fvec32s4_COMP(nle) Fvec32s4_COMP(ngt) Fvec32s4_COMP(nge) friend F32vec4 simd_min(const F32vec4 &a
friend F32vec4 operator*(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:45
F32vec4 & operator/=(F32vec4 &a)
Definition: fvec.h:52
friend F32vec4 operator+(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:43
F32vec4 & operator|=(F32vec4 &a)
Definition: fvec.h:54
F32vec4 & operator*=(F32vec4 &a)
Definition: fvec.h:51
friend F32vec4 operator/(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:46
friend F32vec4 rcp_nr(const F32vec4 &a)
Definition: fvec.h:63
friend F32vec4 sqrt(const F32vec4 &a)
Definition: fvec.h:60
EXPLICIT F32vec4(float f)
Definition: fvec.h:35
friend F32vec4 operator^(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:42
F32vec4()
Definition: fvec.h:32
F32vec4(float f3, float f2, float f1, float f0)
Definition: fvec.h:34
#define assert(x)
Definition: debug.h:53
#define Fvec32s4_COMP(op)
Definition: fvec.h:74
#define Fvec32s4_SELECT(op)
Definition: fvec.h:119
F32vec4 unpack_low(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:112
#define Fvec32s1_COMP(op)
Definition: fvec.h:198
F32vec4 IntToF32vec4(const F32vec4 &a, int i)
Definition: fvec.h:149
int F32vec4ToInt(const F32vec4 &a)
Definition: fvec.h:142
#define EXPLICIT
Definition: fvec.h:26
Iu16vec4 simd_avg(const Iu16vec4 &a, const Iu16vec4 &b)
Definition: fvec.h:136
void loadu(F32vec4 &a, float *p)
Definition: fvec.h:115
void store_nta(float *p, F32vec4 &a)
Definition: fvec.h:117
Is32vec2 F32vec4ToIs32vec2(const F32vec4 &a)
Definition: fvec.h:143
#define Fvec32s1_SELECT(op)
Definition: fvec.h:223
Iu16vec4 mul_high(const Iu16vec4 &a, const Iu16vec4 &b)
Definition: fvec.h:139
Is16vec4 simd_min(const Is16vec4 &a, const Is16vec4 &b)
Definition: fvec.h:133
F32vec4 Is32vec2ToF32vec4(const F32vec4 &a, const Is32vec2 &b)
Definition: fvec.h:155
Iu8vec8 simd_max(const Iu8vec8 &a, const Iu8vec8 &b)
Definition: fvec.h:134
int move_mask(const F32vec4 &a)
Definition: fvec.h:114
void storeu(float *p, const F32vec4 &a)
Definition: fvec.h:116
F32vec4 unpack_high(const F32vec4 &a, const F32vec4 &b)
Definition: fvec.h:113
void mask_move(const I8vec8 &a, const I8vec8 &b, char *addr)
Definition: fvec.h:140
GLfloat f
Definition: glext.h:7540
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLenum const GLvoid * addr
Definition: glext.h:9621
GLfloat GLfloat p
Definition: glext.h:8902
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLuint64EXT * result
Definition: glext.h:11304
const GLfloat * m
Definition: glext.h:10848
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define d
Definition: ke_i.h:81
#define eq(received, expected, label, type)
Definition: locale.c:144
#define f2(x, y, z)
Definition: sha1.c:31
#define f1(x, y, z)
Definition: sha1.c:30
#define f3(x, y, z)
Definition: sha1.c:32
__INTRIN_INLINE_SSE void _mm_stream_pi(__m64 *__p, __m64 __a)
Definition: xmmintrin.h:1121
__m128 _mm_add_ss(__m128 a, __m128 b)
Definition: xmmintrin.h:548
__m128 _mm_rsqrt_ps(__m128 a)
Definition: xmmintrin.h:617
void _mm_storeu_ps(float *p, __m128 a)
Definition: xmmintrin.h:1057
__m128 _mm_loadu_ps(float const *p)
Definition: xmmintrin.h:972
void _mm_stream_ps(float *p, __m128 a)
Definition: xmmintrin.h:1130
__m128 _mm_div_ss(__m128 a, __m128 b)
Definition: xmmintrin.h:581
__m128 _mm_max_ss(__m128 a, __m128 b)
Definition: xmmintrin.h:632
__m128 _mm_set_ps(float e3, float e2, float e1, float e0)
Definition: xmmintrin.h:1011
__m128 _mm_rcp_ps(__m128 a)
Definition: xmmintrin.h:607
__m128 _mm_mul_ss(__m128 a, __m128 b)
Definition: xmmintrin.h:570
__m128 _mm_rsqrt_ss(__m128 a)
Definition: xmmintrin.h:612
__m128 _mm_shuffle_ps(__m128 a, __m128 b, unsigned int imm8)
__m128 _mm_set_ss(float a)
Definition: xmmintrin.h:1000
__m128 _mm_cvt_si2ss(__m128 a, int b)
__m128 _mm_and_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:642
__m128 _mm_xor_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:657
__m128 _mm_set_ps1(float a)
__m128 _mm_div_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:587
__m128 _mm_max_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:637
__m128 _mm_sub_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:565
__m128 _mm_unpackhi_ps(__m128 a, __m128 b)
__m128 _mm_min_ss(__m128 a, __m128 b)
Definition: xmmintrin.h:622
__m128 _mm_add_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:554
__m128 _mm_min_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:627
int _mm_cvtt_ss2si(__m128 a)
__m128 _mm_sqrt_ss(__m128 a)
Definition: xmmintrin.h:592
__m128 _mm_or_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:652
__m128 _mm_sqrt_ps(__m128 a)
Definition: xmmintrin.h:597
__m128 _mm_sub_ss(__m128 a, __m128 b)
Definition: xmmintrin.h:559
__m128 _mm_rcp_ss(__m128 a)
Definition: xmmintrin.h:602
__m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: xmmintrin.h:576
__m128 _mm_unpacklo_ps(__m128 a, __m128 b)
int _mm_movemask_ps(__m128 a)