ReactOS  0.4.14-dev-337-gf981a68
dvec.h
Go to the documentation of this file.
1 
6 #ifndef _DVEC_H_INCLUDED
7 #define _DVEC_H_INCLUDED
8 #ifndef RC_INVOKED
9 
10 #if !defined __cplusplus
11 #error This file is only supported in C++ compilations!
12 #endif
13 
14 #include <emmintrin.h>
15 #include <assert.h>
16 #include <fvec.h>
17 #include <crtdefs.h>
18 
19 #pragma pack(push,_CRT_PACKING)
20 
21 #if defined(_ENABLE_VEC_DEBUG)
22 #include <iostream>
23 #endif
24 
25 #pragma pack(push,16)
26 
27 #define EXPLICIT explicit
28 
29 class I8vec16;
30 class Is8vec16;
31 class Iu8vec16;
32 class I16vec8;
33 class Is16vec8;
34 class Iu16vec8;
35 class I32vec4;
36 class Is32vec4;
37 class Iu32vec4;
38 class I64vec2;
39 class I128vec1;
40 
41 #define _MM_16UB(element,vector) (*((unsigned char*)&##vector + ##element))
42 #define _MM_16B(element,vector) (*((signed char*)&##vector + ##element))
43 
44 #define _MM_8UW(element,vector) (*((unsigned short*)&##vector + ##element))
45 #define _MM_8W(element,vector) (*((short*)&##vector + ##element))
46 
47 #define _MM_4UDW(element,vector) (*((unsigned int*)&##vector + ##element))
48 #define _MM_4DW(element,vector) (*((int*)&##vector + ##element))
49 
50 #define _MM_2QW(element,vector) (*((__int64*)&##vector + ##element))
51 
52 inline const __m128i get_mask128()
53 {
54  static const __m128i mask128 = _mm_set1_epi64(M64(0xffffffffffffffffi64));
55  return mask128;
56 }
57 
58 class M128
59 {
60 protected:
62 
63 public:
64  M128() { }
65  M128(__m128i mm) { vec = mm; }
66 
67  operator __m128i() const { return vec; }
68 
69  M128& operator&=(const M128 &a) { return *this = (M128) _mm_and_si128(vec,a); }
70  M128& operator|=(const M128 &a) { return *this = (M128) _mm_or_si128(vec,a); }
71  M128& operator^=(const M128 &a) { return *this = (M128) _mm_xor_si128(vec,a); }
72 
73 };
74 
75 inline M128 operator&(const M128 &a,const M128 &b) { return _mm_and_si128(a,b); }
76 inline M128 operator|(const M128 &a,const M128 &b) { return _mm_or_si128(a,b); }
77 inline M128 operator^(const M128 &a,const M128 &b) { return _mm_xor_si128(a,b); }
78 inline M128 andnot(const M128 &a,const M128 &b) { return _mm_andnot_si128(a,b); }
79 
80 class I128vec1 : public M128
81 {
82 public:
83  I128vec1() { }
84  I128vec1(__m128i mm) : M128(mm) { }
85 
86  I128vec1& operator= (const M128 &a) { return *this = (I128vec1) a; }
87  I128vec1& operator&=(const M128 &a) { return *this = (I128vec1) _mm_and_si128(vec,a); }
88  I128vec1& operator|=(const M128 &a) { return *this = (I128vec1) _mm_or_si128(vec,a); }
89  I128vec1& operator^=(const M128 &a) { return *this = (I128vec1) _mm_xor_si128(vec,a); }
90 
91 };
92 
93 class I64vec2 : public M128
94 {
95 public:
96  I64vec2() { }
97  I64vec2(__m128i mm) : M128(mm) { }
98 
99  I64vec2(__m64 q1,__m64 q0)
100  {
101  _MM_2QW(0,vec) = *(__int64*)&q0;
102  _MM_2QW(1,vec) = *(__int64*)&q1;
103  }
104 
105  I64vec2& operator= (const M128 &a) { return *this = (I64vec2) a; }
106 
107  I64vec2& operator&=(const M128 &a) { return *this = (I64vec2) _mm_and_si128(vec,a); }
108  I64vec2& operator|=(const M128 &a) { return *this = (I64vec2) _mm_or_si128(vec,a); }
109  I64vec2& operator^=(const M128 &a) { return *this = (I64vec2) _mm_xor_si128(vec,a); }
110 
111  I64vec2& operator +=(const I64vec2 &a) { return *this = (I64vec2) _mm_add_epi64(vec,a); }
112  I64vec2& operator -=(const I64vec2 &a) { return *this = (I64vec2) _mm_sub_epi64(vec,a); }
113 
114  I64vec2 operator<<(const I64vec2 &a) { return _mm_sll_epi64(vec,a); }
115  I64vec2 operator<<(int count) { return _mm_slli_epi64(vec,count); }
116  I64vec2& operator<<=(const I64vec2 &a) { return *this = (I64vec2) _mm_sll_epi64(vec,a); }
117  I64vec2& operator<<=(int count) { return *this = (I64vec2) _mm_slli_epi64(vec,count); }
118  I64vec2 operator>>(const I64vec2 &a) { return _mm_srl_epi64(vec,a); }
119  I64vec2 operator>>(int count) { return _mm_srli_epi64(vec,count); }
120  I64vec2& operator>>=(const I64vec2 &a) { return *this = (I64vec2) _mm_srl_epi64(vec,a); }
121  I64vec2& operator>>=(int count) { return *this = (I64vec2) _mm_srli_epi64(vec,count); }
122 
123  const __int64& operator[](int i)const
124  {
125  assert(static_cast<unsigned int>(i) < 2);
126  return _MM_2QW(i,vec);
127  }
128 
130  {
131  assert(static_cast<unsigned int>(i) < 2);
132  return _MM_2QW(i,vec);
133  }
134 
135 };
136 
137 inline I64vec2 unpack_low(const I64vec2 &a,const I64vec2 &b) {return _mm_unpacklo_epi64(a,b); }
138 inline I64vec2 unpack_high(const I64vec2 &a,const I64vec2 &b) {return _mm_unpackhi_epi64(a,b); }
139 
140 class I32vec4 : public M128
141 {
142 public:
143  I32vec4() { }
144  I32vec4(__m128i mm) : M128(mm) { }
145 
146  I32vec4& operator= (const M128 &a) { return *this = (I32vec4) a; }
147 
148  I32vec4& operator&=(const M128 &a) { return *this = (I32vec4) _mm_and_si128(vec,a); }
149  I32vec4& operator|=(const M128 &a) { return *this = (I32vec4) _mm_or_si128(vec,a); }
150  I32vec4& operator^=(const M128 &a) { return *this = (I32vec4) _mm_xor_si128(vec,a); }
151 
152  I32vec4& operator +=(const I32vec4 &a) { return *this = (I32vec4)_mm_add_epi32(vec,a); }
153  I32vec4& operator -=(const I32vec4 &a) { return *this = (I32vec4)_mm_sub_epi32(vec,a); }
154 
155  I32vec4 operator<<(const I32vec4 &a) { return _mm_sll_epi32(vec,a); }
156  I32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
157  I32vec4& operator<<=(const I32vec4 &a) { return *this = (I32vec4)_mm_sll_epi32(vec,a); }
158  I32vec4& operator<<=(int count) { return *this = (I32vec4)_mm_slli_epi32(vec,count); }
159 
160 };
161 
162 inline I32vec4 cmpeq(const I32vec4 &a,const I32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
163 inline I32vec4 cmpneq(const I32vec4 &a,const I32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
164 
165 inline I32vec4 unpack_low(const I32vec4 &a,const I32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
166 inline I32vec4 unpack_high(const I32vec4 &a,const I32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
167 
168 class Is32vec4 : public I32vec4
169 {
170 public:
171  Is32vec4() { }
172  Is32vec4(__m128i mm) : I32vec4(mm) { }
173  Is32vec4(int i3,int i2,int i1,int i0)
174  {
175  _MM_4DW(0,vec) = i0;
176  _MM_4DW(1,vec) = i1;
177  _MM_4DW(2,vec) = i2;
178  _MM_4DW(3,vec) = i3;
179  }
180 
181  Is32vec4& operator= (const M128 &a) { return *this = (Is32vec4) a; }
182 
183  Is32vec4& operator&=(const M128 &a) { return *this = (Is32vec4) _mm_and_si128(vec,a); }
184  Is32vec4& operator|=(const M128 &a) { return *this = (Is32vec4) _mm_or_si128(vec,a); }
185  Is32vec4& operator^=(const M128 &a) { return *this = (Is32vec4) _mm_xor_si128(vec,a); }
186 
187  Is32vec4& operator +=(const I32vec4 &a) { return *this = (Is32vec4)_mm_add_epi32(vec,a); }
188  Is32vec4& operator -=(const I32vec4 &a) { return *this = (Is32vec4)_mm_sub_epi32(vec,a); }
189 
190  Is32vec4 operator<<(const M128 &a) { return _mm_sll_epi32(vec,a); }
191  Is32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
192  Is32vec4& operator<<=(const M128 &a) { return *this = (Is32vec4)_mm_sll_epi32(vec,a); }
193  Is32vec4& operator<<=(int count) { return *this = (Is32vec4)_mm_slli_epi32(vec,count); }
194 
195  Is32vec4 operator>>(const M128 &a) { return _mm_sra_epi32(vec,a); }
196  Is32vec4 operator>>(int count) { return _mm_srai_epi32(vec,count); }
197  Is32vec4& operator>>=(const M128 &a) { return *this = (Is32vec4) _mm_sra_epi32(vec,a); }
198  Is32vec4& operator>>=(int count) { return *this = (Is32vec4) _mm_srai_epi32(vec,count); }
199 
200 #if defined(_ENABLE_VEC_DEBUG)
201 
202  friend std::ostream& operator<< (std::ostream &os,const Is32vec4 &a)
203  {
204  os << "[3]:" << _MM_4DW(3,a)
205  << " [2]:" << _MM_4DW(2,a)
206  << " [1]:" << _MM_4DW(1,a)
207  << " [0]:" << _MM_4DW(0,a);
208  return os;
209  }
210 #endif
211 
212  const int& operator[](int i)const
213  {
214  assert(static_cast<unsigned int>(i) < 4);
215  return _MM_4DW(i,vec);
216  }
217 
218  int& operator[](int i)
219  {
220  assert(static_cast<unsigned int>(i) < 4);
221  return _MM_4DW(i,vec);
222  }
223 };
224 
225 inline Is32vec4 cmpeq(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
226 inline Is32vec4 cmpneq(const Is32vec4 &a,const Is32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
227 inline Is32vec4 cmpgt(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpgt_epi32(a,b); }
228 inline Is32vec4 cmplt(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpgt_epi32(b,a); }
229 
230 inline Is32vec4 unpack_low(const Is32vec4 &a,const Is32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
231 inline Is32vec4 unpack_high(const Is32vec4 &a,const Is32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
232 
233 class Iu32vec4 : public I32vec4
234 {
235 public:
236  Iu32vec4() { }
237  Iu32vec4(__m128i mm) : I32vec4(mm) { }
238  Iu32vec4(unsigned int ui3,unsigned int ui2,unsigned int ui1,unsigned int ui0)
239  {
240  _MM_4UDW(0,vec) = ui0;
241  _MM_4UDW(1,vec) = ui1;
242  _MM_4UDW(2,vec) = ui2;
243  _MM_4UDW(3,vec) = ui3;
244  }
245 
246  Iu32vec4& operator= (const M128 &a) { return *this = (Iu32vec4) a; }
247 
248  Iu32vec4& operator&=(const M128 &a) { return *this = (Iu32vec4) _mm_and_si128(vec,a); }
249  Iu32vec4& operator|=(const M128 &a) { return *this = (Iu32vec4) _mm_or_si128(vec,a); }
250  Iu32vec4& operator^=(const M128 &a) { return *this = (Iu32vec4) _mm_xor_si128(vec,a); }
251 
252  Iu32vec4& operator +=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_add_epi32(vec,a); }
253  Iu32vec4& operator -=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_sub_epi32(vec,a); }
254 
255  Iu32vec4 operator<<(const M128 &a) { return _mm_sll_epi32(vec,a); }
256  Iu32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
257  Iu32vec4& operator<<=(const M128 &a) { return *this = (Iu32vec4)_mm_sll_epi32(vec,a); }
258  Iu32vec4& operator<<=(int count) { return *this = (Iu32vec4)_mm_slli_epi32(vec,count); }
259  Iu32vec4 operator>>(const M128 &a) { return _mm_srl_epi32(vec,a); }
260  Iu32vec4 operator>>(int count) { return _mm_srli_epi32(vec,count); }
261  Iu32vec4& operator>>=(const M128 &a) { return *this = (Iu32vec4) _mm_srl_epi32(vec,a); }
262  Iu32vec4& operator>>=(int count) { return *this = (Iu32vec4) _mm_srli_epi32(vec,count); }
263 
264 #if defined(_ENABLE_VEC_DEBUG)
265 
266  friend std::ostream& operator<< (std::ostream &os,const Iu32vec4 &a)
267  {
268  os << "[3]:" << _MM_4UDW(3,a)
269  << " [2]:" << _MM_4UDW(2,a)
270  << " [1]:" << _MM_4UDW(1,a)
271  << " [0]:" << _MM_4UDW(0,a);
272  return os;
273  }
274 #endif
275 
276  const unsigned int& operator[](int i)const
277  {
278  assert(static_cast<unsigned int>(i) < 4);
279  return _MM_4UDW(i,vec);
280  }
281 
282  unsigned int& operator[](int i)
283  {
284  assert(static_cast<unsigned int>(i) < 4);
285  return _MM_4UDW(i,vec);
286  }
287 };
288 
289 inline I64vec2 operator*(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_mul_epu32(a,b); }
290 inline Iu32vec4 cmpeq(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
291 inline Iu32vec4 cmpneq(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
292 
293 inline Iu32vec4 unpack_low(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
294 inline Iu32vec4 unpack_high(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
295 
296 class I16vec8 : public M128
297 {
298 public:
299  I16vec8() { }
300  I16vec8(__m128i mm) : M128(mm) { }
301 
302  I16vec8& operator= (const M128 &a) { return *this = (I16vec8) a; }
303 
304  I16vec8& operator&=(const M128 &a) { return *this = (I16vec8) _mm_and_si128(vec,a); }
305  I16vec8& operator|=(const M128 &a) { return *this = (I16vec8) _mm_or_si128(vec,a); }
306  I16vec8& operator^=(const M128 &a) { return *this = (I16vec8) _mm_xor_si128(vec,a); }
307 
308  I16vec8& operator +=(const I16vec8 &a) { return *this = (I16vec8) _mm_add_epi16(vec,a); }
309  I16vec8& operator -=(const I16vec8 &a) { return *this = (I16vec8) _mm_sub_epi16(vec,a); }
310  I16vec8& operator *=(const I16vec8 &a) { return *this = (I16vec8) _mm_mullo_epi16(vec,a); }
311 
312  I16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
313  I16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
314  I16vec8& operator<<=(const M128 &a) { return *this = (I16vec8)_mm_sll_epi16(vec,a); }
315  I16vec8& operator<<=(int count) { return *this = (I16vec8)_mm_slli_epi16(vec,count); }
316 
317 };
318 
319 inline I16vec8 operator*(const I16vec8 &a,const I16vec8 &b) { return _mm_mullo_epi16(a,b); }
320 
321 inline I16vec8 cmpeq(const I16vec8 &a,const I16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
322 inline I16vec8 cmpneq(const I16vec8 &a,const I16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
323 
324 inline I16vec8 unpack_low(const I16vec8 &a,const I16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
325 inline I16vec8 unpack_high(const I16vec8 &a,const I16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
326 
327 class Is16vec8 : public I16vec8
328 {
329 public:
330  Is16vec8() { }
331  Is16vec8(__m128i mm) : I16vec8(mm) { }
332  Is16vec8(signed short s7,signed short s6,signed short s5,signed short s4,signed short s3,signed short s2,signed short s1,signed short s0)
333  {
334  _MM_8W(0,vec) = s0;
335  _MM_8W(1,vec) = s1;
336  _MM_8W(2,vec) = s2;
337  _MM_8W(3,vec) = s3;
338  _MM_8W(4,vec) = s4;
339  _MM_8W(5,vec) = s5;
340  _MM_8W(6,vec) = s6;
341  _MM_8W(7,vec) = s7;
342  }
343 
344  Is16vec8& operator= (const M128 &a) { return *this = (Is16vec8) a; }
345 
346  Is16vec8& operator&=(const M128 &a) { return *this = (Is16vec8) _mm_and_si128(vec,a); }
347  Is16vec8& operator|=(const M128 &a) { return *this = (Is16vec8) _mm_or_si128(vec,a); }
348  Is16vec8& operator^=(const M128 &a) { return *this = (Is16vec8) _mm_xor_si128(vec,a); }
349 
350  Is16vec8& operator +=(const I16vec8 &a) { return *this = (Is16vec8) _mm_add_epi16(vec,a); }
351  Is16vec8& operator -=(const I16vec8 &a) { return *this = (Is16vec8) _mm_sub_epi16(vec,a); }
352  Is16vec8& operator *=(const I16vec8 &a) { return *this = (Is16vec8) _mm_mullo_epi16(vec,a); }
353 
354  Is16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
355  Is16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
356  Is16vec8& operator<<=(const M128 &a) { return *this = (Is16vec8)_mm_sll_epi16(vec,a); }
357  Is16vec8& operator<<=(int count) { return *this = (Is16vec8)_mm_slli_epi16(vec,count); }
358 
359  Is16vec8 operator>>(const M128 &a) { return _mm_sra_epi16(vec,a); }
360  Is16vec8 operator>>(int count) { return _mm_srai_epi16(vec,count); }
361  Is16vec8& operator>>=(const M128 &a) { return *this = (Is16vec8)_mm_sra_epi16(vec,a); }
362  Is16vec8& operator>>=(int count) { return *this = (Is16vec8)_mm_srai_epi16(vec,count); }
363 
364 #if defined(_ENABLE_VEC_DEBUG)
365 
366  friend std::ostream& operator<< (std::ostream &os,const Is16vec8 &a)
367  {
368  os << "[7]:" << _MM_8W(7,a)
369  << " [6]:" << _MM_8W(6,a)
370  << " [5]:" << _MM_8W(5,a)
371  << " [4]:" << _MM_8W(4,a)
372  << " [3]:" << _MM_8W(3,a)
373  << " [2]:" << _MM_8W(2,a)
374  << " [1]:" << _MM_8W(1,a)
375  << " [0]:" << _MM_8W(0,a);
376  return os;
377  }
378 #endif
379 
380  const signed short& operator[](int i)const
381  {
382  assert(static_cast<unsigned int>(i) < 8);
383  return _MM_8W(i,vec);
384  }
385 
386  signed short& operator[](int i)
387  {
388  assert(static_cast<unsigned int>(i) < 8);
389  return _MM_8W(i,vec);
390  }
391 };
392 
393 inline Is16vec8 operator*(const Is16vec8 &a,const Is16vec8 &b) { return _mm_mullo_epi16(a,b); }
394 
395 inline Is16vec8 cmpeq(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
396 inline Is16vec8 cmpneq(const Is16vec8 &a,const Is16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
397 inline Is16vec8 cmpgt(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpgt_epi16(a,b); }
398 inline Is16vec8 cmplt(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpgt_epi16(b,a); }
399 
400 inline Is16vec8 unpack_low(const Is16vec8 &a,const Is16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
401 inline Is16vec8 unpack_high(const Is16vec8 &a,const Is16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
402 
403 inline Is16vec8 mul_high(const Is16vec8 &a,const Is16vec8 &b) { return _mm_mulhi_epi16(a,b); }
404 inline Is32vec4 mul_add(const Is16vec8 &a,const Is16vec8 &b) { return _mm_madd_epi16(a,b);}
405 
406 inline Is16vec8 sat_add(const Is16vec8 &a,const Is16vec8 &b) { return _mm_adds_epi16(a,b); }
407 inline Is16vec8 sat_sub(const Is16vec8 &a,const Is16vec8 &b) { return _mm_subs_epi16(a,b); }
408 
409 inline Is16vec8 simd_max(const Is16vec8 &a,const Is16vec8 &b) { return _mm_max_epi16(a,b); }
410 inline Is16vec8 simd_min(const Is16vec8 &a,const Is16vec8 &b) { return _mm_min_epi16(a,b); }
411 
412 class Iu16vec8 : public I16vec8
413 {
414 public:
415  Iu16vec8() { }
416  Iu16vec8(__m128i mm) : I16vec8(mm) { }
417  Iu16vec8(unsigned short s7,unsigned short s6,unsigned short s5,unsigned short s4,unsigned short s3,unsigned short s2,unsigned short s1,unsigned short s0)
418  {
419  _MM_8UW(0,vec) = s0;
420  _MM_8UW(1,vec) = s1;
421  _MM_8UW(2,vec) = s2;
422  _MM_8UW(3,vec) = s3;
423  _MM_8UW(4,vec) = s4;
424  _MM_8UW(5,vec) = s5;
425  _MM_8UW(6,vec) = s6;
426  _MM_8UW(7,vec) = s7;
427  }
428 
429  Iu16vec8& operator= (const M128 &a) { return *this = (Iu16vec8) a; }
430 
431  Iu16vec8& operator&=(const M128 &a) { return *this = (Iu16vec8) _mm_and_si128(vec,a); }
432  Iu16vec8& operator|=(const M128 &a) { return *this = (Iu16vec8) _mm_or_si128(vec,a); }
433  Iu16vec8& operator^=(const M128 &a) { return *this = (Iu16vec8) _mm_xor_si128(vec,a); }
434 
435  Iu16vec8& operator +=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_add_epi16(vec,a); }
436  Iu16vec8& operator -=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_sub_epi16(vec,a); }
437  Iu16vec8& operator *=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_mullo_epi16(vec,a); }
438 
439  Iu16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
440  Iu16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
441  Iu16vec8& operator<<=(const M128 &a) { return *this = (Iu16vec8)_mm_sll_epi16(vec,a); }
442  Iu16vec8& operator<<=(int count) { return *this = (Iu16vec8)_mm_slli_epi16(vec,count); }
443  Iu16vec8 operator>>(const M128 &a) { return _mm_srl_epi16(vec,a); }
444  Iu16vec8 operator>>(int count) { return _mm_srli_epi16(vec,count); }
445  Iu16vec8& operator>>=(const M128 &a) { return *this = (Iu16vec8) _mm_srl_epi16(vec,a); }
446  Iu16vec8& operator>>=(int count) { return *this = (Iu16vec8) _mm_srli_epi16(vec,count); }
447 
448 #if defined(_ENABLE_VEC_DEBUG)
449 
450  friend std::ostream& operator << (std::ostream &os,const Iu16vec8 &a)
451  {
452  os << "[7]:" << unsigned short(_MM_8UW(7,a))
453  << " [6]:" << unsigned short(_MM_8UW(6,a))
454  << " [5]:" << unsigned short(_MM_8UW(5,a))
455  << " [4]:" << unsigned short(_MM_8UW(4,a))
456  << " [3]:" << unsigned short(_MM_8UW(3,a))
457  << " [2]:" << unsigned short(_MM_8UW(2,a))
458  << " [1]:" << unsigned short(_MM_8UW(1,a))
459  << " [0]:" << unsigned short(_MM_8UW(0,a));
460  return os;
461  }
462 #endif
463 
464  const unsigned short& operator[](int i)const
465  {
466  assert(static_cast<unsigned int>(i) < 8);
467  return _MM_8UW(i,vec);
468  }
469 
470  unsigned short& operator[](int i)
471  {
472  assert(static_cast<unsigned int>(i) < 8);
473  return _MM_8UW(i,vec);
474  }
475 };
476 
477 inline Iu16vec8 operator*(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_mullo_epi16(a,b); }
478 
479 inline Iu16vec8 cmpeq(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
480 inline Iu16vec8 cmpneq(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
481 
482 inline Iu16vec8 unpack_low(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
483 inline Iu16vec8 unpack_high(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
484 
485 inline Iu16vec8 sat_add(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_adds_epu16(a,b); }
486 inline Iu16vec8 sat_sub(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_subs_epu16(a,b); }
487 
488 inline Iu16vec8 simd_avg(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_avg_epu16(a,b); }
489 inline I16vec8 mul_high(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_mulhi_epu16(a,b); }
490 
491 class I8vec16 : public M128
492 {
493 public:
494  I8vec16() { }
495  I8vec16(__m128i mm) : M128(mm) { }
496 
497  I8vec16& operator= (const M128 &a) { return *this = (I8vec16) a; }
498 
499  I8vec16& operator&=(const M128 &a) { return *this = (I8vec16) _mm_and_si128(vec,a); }
500  I8vec16& operator|=(const M128 &a) { return *this = (I8vec16) _mm_or_si128(vec,a); }
501  I8vec16& operator^=(const M128 &a) { return *this = (I8vec16) _mm_xor_si128(vec,a); }
502 
503  I8vec16& operator +=(const I8vec16 &a) { return *this = (I8vec16) _mm_add_epi8(vec,a); }
504  I8vec16& operator -=(const I8vec16 &a) { return *this = (I8vec16) _mm_sub_epi8(vec,a); }
505 
506 };
507 
508 inline I8vec16 cmpeq(const I8vec16 &a,const I8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
509 inline I8vec16 cmpneq(const I8vec16 &a,const I8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
510 
511 inline I8vec16 unpack_low(const I8vec16 &a,const I8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
512 inline I8vec16 unpack_high(const I8vec16 &a,const I8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
513 
514 class Is8vec16 : public I8vec16
515 {
516 public:
517  Is8vec16() { }
518  Is8vec16(__m128i mm) : I8vec16(mm) { }
519 
520  Is8vec16& operator= (const M128 &a) { return *this = (Is8vec16) a; }
521 
522  Is8vec16& operator&=(const M128 &a) { return *this = (Is8vec16) _mm_and_si128(vec,a); }
523  Is8vec16& operator|=(const M128 &a) { return *this = (Is8vec16) _mm_or_si128(vec,a); }
524  Is8vec16& operator^=(const M128 &a) { return *this = (Is8vec16) _mm_xor_si128(vec,a); }
525 
526  Is8vec16& operator +=(const I8vec16 &a) { return *this = (Is8vec16) _mm_add_epi8(vec,a); }
527  Is8vec16& operator -=(const I8vec16 &a) { return *this = (Is8vec16) _mm_sub_epi8(vec,a); }
528 
529 #if defined(_ENABLE_VEC_DEBUG)
530 
531  friend std::ostream& operator << (std::ostream &os,const Is8vec16 &a)
532  {
533  os << "[15]:" << short(_MM_16B(15,a))
534  << " [14]:" << short(_MM_16B(14,a))
535  << " [13]:" << short(_MM_16B(13,a))
536  << " [12]:" << short(_MM_16B(12,a))
537  << " [11]:" << short(_MM_16B(11,a))
538  << " [10]:" << short(_MM_16B(10,a))
539  << " [9]:" << short(_MM_16B(9,a))
540  << " [8]:" << short(_MM_16B(8,a))
541  << " [7]:" << short(_MM_16B(7,a))
542  << " [6]:" << short(_MM_16B(6,a))
543  << " [5]:" << short(_MM_16B(5,a))
544  << " [4]:" << short(_MM_16B(4,a))
545  << " [3]:" << short(_MM_16B(3,a))
546  << " [2]:" << short(_MM_16B(2,a))
547  << " [1]:" << short(_MM_16B(1,a))
548  << " [0]:" << short(_MM_16B(0,a));
549  return os;
550  }
551 #endif
552 
553  const signed char& operator[](int i)const
554  {
555  assert(static_cast<unsigned int>(i) < 16);
556  return _MM_16B(i,vec);
557  }
558 
559  signed char& operator[](int i)
560  {
561  assert(static_cast<unsigned int>(i) < 16);
562  return _MM_16B(i,vec);
563  }
564 
565 };
566 
567 inline Is8vec16 cmpeq(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
568 inline Is8vec16 cmpneq(const Is8vec16 &a,const Is8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
569 inline Is8vec16 cmpgt(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmpgt_epi8(a,b); }
570 inline Is8vec16 cmplt(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmplt_epi8(a,b); }
571 
572 inline Is8vec16 unpack_low(const Is8vec16 &a,const Is8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
573 inline Is8vec16 unpack_high(const Is8vec16 &a,const Is8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
574 
575 inline Is8vec16 sat_add(const Is8vec16 &a,const Is8vec16 &b) { return _mm_adds_epi8(a,b); }
576 inline Is8vec16 sat_sub(const Is8vec16 &a,const Is8vec16 &b) { return _mm_subs_epi8(a,b); }
577 
578 class Iu8vec16 : public I8vec16
579 {
580 public:
581  Iu8vec16() { }
582  Iu8vec16(__m128i mm) : I8vec16(mm) { }
583 
584  Iu8vec16& operator= (const M128 &a) { return *this = (Iu8vec16) a; }
585 
586  Iu8vec16& operator&=(const M128 &a) { return *this = (Iu8vec16) _mm_and_si128(vec,a); }
587  Iu8vec16& operator|=(const M128 &a) { return *this = (Iu8vec16) _mm_or_si128(vec,a); }
588  Iu8vec16& operator^=(const M128 &a) { return *this = (Iu8vec16) _mm_xor_si128(vec,a); }
589 
590  Iu8vec16& operator +=(const I8vec16 &a) { return *this = (Iu8vec16) _mm_add_epi8(vec,a); }
591  Iu8vec16& operator -=(const I8vec16 &a) { return *this = (Iu8vec16) _mm_sub_epi8(vec,a); }
592 
593 #if defined(_ENABLE_VEC_DEBUG)
594 
595  friend std::ostream& operator << (std::ostream &os,const Iu8vec16 &a)
596  {
597  os << "[15]:" << unsigned short(_MM_16UB(15,a))
598  << " [14]:" << unsigned short(_MM_16UB(14,a))
599  << " [13]:" << unsigned short(_MM_16UB(13,a))
600  << " [12]:" << unsigned short(_MM_16UB(12,a))
601  << " [11]:" << unsigned short(_MM_16UB(11,a))
602  << " [10]:" << unsigned short(_MM_16UB(10,a))
603  << " [9]:" << unsigned short(_MM_16UB(9,a))
604  << " [8]:" << unsigned short(_MM_16UB(8,a))
605  << " [7]:" << unsigned short(_MM_16UB(7,a))
606  << " [6]:" << unsigned short(_MM_16UB(6,a))
607  << " [5]:" << unsigned short(_MM_16UB(5,a))
608  << " [4]:" << unsigned short(_MM_16UB(4,a))
609  << " [3]:" << unsigned short(_MM_16UB(3,a))
610  << " [2]:" << unsigned short(_MM_16UB(2,a))
611  << " [1]:" << unsigned short(_MM_16UB(1,a))
612  << " [0]:" << unsigned short(_MM_16UB(0,a));
613  return os;
614  }
615 #endif
616 
617  const unsigned char& operator[](int i)const
618  {
619  assert(static_cast<unsigned int>(i) < 16);
620  return _MM_16UB(i,vec);
621  }
622 
623  unsigned char& operator[](int i)
624  {
625  assert(static_cast<unsigned int>(i) < 16);
626  return _MM_16UB(i,vec);
627  }
628 
629 };
630 
631 inline Iu8vec16 cmpeq(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
632 inline Iu8vec16 cmpneq(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
633 
634 inline Iu8vec16 unpack_low(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
635 inline Iu8vec16 unpack_high(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
636 
637 inline Iu8vec16 sat_add(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_adds_epu8(a,b); }
638 inline Iu8vec16 sat_sub(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_subs_epu8(a,b); }
639 
640 inline I64vec2 sum_abs(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_sad_epu8(a,b); }
641 
642 inline Iu8vec16 simd_avg(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_avg_epu8(a,b); }
643 inline Iu8vec16 simd_max(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_max_epu8(a,b); }
644 inline Iu8vec16 simd_min(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_min_epu8(a,b); }
645 
646 inline Is16vec8 pack_sat(const Is32vec4 &a,const Is32vec4 &b) { return _mm_packs_epi32(a,b); }
647 inline Is8vec16 pack_sat(const Is16vec8 &a,const Is16vec8 &b) { return _mm_packs_epi16(a,b); }
648 inline Iu8vec16 packu_sat(const Is16vec8 &a,const Is16vec8 &b) { return _mm_packus_epi16(a,b);}
649 
650 #define IVEC128_LOGICALS(vect,element) inline I##vect##vec##element operator& (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_and_si128(a,b); } inline I##vect##vec##element operator| (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_or_si128(a,b); } inline I##vect##vec##element operator^ (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_xor_si128(a,b); } inline I##vect##vec##element andnot (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_andnot_si128(a,b); }
651 
652 IVEC128_LOGICALS(8,16)
655 IVEC128_LOGICALS(16,8)
658 IVEC128_LOGICALS(32,4)
661 IVEC128_LOGICALS(64,2)
662 IVEC128_LOGICALS(128,1)
663 #undef IVEC128_LOGICALS
664 
665 #define IVEC128_ADD_SUB(vect,element,opsize) inline I##vect##vec##element operator+ (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_add_##opsize(a,b); } inline I##vect##vec##element operator- (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_sub_##opsize(a,b); }
666 
667 IVEC128_ADD_SUB(8,16,epi8)
668 IVEC128_ADD_SUB(u8,16,epi8)
669 IVEC128_ADD_SUB(s8,16,epi8)
670 IVEC128_ADD_SUB(16,8,epi16)
671 IVEC128_ADD_SUB(u16,8,epi16)
672 IVEC128_ADD_SUB(s16,8,epi16)
673 IVEC128_ADD_SUB(32,4,epi32)
674 IVEC128_ADD_SUB(u32,4,epi32)
675 IVEC128_ADD_SUB(s32,4,epi32)
676 IVEC128_ADD_SUB(64,2,epi64)
677 #undef IVEC128_ADD_SUB
678 
679 #define IVEC128_SELECT(vect12,vect34,element,selop,arg1,arg2) inline I##vect34##vec##element select_##selop (const I##vect12##vec##element &a,const I##vect12##vec##element &b,const I##vect34##vec##element &c,const I##vect34##vec##element &d) { I##vect12##vec##element mask = cmp##selop(a,b); return(I##vect34##vec##element ((mask & arg1) | I##vect12##vec##element ((_mm_andnot_si128(mask,arg2))))); }
680 IVEC128_SELECT(8,s8,16,eq,c,d)
681 IVEC128_SELECT(8,u8,16,eq,c,d)
682 IVEC128_SELECT(8,8,16,eq,c,d)
683 IVEC128_SELECT(8,s8,16,neq,c,d)
684 IVEC128_SELECT(8,u8,16,neq,c,d)
685 IVEC128_SELECT(8,8,16,neq,c,d)
686 
687 IVEC128_SELECT(16,s16,8,eq,c,d)
688 IVEC128_SELECT(16,u16,8,eq,c,d)
689 IVEC128_SELECT(16,16,8,eq,c,d)
690 IVEC128_SELECT(16,s16,8,neq,c,d)
691 IVEC128_SELECT(16,u16,8,neq,c,d)
692 IVEC128_SELECT(16,16,8,neq,c,d)
693 
694 IVEC128_SELECT(32,s32,4,eq,c,d)
695 IVEC128_SELECT(32,u32,4,eq,c,d)
696 IVEC128_SELECT(32,32,4,eq,c,d)
697 IVEC128_SELECT(32,s32,4,neq,c,d)
698 IVEC128_SELECT(32,u32,4,neq,c,d)
699 IVEC128_SELECT(32,32,4,neq,c,d)
700 
701 IVEC128_SELECT(s8,s8,16,gt,c,d)
702 IVEC128_SELECT(s8,u8,16,gt,c,d)
703 IVEC128_SELECT(s8,8,16,gt,c,d)
704 IVEC128_SELECT(s8,s8,16,lt,c,d)
705 IVEC128_SELECT(s8,u8,16,lt,c,d)
706 IVEC128_SELECT(s8,8,16,lt,c,d)
707 
708 IVEC128_SELECT(s16,s16,8,gt,c,d)
709 IVEC128_SELECT(s16,u16,8,gt,c,d)
710 IVEC128_SELECT(s16,16,8,gt,c,d)
711 IVEC128_SELECT(s16,s16,8,lt,c,d)
712 IVEC128_SELECT(s16,u16,8,lt,c,d)
713 IVEC128_SELECT(s16,16,8,lt,c,d)
714 
715 #undef IVEC128_SELECT
716 
717 class F64vec2
718 {
719 protected:
721 public:
722 
723  F64vec2() {}
724 
726 
727  F64vec2(double d1,double d0) { vec= _mm_set_pd(d1,d0); }
728 
729  EXPLICIT F64vec2(double d) { vec = _mm_set1_pd(d); }
730 
731  operator __m128d() const { return vec; }
732 
733  friend F64vec2 operator &(const F64vec2 &a,const F64vec2 &b) { return _mm_and_pd(a,b); }
734  friend F64vec2 operator |(const F64vec2 &a,const F64vec2 &b) { return _mm_or_pd(a,b); }
735  friend F64vec2 operator ^(const F64vec2 &a,const F64vec2 &b) { return _mm_xor_pd(a,b); }
736 
737  friend F64vec2 operator +(const F64vec2 &a,const F64vec2 &b) { return _mm_add_pd(a,b); }
738  friend F64vec2 operator -(const F64vec2 &a,const F64vec2 &b) { return _mm_sub_pd(a,b); }
739  friend F64vec2 operator *(const F64vec2 &a,const F64vec2 &b) { return _mm_mul_pd(a,b); }
740  friend F64vec2 operator /(const F64vec2 &a,const F64vec2 &b) { return _mm_div_pd(a,b); }
741 
742  F64vec2& operator +=(F64vec2 &a) { return *this = _mm_add_pd(vec,a); }
743  F64vec2& operator -=(F64vec2 &a) { return *this = _mm_sub_pd(vec,a); }
744  F64vec2& operator *=(F64vec2 &a) { return *this = _mm_mul_pd(vec,a); }
745  F64vec2& operator /=(F64vec2 &a) { return *this = _mm_div_pd(vec,a); }
746  F64vec2& operator &=(F64vec2 &a) { return *this = _mm_and_pd(vec,a); }
747  F64vec2& operator |=(F64vec2 &a) { return *this = _mm_or_pd(vec,a); }
748  F64vec2& operator ^=(F64vec2 &a) { return *this = _mm_xor_pd(vec,a); }
749 
750  friend double add_horizontal(F64vec2 &a)
751  {
752  F64vec2 ftemp = _mm_add_sd(a,_mm_shuffle_pd(a,a,1));
753  return ftemp[0];
754  }
755 
756  friend F64vec2 andnot(const F64vec2 &a,const F64vec2 &b) { return _mm_andnot_pd(a,b); }
757 
758  friend F64vec2 sqrt(const F64vec2 &a) { return _mm_sqrt_pd(a); }
759 
760 #define F64vec2_COMP(op) friend F64vec2 cmp##op (const F64vec2 &a,const F64vec2 &b) { return _mm_cmp##op##_pd(a,b); }
762  F64vec2_COMP(lt)
763  F64vec2_COMP(le)
764  F64vec2_COMP(gt)
765  F64vec2_COMP(ge)
766  F64vec2_COMP(ngt)
767  F64vec2_COMP(nge)
768  F64vec2_COMP(neq)
769  F64vec2_COMP(nlt)
770  F64vec2_COMP(nle)
771 #undef F64vec2_COMP
772 
773  friend F64vec2 simd_min(const F64vec2 &a,const F64vec2 &b) { return _mm_min_pd(a,b); }
774  friend F64vec2 simd_max(const F64vec2 &a,const F64vec2 &b) { return _mm_max_pd(a,b); }
775 
776 #define F64vec2_COMI(op) friend int comi##op (const F64vec2 &a,const F64vec2 &b) { return _mm_comi##op##_sd(a,b); }
778  F64vec2_COMI(lt)
779  F64vec2_COMI(le)
780  F64vec2_COMI(gt)
781  F64vec2_COMI(ge)
782  F64vec2_COMI(neq)
783 #undef F64vec2_COMI
784 
785 #define F64vec2_UCOMI(op) friend int ucomi##op (const F64vec2 &a,const F64vec2 &b) { return _mm_ucomi##op##_sd(a,b); }
787  F64vec2_UCOMI(lt)
788  F64vec2_UCOMI(le)
789  F64vec2_UCOMI(gt)
790  F64vec2_UCOMI(ge)
791  F64vec2_UCOMI(neq)
792 #undef F64vec2_UCOMI
793 
794 #if defined(_ENABLE_VEC_DEBUG)
795 
796  friend std::ostream & operator<<(std::ostream & os,const F64vec2 &a) {
797  double *dp = (double*)&a;
798  os << " [1]:" << *(dp+1)
799  << " [0]:" << *dp;
800  return os;
801  }
802 #endif
803 
804  const double &operator[](int i) const {
805  assert((0 <= i) && (i <= 1));
806  double *dp = (double*)&vec;
807  return *(dp+i);
808  }
809 
810  double &operator[](int i) {
811  assert((0 <= i) && (i <= 1));
812  double *dp = (double*)&vec;
813  return *(dp+i);
814  }
815 };
816 
817 inline F64vec2 unpack_low(const F64vec2 &a,const F64vec2 &b) { return _mm_unpacklo_pd(a,b); }
818 inline F64vec2 unpack_high(const F64vec2 &a,const F64vec2 &b) { return _mm_unpackhi_pd(a,b); }
819 inline int move_mask(const F64vec2 &a) { return _mm_movemask_pd(a); }
820 inline void loadu(F64vec2 &a,double *p) { a = _mm_loadu_pd(p); }
821 inline void storeu(double *p,const F64vec2 &a) { _mm_storeu_pd(p,a); }
822 inline void store_nta(double *p,F64vec2 &a) { _mm_stream_pd(p,a); }
823 
824 #define F64vec2_SELECT(op) inline F64vec2 select_##op (const F64vec2 &a,const F64vec2 &b,const F64vec2 &c,const F64vec2 &d) { F64vec2 mask = _mm_cmp##op##_pd(a,b); return((mask & c) | F64vec2((_mm_andnot_pd(mask,d)))); }
826 F64vec2_SELECT(lt)
827 F64vec2_SELECT(le)
828 F64vec2_SELECT(gt)
829 F64vec2_SELECT(ge)
830 F64vec2_SELECT(neq)
831 F64vec2_SELECT(nlt)
832 F64vec2_SELECT(nle)
833 #undef F64vec2_SELECT
834 
835 inline int F64vec2ToInt(const F64vec2 &a) { return _mm_cvttsd_si32(a); }
836 inline F64vec2 F32vec4ToF64vec2(const F32vec4 &a) { return _mm_cvtps_pd(a); }
837 inline F32vec4 F64vec2ToF32vec4(const F64vec2 &a) { return _mm_cvtpd_ps(a); }
838 inline F64vec2 IntToF64vec2(const F64vec2 &a,int b) { return _mm_cvtsi32_sd(a,b); }
839 
840 #pragma pack(pop)
841 #pragma pack(pop)
842 #endif
843 #endif
friend F64vec2 operator ^(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:735
Definition: fvec.h:28
F64vec2 F32vec4ToF64vec2(const F32vec4 &a)
Definition: dvec.h:836
I16vec8 operator<<(int count)
Definition: dvec.h:313
I64vec2(__m128i mm)
Definition: dvec.h:97
I16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:314
Is16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:350
Definition: dvec.h:233
I32vec4 & operator|=(const M128 &a)
Definition: dvec.h:149
Is8vec16 & operator|=(const M128 &a)
Definition: dvec.h:523
I128vec1(__m128i mm)
Definition: dvec.h:84
const int & operator[](int i) const
Definition: dvec.h:212
const signed char & operator[](int i) const
Definition: dvec.h:553
Iu16vec8 & operator *=(const I16vec8 &a)
Definition: dvec.h:437
I16vec8 & operator<<=(int count)
Definition: dvec.h:315
Iu8vec16 & operator -=(const I8vec16 &a)
Definition: dvec.h:591
struct S2 s2
Iu32vec4 operator>>(int count)
Definition: dvec.h:260
signed short & operator[](int i)
Definition: dvec.h:386
Is32vec4 operator<<(const M128 &a)
Definition: dvec.h:190
unsigned char & operator[](int i)
Definition: dvec.h:623
I64vec2 operator<<(int count)
Definition: dvec.h:115
const __int64 & operator[](int i) const
Definition: dvec.h:123
friend F64vec2 operator &(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:733
Iu32vec4 operator>>(const M128 &a)
Definition: dvec.h:259
Iu16vec8 & operator&=(const M128 &a)
Definition: dvec.h:431
EXPLICIT F64vec2(double d)
Definition: dvec.h:729
basic_ostream< char, char_traits< char > > ostream
Definition: _iosfwd.h:121
I64vec2 & operator|=(const M128 &a)
Definition: dvec.h:108
const unsigned short & operator[](int i) const
Definition: dvec.h:464
Is32vec4 & operator=(const M128 &a)
Definition: dvec.h:181
#define _MM_2QW(element, vector)
Definition: dvec.h:50
Iu8vec16 & operator^=(const M128 &a)
Definition: dvec.h:588
I16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:308
Iu32vec4 & operator=(const M128 &a)
Definition: dvec.h:246
I16vec8()
Definition: dvec.h:299
I32vec4 & operator=(const M128 &a)
Definition: dvec.h:146
I16vec8 & operator&=(const M128 &a)
Definition: dvec.h:304
M128 operator|(const M128 &a, const M128 &b)
Definition: dvec.h:76
GLuint GLuint GLsizei count
Definition: gl.h:1545
Iu16vec8 & operator<<=(int count)
Definition: dvec.h:442
Is16vec8()
Definition: dvec.h:330
std::ostream & operator<<(std::ostream &out, const XMLError &err)
Definition: xmlstorage.cpp:673
I16vec8 & operator *=(const I16vec8 &a)
Definition: dvec.h:310
Definition: dvec.h:578
const __m128i get_mask128()
Definition: dvec.h:52
__m128d
Definition: emmintrin.h:30
Is16vec8 & operator|=(const M128 &a)
Definition: dvec.h:347
I32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:152
#define IVEC128_SELECT(vect12, vect34, element, selop, arg1, arg2)
Definition: dvec.h:679
F64vec2_COMI(eq) F64vec2_COMI(lt) F64vec2_COMI(le) F64vec2_COMI(gt) F64vec2_COMI(ge) F64vec2_COMI(neq) F64vec2_UCOMI(eq) F64vec2_UCOMI(lt) F64vec2_UCOMI(le) F64vec2_UCOMI(gt) F64vec2_UCOMI(ge) F64vec2_UCOMI(neq) const double &operator[](int i) const
Definition: dvec.h:777
Iu32vec4 & operator>>=(const M128 &a)
Definition: dvec.h:261
F64vec2 & operator+=(F64vec2 &a)
Definition: dvec.h:742
#define assert(x)
Definition: debug.h:53
M128 & operator&=(const M128 &a)
Definition: dvec.h:69
I16vec8 operator<<(const M128 &a)
Definition: dvec.h:312
Is16vec8 simd_max(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:409
__int64 & operator[](int i)
Definition: dvec.h:129
Is16vec8(signed short s7, signed short s6, signed short s5, signed short s4, signed short s3, signed short s2, signed short s1, signed short s0)
Definition: dvec.h:332
Iu16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:441
void store_nta(double *p, F64vec2 &a)
Definition: dvec.h:822
I64vec2()
Definition: dvec.h:96
Is16vec8 pack_sat(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:646
I128vec1 & operator|=(const M128 &a)
Definition: dvec.h:88
Iu16vec8 & operator=(const M128 &a)
Definition: dvec.h:429
M128 operator &(const M128 &a, const M128 &b)
Definition: dvec.h:75
Iu32vec4 & operator<<=(const M128 &a)
Definition: dvec.h:257
Definition: dvec.h:296
#define F64vec2_UCOMI(op)
Is8vec16(__m128i mm)
Definition: dvec.h:518
#define _MM_8UW(element, vector)
Definition: dvec.h:44
#define _MM_8W(element, vector)
Definition: dvec.h:45
Is16vec8 operator<<(const M128 &a)
Definition: dvec.h:354
F32vec4 F64vec2ToF32vec4(const F64vec2 &a)
Definition: dvec.h:837
Iu8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:590
#define eq(received, expected, label, type)
Definition: locale.c:144
I8vec16(__m128i mm)
Definition: dvec.h:495
ULONG32 u32
Definition: btrfs.h:14
F64vec2 & operator/=(F64vec2 &a)
Definition: dvec.h:745
Is32vec4 & operator|=(const M128 &a)
Definition: dvec.h:184
Is32vec4 & operator&=(const M128 &a)
Definition: dvec.h:183
I32vec4 & operator<<=(int count)
Definition: dvec.h:158
Is8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:526
const GLfloat * m
Definition: glext.h:10848
M128 andnot(const M128 &a, const M128 &b)
Definition: dvec.h:78
Iu8vec16 & operator&=(const M128 &a)
Definition: dvec.h:586
Is32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:187
I32vec4 & operator&=(const M128 &a)
Definition: dvec.h:148
Is8vec16 & operator -=(const I8vec16 &a)
Definition: dvec.h:527
friend F64vec2 sqrt(const F64vec2 &a)
Definition: dvec.h:758
F64vec2()
Definition: dvec.h:723
#define _MM_16UB(element, vector)
Definition: dvec.h:41
Is16vec8(__m128i mm)
Definition: dvec.h:331
Definition: dvec.h:168
unsigned short(__cdecl typeof(TIFFCurrentDirectory))(struct tiff *)
Definition: typeof.h:93
Is32vec4 cmpgt(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:227
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
Iu32vec4()
Definition: dvec.h:236
int & operator[](int i)
Definition: dvec.h:218
I8vec16 & operator=(const M128 &a)
Definition: dvec.h:497
Is16vec8 sat_sub(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:407
void loadu(F64vec2 &a, double *p)
Definition: dvec.h:820
I64vec2 & operator=(const M128 &a)
Definition: dvec.h:105
F64vec2(__m128d m)
Definition: dvec.h:725
Is16vec8 operator<<(int count)
Definition: dvec.h:355
Definition: dvec.h:80
I8vec16 & operator -=(const I8vec16 &a)
Definition: dvec.h:504
I64vec2 operator<<(const I64vec2 &a)
Definition: dvec.h:114
Is32vec4 & operator<<=(int count)
Definition: dvec.h:193
unsigned short & operator[](int i)
Definition: dvec.h:470
F64vec2 IntToF64vec2(const F64vec2 &a, int b)
Definition: dvec.h:838
I8vec16 & operator|=(const M128 &a)
Definition: dvec.h:500
F64vec2_COMP(eq) F64vec2_COMP(lt) F64vec2_COMP(le) F64vec2_COMP(gt) F64vec2_COMP(ge) F64vec2_COMP(ngt) F64vec2_COMP(nge) F64vec2_COMP(neq) F64vec2_COMP(nlt) F64vec2_COMP(nle) friend F64vec2 simd_min(const F64vec2 &a
I8vec16 & operator^=(const M128 &a)
Definition: dvec.h:501
I16vec8 & operator -=(const I16vec8 &a)
Definition: dvec.h:309
I64vec2 sum_abs(const Iu8vec16 &a, const Iu8vec16 &b)
Definition: dvec.h:640
I16vec8(__m128i mm)
Definition: dvec.h:300
Iu16vec8 operator>>(int count)
Definition: dvec.h:444
I64vec2 & operator&=(const M128 &a)
Definition: dvec.h:107
Iu32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:252
Iu16vec8 & operator -=(const I16vec8 &a)
Definition: dvec.h:436
Is8vec16 & operator^=(const M128 &a)
Definition: dvec.h:524
#define F64vec2_SELECT(op)
Definition: dvec.h:824
I128vec1 & operator&=(const M128 &a)
Definition: dvec.h:87
void storeu(double *p, const F64vec2 &a)
Definition: dvec.h:821
Is8vec16 & operator=(const M128 &a)
Definition: dvec.h:520
Iu8vec16 & operator=(const M128 &a)
Definition: dvec.h:584
friend F64vec2 operator *(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:739
Iu8vec16(__m128i mm)
Definition: dvec.h:582
I64vec2 & operator^=(const M128 &a)
Definition: dvec.h:109
const unsigned char & operator[](int i) const
Definition: dvec.h:617
I64vec2(__m64 q1, __m64 q0)
Definition: dvec.h:99
Is16vec8 & operator>>=(int count)
Definition: dvec.h:362
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
I64vec2 & operator>>=(const I64vec2 &a)
Definition: dvec.h:120
M128 & operator^=(const M128 &a)
Definition: dvec.h:71
I32vec4 & operator<<=(const I32vec4 &a)
Definition: dvec.h:157
Is16vec8 simd_min(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:410
#define d
Definition: ke_i.h:81
Is16vec8 & operator -=(const I16vec8 &a)
Definition: dvec.h:351
M128 operator^(const M128 &a, const M128 &b)
Definition: dvec.h:77
Is32vec4 mul_add(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:404
__s8 s8
Definition: types.h:30
I8vec16()
Definition: dvec.h:494
struct S1 s1
const unsigned int & operator[](int i) const
Definition: dvec.h:276
friend double add_horizontal(F64vec2 &a)
Definition: dvec.h:750
Is32vec4 & operator<<=(const M128 &a)
Definition: dvec.h:192
const GLubyte * c
Definition: glext.h:8905
I32vec4()
Definition: dvec.h:143
Iu16vec8(__m128i mm)
Definition: dvec.h:416
Is32vec4 & operator^=(const M128 &a)
Definition: dvec.h:185
__m128i
Definition: emmintrin.h:24
Is16vec8 & operator=(const M128 &a)
Definition: dvec.h:344
UCHAR u8
Definition: btrfs.h:12
M128()
Definition: dvec.h:64
Iu16vec8 & operator>>=(const M128 &a)
Definition: dvec.h:445
Is32vec4 & operator>>=(const M128 &a)
Definition: dvec.h:197
I32vec4(__m128i mm)
Definition: dvec.h:144
Definition: dvec.h:514
Is16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:356
Is16vec8 operator>>(int count)
Definition: dvec.h:360
#define _MM_16B(element, vector)
Definition: dvec.h:42
Iu8vec16()
Definition: dvec.h:581
I64vec2 operator *(const Iu32vec4 &a, const Iu32vec4 &b)
Definition: dvec.h:289
I64vec2 & operator<<=(int count)
Definition: dvec.h:117
Iu16vec8 operator>>(const M128 &a)
Definition: dvec.h:443
I64vec2 & operator>>=(int count)
Definition: dvec.h:121
Definition: dvec.h:412
#define IVEC128_ADD_SUB(vect, element, opsize)
Definition: dvec.h:665
I32vec4 & operator^=(const M128 &a)
Definition: dvec.h:150
__m128d vec
Definition: dvec.h:720
__s16 s16
Definition: types.h:32
Definition: dvec.h:327
I32vec4 operator<<(const I32vec4 &a)
Definition: dvec.h:155
Iu16vec8()
Definition: dvec.h:415
Definition: dvec.h:140
I64vec2 & operator<<=(const I64vec2 &a)
Definition: dvec.h:116
friend F64vec2 simd_max(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:774
Definition: dvec.h:93
Iu16vec8 & operator|=(const M128 &a)
Definition: dvec.h:432
Iu8vec16 & operator|=(const M128 &a)
Definition: dvec.h:587
F64vec2(double d1, double d0)
Definition: dvec.h:727
#define _MM_4DW(element, vector)
Definition: dvec.h:48
Is16vec8 & operator&=(const M128 &a)
Definition: dvec.h:346
Is32vec4()
Definition: dvec.h:171
M128(__m128i mm)
Definition: dvec.h:65
const signed short & operator[](int i) const
Definition: dvec.h:380
friend F64vec2 operator/(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:740
Iu32vec4 operator<<(int count)
Definition: dvec.h:256
double & operator[](int i)
Definition: dvec.h:810
I64vec2 unpack_high(const I64vec2 &a, const I64vec2 &b)
Definition: dvec.h:138
__s32 s32
Definition: types.h:34
I16vec8 & operator^=(const M128 &a)
Definition: dvec.h:306
__m128i vec
Definition: dvec.h:61
I64vec2 operator>>(const I64vec2 &a)
Definition: dvec.h:118
Is8vec16 & operator&=(const M128 &a)
Definition: dvec.h:522
Iu32vec4 & operator<<=(int count)
Definition: dvec.h:258
F64vec2 & operator -=(F64vec2 &a)
Definition: dvec.h:743
Definition: dvec.h:717
friend F64vec2 operator -(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:738
Iu32vec4 & operator&=(const M128 &a)
Definition: dvec.h:248
Iu32vec4 & operator>>=(int count)
Definition: dvec.h:262
Iu16vec8 & operator^=(const M128 &a)
Definition: dvec.h:433
I64vec2 operator>>(int count)
Definition: dvec.h:119
F64vec2 & operator|=(F64vec2 &a)
Definition: dvec.h:747
I128vec1 & operator=(const M128 &a)
Definition: dvec.h:86
I128vec1 & operator^=(const M128 &a)
Definition: dvec.h:89
Iu32vec4 & operator -=(const I32vec4 &a)
Definition: dvec.h:253
Is16vec8 mul_high(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:403
Iu16vec8 & operator>>=(int count)
Definition: dvec.h:446
#define EXPLICIT
Definition: dvec.h:27
Iu32vec4 & operator^=(const M128 &a)
Definition: dvec.h:250
I64vec2 & operator+=(const I64vec2 &a)
Definition: dvec.h:111
#define IVEC128_LOGICALS(vect, element)
Definition: dvec.h:650
Is32vec4 & operator>>=(int count)
Definition: dvec.h:198
F64vec2 & operator &=(F64vec2 &a)
Definition: dvec.h:746
Is32vec4 operator>>(int count)
Definition: dvec.h:196
Is16vec8 & operator>>=(const M128 &a)
Definition: dvec.h:361
Iu32vec4 & operator|=(const M128 &a)
Definition: dvec.h:249
Is16vec8 & operator^=(const M128 &a)
Definition: dvec.h:348
friend F64vec2 operator+(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:737
I32vec4 operator<<(int count)
Definition: dvec.h:156
F64vec2 & operator ^=(F64vec2 &a)
Definition: dvec.h:748
int move_mask(const F64vec2 &a)
Definition: dvec.h:819
I32vec4 cmpeq(const I32vec4 &a, const I32vec4 &b)
Definition: dvec.h:162
friend F64vec2 andnot(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:756
I64vec2 & operator -=(const I64vec2 &a)
Definition: dvec.h:112
Iu32vec4(__m128i mm)
Definition: dvec.h:237
Definition: dvec.h:491
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
unsigned int & operator[](int i)
Definition: dvec.h:282
I8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:503
Is32vec4 & operator -=(const I32vec4 &a)
Definition: dvec.h:188
GLfloat GLfloat p
Definition: glext.h:8902
Iu8vec16 packu_sat(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:648
I128vec1()
Definition: dvec.h:83
Iu32vec4(unsigned int ui3, unsigned int ui2, unsigned int ui1, unsigned int ui0)
Definition: dvec.h:238
Is16vec8 & operator *=(const I16vec8 &a)
Definition: dvec.h:352
I32vec4 cmpneq(const I32vec4 &a, const I32vec4 &b)
Definition: dvec.h:163
Iu16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:435
Is8vec16()
Definition: dvec.h:517
Is32vec4 operator<<(int count)
Definition: dvec.h:191
#define _MM_4UDW(element, vector)
Definition: dvec.h:47
USHORT u16
Definition: btrfs.h:13
I8vec16 & operator&=(const M128 &a)
Definition: dvec.h:499
Is32vec4(__m128i mm)
Definition: dvec.h:172
Iu16vec8 operator<<(int count)
Definition: dvec.h:440
M128 & operator|=(const M128 &a)
Definition: dvec.h:70
signed char & operator[](int i)
Definition: dvec.h:559
Is32vec4 cmplt(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:228
Is32vec4 operator>>(const M128 &a)
Definition: dvec.h:195
Is16vec8 & operator<<=(int count)
Definition: dvec.h:357
Is16vec8 sat_add(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:406
Iu16vec8(unsigned short s7, unsigned short s6, unsigned short s5, unsigned short s4, unsigned short s3, unsigned short s2, unsigned short s1, unsigned short s0)
Definition: dvec.h:417
Is32vec4(int i3, int i2, int i1, int i0)
Definition: dvec.h:173
Is16vec8 operator>>(const M128 &a)
Definition: dvec.h:359
#define __int64
Definition: basetyps.h:16
I32vec4 & operator -=(const I32vec4 &a)
Definition: dvec.h:153
friend F64vec2 operator|(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:734
I64vec2 unpack_low(const I64vec2 &a, const I64vec2 &b)
Definition: dvec.h:137
Iu32vec4 operator<<(const M128 &a)
Definition: dvec.h:255
F64vec2 & operator *=(F64vec2 &a)
Definition: dvec.h:744
I16vec8 & operator=(const M128 &a)
Definition: dvec.h:302
Iu16vec8 simd_avg(const Iu16vec8 &a, const Iu16vec8 &b)
Definition: dvec.h:488
Definition: dvec.h:58
I16vec8 & operator|=(const M128 &a)
Definition: dvec.h:305
Iu16vec8 operator<<(const M128 &a)
Definition: dvec.h:439