ReactOS 0.4.16-dev-1-gcf26321
dvec.h
Go to the documentation of this file.
1
6#ifndef _DVEC_H_INCLUDED
7#define _DVEC_H_INCLUDED
8#ifndef RC_INVOKED
9
10#if !defined __cplusplus
11#error This file is only supported in C++ compilations!
12#endif
13
14#include <emmintrin.h>
15#include <assert.h>
16#include <fvec.h>
17#include <crtdefs.h>
18
19#pragma pack(push,_CRT_PACKING)
20
21#if defined(_ENABLE_VEC_DEBUG)
22#include <iostream>
23#endif
24
25#pragma pack(push,16)
26
27#define EXPLICIT explicit
28
29class I8vec16;
30class Is8vec16;
31class Iu8vec16;
32class I16vec8;
33class Is16vec8;
34class Iu16vec8;
35class I32vec4;
36class Is32vec4;
37class Iu32vec4;
38class I64vec2;
39class I128vec1;
40
41#define _MM_16UB(element,vector) (*((unsigned char*)&##vector + ##element))
42#define _MM_16B(element,vector) (*((signed char*)&##vector + ##element))
43
44#define _MM_8UW(element,vector) (*((unsigned short*)&##vector + ##element))
45#define _MM_8W(element,vector) (*((short*)&##vector + ##element))
46
47#define _MM_4UDW(element,vector) (*((unsigned int*)&##vector + ##element))
48#define _MM_4DW(element,vector) (*((int*)&##vector + ##element))
49
50#define _MM_2QW(element,vector) (*((__int64*)&##vector + ##element))
51
52inline const __m128i get_mask128()
53{
54 static const __m128i mask128 = _mm_set1_epi64(M64(0xffffffffffffffffi64));
55 return mask128;
56}
57
58class M128
59{
60protected:
61 __m128i vec;
62
63public:
64 M128() { }
65 M128(__m128i mm) { vec = mm; }
66
67 operator __m128i() const { return vec; }
68
69 M128& operator&=(const M128 &a) { return *this = (M128) _mm_and_si128(vec,a); }
70 M128& operator|=(const M128 &a) { return *this = (M128) _mm_or_si128(vec,a); }
71 M128& operator^=(const M128 &a) { return *this = (M128) _mm_xor_si128(vec,a); }
72
73};
74
75inline M128 operator&(const M128 &a,const M128 &b) { return _mm_and_si128(a,b); }
76inline M128 operator|(const M128 &a,const M128 &b) { return _mm_or_si128(a,b); }
77inline M128 operator^(const M128 &a,const M128 &b) { return _mm_xor_si128(a,b); }
78inline M128 andnot(const M128 &a,const M128 &b) { return _mm_andnot_si128(a,b); }
79
80class I128vec1 : public M128
81{
82public:
84 I128vec1(__m128i mm) : M128(mm) { }
85
86 I128vec1& operator= (const M128 &a) { return *this = (I128vec1) a; }
87 I128vec1& operator&=(const M128 &a) { return *this = (I128vec1) _mm_and_si128(vec,a); }
88 I128vec1& operator|=(const M128 &a) { return *this = (I128vec1) _mm_or_si128(vec,a); }
89 I128vec1& operator^=(const M128 &a) { return *this = (I128vec1) _mm_xor_si128(vec,a); }
90
91};
92
93class I64vec2 : public M128
94{
95public:
96 I64vec2() { }
97 I64vec2(__m128i mm) : M128(mm) { }
98
99 I64vec2(__m64 q1,__m64 q0)
100 {
101 _MM_2QW(0,vec) = *(__int64*)&q0;
102 _MM_2QW(1,vec) = *(__int64*)&q1;
103 }
104
105 I64vec2& operator= (const M128 &a) { return *this = (I64vec2) a; }
106
107 I64vec2& operator&=(const M128 &a) { return *this = (I64vec2) _mm_and_si128(vec,a); }
108 I64vec2& operator|=(const M128 &a) { return *this = (I64vec2) _mm_or_si128(vec,a); }
109 I64vec2& operator^=(const M128 &a) { return *this = (I64vec2) _mm_xor_si128(vec,a); }
110
111 I64vec2& operator +=(const I64vec2 &a) { return *this = (I64vec2) _mm_add_epi64(vec,a); }
112 I64vec2& operator -=(const I64vec2 &a) { return *this = (I64vec2) _mm_sub_epi64(vec,a); }
113
116 I64vec2& operator<<=(const I64vec2 &a) { return *this = (I64vec2) _mm_sll_epi64(vec,a); }
120 I64vec2& operator>>=(const I64vec2 &a) { return *this = (I64vec2) _mm_srl_epi64(vec,a); }
122
123 const __int64& operator[](int i)const
124 {
125 assert(static_cast<unsigned int>(i) < 2);
126 return _MM_2QW(i,vec);
127 }
128
130 {
131 assert(static_cast<unsigned int>(i) < 2);
132 return _MM_2QW(i,vec);
133 }
134
135};
136
137inline I64vec2 unpack_low(const I64vec2 &a,const I64vec2 &b) {return _mm_unpacklo_epi64(a,b); }
138inline I64vec2 unpack_high(const I64vec2 &a,const I64vec2 &b) {return _mm_unpackhi_epi64(a,b); }
139
140class I32vec4 : public M128
141{
142public:
144 I32vec4(__m128i mm) : M128(mm) { }
145
146 I32vec4& operator= (const M128 &a) { return *this = (I32vec4) a; }
147
148 I32vec4& operator&=(const M128 &a) { return *this = (I32vec4) _mm_and_si128(vec,a); }
149 I32vec4& operator|=(const M128 &a) { return *this = (I32vec4) _mm_or_si128(vec,a); }
150 I32vec4& operator^=(const M128 &a) { return *this = (I32vec4) _mm_xor_si128(vec,a); }
151
152 I32vec4& operator +=(const I32vec4 &a) { return *this = (I32vec4)_mm_add_epi32(vec,a); }
153 I32vec4& operator -=(const I32vec4 &a) { return *this = (I32vec4)_mm_sub_epi32(vec,a); }
154
157 I32vec4& operator<<=(const I32vec4 &a) { return *this = (I32vec4)_mm_sll_epi32(vec,a); }
159
160};
161
162inline I32vec4 cmpeq(const I32vec4 &a,const I32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
164
165inline I32vec4 unpack_low(const I32vec4 &a,const I32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
166inline I32vec4 unpack_high(const I32vec4 &a,const I32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
167
168class Is32vec4 : public I32vec4
169{
170public:
172 Is32vec4(__m128i mm) : I32vec4(mm) { }
173 Is32vec4(int i3,int i2,int i1,int i0)
174 {
175 _MM_4DW(0,vec) = i0;
176 _MM_4DW(1,vec) = i1;
177 _MM_4DW(2,vec) = i2;
178 _MM_4DW(3,vec) = i3;
179 }
180
181 Is32vec4& operator= (const M128 &a) { return *this = (Is32vec4) a; }
182
183 Is32vec4& operator&=(const M128 &a) { return *this = (Is32vec4) _mm_and_si128(vec,a); }
184 Is32vec4& operator|=(const M128 &a) { return *this = (Is32vec4) _mm_or_si128(vec,a); }
185 Is32vec4& operator^=(const M128 &a) { return *this = (Is32vec4) _mm_xor_si128(vec,a); }
186
187 Is32vec4& operator +=(const I32vec4 &a) { return *this = (Is32vec4)_mm_add_epi32(vec,a); }
188 Is32vec4& operator -=(const I32vec4 &a) { return *this = (Is32vec4)_mm_sub_epi32(vec,a); }
189
192 Is32vec4& operator<<=(const M128 &a) { return *this = (Is32vec4)_mm_sll_epi32(vec,a); }
194
197 Is32vec4& operator>>=(const M128 &a) { return *this = (Is32vec4) _mm_sra_epi32(vec,a); }
199
200#if defined(_ENABLE_VEC_DEBUG)
201
202 friend std::ostream& operator<< (std::ostream &os,const Is32vec4 &a)
203 {
204 os << "[3]:" << _MM_4DW(3,a)
205 << " [2]:" << _MM_4DW(2,a)
206 << " [1]:" << _MM_4DW(1,a)
207 << " [0]:" << _MM_4DW(0,a);
208 return os;
209 }
210#endif
211
212 const int& operator[](int i)const
213 {
214 assert(static_cast<unsigned int>(i) < 4);
215 return _MM_4DW(i,vec);
216 }
217
218 int& operator[](int i)
219 {
220 assert(static_cast<unsigned int>(i) < 4);
221 return _MM_4DW(i,vec);
222 }
223};
224
225inline Is32vec4 cmpeq(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
227inline Is32vec4 cmpgt(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpgt_epi32(a,b); }
228inline Is32vec4 cmplt(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpgt_epi32(b,a); }
229
230inline Is32vec4 unpack_low(const Is32vec4 &a,const Is32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
231inline Is32vec4 unpack_high(const Is32vec4 &a,const Is32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
232
233class Iu32vec4 : public I32vec4
234{
235public:
237 Iu32vec4(__m128i mm) : I32vec4(mm) { }
238 Iu32vec4(unsigned int ui3,unsigned int ui2,unsigned int ui1,unsigned int ui0)
239 {
240 _MM_4UDW(0,vec) = ui0;
241 _MM_4UDW(1,vec) = ui1;
242 _MM_4UDW(2,vec) = ui2;
243 _MM_4UDW(3,vec) = ui3;
244 }
245
246 Iu32vec4& operator= (const M128 &a) { return *this = (Iu32vec4) a; }
247
248 Iu32vec4& operator&=(const M128 &a) { return *this = (Iu32vec4) _mm_and_si128(vec,a); }
249 Iu32vec4& operator|=(const M128 &a) { return *this = (Iu32vec4) _mm_or_si128(vec,a); }
250 Iu32vec4& operator^=(const M128 &a) { return *this = (Iu32vec4) _mm_xor_si128(vec,a); }
251
252 Iu32vec4& operator +=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_add_epi32(vec,a); }
253 Iu32vec4& operator -=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_sub_epi32(vec,a); }
254
257 Iu32vec4& operator<<=(const M128 &a) { return *this = (Iu32vec4)_mm_sll_epi32(vec,a); }
261 Iu32vec4& operator>>=(const M128 &a) { return *this = (Iu32vec4) _mm_srl_epi32(vec,a); }
263
264#if defined(_ENABLE_VEC_DEBUG)
265
266 friend std::ostream& operator<< (std::ostream &os,const Iu32vec4 &a)
267 {
268 os << "[3]:" << _MM_4UDW(3,a)
269 << " [2]:" << _MM_4UDW(2,a)
270 << " [1]:" << _MM_4UDW(1,a)
271 << " [0]:" << _MM_4UDW(0,a);
272 return os;
273 }
274#endif
275
276 const unsigned int& operator[](int i)const
277 {
278 assert(static_cast<unsigned int>(i) < 4);
279 return _MM_4UDW(i,vec);
280 }
281
282 unsigned int& operator[](int i)
283 {
284 assert(static_cast<unsigned int>(i) < 4);
285 return _MM_4UDW(i,vec);
286 }
287};
288
289inline I64vec2 operator*(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_mul_epu32(a,b); }
290inline Iu32vec4 cmpeq(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
292
293inline Iu32vec4 unpack_low(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
294inline Iu32vec4 unpack_high(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
295
296class I16vec8 : public M128
297{
298public:
300 I16vec8(__m128i mm) : M128(mm) { }
301
302 I16vec8& operator= (const M128 &a) { return *this = (I16vec8) a; }
303
304 I16vec8& operator&=(const M128 &a) { return *this = (I16vec8) _mm_and_si128(vec,a); }
305 I16vec8& operator|=(const M128 &a) { return *this = (I16vec8) _mm_or_si128(vec,a); }
306 I16vec8& operator^=(const M128 &a) { return *this = (I16vec8) _mm_xor_si128(vec,a); }
307
308 I16vec8& operator +=(const I16vec8 &a) { return *this = (I16vec8) _mm_add_epi16(vec,a); }
309 I16vec8& operator -=(const I16vec8 &a) { return *this = (I16vec8) _mm_sub_epi16(vec,a); }
310 I16vec8& operator *=(const I16vec8 &a) { return *this = (I16vec8) _mm_mullo_epi16(vec,a); }
311
314 I16vec8& operator<<=(const M128 &a) { return *this = (I16vec8)_mm_sll_epi16(vec,a); }
316
317};
318
319inline I16vec8 operator*(const I16vec8 &a,const I16vec8 &b) { return _mm_mullo_epi16(a,b); }
320
321inline I16vec8 cmpeq(const I16vec8 &a,const I16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
323
324inline I16vec8 unpack_low(const I16vec8 &a,const I16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
325inline I16vec8 unpack_high(const I16vec8 &a,const I16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
326
327class Is16vec8 : public I16vec8
328{
329public:
331 Is16vec8(__m128i mm) : I16vec8(mm) { }
332 Is16vec8(signed short s7,signed short s6,signed short s5,signed short s4,signed short s3,signed short s2,signed short s1,signed short s0)
333 {
334 _MM_8W(0,vec) = s0;
335 _MM_8W(1,vec) = s1;
336 _MM_8W(2,vec) = s2;
337 _MM_8W(3,vec) = s3;
338 _MM_8W(4,vec) = s4;
339 _MM_8W(5,vec) = s5;
340 _MM_8W(6,vec) = s6;
341 _MM_8W(7,vec) = s7;
342 }
343
344 Is16vec8& operator= (const M128 &a) { return *this = (Is16vec8) a; }
345
346 Is16vec8& operator&=(const M128 &a) { return *this = (Is16vec8) _mm_and_si128(vec,a); }
347 Is16vec8& operator|=(const M128 &a) { return *this = (Is16vec8) _mm_or_si128(vec,a); }
348 Is16vec8& operator^=(const M128 &a) { return *this = (Is16vec8) _mm_xor_si128(vec,a); }
349
350 Is16vec8& operator +=(const I16vec8 &a) { return *this = (Is16vec8) _mm_add_epi16(vec,a); }
351 Is16vec8& operator -=(const I16vec8 &a) { return *this = (Is16vec8) _mm_sub_epi16(vec,a); }
352 Is16vec8& operator *=(const I16vec8 &a) { return *this = (Is16vec8) _mm_mullo_epi16(vec,a); }
353
356 Is16vec8& operator<<=(const M128 &a) { return *this = (Is16vec8)_mm_sll_epi16(vec,a); }
358
361 Is16vec8& operator>>=(const M128 &a) { return *this = (Is16vec8)_mm_sra_epi16(vec,a); }
363
364#if defined(_ENABLE_VEC_DEBUG)
365
366 friend std::ostream& operator<< (std::ostream &os,const Is16vec8 &a)
367 {
368 os << "[7]:" << _MM_8W(7,a)
369 << " [6]:" << _MM_8W(6,a)
370 << " [5]:" << _MM_8W(5,a)
371 << " [4]:" << _MM_8W(4,a)
372 << " [3]:" << _MM_8W(3,a)
373 << " [2]:" << _MM_8W(2,a)
374 << " [1]:" << _MM_8W(1,a)
375 << " [0]:" << _MM_8W(0,a);
376 return os;
377 }
378#endif
379
380 const signed short& operator[](int i)const
381 {
382 assert(static_cast<unsigned int>(i) < 8);
383 return _MM_8W(i,vec);
384 }
385
386 signed short& operator[](int i)
387 {
388 assert(static_cast<unsigned int>(i) < 8);
389 return _MM_8W(i,vec);
390 }
391};
392
393inline Is16vec8 operator*(const Is16vec8 &a,const Is16vec8 &b) { return _mm_mullo_epi16(a,b); }
394
395inline Is16vec8 cmpeq(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
397inline Is16vec8 cmpgt(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpgt_epi16(a,b); }
398inline Is16vec8 cmplt(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpgt_epi16(b,a); }
399
400inline Is16vec8 unpack_low(const Is16vec8 &a,const Is16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
401inline Is16vec8 unpack_high(const Is16vec8 &a,const Is16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
402
403inline Is16vec8 mul_high(const Is16vec8 &a,const Is16vec8 &b) { return _mm_mulhi_epi16(a,b); }
404inline Is32vec4 mul_add(const Is16vec8 &a,const Is16vec8 &b) { return _mm_madd_epi16(a,b);}
405
406inline Is16vec8 sat_add(const Is16vec8 &a,const Is16vec8 &b) { return _mm_adds_epi16(a,b); }
407inline Is16vec8 sat_sub(const Is16vec8 &a,const Is16vec8 &b) { return _mm_subs_epi16(a,b); }
408
409inline Is16vec8 simd_max(const Is16vec8 &a,const Is16vec8 &b) { return _mm_max_epi16(a,b); }
410inline Is16vec8 simd_min(const Is16vec8 &a,const Is16vec8 &b) { return _mm_min_epi16(a,b); }
411
412class Iu16vec8 : public I16vec8
413{
414public:
416 Iu16vec8(__m128i mm) : I16vec8(mm) { }
417 Iu16vec8(unsigned short s7,unsigned short s6,unsigned short s5,unsigned short s4,unsigned short s3,unsigned short s2,unsigned short s1,unsigned short s0)
418 {
419 _MM_8UW(0,vec) = s0;
420 _MM_8UW(1,vec) = s1;
421 _MM_8UW(2,vec) = s2;
422 _MM_8UW(3,vec) = s3;
423 _MM_8UW(4,vec) = s4;
424 _MM_8UW(5,vec) = s5;
425 _MM_8UW(6,vec) = s6;
426 _MM_8UW(7,vec) = s7;
427 }
428
429 Iu16vec8& operator= (const M128 &a) { return *this = (Iu16vec8) a; }
430
431 Iu16vec8& operator&=(const M128 &a) { return *this = (Iu16vec8) _mm_and_si128(vec,a); }
432 Iu16vec8& operator|=(const M128 &a) { return *this = (Iu16vec8) _mm_or_si128(vec,a); }
433 Iu16vec8& operator^=(const M128 &a) { return *this = (Iu16vec8) _mm_xor_si128(vec,a); }
434
435 Iu16vec8& operator +=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_add_epi16(vec,a); }
436 Iu16vec8& operator -=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_sub_epi16(vec,a); }
437 Iu16vec8& operator *=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_mullo_epi16(vec,a); }
438
441 Iu16vec8& operator<<=(const M128 &a) { return *this = (Iu16vec8)_mm_sll_epi16(vec,a); }
445 Iu16vec8& operator>>=(const M128 &a) { return *this = (Iu16vec8) _mm_srl_epi16(vec,a); }
447
448#if defined(_ENABLE_VEC_DEBUG)
449
450 friend std::ostream& operator << (std::ostream &os,const Iu16vec8 &a)
451 {
452 os << "[7]:" << unsigned short(_MM_8UW(7,a))
453 << " [6]:" << unsigned short(_MM_8UW(6,a))
454 << " [5]:" << unsigned short(_MM_8UW(5,a))
455 << " [4]:" << unsigned short(_MM_8UW(4,a))
456 << " [3]:" << unsigned short(_MM_8UW(3,a))
457 << " [2]:" << unsigned short(_MM_8UW(2,a))
458 << " [1]:" << unsigned short(_MM_8UW(1,a))
459 << " [0]:" << unsigned short(_MM_8UW(0,a));
460 return os;
461 }
462#endif
463
464 const unsigned short& operator[](int i)const
465 {
466 assert(static_cast<unsigned int>(i) < 8);
467 return _MM_8UW(i,vec);
468 }
469
470 unsigned short& operator[](int i)
471 {
472 assert(static_cast<unsigned int>(i) < 8);
473 return _MM_8UW(i,vec);
474 }
475};
476
477inline Iu16vec8 operator*(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_mullo_epi16(a,b); }
478
479inline Iu16vec8 cmpeq(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
481
482inline Iu16vec8 unpack_low(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
483inline Iu16vec8 unpack_high(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
484
485inline Iu16vec8 sat_add(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_adds_epu16(a,b); }
486inline Iu16vec8 sat_sub(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_subs_epu16(a,b); }
487
488inline Iu16vec8 simd_avg(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_avg_epu16(a,b); }
489inline I16vec8 mul_high(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_mulhi_epu16(a,b); }
490
491class I8vec16 : public M128
492{
493public:
495 I8vec16(__m128i mm) : M128(mm) { }
496
497 I8vec16& operator= (const M128 &a) { return *this = (I8vec16) a; }
498
499 I8vec16& operator&=(const M128 &a) { return *this = (I8vec16) _mm_and_si128(vec,a); }
500 I8vec16& operator|=(const M128 &a) { return *this = (I8vec16) _mm_or_si128(vec,a); }
501 I8vec16& operator^=(const M128 &a) { return *this = (I8vec16) _mm_xor_si128(vec,a); }
502
503 I8vec16& operator +=(const I8vec16 &a) { return *this = (I8vec16) _mm_add_epi8(vec,a); }
504 I8vec16& operator -=(const I8vec16 &a) { return *this = (I8vec16) _mm_sub_epi8(vec,a); }
505
506};
507
508inline I8vec16 cmpeq(const I8vec16 &a,const I8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
510
511inline I8vec16 unpack_low(const I8vec16 &a,const I8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
512inline I8vec16 unpack_high(const I8vec16 &a,const I8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
513
514class Is8vec16 : public I8vec16
515{
516public:
518 Is8vec16(__m128i mm) : I8vec16(mm) { }
519
520 Is8vec16& operator= (const M128 &a) { return *this = (Is8vec16) a; }
521
522 Is8vec16& operator&=(const M128 &a) { return *this = (Is8vec16) _mm_and_si128(vec,a); }
523 Is8vec16& operator|=(const M128 &a) { return *this = (Is8vec16) _mm_or_si128(vec,a); }
524 Is8vec16& operator^=(const M128 &a) { return *this = (Is8vec16) _mm_xor_si128(vec,a); }
525
526 Is8vec16& operator +=(const I8vec16 &a) { return *this = (Is8vec16) _mm_add_epi8(vec,a); }
527 Is8vec16& operator -=(const I8vec16 &a) { return *this = (Is8vec16) _mm_sub_epi8(vec,a); }
528
529#if defined(_ENABLE_VEC_DEBUG)
530
531 friend std::ostream& operator << (std::ostream &os,const Is8vec16 &a)
532 {
533 os << "[15]:" << short(_MM_16B(15,a))
534 << " [14]:" << short(_MM_16B(14,a))
535 << " [13]:" << short(_MM_16B(13,a))
536 << " [12]:" << short(_MM_16B(12,a))
537 << " [11]:" << short(_MM_16B(11,a))
538 << " [10]:" << short(_MM_16B(10,a))
539 << " [9]:" << short(_MM_16B(9,a))
540 << " [8]:" << short(_MM_16B(8,a))
541 << " [7]:" << short(_MM_16B(7,a))
542 << " [6]:" << short(_MM_16B(6,a))
543 << " [5]:" << short(_MM_16B(5,a))
544 << " [4]:" << short(_MM_16B(4,a))
545 << " [3]:" << short(_MM_16B(3,a))
546 << " [2]:" << short(_MM_16B(2,a))
547 << " [1]:" << short(_MM_16B(1,a))
548 << " [0]:" << short(_MM_16B(0,a));
549 return os;
550 }
551#endif
552
553 const signed char& operator[](int i)const
554 {
555 assert(static_cast<unsigned int>(i) < 16);
556 return _MM_16B(i,vec);
557 }
558
559 signed char& operator[](int i)
560 {
561 assert(static_cast<unsigned int>(i) < 16);
562 return _MM_16B(i,vec);
563 }
564
565};
566
567inline Is8vec16 cmpeq(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
569inline Is8vec16 cmpgt(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmpgt_epi8(a,b); }
570inline Is8vec16 cmplt(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmplt_epi8(a,b); }
571
572inline Is8vec16 unpack_low(const Is8vec16 &a,const Is8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
573inline Is8vec16 unpack_high(const Is8vec16 &a,const Is8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
574
575inline Is8vec16 sat_add(const Is8vec16 &a,const Is8vec16 &b) { return _mm_adds_epi8(a,b); }
576inline Is8vec16 sat_sub(const Is8vec16 &a,const Is8vec16 &b) { return _mm_subs_epi8(a,b); }
577
578class Iu8vec16 : public I8vec16
579{
580public:
582 Iu8vec16(__m128i mm) : I8vec16(mm) { }
583
584 Iu8vec16& operator= (const M128 &a) { return *this = (Iu8vec16) a; }
585
586 Iu8vec16& operator&=(const M128 &a) { return *this = (Iu8vec16) _mm_and_si128(vec,a); }
587 Iu8vec16& operator|=(const M128 &a) { return *this = (Iu8vec16) _mm_or_si128(vec,a); }
588 Iu8vec16& operator^=(const M128 &a) { return *this = (Iu8vec16) _mm_xor_si128(vec,a); }
589
590 Iu8vec16& operator +=(const I8vec16 &a) { return *this = (Iu8vec16) _mm_add_epi8(vec,a); }
591 Iu8vec16& operator -=(const I8vec16 &a) { return *this = (Iu8vec16) _mm_sub_epi8(vec,a); }
592
593#if defined(_ENABLE_VEC_DEBUG)
594
595 friend std::ostream& operator << (std::ostream &os,const Iu8vec16 &a)
596 {
597 os << "[15]:" << unsigned short(_MM_16UB(15,a))
598 << " [14]:" << unsigned short(_MM_16UB(14,a))
599 << " [13]:" << unsigned short(_MM_16UB(13,a))
600 << " [12]:" << unsigned short(_MM_16UB(12,a))
601 << " [11]:" << unsigned short(_MM_16UB(11,a))
602 << " [10]:" << unsigned short(_MM_16UB(10,a))
603 << " [9]:" << unsigned short(_MM_16UB(9,a))
604 << " [8]:" << unsigned short(_MM_16UB(8,a))
605 << " [7]:" << unsigned short(_MM_16UB(7,a))
606 << " [6]:" << unsigned short(_MM_16UB(6,a))
607 << " [5]:" << unsigned short(_MM_16UB(5,a))
608 << " [4]:" << unsigned short(_MM_16UB(4,a))
609 << " [3]:" << unsigned short(_MM_16UB(3,a))
610 << " [2]:" << unsigned short(_MM_16UB(2,a))
611 << " [1]:" << unsigned short(_MM_16UB(1,a))
612 << " [0]:" << unsigned short(_MM_16UB(0,a));
613 return os;
614 }
615#endif
616
617 const unsigned char& operator[](int i)const
618 {
619 assert(static_cast<unsigned int>(i) < 16);
620 return _MM_16UB(i,vec);
621 }
622
623 unsigned char& operator[](int i)
624 {
625 assert(static_cast<unsigned int>(i) < 16);
626 return _MM_16UB(i,vec);
627 }
628
629};
630
631inline Iu8vec16 cmpeq(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
633
634inline Iu8vec16 unpack_low(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
635inline Iu8vec16 unpack_high(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
636
637inline Iu8vec16 sat_add(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_adds_epu8(a,b); }
638inline Iu8vec16 sat_sub(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_subs_epu8(a,b); }
639
640inline I64vec2 sum_abs(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_sad_epu8(a,b); }
641
642inline Iu8vec16 simd_avg(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_avg_epu8(a,b); }
643inline Iu8vec16 simd_max(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_max_epu8(a,b); }
644inline Iu8vec16 simd_min(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_min_epu8(a,b); }
645
646inline Is16vec8 pack_sat(const Is32vec4 &a,const Is32vec4 &b) { return _mm_packs_epi32(a,b); }
647inline Is8vec16 pack_sat(const Is16vec8 &a,const Is16vec8 &b) { return _mm_packs_epi16(a,b); }
648inline Iu8vec16 packu_sat(const Is16vec8 &a,const Is16vec8 &b) { return _mm_packus_epi16(a,b);}
649
650#define IVEC128_LOGICALS(vect,element) inline I##vect##vec##element operator& (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_and_si128(a,b); } inline I##vect##vec##element operator| (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_or_si128(a,b); } inline I##vect##vec##element operator^ (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_xor_si128(a,b); } inline I##vect##vec##element andnot (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_andnot_si128(a,b); }
651
662IVEC128_LOGICALS(128,1)
663#undef IVEC128_LOGICALS
664
665#define IVEC128_ADD_SUB(vect,element,opsize) inline I##vect##vec##element operator+ (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_add_##opsize(a,b); } inline I##vect##vec##element operator- (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_sub_##opsize(a,b); }
666
667IVEC128_ADD_SUB(8,16,epi8)
668IVEC128_ADD_SUB(u8,16,epi8)
669IVEC128_ADD_SUB(s8,16,epi8)
670IVEC128_ADD_SUB(16,8,epi16)
671IVEC128_ADD_SUB(u16,8,epi16)
672IVEC128_ADD_SUB(s16,8,epi16)
673IVEC128_ADD_SUB(32,4,epi32)
674IVEC128_ADD_SUB(u32,4,epi32)
675IVEC128_ADD_SUB(s32,4,epi32)
676IVEC128_ADD_SUB(64,2,epi64)
677#undef IVEC128_ADD_SUB
678
679#define IVEC128_SELECT(vect12,vect34,element,selop,arg1,arg2) inline I##vect34##vec##element select_##selop (const I##vect12##vec##element &a,const I##vect12##vec##element &b,const I##vect34##vec##element &c,const I##vect34##vec##element &d) { I##vect12##vec##element mask = cmp##selop(a,b); return(I##vect34##vec##element ((mask & arg1) | I##vect12##vec##element ((_mm_andnot_si128(mask,arg2))))); }
680IVEC128_SELECT(8,s8,16,eq,c,d)
681IVEC128_SELECT(8,u8,16,eq,c,d)
682IVEC128_SELECT(8,8,16,eq,c,d)
683IVEC128_SELECT(8,s8,16,neq,c,d)
684IVEC128_SELECT(8,u8,16,neq,c,d)
685IVEC128_SELECT(8,8,16,neq,c,d)
686
687IVEC128_SELECT(16,s16,8,eq,c,d)
688IVEC128_SELECT(16,u16,8,eq,c,d)
689IVEC128_SELECT(16,16,8,eq,c,d)
690IVEC128_SELECT(16,s16,8,neq,c,d)
691IVEC128_SELECT(16,u16,8,neq,c,d)
692IVEC128_SELECT(16,16,8,neq,c,d)
693
694IVEC128_SELECT(32,s32,4,eq,c,d)
695IVEC128_SELECT(32,u32,4,eq,c,d)
696IVEC128_SELECT(32,32,4,eq,c,d)
697IVEC128_SELECT(32,s32,4,neq,c,d)
698IVEC128_SELECT(32,u32,4,neq,c,d)
699IVEC128_SELECT(32,32,4,neq,c,d)
700
701IVEC128_SELECT(s8,s8,16,gt,c,d)
702IVEC128_SELECT(s8,u8,16,gt,c,d)
703IVEC128_SELECT(s8,8,16,gt,c,d)
704IVEC128_SELECT(s8,s8,16,lt,c,d)
705IVEC128_SELECT(s8,u8,16,lt,c,d)
706IVEC128_SELECT(s8,8,16,lt,c,d)
707
710IVEC128_SELECT(s16,16,8,gt,c,d)
713IVEC128_SELECT(s16,16,8,lt,c,d)
714
715#undef IVEC128_SELECT
716
718{
719protected:
720 __m128d vec;
721public:
722
724
725 F64vec2(__m128d m) { vec = m;}
726
727 F64vec2(double d1,double d0) { vec= _mm_set_pd(d1,d0); }
728
730
731 operator __m128d() const { return vec; }
732
733 friend F64vec2 operator &(const F64vec2 &a,const F64vec2 &b) { return _mm_and_pd(a,b); }
734 friend F64vec2 operator |(const F64vec2 &a,const F64vec2 &b) { return _mm_or_pd(a,b); }
735 friend F64vec2 operator ^(const F64vec2 &a,const F64vec2 &b) { return _mm_xor_pd(a,b); }
736
737 friend F64vec2 operator +(const F64vec2 &a,const F64vec2 &b) { return _mm_add_pd(a,b); }
738 friend F64vec2 operator -(const F64vec2 &a,const F64vec2 &b) { return _mm_sub_pd(a,b); }
739 friend F64vec2 operator *(const F64vec2 &a,const F64vec2 &b) { return _mm_mul_pd(a,b); }
740 friend F64vec2 operator /(const F64vec2 &a,const F64vec2 &b) { return _mm_div_pd(a,b); }
741
742 F64vec2& operator +=(F64vec2 &a) { return *this = _mm_add_pd(vec,a); }
743 F64vec2& operator -=(F64vec2 &a) { return *this = _mm_sub_pd(vec,a); }
744 F64vec2& operator *=(F64vec2 &a) { return *this = _mm_mul_pd(vec,a); }
745 F64vec2& operator /=(F64vec2 &a) { return *this = _mm_div_pd(vec,a); }
746 F64vec2& operator &=(F64vec2 &a) { return *this = _mm_and_pd(vec,a); }
747 F64vec2& operator |=(F64vec2 &a) { return *this = _mm_or_pd(vec,a); }
748 F64vec2& operator ^=(F64vec2 &a) { return *this = _mm_xor_pd(vec,a); }
749
750 friend double add_horizontal(F64vec2 &a)
751 {
752 F64vec2 ftemp = _mm_add_sd(a,_mm_shuffle_pd(a,a,1));
753 return ftemp[0];
754 }
755
756 friend F64vec2 andnot(const F64vec2 &a,const F64vec2 &b) { return _mm_andnot_pd(a,b); }
757
758 friend F64vec2 sqrt(const F64vec2 &a) { return _mm_sqrt_pd(a); }
759
760#define F64vec2_COMP(op) friend F64vec2 cmp##op (const F64vec2 &a,const F64vec2 &b) { return _mm_cmp##op##_pd(a,b); }
762 F64vec2_COMP(lt)
763 F64vec2_COMP(le)
764 F64vec2_COMP(gt)
765 F64vec2_COMP(ge)
766 F64vec2_COMP(ngt)
767 F64vec2_COMP(nge)
768 F64vec2_COMP(neq)
769 F64vec2_COMP(nlt)
770 F64vec2_COMP(nle)
771#undef F64vec2_COMP
772
773 friend F64vec2 simd_min(const F64vec2 &a,const F64vec2 &b) { return _mm_min_pd(a,b); }
774 friend F64vec2 simd_max(const F64vec2 &a,const F64vec2 &b) { return _mm_max_pd(a,b); }
775
776#define F64vec2_COMI(op) friend int comi##op (const F64vec2 &a,const F64vec2 &b) { return _mm_comi##op##_sd(a,b); }
778 F64vec2_COMI(lt)
779 F64vec2_COMI(le)
780 F64vec2_COMI(gt)
781 F64vec2_COMI(ge)
782 F64vec2_COMI(neq)
783#undef F64vec2_COMI
784
785#define F64vec2_UCOMI(op) friend int ucomi##op (const F64vec2 &a,const F64vec2 &b) { return _mm_ucomi##op##_sd(a,b); }
787 F64vec2_UCOMI(lt)
788 F64vec2_UCOMI(le)
789 F64vec2_UCOMI(gt)
790 F64vec2_UCOMI(ge)
791 F64vec2_UCOMI(neq)
792#undef F64vec2_UCOMI
793
794#if defined(_ENABLE_VEC_DEBUG)
795
796 friend std::ostream & operator<<(std::ostream & os,const F64vec2 &a) {
797 double *dp = (double*)&a;
798 os << " [1]:" << *(dp+1)
799 << " [0]:" << *dp;
800 return os;
801 }
802#endif
803
804 const double &operator[](int i) const {
805 assert((0 <= i) && (i <= 1));
806 double *dp = (double*)&vec;
807 return *(dp+i);
808 }
809
810 double &operator[](int i) {
811 assert((0 <= i) && (i <= 1));
812 double *dp = (double*)&vec;
813 return *(dp+i);
814 }
815};
816
817inline F64vec2 unpack_low(const F64vec2 &a,const F64vec2 &b) { return _mm_unpacklo_pd(a,b); }
818inline F64vec2 unpack_high(const F64vec2 &a,const F64vec2 &b) { return _mm_unpackhi_pd(a,b); }
819inline int move_mask(const F64vec2 &a) { return _mm_movemask_pd(a); }
820inline void loadu(F64vec2 &a,double *p) { a = _mm_loadu_pd(p); }
821inline void storeu(double *p,const F64vec2 &a) { _mm_storeu_pd(p,a); }
822inline void store_nta(double *p,F64vec2 &a) { _mm_stream_pd(p,a); }
823
824#define F64vec2_SELECT(op) inline F64vec2 select_##op (const F64vec2 &a,const F64vec2 &b,const F64vec2 &c,const F64vec2 &d) { F64vec2 mask = _mm_cmp##op##_pd(a,b); return((mask & c) | F64vec2((_mm_andnot_pd(mask,d)))); }
833#undef F64vec2_SELECT
834
835inline int F64vec2ToInt(const F64vec2 &a) { return _mm_cvttsd_si32(a); }
836inline F64vec2 F32vec4ToF64vec2(const F32vec4 &a) { return _mm_cvtps_pd(a); }
837inline F32vec4 F64vec2ToF32vec4(const F64vec2 &a) { return _mm_cvtpd_ps(a); }
838inline F64vec2 IntToF64vec2(const F64vec2 &a,int b) { return _mm_cvtsi32_sd(a,b); }
839
840#pragma pack(pop)
841#pragma pack(pop)
842#endif
843#endif
basic_ostream< _CharT, _Traits > &_STLP_CALL operator<<(basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Definition: _bitset.c:219
#define __int64
Definition: basetyps.h:16
ULONG32 u32
Definition: btrfs.h:14
UCHAR u8
Definition: btrfs.h:12
USHORT u16
Definition: btrfs.h:13
Definition: fvec.h:28
Definition: dvec.h:718
friend F64vec2 operator*(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:739
F64vec2 & operator-=(F64vec2 &a)
Definition: dvec.h:743
F64vec2_COMI(eq) F64vec2_COMI(lt) F64vec2_COMI(le) F64vec2_COMI(gt) F64vec2_COMI(ge) F64vec2_COMI(neq) F64vec2_UCOMI(eq) F64vec2_UCOMI(lt) F64vec2_UCOMI(le) F64vec2_UCOMI(gt) F64vec2_UCOMI(ge) F64vec2_UCOMI(neq) const double &operator[](int i) const
Definition: dvec.h:777
EXPLICIT F64vec2(double d)
Definition: dvec.h:729
double & operator[](int i)
Definition: dvec.h:810
friend F64vec2 operator/(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:740
friend F64vec2 operator-(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:738
F64vec2 & operator/=(F64vec2 &a)
Definition: dvec.h:745
friend F64vec2 operator&(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:733
friend F64vec2 simd_max(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:774
friend F64vec2 sqrt(const F64vec2 &a)
Definition: dvec.h:758
friend double add_horizontal(F64vec2 &a)
Definition: dvec.h:750
friend F64vec2 operator+(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:737
F64vec2 & operator^=(F64vec2 &a)
Definition: dvec.h:748
friend F64vec2 andnot(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:756
F64vec2_COMP(eq) F64vec2_COMP(lt) F64vec2_COMP(le) F64vec2_COMP(gt) F64vec2_COMP(ge) F64vec2_COMP(ngt) F64vec2_COMP(nge) F64vec2_COMP(neq) F64vec2_COMP(nlt) F64vec2_COMP(nle) friend F64vec2 simd_min(const F64vec2 &a
F64vec2 & operator+=(F64vec2 &a)
Definition: dvec.h:742
friend F64vec2 operator|(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:734
F64vec2()
Definition: dvec.h:723
F64vec2 & operator|=(F64vec2 &a)
Definition: dvec.h:747
__m128d vec
Definition: dvec.h:720
friend F64vec2 operator^(const F64vec2 &a, const F64vec2 &b)
Definition: dvec.h:735
F64vec2 & operator*=(F64vec2 &a)
Definition: dvec.h:744
F64vec2(double d1, double d0)
Definition: dvec.h:727
F64vec2 & operator&=(F64vec2 &a)
Definition: dvec.h:746
F64vec2(__m128d m)
Definition: dvec.h:725
Definition: dvec.h:81
I128vec1 & operator=(const M128 &a)
Definition: dvec.h:86
I128vec1 & operator^=(const M128 &a)
Definition: dvec.h:89
I128vec1 & operator&=(const M128 &a)
Definition: dvec.h:87
I128vec1(__m128i mm)
Definition: dvec.h:84
I128vec1 & operator|=(const M128 &a)
Definition: dvec.h:88
I128vec1()
Definition: dvec.h:83
Definition: dvec.h:297
I16vec8 & operator*=(const I16vec8 &a)
Definition: dvec.h:310
I16vec8 & operator=(const M128 &a)
Definition: dvec.h:302
I16vec8 operator<<(const M128 &a)
Definition: dvec.h:312
I16vec8 & operator&=(const M128 &a)
Definition: dvec.h:304
I16vec8 & operator^=(const M128 &a)
Definition: dvec.h:306
I16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:308
I16vec8 & operator|=(const M128 &a)
Definition: dvec.h:305
I16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:314
I16vec8(__m128i mm)
Definition: dvec.h:300
I16vec8 & operator-=(const I16vec8 &a)
Definition: dvec.h:309
I16vec8 & operator<<=(int count)
Definition: dvec.h:315
I16vec8()
Definition: dvec.h:299
I16vec8 operator<<(int count)
Definition: dvec.h:313
Definition: dvec.h:141
I32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:152
I32vec4 & operator=(const M128 &a)
Definition: dvec.h:146
I32vec4()
Definition: dvec.h:143
I32vec4 & operator&=(const M128 &a)
Definition: dvec.h:148
I32vec4 & operator<<=(int count)
Definition: dvec.h:158
I32vec4 & operator<<=(const I32vec4 &a)
Definition: dvec.h:157
I32vec4 operator<<(int count)
Definition: dvec.h:156
I32vec4(__m128i mm)
Definition: dvec.h:144
I32vec4 & operator-=(const I32vec4 &a)
Definition: dvec.h:153
I32vec4 & operator|=(const M128 &a)
Definition: dvec.h:149
I32vec4 & operator^=(const M128 &a)
Definition: dvec.h:150
I32vec4 operator<<(const I32vec4 &a)
Definition: dvec.h:155
Definition: dvec.h:94
I64vec2 & operator+=(const I64vec2 &a)
Definition: dvec.h:111
const __int64 & operator[](int i) const
Definition: dvec.h:123
I64vec2(__m128i mm)
Definition: dvec.h:97
I64vec2 operator>>(const I64vec2 &a)
Definition: dvec.h:118
I64vec2 operator>>(int count)
Definition: dvec.h:119
I64vec2 & operator&=(const M128 &a)
Definition: dvec.h:107
I64vec2 & operator=(const M128 &a)
Definition: dvec.h:105
I64vec2 & operator^=(const M128 &a)
Definition: dvec.h:109
I64vec2 & operator|=(const M128 &a)
Definition: dvec.h:108
__int64 & operator[](int i)
Definition: dvec.h:129
I64vec2 & operator<<=(const I64vec2 &a)
Definition: dvec.h:116
I64vec2 & operator>>=(int count)
Definition: dvec.h:121
I64vec2 & operator>>=(const I64vec2 &a)
Definition: dvec.h:120
I64vec2 & operator-=(const I64vec2 &a)
Definition: dvec.h:112
I64vec2 operator<<(const I64vec2 &a)
Definition: dvec.h:114
I64vec2 & operator<<=(int count)
Definition: dvec.h:117
I64vec2()
Definition: dvec.h:96
I64vec2(__m64 q1, __m64 q0)
Definition: dvec.h:99
I64vec2 operator<<(int count)
Definition: dvec.h:115
Definition: dvec.h:492
I8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:503
I8vec16 & operator^=(const M128 &a)
Definition: dvec.h:501
I8vec16()
Definition: dvec.h:494
I8vec16 & operator=(const M128 &a)
Definition: dvec.h:497
I8vec16 & operator|=(const M128 &a)
Definition: dvec.h:500
I8vec16(__m128i mm)
Definition: dvec.h:495
I8vec16 & operator&=(const M128 &a)
Definition: dvec.h:499
I8vec16 & operator-=(const I8vec16 &a)
Definition: dvec.h:504
Definition: dvec.h:328
Is16vec8 & operator*=(const I16vec8 &a)
Definition: dvec.h:352
Is16vec8(signed short s7, signed short s6, signed short s5, signed short s4, signed short s3, signed short s2, signed short s1, signed short s0)
Definition: dvec.h:332
Is16vec8 operator>>(const M128 &a)
Definition: dvec.h:359
Is16vec8 & operator>>=(const M128 &a)
Definition: dvec.h:361
const signed short & operator[](int i) const
Definition: dvec.h:380
Is16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:356
Is16vec8 & operator&=(const M128 &a)
Definition: dvec.h:346
Is16vec8 & operator-=(const I16vec8 &a)
Definition: dvec.h:351
Is16vec8 & operator|=(const M128 &a)
Definition: dvec.h:347
Is16vec8 & operator>>=(int count)
Definition: dvec.h:362
Is16vec8 & operator=(const M128 &a)
Definition: dvec.h:344
signed short & operator[](int i)
Definition: dvec.h:386
Is16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:350
Is16vec8()
Definition: dvec.h:330
Is16vec8 operator>>(int count)
Definition: dvec.h:360
Is16vec8 operator<<(int count)
Definition: dvec.h:355
Is16vec8 & operator^=(const M128 &a)
Definition: dvec.h:348
Is16vec8 & operator<<=(int count)
Definition: dvec.h:357
Is16vec8 operator<<(const M128 &a)
Definition: dvec.h:354
Is16vec8(__m128i mm)
Definition: dvec.h:331
Definition: dvec.h:169
Is32vec4 & operator=(const M128 &a)
Definition: dvec.h:181
Is32vec4 operator>>(int count)
Definition: dvec.h:196
Is32vec4 & operator|=(const M128 &a)
Definition: dvec.h:184
Is32vec4 operator<<(int count)
Definition: dvec.h:191
Is32vec4 & operator-=(const I32vec4 &a)
Definition: dvec.h:188
Is32vec4 & operator>>=(int count)
Definition: dvec.h:198
int & operator[](int i)
Definition: dvec.h:218
Is32vec4 & operator>>=(const M128 &a)
Definition: dvec.h:197
Is32vec4 operator<<(const M128 &a)
Definition: dvec.h:190
Is32vec4 operator>>(const M128 &a)
Definition: dvec.h:195
Is32vec4 & operator<<=(const M128 &a)
Definition: dvec.h:192
Is32vec4 & operator<<=(int count)
Definition: dvec.h:193
Is32vec4()
Definition: dvec.h:171
Is32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:187
Is32vec4(__m128i mm)
Definition: dvec.h:172
Is32vec4 & operator&=(const M128 &a)
Definition: dvec.h:183
const int & operator[](int i) const
Definition: dvec.h:212
Is32vec4 & operator^=(const M128 &a)
Definition: dvec.h:185
Is32vec4(int i3, int i2, int i1, int i0)
Definition: dvec.h:173
Definition: dvec.h:515
signed char & operator[](int i)
Definition: dvec.h:559
Is8vec16()
Definition: dvec.h:517
Is8vec16 & operator&=(const M128 &a)
Definition: dvec.h:522
Is8vec16 & operator-=(const I8vec16 &a)
Definition: dvec.h:527
Is8vec16(__m128i mm)
Definition: dvec.h:518
Is8vec16 & operator=(const M128 &a)
Definition: dvec.h:520
Is8vec16 & operator^=(const M128 &a)
Definition: dvec.h:524
Is8vec16 & operator|=(const M128 &a)
Definition: dvec.h:523
Is8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:526
const signed char & operator[](int i) const
Definition: dvec.h:553
Definition: dvec.h:413
unsigned short & operator[](int i)
Definition: dvec.h:470
Iu16vec8 & operator&=(const M128 &a)
Definition: dvec.h:431
Iu16vec8 & operator|=(const M128 &a)
Definition: dvec.h:432
Iu16vec8()
Definition: dvec.h:415
Iu16vec8 & operator<<=(int count)
Definition: dvec.h:442
Iu16vec8 & operator-=(const I16vec8 &a)
Definition: dvec.h:436
Iu16vec8 & operator^=(const M128 &a)
Definition: dvec.h:433
Iu16vec8 & operator>>=(int count)
Definition: dvec.h:446
Iu16vec8(__m128i mm)
Definition: dvec.h:416
Iu16vec8 & operator*=(const I16vec8 &a)
Definition: dvec.h:437
Iu16vec8 & operator=(const M128 &a)
Definition: dvec.h:429
Iu16vec8 & operator<<=(const M128 &a)
Definition: dvec.h:441
Iu16vec8 & operator+=(const I16vec8 &a)
Definition: dvec.h:435
Iu16vec8 operator>>(const M128 &a)
Definition: dvec.h:443
Iu16vec8 & operator>>=(const M128 &a)
Definition: dvec.h:445
const unsigned short & operator[](int i) const
Definition: dvec.h:464
Iu16vec8 operator>>(int count)
Definition: dvec.h:444
Iu16vec8 operator<<(const M128 &a)
Definition: dvec.h:439
Iu16vec8(unsigned short s7, unsigned short s6, unsigned short s5, unsigned short s4, unsigned short s3, unsigned short s2, unsigned short s1, unsigned short s0)
Definition: dvec.h:417
Iu16vec8 operator<<(int count)
Definition: dvec.h:440
Definition: dvec.h:234
Iu32vec4 & operator-=(const I32vec4 &a)
Definition: dvec.h:253
Iu32vec4 & operator=(const M128 &a)
Definition: dvec.h:246
Iu32vec4 & operator<<=(int count)
Definition: dvec.h:258
Iu32vec4(unsigned int ui3, unsigned int ui2, unsigned int ui1, unsigned int ui0)
Definition: dvec.h:238
Iu32vec4 & operator>>=(int count)
Definition: dvec.h:262
Iu32vec4 & operator^=(const M128 &a)
Definition: dvec.h:250
Iu32vec4()
Definition: dvec.h:236
Iu32vec4 & operator&=(const M128 &a)
Definition: dvec.h:248
Iu32vec4 & operator+=(const I32vec4 &a)
Definition: dvec.h:252
Iu32vec4 operator<<(int count)
Definition: dvec.h:256
Iu32vec4 operator<<(const M128 &a)
Definition: dvec.h:255
Iu32vec4 & operator<<=(const M128 &a)
Definition: dvec.h:257
unsigned int & operator[](int i)
Definition: dvec.h:282
Iu32vec4 & operator|=(const M128 &a)
Definition: dvec.h:249
Iu32vec4 operator>>(int count)
Definition: dvec.h:260
Iu32vec4 & operator>>=(const M128 &a)
Definition: dvec.h:261
Iu32vec4(__m128i mm)
Definition: dvec.h:237
Iu32vec4 operator>>(const M128 &a)
Definition: dvec.h:259
const unsigned int & operator[](int i) const
Definition: dvec.h:276
Definition: dvec.h:579
Iu8vec16 & operator^=(const M128 &a)
Definition: dvec.h:588
Iu8vec16()
Definition: dvec.h:581
const unsigned char & operator[](int i) const
Definition: dvec.h:617
Iu8vec16(__m128i mm)
Definition: dvec.h:582
Iu8vec16 & operator+=(const I8vec16 &a)
Definition: dvec.h:590
unsigned char & operator[](int i)
Definition: dvec.h:623
Iu8vec16 & operator&=(const M128 &a)
Definition: dvec.h:586
Iu8vec16 & operator=(const M128 &a)
Definition: dvec.h:584
Iu8vec16 & operator|=(const M128 &a)
Definition: dvec.h:587
Iu8vec16 & operator-=(const I8vec16 &a)
Definition: dvec.h:591
Definition: dvec.h:59
__m128i vec
Definition: dvec.h:61
M128 & operator|=(const M128 &a)
Definition: dvec.h:70
M128 & operator^=(const M128 &a)
Definition: dvec.h:71
M128()
Definition: dvec.h:64
M128 & operator&=(const M128 &a)
Definition: dvec.h:69
M128(__m128i mm)
Definition: dvec.h:65
unsigned short(__cdecl typeof(TIFFCurrentDirectory))(struct tiff *)
Definition: typeof.h:94
#define assert(x)
Definition: debug.h:53
signed int s32
Definition: linux.h:57
signed char s8
Definition: linux.h:51
signed short s16
Definition: linux.h:54
Is16vec8 pack_sat(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:646
I64vec2 sum_abs(const Iu8vec16 &a, const Iu8vec16 &b)
Definition: dvec.h:640
#define F64vec2_COMI(op)
Definition: dvec.h:776
#define _MM_8UW(element, vector)
Definition: dvec.h:44
#define F64vec2_COMP(op)
Definition: dvec.h:760
#define EXPLICIT
Definition: dvec.h:27
Is32vec4 mul_add(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:404
#define F64vec2_UCOMI(op)
Is32vec4 cmpgt(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:227
I32vec4 cmpeq(const I32vec4 &a, const I32vec4 &b)
Definition: dvec.h:162
#define _MM_4DW(element, vector)
Definition: dvec.h:48
#define _MM_2QW(element, vector)
Definition: dvec.h:50
F64vec2 IntToF64vec2(const F64vec2 &a, int b)
Definition: dvec.h:838
#define _MM_8W(element, vector)
Definition: dvec.h:45
M128 andnot(const M128 &a, const M128 &b)
Definition: dvec.h:78
I32vec4 cmpneq(const I32vec4 &a, const I32vec4 &b)
Definition: dvec.h:163
void loadu(F64vec2 &a, double *p)
Definition: dvec.h:820
#define IVEC128_ADD_SUB(vect, element, opsize)
Definition: dvec.h:665
Is16vec8 simd_max(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:409
#define F64vec2_SELECT(op)
Definition: dvec.h:824
M128 operator&(const M128 &a, const M128 &b)
Definition: dvec.h:75
I64vec2 unpack_low(const I64vec2 &a, const I64vec2 &b)
Definition: dvec.h:137
Is32vec4 cmplt(const Is32vec4 &a, const Is32vec4 &b)
Definition: dvec.h:228
I64vec2 unpack_high(const I64vec2 &a, const I64vec2 &b)
Definition: dvec.h:138
F64vec2 F32vec4ToF64vec2(const F32vec4 &a)
Definition: dvec.h:836
I64vec2 operator*(const Iu32vec4 &a, const Iu32vec4 &b)
Definition: dvec.h:289
Iu16vec8 simd_avg(const Iu16vec8 &a, const Iu16vec8 &b)
Definition: dvec.h:488
#define _MM_4UDW(element, vector)
Definition: dvec.h:47
Is16vec8 mul_high(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:403
#define _MM_16UB(element, vector)
Definition: dvec.h:41
M128 operator^(const M128 &a, const M128 &b)
Definition: dvec.h:77
M128 operator|(const M128 &a, const M128 &b)
Definition: dvec.h:76
F32vec4 F64vec2ToF32vec4(const F64vec2 &a)
Definition: dvec.h:837
void storeu(double *p, const F64vec2 &a)
Definition: dvec.h:821
Iu8vec16 packu_sat(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:648
#define IVEC128_LOGICALS(vect, element)
Definition: dvec.h:650
Is16vec8 sat_sub(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:407
Is16vec8 simd_min(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:410
const __m128i get_mask128()
Definition: dvec.h:52
#define _MM_16B(element, vector)
Definition: dvec.h:42
void store_nta(double *p, F64vec2 &a)
Definition: dvec.h:822
#define IVEC128_SELECT(vect12, vect34, element, selop, arg1, arg2)
Definition: dvec.h:679
int move_mask(const F64vec2 &a)
Definition: dvec.h:819
Is16vec8 sat_add(const Is16vec8 &a, const Is16vec8 &b)
Definition: dvec.h:406
__m128 _mm_cvtpd_ps(__m128d a)
Definition: emmintrin.h:846
void _mm_storeu_pd(double *dp, __m128d a)
Definition: emmintrin.h:1083
__m128d _mm_add_sd(__m128d a, __m128d b)
Definition: emmintrin.h:567
__m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1442
__m128i _mm_adds_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1170
__m128i _mm_slli_epi64(__m128i a, int count)
Definition: emmintrin.h:1364
__m128i _mm_srl_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1422
__m128d _mm_add_pd(__m128d a, __m128d b)
Definition: emmintrin.h:573
__m128i _mm_xor_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1336
__m128i _mm_srli_epi64(__m128i a, int count)
Definition: emmintrin.h:1417
__m128i _mm_set1_epi64(__m64 q)
Definition: emmintrin.h:1603
__m128i _mm_packs_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1747
__m128i _mm_sad_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1255
__m128i _mm_packus_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1752
__m128i _mm_adds_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1161
__m128i _mm_sub_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1270
__m128i _mm_add_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1123
__m128i _mm_mulhi_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1235
__m128i _mm_slli_epi32(__m128i a, int count)
Definition: emmintrin.h:1354
__m128d _mm_and_pd(__m128d a, __m128d b)
Definition: emmintrin.h:642
__m128i _mm_mul_epu32(__m128i a, __m128i b)
Definition: emmintrin.h:1250
__m128d _mm_sub_pd(__m128d a, __m128d b)
Definition: emmintrin.h:584
__m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1427
__m128d _mm_min_pd(__m128d a, __m128d b)
Definition: emmintrin.h:627
__m128i _mm_sll_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1359
__m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1790
__m128i _mm_subs_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1303
__m128i _mm_srl_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1412
__m128i _mm_mulhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1230
#define _mm_shuffle_pd(a, b, i)
Definition: emmintrin.h:1901
__m128d _mm_or_pd(__m128d a, __m128d b)
Definition: emmintrin.h:652
__m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1432
__m128i _mm_sra_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1389
__m128i _mm_srai_epi32(__m128i a, int count)
Definition: emmintrin.h:1384
__m128i _mm_slli_epi16(__m128i a, int count)
Definition: emmintrin.h:1344
__m128i _mm_subs_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1285
int _mm_movemask_pd(__m128d a)
Definition: emmintrin.h:1896
__m128i _mm_sub_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1280
__m128i _mm_min_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1221
__m128d _mm_unpackhi_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1878
int _mm_cvttsd_si32(__m128d a)
Definition: emmintrin.h:902
__m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1800
__m128i _mm_add_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1118
__m128d _mm_loadu_pd(double const *dp)
Definition: emmintrin.h:956
__m128i _mm_avg_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1184
__m128i _mm_sll_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1369
__m128d _mm_cvtsi32_sd(__m128d a, int b)
Definition: emmintrin.h:884
__m128i _mm_or_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1331
__m128i _mm_subs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1294
__m128i _mm_packs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1742
__m128i _mm_srli_epi16(__m128i a, int count)
Definition: emmintrin.h:1397
__m128i _mm_sub_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1260
__m128d _mm_mul_pd(__m128d a, __m128d b)
Definition: emmintrin.h:595
__m128i _mm_sll_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1349
__m128d _mm_sqrt_pd(__m128d a)
Definition: emmintrin.h:617
__m128i _mm_mullo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1240
__m128i _mm_sra_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1379
__m128i _mm_madd_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1189
__m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1437
__m128i _mm_srai_epi16(__m128i a, int count)
Definition: emmintrin.h:1374
__m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1779
__m128d _mm_andnot_pd(__m128d a, __m128d b)
Definition: emmintrin.h:647
__m128i _mm_min_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1212
__m128i _mm_andnot_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1326
__m128i _mm_and_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1321
void _mm_stream_pd(double *p, __m128d a)
Definition: emmintrin.h:1706
__m128i _mm_avg_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1179
__m128i _mm_adds_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1152
__m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1810
__m128i _mm_adds_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1143
__m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1830
__m128i _mm_cmplt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1459
__m128i _mm_max_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1194
__m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1819
__m128d _mm_xor_pd(__m128d a, __m128d b)
Definition: emmintrin.h:657
__m128d _mm_cvtps_pd(__m128 a)
Definition: emmintrin.h:851
__m128d _mm_set_pd(double w, double x)
Definition: emmintrin.h:1038
__m128i _mm_srli_epi32(__m128i a, int count)
Definition: emmintrin.h:1407
__m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1449
__m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1128
__m128i _mm_sub_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1265
__m128i _mm_add_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1138
__m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1454
__m128d _mm_set1_pd(double w)
Definition: emmintrin.h:1033
__m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1840
__m128d _mm_div_pd(__m128d a, __m128d b)
Definition: emmintrin.h:606
__m128i _mm_srl_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1402
__m128d _mm_max_pd(__m128d a, __m128d b)
Definition: emmintrin.h:637
__m128d _mm_unpacklo_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1887
__m128i _mm_subs_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1312
__m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1850
__m128i _mm_max_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1203
GLuint GLuint GLsizei count
Definition: gl.h:1545
const GLubyte * c
Definition: glext.h:8905
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLfloat GLfloat p
Definition: glext.h:8902
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
const GLfloat * m
Definition: glext.h:10848
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define d
Definition: ke_i.h:81
struct S1 s1
struct S2 s2
#define eq(received, expected, label, type)
Definition: locale.c:144