ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

dvec.h
Go to the documentation of this file.
00001 
00006 #ifndef _DVEC_H_INCLUDED
00007 #define _DVEC_H_INCLUDED
00008 #ifndef RC_INVOKED
00009 
00010 #if !defined __cplusplus
00011 #error This file is only supported in C++ compilations!
00012 #endif
00013 
00014 #include <emmintrin.h>
00015 #include <assert.h>
00016 #include <fvec.h>
00017 #include <crtdefs.h>
00018 
00019 #pragma pack(push,_CRT_PACKING)
00020 
00021 #if defined(_ENABLE_VEC_DEBUG)
00022 #include <iostream>
00023 #endif
00024 
00025 #pragma pack(push,16)
00026 
00027 #define EXPLICIT explicit
00028 
00029 class I8vec16;
00030 class Is8vec16;
00031 class Iu8vec16;
00032 class I16vec8;
00033 class Is16vec8;
00034 class Iu16vec8;
00035 class I32vec4;
00036 class Is32vec4;
00037 class Iu32vec4;
00038 class I64vec2;
00039 class I128vec1;
00040 
00041 #define _MM_16UB(element,vector) (*((unsigned char*)&##vector + ##element))
00042 #define _MM_16B(element,vector) (*((signed char*)&##vector + ##element))
00043 
00044 #define _MM_8UW(element,vector) (*((unsigned short*)&##vector + ##element))
00045 #define _MM_8W(element,vector) (*((short*)&##vector + ##element))
00046 
00047 #define _MM_4UDW(element,vector) (*((unsigned int*)&##vector + ##element))
00048 #define _MM_4DW(element,vector) (*((int*)&##vector + ##element))
00049 
00050 #define _MM_2QW(element,vector) (*((__int64*)&##vector + ##element))
00051 
00052 inline const __m128i get_mask128()
00053 {
00054   static const __m128i mask128 = _mm_set1_epi64(M64(0xffffffffffffffffi64));
00055   return mask128;
00056 }
00057 
00058 class M128
00059 {
00060 protected:
00061   __m128i vec;
00062 
00063 public:
00064   M128() { }
00065   M128(__m128i mm) { vec = mm; }
00066 
00067   operator __m128i() const { return vec; }
00068 
00069   M128& operator&=(const M128 &a) { return *this = (M128) _mm_and_si128(vec,a); }
00070   M128& operator|=(const M128 &a) { return *this = (M128) _mm_or_si128(vec,a); }
00071   M128& operator^=(const M128 &a) { return *this = (M128) _mm_xor_si128(vec,a); }
00072 
00073 };
00074 
00075 inline M128 operator&(const M128 &a,const M128 &b) { return _mm_and_si128(a,b); }
00076 inline M128 operator|(const M128 &a,const M128 &b) { return _mm_or_si128(a,b); }
00077 inline M128 operator^(const M128 &a,const M128 &b) { return _mm_xor_si128(a,b); }
00078 inline M128 andnot(const M128 &a,const M128 &b) { return _mm_andnot_si128(a,b); }
00079 
00080 class I128vec1 : public M128
00081 {
00082 public:
00083   I128vec1() { }
00084   I128vec1(__m128i mm) : M128(mm) { }
00085 
00086   I128vec1& operator= (const M128 &a) { return *this = (I128vec1) a; }
00087   I128vec1& operator&=(const M128 &a) { return *this = (I128vec1) _mm_and_si128(vec,a); }
00088   I128vec1& operator|=(const M128 &a) { return *this = (I128vec1) _mm_or_si128(vec,a); }
00089   I128vec1& operator^=(const M128 &a) { return *this = (I128vec1) _mm_xor_si128(vec,a); }
00090 
00091 };
00092 
00093 class I64vec2 : public M128
00094 {
00095 public:
00096   I64vec2() { }
00097   I64vec2(__m128i mm) : M128(mm) { }
00098 
00099   I64vec2(__m64 q1,__m64 q0)
00100   {
00101     _MM_2QW(0,vec) = *(__int64*)&q0;
00102     _MM_2QW(1,vec) = *(__int64*)&q1;
00103   }
00104 
00105   I64vec2& operator= (const M128 &a) { return *this = (I64vec2) a; }
00106 
00107   I64vec2& operator&=(const M128 &a) { return *this = (I64vec2) _mm_and_si128(vec,a); }
00108   I64vec2& operator|=(const M128 &a) { return *this = (I64vec2) _mm_or_si128(vec,a); }
00109   I64vec2& operator^=(const M128 &a) { return *this = (I64vec2) _mm_xor_si128(vec,a); }
00110 
00111   I64vec2& operator +=(const I64vec2 &a) { return *this = (I64vec2) _mm_add_epi64(vec,a); }
00112   I64vec2& operator -=(const I64vec2 &a) { return *this = (I64vec2) _mm_sub_epi64(vec,a); }
00113 
00114   I64vec2 operator<<(const I64vec2 &a) { return _mm_sll_epi64(vec,a); }
00115   I64vec2 operator<<(int count) { return _mm_slli_epi64(vec,count); }
00116   I64vec2& operator<<=(const I64vec2 &a) { return *this = (I64vec2) _mm_sll_epi64(vec,a); }
00117   I64vec2& operator<<=(int count) { return *this = (I64vec2) _mm_slli_epi64(vec,count); }
00118   I64vec2 operator>>(const I64vec2 &a) { return _mm_srl_epi64(vec,a); }
00119   I64vec2 operator>>(int count) { return _mm_srli_epi64(vec,count); }
00120   I64vec2& operator>>=(const I64vec2 &a) { return *this = (I64vec2) _mm_srl_epi64(vec,a); }
00121   I64vec2& operator>>=(int count) { return *this = (I64vec2) _mm_srli_epi64(vec,count); }
00122 
00123   const __int64& operator[](int i)const
00124   {
00125     assert(static_cast<unsigned int>(i) < 2);
00126     return _MM_2QW(i,vec);
00127   }
00128 
00129   __int64& operator[](int i)
00130   {
00131     assert(static_cast<unsigned int>(i) < 2);
00132     return _MM_2QW(i,vec);
00133   }
00134 
00135 };
00136 
00137 inline I64vec2 unpack_low(const I64vec2 &a,const I64vec2 &b) {return _mm_unpacklo_epi64(a,b); }
00138 inline I64vec2 unpack_high(const I64vec2 &a,const I64vec2 &b) {return _mm_unpackhi_epi64(a,b); }
00139 
00140 class I32vec4 : public M128
00141 {
00142 public:
00143   I32vec4() { }
00144   I32vec4(__m128i mm) : M128(mm) { }
00145 
00146   I32vec4& operator= (const M128 &a) { return *this = (I32vec4) a; }
00147 
00148   I32vec4& operator&=(const M128 &a) { return *this = (I32vec4) _mm_and_si128(vec,a); }
00149   I32vec4& operator|=(const M128 &a) { return *this = (I32vec4) _mm_or_si128(vec,a); }
00150   I32vec4& operator^=(const M128 &a) { return *this = (I32vec4) _mm_xor_si128(vec,a); }
00151 
00152   I32vec4& operator +=(const I32vec4 &a) { return *this = (I32vec4)_mm_add_epi32(vec,a); }
00153   I32vec4& operator -=(const I32vec4 &a) { return *this = (I32vec4)_mm_sub_epi32(vec,a); }
00154 
00155   I32vec4 operator<<(const I32vec4 &a) { return _mm_sll_epi32(vec,a); }
00156   I32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
00157   I32vec4& operator<<=(const I32vec4 &a) { return *this = (I32vec4)_mm_sll_epi32(vec,a); }
00158   I32vec4& operator<<=(int count) { return *this = (I32vec4)_mm_slli_epi32(vec,count); }
00159 
00160 };
00161 
00162 inline I32vec4 cmpeq(const I32vec4 &a,const I32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
00163 inline I32vec4 cmpneq(const I32vec4 &a,const I32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
00164 
00165 inline I32vec4 unpack_low(const I32vec4 &a,const I32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
00166 inline I32vec4 unpack_high(const I32vec4 &a,const I32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
00167 
00168 class Is32vec4 : public I32vec4
00169 {
00170 public:
00171   Is32vec4() { }
00172   Is32vec4(__m128i mm) : I32vec4(mm) { }
00173   Is32vec4(int i3,int i2,int i1,int i0)
00174   {
00175     _MM_4DW(0,vec) = i0;
00176     _MM_4DW(1,vec) = i1;
00177     _MM_4DW(2,vec) = i2;
00178     _MM_4DW(3,vec) = i3;
00179   }
00180 
00181   Is32vec4& operator= (const M128 &a) { return *this = (Is32vec4) a; }
00182 
00183   Is32vec4& operator&=(const M128 &a) { return *this = (Is32vec4) _mm_and_si128(vec,a); }
00184   Is32vec4& operator|=(const M128 &a) { return *this = (Is32vec4) _mm_or_si128(vec,a); }
00185   Is32vec4& operator^=(const M128 &a) { return *this = (Is32vec4) _mm_xor_si128(vec,a); }
00186 
00187   Is32vec4& operator +=(const I32vec4 &a) { return *this = (Is32vec4)_mm_add_epi32(vec,a); }
00188   Is32vec4& operator -=(const I32vec4 &a) { return *this = (Is32vec4)_mm_sub_epi32(vec,a); }
00189 
00190   Is32vec4 operator<<(const M128 &a) { return _mm_sll_epi32(vec,a); }
00191   Is32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
00192   Is32vec4& operator<<=(const M128 &a) { return *this = (Is32vec4)_mm_sll_epi32(vec,a); }
00193   Is32vec4& operator<<=(int count) { return *this = (Is32vec4)_mm_slli_epi32(vec,count); }
00194 
00195   Is32vec4 operator>>(const M128 &a) { return _mm_sra_epi32(vec,a); }
00196   Is32vec4 operator>>(int count) { return _mm_srai_epi32(vec,count); }
00197   Is32vec4& operator>>=(const M128 &a) { return *this = (Is32vec4) _mm_sra_epi32(vec,a); }
00198   Is32vec4& operator>>=(int count) { return *this = (Is32vec4) _mm_srai_epi32(vec,count); }
00199 
00200 #if defined(_ENABLE_VEC_DEBUG)
00201 
00202   friend std::ostream& operator<< (std::ostream &os,const Is32vec4 &a)
00203   {
00204     os << "[3]:" << _MM_4DW(3,a)
00205       << " [2]:" << _MM_4DW(2,a)
00206       << " [1]:" << _MM_4DW(1,a)
00207       << " [0]:" << _MM_4DW(0,a);
00208     return os;
00209   }
00210 #endif
00211 
00212   const int& operator[](int i)const
00213   {
00214     assert(static_cast<unsigned int>(i) < 4);
00215     return _MM_4DW(i,vec);
00216   }
00217 
00218   int& operator[](int i)
00219   {
00220     assert(static_cast<unsigned int>(i) < 4);
00221     return _MM_4DW(i,vec);
00222   }
00223 };
00224 
00225 inline Is32vec4 cmpeq(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
00226 inline Is32vec4 cmpneq(const Is32vec4 &a,const Is32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
00227 inline Is32vec4 cmpgt(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpgt_epi32(a,b); }
00228 inline Is32vec4 cmplt(const Is32vec4 &a,const Is32vec4 &b) { return _mm_cmpgt_epi32(b,a); }
00229 
00230 inline Is32vec4 unpack_low(const Is32vec4 &a,const Is32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
00231 inline Is32vec4 unpack_high(const Is32vec4 &a,const Is32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
00232 
00233 class Iu32vec4 : public I32vec4
00234 {
00235 public:
00236   Iu32vec4() { }
00237   Iu32vec4(__m128i mm) : I32vec4(mm) { }
00238   Iu32vec4(unsigned int ui3,unsigned int ui2,unsigned int ui1,unsigned int ui0)
00239   {
00240     _MM_4UDW(0,vec) = ui0;
00241     _MM_4UDW(1,vec) = ui1;
00242     _MM_4UDW(2,vec) = ui2;
00243     _MM_4UDW(3,vec) = ui3;
00244   }
00245 
00246   Iu32vec4& operator= (const M128 &a) { return *this = (Iu32vec4) a; }
00247 
00248   Iu32vec4& operator&=(const M128 &a) { return *this = (Iu32vec4) _mm_and_si128(vec,a); }
00249   Iu32vec4& operator|=(const M128 &a) { return *this = (Iu32vec4) _mm_or_si128(vec,a); }
00250   Iu32vec4& operator^=(const M128 &a) { return *this = (Iu32vec4) _mm_xor_si128(vec,a); }
00251 
00252   Iu32vec4& operator +=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_add_epi32(vec,a); }
00253   Iu32vec4& operator -=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_sub_epi32(vec,a); }
00254 
00255   Iu32vec4 operator<<(const M128 &a) { return _mm_sll_epi32(vec,a); }
00256   Iu32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
00257   Iu32vec4& operator<<=(const M128 &a) { return *this = (Iu32vec4)_mm_sll_epi32(vec,a); }
00258   Iu32vec4& operator<<=(int count) { return *this = (Iu32vec4)_mm_slli_epi32(vec,count); }
00259   Iu32vec4 operator>>(const M128 &a) { return _mm_srl_epi32(vec,a); }
00260   Iu32vec4 operator>>(int count) { return _mm_srli_epi32(vec,count); }
00261   Iu32vec4& operator>>=(const M128 &a) { return *this = (Iu32vec4) _mm_srl_epi32(vec,a); }
00262   Iu32vec4& operator>>=(int count) { return *this = (Iu32vec4) _mm_srli_epi32(vec,count); }
00263 
00264 #if defined(_ENABLE_VEC_DEBUG)
00265 
00266   friend std::ostream& operator<< (std::ostream &os,const Iu32vec4 &a)
00267   {
00268     os << "[3]:" << _MM_4UDW(3,a)
00269       << " [2]:" << _MM_4UDW(2,a)
00270       << " [1]:" << _MM_4UDW(1,a)
00271       << " [0]:" << _MM_4UDW(0,a);
00272     return os;
00273   }
00274 #endif
00275 
00276   const unsigned int& operator[](int i)const
00277   {
00278     assert(static_cast<unsigned int>(i) < 4);
00279     return _MM_4UDW(i,vec);
00280   }
00281 
00282   unsigned int& operator[](int i)
00283   {
00284     assert(static_cast<unsigned int>(i) < 4);
00285     return _MM_4UDW(i,vec);
00286   }
00287 };
00288 
00289 inline I64vec2 operator*(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_mul_epu32(a,b); }
00290 inline Iu32vec4 cmpeq(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
00291 inline Iu32vec4 cmpneq(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b),get_mask128()); }
00292 
00293 inline Iu32vec4 unpack_low(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
00294 inline Iu32vec4 unpack_high(const Iu32vec4 &a,const Iu32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
00295 
00296 class I16vec8 : public M128
00297 {
00298 public:
00299   I16vec8() { }
00300   I16vec8(__m128i mm) : M128(mm) { }
00301 
00302   I16vec8& operator= (const M128 &a) { return *this = (I16vec8) a; }
00303 
00304   I16vec8& operator&=(const M128 &a) { return *this = (I16vec8) _mm_and_si128(vec,a); }
00305   I16vec8& operator|=(const M128 &a) { return *this = (I16vec8) _mm_or_si128(vec,a); }
00306   I16vec8& operator^=(const M128 &a) { return *this = (I16vec8) _mm_xor_si128(vec,a); }
00307 
00308   I16vec8& operator +=(const I16vec8 &a) { return *this = (I16vec8) _mm_add_epi16(vec,a); }
00309   I16vec8& operator -=(const I16vec8 &a) { return *this = (I16vec8) _mm_sub_epi16(vec,a); }
00310   I16vec8& operator *=(const I16vec8 &a) { return *this = (I16vec8) _mm_mullo_epi16(vec,a); }
00311 
00312   I16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
00313   I16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
00314   I16vec8& operator<<=(const M128 &a) { return *this = (I16vec8)_mm_sll_epi16(vec,a); }
00315   I16vec8& operator<<=(int count) { return *this = (I16vec8)_mm_slli_epi16(vec,count); }
00316 
00317 };
00318 
00319 inline I16vec8 operator*(const I16vec8 &a,const I16vec8 &b) { return _mm_mullo_epi16(a,b); }
00320 
00321 inline I16vec8 cmpeq(const I16vec8 &a,const I16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
00322 inline I16vec8 cmpneq(const I16vec8 &a,const I16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
00323 
00324 inline I16vec8 unpack_low(const I16vec8 &a,const I16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
00325 inline I16vec8 unpack_high(const I16vec8 &a,const I16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
00326 
00327 class Is16vec8 : public I16vec8
00328 {
00329 public:
00330   Is16vec8() { }
00331   Is16vec8(__m128i mm) : I16vec8(mm) { }
00332   Is16vec8(signed short s7,signed short s6,signed short s5,signed short s4,signed short s3,signed short s2,signed short s1,signed short s0)
00333   {
00334     _MM_8W(0,vec) = s0;
00335     _MM_8W(1,vec) = s1;
00336     _MM_8W(2,vec) = s2;
00337     _MM_8W(3,vec) = s3;
00338     _MM_8W(4,vec) = s4;
00339     _MM_8W(5,vec) = s5;
00340     _MM_8W(6,vec) = s6;
00341     _MM_8W(7,vec) = s7;
00342   }
00343 
00344   Is16vec8& operator= (const M128 &a) { return *this = (Is16vec8) a; }
00345 
00346   Is16vec8& operator&=(const M128 &a) { return *this = (Is16vec8) _mm_and_si128(vec,a); }
00347   Is16vec8& operator|=(const M128 &a) { return *this = (Is16vec8) _mm_or_si128(vec,a); }
00348   Is16vec8& operator^=(const M128 &a) { return *this = (Is16vec8) _mm_xor_si128(vec,a); }
00349 
00350   Is16vec8& operator +=(const I16vec8 &a) { return *this = (Is16vec8) _mm_add_epi16(vec,a); }
00351   Is16vec8& operator -=(const I16vec8 &a) { return *this = (Is16vec8) _mm_sub_epi16(vec,a); }
00352   Is16vec8& operator *=(const I16vec8 &a) { return *this = (Is16vec8) _mm_mullo_epi16(vec,a); }
00353 
00354   Is16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
00355   Is16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
00356   Is16vec8& operator<<=(const M128 &a) { return *this = (Is16vec8)_mm_sll_epi16(vec,a); }
00357   Is16vec8& operator<<=(int count) { return *this = (Is16vec8)_mm_slli_epi16(vec,count); }
00358 
00359   Is16vec8 operator>>(const M128 &a) { return _mm_sra_epi16(vec,a); }
00360   Is16vec8 operator>>(int count) { return _mm_srai_epi16(vec,count); }
00361   Is16vec8& operator>>=(const M128 &a) { return *this = (Is16vec8)_mm_sra_epi16(vec,a); }
00362   Is16vec8& operator>>=(int count) { return *this = (Is16vec8)_mm_srai_epi16(vec,count); }
00363 
00364 #if defined(_ENABLE_VEC_DEBUG)
00365 
00366   friend std::ostream& operator<< (std::ostream &os,const Is16vec8 &a)
00367   {
00368     os << "[7]:" << _MM_8W(7,a)
00369       << " [6]:" << _MM_8W(6,a)
00370       << " [5]:" << _MM_8W(5,a)
00371       << " [4]:" << _MM_8W(4,a)
00372       << " [3]:" << _MM_8W(3,a)
00373       << " [2]:" << _MM_8W(2,a)
00374       << " [1]:" << _MM_8W(1,a)
00375       << " [0]:" << _MM_8W(0,a);
00376     return os;
00377   }
00378 #endif
00379 
00380   const signed short& operator[](int i)const
00381   {
00382     assert(static_cast<unsigned int>(i) < 8);
00383     return _MM_8W(i,vec);
00384   }
00385 
00386   signed short& operator[](int i)
00387   {
00388     assert(static_cast<unsigned int>(i) < 8);
00389     return _MM_8W(i,vec);
00390   }
00391 };
00392 
00393 inline Is16vec8 operator*(const Is16vec8 &a,const Is16vec8 &b) { return _mm_mullo_epi16(a,b); }
00394 
00395 inline Is16vec8 cmpeq(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
00396 inline Is16vec8 cmpneq(const Is16vec8 &a,const Is16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
00397 inline Is16vec8 cmpgt(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpgt_epi16(a,b); }
00398 inline Is16vec8 cmplt(const Is16vec8 &a,const Is16vec8 &b) { return _mm_cmpgt_epi16(b,a); }
00399 
00400 inline Is16vec8 unpack_low(const Is16vec8 &a,const Is16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
00401 inline Is16vec8 unpack_high(const Is16vec8 &a,const Is16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
00402 
00403 inline Is16vec8 mul_high(const Is16vec8 &a,const Is16vec8 &b) { return _mm_mulhi_epi16(a,b); }
00404 inline Is32vec4 mul_add(const Is16vec8 &a,const Is16vec8 &b) { return _mm_madd_epi16(a,b);}
00405 
00406 inline Is16vec8 sat_add(const Is16vec8 &a,const Is16vec8 &b) { return _mm_adds_epi16(a,b); }
00407 inline Is16vec8 sat_sub(const Is16vec8 &a,const Is16vec8 &b) { return _mm_subs_epi16(a,b); }
00408 
00409 inline Is16vec8 simd_max(const Is16vec8 &a,const Is16vec8 &b) { return _mm_max_epi16(a,b); }
00410 inline Is16vec8 simd_min(const Is16vec8 &a,const Is16vec8 &b) { return _mm_min_epi16(a,b); }
00411 
00412 class Iu16vec8 : public I16vec8
00413 {
00414 public:
00415   Iu16vec8() { }
00416   Iu16vec8(__m128i mm) : I16vec8(mm) { }
00417   Iu16vec8(unsigned short s7,unsigned short s6,unsigned short s5,unsigned short s4,unsigned short s3,unsigned short s2,unsigned short s1,unsigned short s0)
00418   {
00419     _MM_8UW(0,vec) = s0;
00420     _MM_8UW(1,vec) = s1;
00421     _MM_8UW(2,vec) = s2;
00422     _MM_8UW(3,vec) = s3;
00423     _MM_8UW(4,vec) = s4;
00424     _MM_8UW(5,vec) = s5;
00425     _MM_8UW(6,vec) = s6;
00426     _MM_8UW(7,vec) = s7;
00427   }
00428 
00429   Iu16vec8& operator= (const M128 &a) { return *this = (Iu16vec8) a; }
00430 
00431   Iu16vec8& operator&=(const M128 &a) { return *this = (Iu16vec8) _mm_and_si128(vec,a); }
00432   Iu16vec8& operator|=(const M128 &a) { return *this = (Iu16vec8) _mm_or_si128(vec,a); }
00433   Iu16vec8& operator^=(const M128 &a) { return *this = (Iu16vec8) _mm_xor_si128(vec,a); }
00434 
00435   Iu16vec8& operator +=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_add_epi16(vec,a); }
00436   Iu16vec8& operator -=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_sub_epi16(vec,a); }
00437   Iu16vec8& operator *=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_mullo_epi16(vec,a); }
00438 
00439   Iu16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); }
00440   Iu16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); }
00441   Iu16vec8& operator<<=(const M128 &a) { return *this = (Iu16vec8)_mm_sll_epi16(vec,a); }
00442   Iu16vec8& operator<<=(int count) { return *this = (Iu16vec8)_mm_slli_epi16(vec,count); }
00443   Iu16vec8 operator>>(const M128 &a) { return _mm_srl_epi16(vec,a); }
00444   Iu16vec8 operator>>(int count) { return _mm_srli_epi16(vec,count); }
00445   Iu16vec8& operator>>=(const M128 &a) { return *this = (Iu16vec8) _mm_srl_epi16(vec,a); }
00446   Iu16vec8& operator>>=(int count) { return *this = (Iu16vec8) _mm_srli_epi16(vec,count); }
00447 
00448 #if defined(_ENABLE_VEC_DEBUG)
00449 
00450   friend std::ostream& operator << (std::ostream &os,const Iu16vec8 &a)
00451   {
00452     os << "[7]:" << unsigned short(_MM_8UW(7,a))
00453       << " [6]:" << unsigned short(_MM_8UW(6,a))
00454       << " [5]:" << unsigned short(_MM_8UW(5,a))
00455       << " [4]:" << unsigned short(_MM_8UW(4,a))
00456       << " [3]:" << unsigned short(_MM_8UW(3,a))
00457       << " [2]:" << unsigned short(_MM_8UW(2,a))
00458       << " [1]:" << unsigned short(_MM_8UW(1,a))
00459       << " [0]:" << unsigned short(_MM_8UW(0,a));
00460     return os;
00461   }
00462 #endif
00463 
00464   const unsigned short& operator[](int i)const
00465   {
00466     assert(static_cast<unsigned int>(i) < 8);
00467     return _MM_8UW(i,vec);
00468   }
00469 
00470   unsigned short& operator[](int i)
00471   {
00472     assert(static_cast<unsigned int>(i) < 8);
00473     return _MM_8UW(i,vec);
00474   }
00475 };
00476 
00477 inline Iu16vec8 operator*(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_mullo_epi16(a,b); }
00478 
00479 inline Iu16vec8 cmpeq(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_cmpeq_epi16(a,b); }
00480 inline Iu16vec8 cmpneq(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_andnot_si128(_mm_cmpeq_epi16(a,b),get_mask128()); }
00481 
00482 inline Iu16vec8 unpack_low(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_unpacklo_epi16(a,b); }
00483 inline Iu16vec8 unpack_high(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_unpackhi_epi16(a,b); }
00484 
00485 inline Iu16vec8 sat_add(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_adds_epu16(a,b); }
00486 inline Iu16vec8 sat_sub(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_subs_epu16(a,b); }
00487 
00488 inline Iu16vec8 simd_avg(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_avg_epu16(a,b); }
00489 inline I16vec8 mul_high(const Iu16vec8 &a,const Iu16vec8 &b) { return _mm_mulhi_epu16(a,b); }
00490 
00491 class I8vec16 : public M128
00492 {
00493 public:
00494   I8vec16() { }
00495   I8vec16(__m128i mm) : M128(mm) { }
00496 
00497   I8vec16& operator= (const M128 &a) { return *this = (I8vec16) a; }
00498 
00499   I8vec16& operator&=(const M128 &a) { return *this = (I8vec16) _mm_and_si128(vec,a); }
00500   I8vec16& operator|=(const M128 &a) { return *this = (I8vec16) _mm_or_si128(vec,a); }
00501   I8vec16& operator^=(const M128 &a) { return *this = (I8vec16) _mm_xor_si128(vec,a); }
00502 
00503   I8vec16& operator +=(const I8vec16 &a) { return *this = (I8vec16) _mm_add_epi8(vec,a); }
00504   I8vec16& operator -=(const I8vec16 &a) { return *this = (I8vec16) _mm_sub_epi8(vec,a); }
00505 
00506 };
00507 
00508 inline I8vec16 cmpeq(const I8vec16 &a,const I8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
00509 inline I8vec16 cmpneq(const I8vec16 &a,const I8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
00510 
00511 inline I8vec16 unpack_low(const I8vec16 &a,const I8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
00512 inline I8vec16 unpack_high(const I8vec16 &a,const I8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
00513 
00514 class Is8vec16 : public I8vec16
00515 {
00516 public:
00517   Is8vec16() { }
00518   Is8vec16(__m128i mm) : I8vec16(mm) { }
00519 
00520   Is8vec16& operator= (const M128 &a) { return *this = (Is8vec16) a; }
00521 
00522   Is8vec16& operator&=(const M128 &a) { return *this = (Is8vec16) _mm_and_si128(vec,a); }
00523   Is8vec16& operator|=(const M128 &a) { return *this = (Is8vec16) _mm_or_si128(vec,a); }
00524   Is8vec16& operator^=(const M128 &a) { return *this = (Is8vec16) _mm_xor_si128(vec,a); }
00525 
00526   Is8vec16& operator +=(const I8vec16 &a) { return *this = (Is8vec16) _mm_add_epi8(vec,a); }
00527   Is8vec16& operator -=(const I8vec16 &a) { return *this = (Is8vec16) _mm_sub_epi8(vec,a); }
00528 
00529 #if defined(_ENABLE_VEC_DEBUG)
00530 
00531   friend std::ostream& operator << (std::ostream &os,const Is8vec16 &a)
00532   {
00533     os << "[15]:" << short(_MM_16B(15,a))
00534       << " [14]:" << short(_MM_16B(14,a))
00535       << " [13]:" << short(_MM_16B(13,a))
00536       << " [12]:" << short(_MM_16B(12,a))
00537       << " [11]:" << short(_MM_16B(11,a))
00538       << " [10]:" << short(_MM_16B(10,a))
00539       << " [9]:" << short(_MM_16B(9,a))
00540       << " [8]:" << short(_MM_16B(8,a))
00541       << " [7]:" << short(_MM_16B(7,a))
00542       << " [6]:" << short(_MM_16B(6,a))
00543       << " [5]:" << short(_MM_16B(5,a))
00544       << " [4]:" << short(_MM_16B(4,a))
00545       << " [3]:" << short(_MM_16B(3,a))
00546       << " [2]:" << short(_MM_16B(2,a))
00547       << " [1]:" << short(_MM_16B(1,a))
00548       << " [0]:" << short(_MM_16B(0,a));
00549     return os;
00550   }
00551 #endif
00552 
00553   const signed char& operator[](int i)const
00554   {
00555     assert(static_cast<unsigned int>(i) < 16);
00556     return _MM_16B(i,vec);
00557   }
00558 
00559   signed char& operator[](int i)
00560   {
00561     assert(static_cast<unsigned int>(i) < 16);
00562     return _MM_16B(i,vec);
00563   }
00564 
00565 };
00566 
00567 inline Is8vec16 cmpeq(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
00568 inline Is8vec16 cmpneq(const Is8vec16 &a,const Is8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
00569 inline Is8vec16 cmpgt(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmpgt_epi8(a,b); }
00570 inline Is8vec16 cmplt(const Is8vec16 &a,const Is8vec16 &b) { return _mm_cmplt_epi8(a,b); }
00571 
00572 inline Is8vec16 unpack_low(const Is8vec16 &a,const Is8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
00573 inline Is8vec16 unpack_high(const Is8vec16 &a,const Is8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
00574 
00575 inline Is8vec16 sat_add(const Is8vec16 &a,const Is8vec16 &b) { return _mm_adds_epi8(a,b); }
00576 inline Is8vec16 sat_sub(const Is8vec16 &a,const Is8vec16 &b) { return _mm_subs_epi8(a,b); }
00577 
00578 class Iu8vec16 : public I8vec16
00579 {
00580 public:
00581   Iu8vec16() { }
00582   Iu8vec16(__m128i mm) : I8vec16(mm) { }
00583 
00584   Iu8vec16& operator= (const M128 &a) { return *this = (Iu8vec16) a; }
00585 
00586   Iu8vec16& operator&=(const M128 &a) { return *this = (Iu8vec16) _mm_and_si128(vec,a); }
00587   Iu8vec16& operator|=(const M128 &a) { return *this = (Iu8vec16) _mm_or_si128(vec,a); }
00588   Iu8vec16& operator^=(const M128 &a) { return *this = (Iu8vec16) _mm_xor_si128(vec,a); }
00589 
00590   Iu8vec16& operator +=(const I8vec16 &a) { return *this = (Iu8vec16) _mm_add_epi8(vec,a); }
00591   Iu8vec16& operator -=(const I8vec16 &a) { return *this = (Iu8vec16) _mm_sub_epi8(vec,a); }
00592 
00593 #if defined(_ENABLE_VEC_DEBUG)
00594 
00595   friend std::ostream& operator << (std::ostream &os,const Iu8vec16 &a)
00596   {
00597     os << "[15]:" << unsigned short(_MM_16UB(15,a))
00598       << " [14]:" << unsigned short(_MM_16UB(14,a))
00599       << " [13]:" << unsigned short(_MM_16UB(13,a))
00600       << " [12]:" << unsigned short(_MM_16UB(12,a))
00601       << " [11]:" << unsigned short(_MM_16UB(11,a))
00602       << " [10]:" << unsigned short(_MM_16UB(10,a))
00603       << " [9]:" << unsigned short(_MM_16UB(9,a))
00604       << " [8]:" << unsigned short(_MM_16UB(8,a))
00605       << " [7]:" << unsigned short(_MM_16UB(7,a))
00606       << " [6]:" << unsigned short(_MM_16UB(6,a))
00607       << " [5]:" << unsigned short(_MM_16UB(5,a))
00608       << " [4]:" << unsigned short(_MM_16UB(4,a))
00609       << " [3]:" << unsigned short(_MM_16UB(3,a))
00610       << " [2]:" << unsigned short(_MM_16UB(2,a))
00611       << " [1]:" << unsigned short(_MM_16UB(1,a))
00612       << " [0]:" << unsigned short(_MM_16UB(0,a));
00613     return os;
00614   }
00615 #endif
00616 
00617   const unsigned char& operator[](int i)const
00618   {
00619     assert(static_cast<unsigned int>(i) < 16);
00620     return _MM_16UB(i,vec);
00621   }
00622 
00623   unsigned char& operator[](int i)
00624   {
00625     assert(static_cast<unsigned int>(i) < 16);
00626     return _MM_16UB(i,vec);
00627   }
00628 
00629 };
00630 
00631 inline Iu8vec16 cmpeq(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_cmpeq_epi8(a,b); }
00632 inline Iu8vec16 cmpneq(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_andnot_si128(_mm_cmpeq_epi8(a,b),get_mask128()); }
00633 
00634 inline Iu8vec16 unpack_low(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_unpacklo_epi8(a,b); }
00635 inline Iu8vec16 unpack_high(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_unpackhi_epi8(a,b); }
00636 
00637 inline Iu8vec16 sat_add(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_adds_epu8(a,b); }
00638 inline Iu8vec16 sat_sub(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_subs_epu8(a,b); }
00639 
00640 inline I64vec2 sum_abs(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_sad_epu8(a,b); }
00641 
00642 inline Iu8vec16 simd_avg(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_avg_epu8(a,b); }
00643 inline Iu8vec16 simd_max(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_max_epu8(a,b); }
00644 inline Iu8vec16 simd_min(const Iu8vec16 &a,const Iu8vec16 &b) { return _mm_min_epu8(a,b); }
00645 
00646 inline Is16vec8 pack_sat(const Is32vec4 &a,const Is32vec4 &b) { return _mm_packs_epi32(a,b); }
00647 inline Is8vec16 pack_sat(const Is16vec8 &a,const Is16vec8 &b) { return _mm_packs_epi16(a,b); }
00648 inline Iu8vec16 packu_sat(const Is16vec8 &a,const Is16vec8 &b) { return _mm_packus_epi16(a,b);}
00649 
00650 #define IVEC128_LOGICALS(vect,element) inline I##vect##vec##element operator& (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_and_si128(a,b); } inline I##vect##vec##element operator| (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_or_si128(a,b); } inline I##vect##vec##element operator^ (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_xor_si128(a,b); } inline I##vect##vec##element andnot (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_andnot_si128(a,b); }
00651 
00652 IVEC128_LOGICALS(8,16)
00653 IVEC128_LOGICALS(u8,16)
00654 IVEC128_LOGICALS(s8,16)
00655 IVEC128_LOGICALS(16,8)
00656 IVEC128_LOGICALS(u16,8)
00657 IVEC128_LOGICALS(s16,8)
00658 IVEC128_LOGICALS(32,4)
00659 IVEC128_LOGICALS(u32,4)
00660 IVEC128_LOGICALS(s32,4)
00661 IVEC128_LOGICALS(64,2)
00662 IVEC128_LOGICALS(128,1)
00663 #undef IVEC128_LOGICALS
00664 
00665 #define IVEC128_ADD_SUB(vect,element,opsize) inline I##vect##vec##element operator+ (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_add_##opsize(a,b); } inline I##vect##vec##element operator- (const I##vect##vec##element &a,const I##vect##vec##element &b) { return _mm_sub_##opsize(a,b); }
00666 
00667 IVEC128_ADD_SUB(8,16,epi8)
00668 IVEC128_ADD_SUB(u8,16,epi8)
00669 IVEC128_ADD_SUB(s8,16,epi8)
00670 IVEC128_ADD_SUB(16,8,epi16)
00671 IVEC128_ADD_SUB(u16,8,epi16)
00672 IVEC128_ADD_SUB(s16,8,epi16)
00673 IVEC128_ADD_SUB(32,4,epi32)
00674 IVEC128_ADD_SUB(u32,4,epi32)
00675 IVEC128_ADD_SUB(s32,4,epi32)
00676 IVEC128_ADD_SUB(64,2,epi64)
00677 #undef IVEC128_ADD_SUB
00678 
00679 #define IVEC128_SELECT(vect12,vect34,element,selop,arg1,arg2) inline I##vect34##vec##element select_##selop (const I##vect12##vec##element &a,const I##vect12##vec##element &b,const I##vect34##vec##element &c,const I##vect34##vec##element &d) { I##vect12##vec##element mask = cmp##selop(a,b); return(I##vect34##vec##element ((mask & arg1) | I##vect12##vec##element ((_mm_andnot_si128(mask,arg2))))); }
00680 IVEC128_SELECT(8,s8,16,eq,c,d)
00681 IVEC128_SELECT(8,u8,16,eq,c,d)
00682 IVEC128_SELECT(8,8,16,eq,c,d)
00683 IVEC128_SELECT(8,s8,16,neq,c,d)
00684 IVEC128_SELECT(8,u8,16,neq,c,d)
00685 IVEC128_SELECT(8,8,16,neq,c,d)
00686 
00687 IVEC128_SELECT(16,s16,8,eq,c,d)
00688 IVEC128_SELECT(16,u16,8,eq,c,d)
00689 IVEC128_SELECT(16,16,8,eq,c,d)
00690 IVEC128_SELECT(16,s16,8,neq,c,d)
00691 IVEC128_SELECT(16,u16,8,neq,c,d)
00692 IVEC128_SELECT(16,16,8,neq,c,d)
00693 
00694 IVEC128_SELECT(32,s32,4,eq,c,d)
00695 IVEC128_SELECT(32,u32,4,eq,c,d)
00696 IVEC128_SELECT(32,32,4,eq,c,d)
00697 IVEC128_SELECT(32,s32,4,neq,c,d)
00698 IVEC128_SELECT(32,u32,4,neq,c,d)
00699 IVEC128_SELECT(32,32,4,neq,c,d)
00700 
00701 IVEC128_SELECT(s8,s8,16,gt,c,d)
00702 IVEC128_SELECT(s8,u8,16,gt,c,d)
00703 IVEC128_SELECT(s8,8,16,gt,c,d)
00704 IVEC128_SELECT(s8,s8,16,lt,c,d)
00705 IVEC128_SELECT(s8,u8,16,lt,c,d)
00706 IVEC128_SELECT(s8,8,16,lt,c,d)
00707 
00708 IVEC128_SELECT(s16,s16,8,gt,c,d)
00709 IVEC128_SELECT(s16,u16,8,gt,c,d)
00710 IVEC128_SELECT(s16,16,8,gt,c,d)
00711 IVEC128_SELECT(s16,s16,8,lt,c,d)
00712 IVEC128_SELECT(s16,u16,8,lt,c,d)
00713 IVEC128_SELECT(s16,16,8,lt,c,d)
00714 
00715 #undef IVEC128_SELECT
00716 
00717 class F64vec2
00718 {
00719 protected:
00720   __m128d vec;
00721 public:
00722 
00723   F64vec2() {}
00724 
00725   F64vec2(__m128d m) { vec = m;}
00726 
00727   F64vec2(double d1,double d0) { vec= _mm_set_pd(d1,d0); }
00728 
00729   EXPLICIT F64vec2(double d) { vec = _mm_set1_pd(d); }
00730 
00731   operator __m128d() const { return vec; }
00732 
00733   friend F64vec2 operator &(const F64vec2 &a,const F64vec2 &b) { return _mm_and_pd(a,b); }
00734   friend F64vec2 operator |(const F64vec2 &a,const F64vec2 &b) { return _mm_or_pd(a,b); }
00735   friend F64vec2 operator ^(const F64vec2 &a,const F64vec2 &b) { return _mm_xor_pd(a,b); }
00736 
00737   friend F64vec2 operator +(const F64vec2 &a,const F64vec2 &b) { return _mm_add_pd(a,b); }
00738   friend F64vec2 operator -(const F64vec2 &a,const F64vec2 &b) { return _mm_sub_pd(a,b); }
00739   friend F64vec2 operator *(const F64vec2 &a,const F64vec2 &b) { return _mm_mul_pd(a,b); }
00740   friend F64vec2 operator /(const F64vec2 &a,const F64vec2 &b) { return _mm_div_pd(a,b); }
00741 
00742   F64vec2& operator +=(F64vec2 &a) { return *this = _mm_add_pd(vec,a); }
00743   F64vec2& operator -=(F64vec2 &a) { return *this = _mm_sub_pd(vec,a); }
00744   F64vec2& operator *=(F64vec2 &a) { return *this = _mm_mul_pd(vec,a); }
00745   F64vec2& operator /=(F64vec2 &a) { return *this = _mm_div_pd(vec,a); }
00746   F64vec2& operator &=(F64vec2 &a) { return *this = _mm_and_pd(vec,a); }
00747   F64vec2& operator |=(F64vec2 &a) { return *this = _mm_or_pd(vec,a); }
00748   F64vec2& operator ^=(F64vec2 &a) { return *this = _mm_xor_pd(vec,a); }
00749 
00750   friend double add_horizontal(F64vec2 &a)
00751   {
00752     F64vec2 ftemp = _mm_add_sd(a,_mm_shuffle_pd(a,a,1));
00753     return ftemp[0];
00754   }
00755 
00756   friend F64vec2 andnot(const F64vec2 &a,const F64vec2 &b) { return _mm_andnot_pd(a,b); }
00757 
00758   friend F64vec2 sqrt(const F64vec2 &a) { return _mm_sqrt_pd(a); }
00759 
00760 #define F64vec2_COMP(op) friend F64vec2 cmp##op (const F64vec2 &a,const F64vec2 &b) { return _mm_cmp##op##_pd(a,b); }
00761   F64vec2_COMP(eq)
00762     F64vec2_COMP(lt)
00763     F64vec2_COMP(le)
00764     F64vec2_COMP(gt)
00765     F64vec2_COMP(ge)
00766     F64vec2_COMP(ngt)
00767     F64vec2_COMP(nge)
00768     F64vec2_COMP(neq)
00769     F64vec2_COMP(nlt)
00770     F64vec2_COMP(nle)
00771 #undef F64vec2_COMP
00772 
00773     friend F64vec2 simd_min(const F64vec2 &a,const F64vec2 &b) { return _mm_min_pd(a,b); }
00774   friend F64vec2 simd_max(const F64vec2 &a,const F64vec2 &b) { return _mm_max_pd(a,b); }
00775 
00776 #define F64vec2_COMI(op) friend int comi##op (const F64vec2 &a,const F64vec2 &b) { return _mm_comi##op##_sd(a,b); }
00777   F64vec2_COMI(eq)
00778     F64vec2_COMI(lt)
00779     F64vec2_COMI(le)
00780     F64vec2_COMI(gt)
00781     F64vec2_COMI(ge)
00782     F64vec2_COMI(neq)
00783 #undef F64vec2_COMI
00784 
00785 #define F64vec2_UCOMI(op) friend int ucomi##op (const F64vec2 &a,const F64vec2 &b) { return _mm_ucomi##op##_sd(a,b); }
00786     F64vec2_UCOMI(eq)
00787     F64vec2_UCOMI(lt)
00788     F64vec2_UCOMI(le)
00789     F64vec2_UCOMI(gt)
00790     F64vec2_UCOMI(ge)
00791     F64vec2_UCOMI(neq)
00792 #undef F64vec2_UCOMI
00793 
00794 #if defined(_ENABLE_VEC_DEBUG)
00795 
00796   friend std::ostream & operator<<(std::ostream & os,const F64vec2 &a) {
00797     double *dp = (double*)&a;
00798     os << " [1]:" << *(dp+1)
00799       << " [0]:" << *dp;
00800     return os;
00801   }
00802 #endif
00803 
00804   const double &operator[](int i) const {
00805     assert((0 <= i) && (i <= 1));
00806     double *dp = (double*)&vec;
00807     return *(dp+i);
00808   }
00809 
00810   double &operator[](int i) {
00811     assert((0 <= i) && (i <= 1));
00812     double *dp = (double*)&vec;
00813     return *(dp+i);
00814   }
00815 };
00816 
00817 inline F64vec2 unpack_low(const F64vec2 &a,const F64vec2 &b) { return _mm_unpacklo_pd(a,b); }
00818 inline F64vec2 unpack_high(const F64vec2 &a,const F64vec2 &b) { return _mm_unpackhi_pd(a,b); }
00819 inline int move_mask(const F64vec2 &a) { return _mm_movemask_pd(a); }
00820 inline void loadu(F64vec2 &a,double *p) { a = _mm_loadu_pd(p); }
00821 inline void storeu(double *p,const F64vec2 &a) { _mm_storeu_pd(p,a); }
00822 inline void store_nta(double *p,F64vec2 &a) { _mm_stream_pd(p,a); }
00823 
00824 #define F64vec2_SELECT(op) inline F64vec2 select_##op (const F64vec2 &a,const F64vec2 &b,const F64vec2 &c,const F64vec2 &d) { F64vec2 mask = _mm_cmp##op##_pd(a,b); return((mask & c) | F64vec2((_mm_andnot_pd(mask,d)))); }
00825 F64vec2_SELECT(eq)
00826 F64vec2_SELECT(lt)
00827 F64vec2_SELECT(le)
00828 F64vec2_SELECT(gt)
00829 F64vec2_SELECT(ge)
00830 F64vec2_SELECT(neq)
00831 F64vec2_SELECT(nlt)
00832 F64vec2_SELECT(nle)
00833 #undef F64vec2_SELECT
00834 
00835 inline int F64vec2ToInt(const F64vec2 &a) { return _mm_cvttsd_si32(a); }
00836 inline F64vec2 F32vec4ToF64vec2(const F32vec4 &a) { return _mm_cvtps_pd(a); }
00837 inline F32vec4 F64vec2ToF32vec4(const F64vec2 &a) { return _mm_cvtpd_ps(a); }
00838 inline F64vec2 IntToF64vec2(const F64vec2 &a,int b) { return _mm_cvtsi32_sd(a,b); }
00839 
00840 #pragma pack(pop)
00841 #pragma pack(pop)
00842 #endif
00843 #endif

Generated on Sun May 27 2012 04:29:51 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.