ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

fvec.h
Go to the documentation of this file.
00001 
00006 #ifndef _FVEC_H_INCLUDED
00007 #define _FVEC_H_INCLUDED
00008 
00009 #ifndef RC_INVOKED
00010 #ifndef __cplusplus
00011 #error ERROR: This file is only supported in C++ compilations!
00012 #endif
00013 
00014 #include <xmmintrin.h>
00015 #include <assert.h>
00016 #include <ivec.h>
00017 #include <crtdefs.h>
00018 
00019 #if defined(_ENABLE_VEC_DEBUG)
00020 #include <iostream>
00021 #endif
00022 
00023 #pragma pack(push,_CRT_PACKING)
00024 #pragma pack(push,16)
00025 
00026 #define EXPLICIT explicit
00027 
00028 class F32vec4 {
00029 protected:
00030   __m128 vec;
00031 public:
00032   F32vec4() {}
00033   F32vec4(__m128 m) { vec = m;}
00034   F32vec4(float f3,float f2,float f1,float f0) { vec= _mm_set_ps(f3,f2,f1,f0); }
00035   EXPLICIT F32vec4(float f) { vec = _mm_set_ps1(f); }
00036   EXPLICIT F32vec4(double d) { vec = _mm_set_ps1((float) d); }
00037   F32vec4& operator =(float f) { vec = _mm_set_ps1(f); return *this; }
00038   F32vec4& operator =(double d) { vec = _mm_set_ps1((float) d); return *this; }
00039   operator __m128() const { return vec; }
00040   friend F32vec4 operator &(const F32vec4 &a,const F32vec4 &b) { return _mm_and_ps(a,b); }
00041   friend F32vec4 operator |(const F32vec4 &a,const F32vec4 &b) { return _mm_or_ps(a,b); }
00042   friend F32vec4 operator ^(const F32vec4 &a,const F32vec4 &b) { return _mm_xor_ps(a,b); }
00043   friend F32vec4 operator +(const F32vec4 &a,const F32vec4 &b) { return _mm_add_ps(a,b); }
00044   friend F32vec4 operator -(const F32vec4 &a,const F32vec4 &b) { return _mm_sub_ps(a,b); }
00045   friend F32vec4 operator *(const F32vec4 &a,const F32vec4 &b) { return _mm_mul_ps(a,b); }
00046   friend F32vec4 operator /(const F32vec4 &a,const F32vec4 &b) { return _mm_div_ps(a,b); }
00047   F32vec4& operator =(const F32vec4 &a) { vec = a.vec; return *this; }
00048   F32vec4& operator =(const __m128 &avec) { vec = avec; return *this; }
00049   F32vec4& operator +=(F32vec4 &a) { return *this = _mm_add_ps(vec,a); }
00050   F32vec4& operator -=(F32vec4 &a) { return *this = _mm_sub_ps(vec,a); }
00051   F32vec4& operator *=(F32vec4 &a) { return *this = _mm_mul_ps(vec,a); }
00052   F32vec4& operator /=(F32vec4 &a) { return *this = _mm_div_ps(vec,a); }
00053   F32vec4& operator &=(F32vec4 &a) { return *this = _mm_and_ps(vec,a); }
00054   F32vec4& operator |=(F32vec4 &a) { return *this = _mm_or_ps(vec,a); }
00055   F32vec4& operator ^=(F32vec4 &a) { return *this = _mm_xor_ps(vec,a); }
00056   friend float add_horizontal(F32vec4 &a) {
00057     F32vec4 ftemp = _mm_add_ss(a,_mm_add_ss(_mm_shuffle_ps(a,a,1),_mm_add_ss(_mm_shuffle_ps(a,a,2),_mm_shuffle_ps(a,a,3))));
00058     return ftemp[0];
00059   }
00060   friend F32vec4 sqrt(const F32vec4 &a) { return _mm_sqrt_ps(a); }
00061   friend F32vec4 rcp(const F32vec4 &a) { return _mm_rcp_ps(a); }
00062   friend F32vec4 rsqrt(const F32vec4 &a) { return _mm_rsqrt_ps(a); }
00063   friend F32vec4 rcp_nr(const F32vec4 &a) {
00064     F32vec4 Ra0 = _mm_rcp_ps(a);
00065     return _mm_sub_ps(_mm_add_ps(Ra0,Ra0),_mm_mul_ps(_mm_mul_ps(Ra0,a),Ra0));
00066   }
00067   friend F32vec4 rsqrt_nr(const F32vec4 &a) {
00068     static const F32vec4 fvecf0pt5(0.5f);
00069     static const F32vec4 fvecf3pt0(3.0f);
00070     F32vec4 Ra0 = _mm_rsqrt_ps(a);
00071     return (fvecf0pt5 *Ra0) *(fvecf3pt0 - (a *Ra0) *Ra0);
00072 
00073   }
00074 #define Fvec32s4_COMP(op) friend F32vec4 cmp##op (const F32vec4 &a,const F32vec4 &b) { return _mm_cmp##op##_ps(a,b); }
00075   Fvec32s4_COMP(eq)
00076     Fvec32s4_COMP(lt)
00077     Fvec32s4_COMP(le)
00078     Fvec32s4_COMP(gt)
00079     Fvec32s4_COMP(ge)
00080     Fvec32s4_COMP(neq)
00081     Fvec32s4_COMP(nlt)
00082     Fvec32s4_COMP(nle)
00083     Fvec32s4_COMP(ngt)
00084     Fvec32s4_COMP(nge)
00085 #undef Fvec32s4_COMP
00086 
00087     friend F32vec4 simd_min(const F32vec4 &a,const F32vec4 &b) { return _mm_min_ps(a,b); }
00088   friend F32vec4 simd_max(const F32vec4 &a,const F32vec4 &b) { return _mm_max_ps(a,b); }
00089 
00090 #if defined(_ENABLE_VEC_DEBUG)
00091   friend std::ostream & operator<<(std::ostream & os,const F32vec4 &a) {
00092     float *fp = (float*)&a;
00093     os << "[3]:" << *(fp+3)
00094       << " [2]:" << *(fp+2)
00095       << " [1]:" << *(fp+1)
00096       << " [0]:" << *fp;
00097     return os;
00098   }
00099 #endif
00100   const float& operator[](int i) const {
00101     assert((0 <= i) && (i <= 3));
00102     float *fp = (float*)&vec;
00103     return *(fp+i);
00104   }
00105   float& operator[](int i) {
00106     assert((0 <= i) && (i <= 3));
00107     float *fp = (float*)&vec;
00108     return *(fp+i);
00109   }
00110 };
00111 
00112 inline F32vec4 unpack_low(const F32vec4 &a,const F32vec4 &b) { return _mm_unpacklo_ps(a,b); }
00113 inline F32vec4 unpack_high(const F32vec4 &a,const F32vec4 &b) { return _mm_unpackhi_ps(a,b); }
00114 inline int move_mask(const F32vec4 &a) { return _mm_movemask_ps(a); }
00115 inline void loadu(F32vec4 &a,float *p) { a = _mm_loadu_ps(p); }
00116 inline void storeu(float *p,const F32vec4 &a) { _mm_storeu_ps(p,a); }
00117 inline void store_nta(float *p,F32vec4 &a) { _mm_stream_ps(p,a); }
00118 
00119 #define Fvec32s4_SELECT(op) inline F32vec4 select_##op (const F32vec4 &a,const F32vec4 &b,const F32vec4 &c,const F32vec4 &d) { F32vec4 mask = _mm_cmp##op##_ps(a,b); return((mask & c) | F32vec4((_mm_andnot_ps(mask,d)))); }
00120 Fvec32s4_SELECT(eq)
00121 Fvec32s4_SELECT(lt)
00122 Fvec32s4_SELECT(le)
00123 Fvec32s4_SELECT(gt)
00124 Fvec32s4_SELECT(ge)
00125 Fvec32s4_SELECT(neq)
00126 Fvec32s4_SELECT(nlt)
00127 Fvec32s4_SELECT(nle)
00128 Fvec32s4_SELECT(ngt)
00129 Fvec32s4_SELECT(nge)
00130 #undef Fvec32s4_SELECT
00131 
00132 inline Is16vec4 simd_max(const Is16vec4 &a,const Is16vec4 &b) { return _m_pmaxsw(a,b); }
00133 inline Is16vec4 simd_min(const Is16vec4 &a,const Is16vec4 &b) { return _m_pminsw(a,b); }
00134 inline Iu8vec8 simd_max(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pmaxub(a,b); }
00135 inline Iu8vec8 simd_min(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pminub(a,b); }
00136 inline Iu16vec4 simd_avg(const Iu16vec4 &a,const Iu16vec4 &b) { return _m_pavgw(a,b); }
00137 inline Iu8vec8 simd_avg(const Iu8vec8 &a,const Iu8vec8 &b) { return _m_pavgb(a,b); }
00138 inline int move_mask(const I8vec8 &a) { return _m_pmovmskb(a); }
00139 inline Iu16vec4 mul_high(const Iu16vec4 &a,const Iu16vec4 &b) { return _m_pmulhuw(a,b); }
00140 inline void mask_move(const I8vec8 &a,const I8vec8 &b,char *addr) { _m_maskmovq(a,b,addr); }
00141 inline void store_nta(__m64 *p,M64 &a) { _mm_stream_pi(p,a); }
00142 inline int F32vec4ToInt(const F32vec4 &a) { return _mm_cvtt_ss2si(a); }
00143 inline Is32vec2 F32vec4ToIs32vec2 (const F32vec4 &a) {
00144   __m64 result;
00145   result = _mm_cvtt_ps2pi(a);
00146   return Is32vec2(result);
00147 }
00148 
00149 inline F32vec4 IntToF32vec4(const F32vec4 &a,int i) {
00150   __m128 result;
00151   result = _mm_cvt_si2ss(a,i);
00152   return F32vec4(result);
00153 }
00154 
00155 inline F32vec4 Is32vec2ToF32vec4(const F32vec4 &a,const Is32vec2 &b) {
00156   __m128 result;
00157   result = _mm_cvt_pi2ps(a,b);
00158   return F32vec4(result);
00159 }
00160 
00161 class F32vec1 {
00162 protected:
00163   __m128 vec;
00164 public:
00165   F32vec1() {}
00166   F32vec1(int i) { vec = _mm_cvt_si2ss(vec,i);};
00167   EXPLICIT F32vec1(float f) { vec = _mm_set_ss(f); }
00168   EXPLICIT F32vec1(double d) { vec = _mm_set_ss((float) d); }
00169   F32vec1(__m128 m) { vec = m; }
00170   operator __m128() const { return vec; }
00171   friend F32vec1 operator &(const F32vec1 &a,const F32vec1 &b) { return _mm_and_ps(a,b); }
00172   friend F32vec1 operator |(const F32vec1 &a,const F32vec1 &b) { return _mm_or_ps(a,b); }
00173   friend F32vec1 operator ^(const F32vec1 &a,const F32vec1 &b) { return _mm_xor_ps(a,b); }
00174   friend F32vec1 operator +(const F32vec1 &a,const F32vec1 &b) { return _mm_add_ss(a,b); }
00175   friend F32vec1 operator -(const F32vec1 &a,const F32vec1 &b) { return _mm_sub_ss(a,b); }
00176   friend F32vec1 operator *(const F32vec1 &a,const F32vec1 &b) { return _mm_mul_ss(a,b); }
00177   friend F32vec1 operator /(const F32vec1 &a,const F32vec1 &b) { return _mm_div_ss(a,b); }
00178   F32vec1& operator +=(F32vec1 &a) { return *this = _mm_add_ss(vec,a); }
00179   F32vec1& operator -=(F32vec1 &a) { return *this = _mm_sub_ss(vec,a); }
00180   F32vec1& operator *=(F32vec1 &a) { return *this = _mm_mul_ss(vec,a); }
00181   F32vec1& operator /=(F32vec1 &a) { return *this = _mm_div_ss(vec,a); }
00182   F32vec1& operator &=(F32vec1 &a) { return *this = _mm_and_ps(vec,a); }
00183   F32vec1& operator |=(F32vec1 &a) { return *this = _mm_or_ps(vec,a); }
00184   F32vec1& operator ^=(F32vec1 &a) { return *this = _mm_xor_ps(vec,a); }
00185   friend F32vec1 sqrt(const F32vec1 &a) { return _mm_sqrt_ss(a); }
00186   friend F32vec1 rcp(const F32vec1 &a) { return _mm_rcp_ss(a); }
00187   friend F32vec1 rsqrt(const F32vec1 &a) { return _mm_rsqrt_ss(a); }
00188   friend F32vec1 rcp_nr(const F32vec1 &a) {
00189     F32vec1 Ra0 = _mm_rcp_ss(a);
00190     return _mm_sub_ss(_mm_add_ss(Ra0,Ra0),_mm_mul_ss(_mm_mul_ss(Ra0,a),Ra0));
00191   }
00192   friend F32vec1 rsqrt_nr(const F32vec1 &a) {
00193     static const F32vec1 fvecf0pt5(0.5f);
00194     static const F32vec1 fvecf3pt0(3.0f);
00195     F32vec1 Ra0 = _mm_rsqrt_ss(a);
00196     return (fvecf0pt5 *Ra0) *(fvecf3pt0 - (a *Ra0) *Ra0);
00197   }
00198 #define Fvec32s1_COMP(op) friend F32vec1 cmp##op (const F32vec1 &a,const F32vec1 &b) { return _mm_cmp##op##_ss(a,b); }
00199   Fvec32s1_COMP(eq)
00200     Fvec32s1_COMP(lt)
00201     Fvec32s1_COMP(le)
00202     Fvec32s1_COMP(gt)
00203     Fvec32s1_COMP(ge)
00204     Fvec32s1_COMP(neq)
00205     Fvec32s1_COMP(nlt)
00206     Fvec32s1_COMP(nle)
00207     Fvec32s1_COMP(ngt)
00208     Fvec32s1_COMP(nge)
00209 #undef Fvec32s1_COMP
00210 
00211     friend F32vec1 simd_min(const F32vec1 &a,const F32vec1 &b) { return _mm_min_ss(a,b); }
00212   friend F32vec1 simd_max(const F32vec1 &a,const F32vec1 &b) { return _mm_max_ss(a,b); }
00213 
00214 #if defined(_ENABLE_VEC_DEBUG)
00215   friend std::ostream & operator<<(std::ostream & os,const F32vec1 &a) {
00216     float *fp = (float*)&a;
00217     os << "float:" << *fp;
00218     return os;
00219   }
00220 #endif
00221 };
00222 
00223 #define Fvec32s1_SELECT(op) inline F32vec1 select_##op (const F32vec1 &a,const F32vec1 &b,const F32vec1 &c,const F32vec1 &d) { F32vec1 mask = _mm_cmp##op##_ss(a,b); return((mask & c) | F32vec1((_mm_andnot_ps(mask,d)))); }
00224 Fvec32s1_SELECT(eq)
00225 Fvec32s1_SELECT(lt)
00226 Fvec32s1_SELECT(le)
00227 Fvec32s1_SELECT(gt)
00228 Fvec32s1_SELECT(ge)
00229 Fvec32s1_SELECT(neq)
00230 Fvec32s1_SELECT(nlt)
00231 Fvec32s1_SELECT(nle)
00232 Fvec32s1_SELECT(ngt)
00233 Fvec32s1_SELECT(nge)
00234 #undef Fvec32s1_SELECT
00235 
00236 inline int F32vec1ToInt(const F32vec1 &a)
00237 {
00238   return _mm_cvtt_ss2si(a);
00239 }
00240 
00241 #pragma pack(pop)
00242 #pragma pack(pop)
00243 #endif
00244 #endif

Generated on Mon May 28 2012 04:29:43 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.