ReactOS  0.4.15-dev-5097-g328cc41
emmintrin.h
Go to the documentation of this file.
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #pragma once
11 #ifndef _INCLUDED_EMM
12 #define _INCLUDED_EMM
13 
14 #include <crtdefs.h>
15 #include <xmmintrin.h>
16 
17 #if defined(_MSC_VER) && !defined(__clang__)
18 
19 typedef union _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128i
20 {
21  __int8 m128i_i8[16];
22  __int16 m128i_i16[8];
23  __int32 m128i_i32[4];
24  __int64 m128i_i64[2];
25  unsigned __int8 m128i_u8[16];
26  unsigned __int16 m128i_u16[8];
27  unsigned __int32 m128i_u32[4];
28  unsigned __int64 m128i_u64[2];
29 } __m128i;
30 _STATIC_ASSERT(sizeof(__m128i) == 16);
31 
32 typedef struct _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128d
33 {
34  double m128d_f64[2];
35 } __m128d;
36 
37 typedef __declspec(align(1)) __m128i __m128i_u;
38 
39 #define __ATTRIBUTE_SSE2__
40 
41 #else /* _MSC_VER */
42 
43 typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
44 typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
45 
46 typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
47 typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
48 
49 /* Type defines. */
50 typedef double __v2df __attribute__((__vector_size__(16)));
51 typedef long long __v2di __attribute__((__vector_size__(16)));
52 typedef short __v8hi __attribute__((__vector_size__(16)));
53 typedef char __v16qi __attribute__((__vector_size__(16)));
54 
55 /* Unsigned types */
56 typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
57 typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
58 typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
59 
60 /* We need an explicitly signed variant for char. Note that this shouldn't
61  * appear in the interface though. */
62 typedef signed char __v16qs __attribute__((__vector_size__(16)));
63 
64 #ifdef __clang__
65 #define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128)))
66 #else
67 #define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
68 #endif
69 #define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__
70 
71 #endif /* _MSC_VER */
72 
73 extern __m128d _mm_add_sd(__m128d a, __m128d b);
74 extern __m128d _mm_add_pd(__m128d a, __m128d b);
75 extern __m128d _mm_sub_sd(__m128d a, __m128d b);
76 extern __m128d _mm_sub_pd(__m128d a, __m128d b);
77 extern __m128d _mm_mul_sd(__m128d a, __m128d b);
78 extern __m128d _mm_mul_pd(__m128d a, __m128d b);
79 extern __m128d _mm_div_sd(__m128d a, __m128d b);
80 extern __m128d _mm_div_pd(__m128d a, __m128d b);
81 extern __m128d _mm_sqrt_sd(__m128d a, __m128d b);
82 extern __m128d _mm_sqrt_pd(__m128d a);
83 extern __m128d _mm_min_sd(__m128d a, __m128d b);
84 extern __m128d _mm_min_pd(__m128d a, __m128d b);
85 extern __m128d _mm_max_sd(__m128d a, __m128d b);
86 extern __m128d _mm_max_pd(__m128d a, __m128d b);
87 extern __m128d _mm_and_pd(__m128d a, __m128d b);
88 extern __m128d _mm_andnot_pd(__m128d a, __m128d b);
89 extern __m128d _mm_or_pd(__m128d a, __m128d b);
90 extern __m128d _mm_xor_pd(__m128d a, __m128d b);
91 extern __m128d _mm_cmpeq_pd(__m128d a, __m128d b);
92 extern __m128d _mm_cmplt_pd(__m128d a, __m128d b);
93 extern __m128d _mm_cmple_pd(__m128d a, __m128d b);
94 extern __m128d _mm_cmpgt_pd(__m128d a, __m128d b);
95 extern __m128d _mm_cmpge_pd(__m128d a, __m128d b);
96 extern __m128d _mm_cmpord_pd(__m128d a, __m128d b);
97 extern __m128d _mm_cmpunord_pd(__m128d a, __m128d b);
98 extern __m128d _mm_cmpneq_pd(__m128d a, __m128d b);
99 extern __m128d _mm_cmpnlt_pd(__m128d a, __m128d b);
100 extern __m128d _mm_cmpnle_pd(__m128d a, __m128d b);
101 extern __m128d _mm_cmpngt_pd(__m128d a, __m128d b);
102 extern __m128d _mm_cmpnge_pd(__m128d a, __m128d b);
103 extern __m128d _mm_cmpeq_sd(__m128d a, __m128d b);
104 extern __m128d _mm_cmplt_sd(__m128d a, __m128d b);
105 extern __m128d _mm_cmple_sd(__m128d a, __m128d b);
106 extern __m128d _mm_cmpgt_sd(__m128d a, __m128d b);
107 extern __m128d _mm_cmpge_sd(__m128d a, __m128d b);
108 extern __m128d _mm_cmpord_sd(__m128d a, __m128d b);
109 extern __m128d _mm_cmpunord_sd(__m128d a, __m128d b);
110 extern __m128d _mm_cmpneq_sd(__m128d a, __m128d b);
111 extern __m128d _mm_cmpnlt_sd(__m128d a, __m128d b);
112 extern __m128d _mm_cmpnle_sd(__m128d a, __m128d b);
113 extern __m128d _mm_cmpngt_sd(__m128d a, __m128d b);
114 extern __m128d _mm_cmpnge_sd(__m128d a, __m128d b);
115 extern int _mm_comieq_sd(__m128d a, __m128d b);
116 extern int _mm_comilt_sd(__m128d a, __m128d b);
117 extern int _mm_comile_sd(__m128d a, __m128d b);
118 extern int _mm_comigt_sd(__m128d a, __m128d b);
119 extern int _mm_comige_sd(__m128d a, __m128d b);
120 extern int _mm_comineq_sd(__m128d a, __m128d b);
121 extern int _mm_ucomieq_sd(__m128d a, __m128d b);
122 extern int _mm_ucomilt_sd(__m128d a, __m128d b);
123 extern int _mm_ucomile_sd(__m128d a, __m128d b);
124 extern int _mm_ucomigt_sd(__m128d a, __m128d b);
125 extern int _mm_ucomige_sd(__m128d a, __m128d b);
126 extern int _mm_ucomineq_sd(__m128d a, __m128d b);
127 extern __m128 _mm_cvtpd_ps(__m128d a);
128 extern __m128d _mm_cvtps_pd(__m128 a);
129 extern __m128d _mm_cvtepi32_pd(__m128i a);
130 extern __m128i _mm_cvtpd_epi32(__m128d a);
131 extern int _mm_cvtsd_si32(__m128d a);
132 extern __m128 _mm_cvtsd_ss(__m128 a, __m128d b);
133 extern __m128d _mm_cvtsi32_sd(__m128d a, int b);
134 extern __m128d _mm_cvtss_sd(__m128d a, __m128 b);
135 extern __m128i _mm_cvttpd_epi32(__m128d a);
136 extern int _mm_cvttsd_si32(__m128d a);
137 extern __m64 _mm_cvtpd_pi32(__m128d a);
138 extern __m64 _mm_cvttpd_pi32(__m128d a);
139 extern __m128d _mm_cvtpi32_pd(__m64 a);
140 extern double _mm_cvtsd_f64(__m128d a);
141 extern __m128d _mm_load_pd(double const *dp);
142 extern __m128d _mm_load1_pd(double const *dp);
143 extern __m128d _mm_loadr_pd(double const *dp);
144 extern __m128d _mm_loadu_pd(double const *dp);
145 //extern __m128i _mm_loadu_si64(void const *a);
146 //extern __m128i _mm_loadu_si32(void const *a);
147 //extern __m128i _mm_loadu_si16(void const *a);
148 extern __m128d _mm_load_sd(double const *dp);
149 extern __m128d _mm_loadh_pd(__m128d a, double const *dp);
150 extern __m128d _mm_loadl_pd(__m128d a, double const *dp);
151 //extern __m128d _mm_undefined_pd(void);
152 extern __m128d _mm_set_sd(double w);
153 extern __m128d _mm_set1_pd(double w);
154 extern __m128d _mm_set_pd(double w, double x);
155 extern __m128d _mm_setr_pd(double w, double x);
156 extern __m128d _mm_setzero_pd(void);
157 extern __m128d _mm_move_sd(__m128d a, __m128d b);
158 extern void _mm_store_sd(double *dp, __m128d a);
159 extern void _mm_store_pd(double *dp, __m128d a);
160 extern void _mm_store1_pd(double *dp, __m128d a);
161 extern void _mm_storeu_pd(double *dp, __m128d a);
162 extern void _mm_storer_pd(double *dp, __m128d a);
163 extern void _mm_storeh_pd(double *dp, __m128d a);
164 extern void _mm_storel_pd(double *dp, __m128d a);
165 extern __m128i _mm_add_epi8(__m128i a, __m128i b);
166 extern __m128i _mm_add_epi16(__m128i a, __m128i b);
167 extern __m128i _mm_add_epi32(__m128i a, __m128i b);
168 extern __m64 _mm_add_si64(__m64 a, __m64 b);
169 extern __m128i _mm_add_epi64(__m128i a, __m128i b);
170 extern __m128i _mm_adds_epi8(__m128i a, __m128i b);
171 extern __m128i _mm_adds_epi16(__m128i a, __m128i b);
172 extern __m128i _mm_adds_epu8(__m128i a, __m128i b);
173 extern __m128i _mm_adds_epu16(__m128i a, __m128i b);
174 extern __m128i _mm_avg_epu8(__m128i a, __m128i b);
175 extern __m128i _mm_avg_epu16(__m128i a, __m128i b);
176 extern __m128i _mm_madd_epi16(__m128i a, __m128i b);
177 extern __m128i _mm_max_epi16(__m128i a, __m128i b);
178 extern __m128i _mm_max_epu8(__m128i a, __m128i b);
179 extern __m128i _mm_min_epi16(__m128i a, __m128i b);
180 extern __m128i _mm_min_epu8(__m128i a, __m128i b);
181 extern __m128i _mm_mulhi_epi16(__m128i a, __m128i b);
182 extern __m128i _mm_mulhi_epu16(__m128i a, __m128i b);
183 extern __m128i _mm_mullo_epi16(__m128i a, __m128i b);
184 extern __m64 _mm_mul_su32(__m64 a, __m64 b);
185 extern __m128i _mm_mul_epu32(__m128i a, __m128i b);
186 extern __m128i _mm_sad_epu8(__m128i a, __m128i b);
187 extern __m128i _mm_sub_epi8(__m128i a, __m128i b);
188 extern __m128i _mm_sub_epi16(__m128i a, __m128i b);
189 extern __m128i _mm_sub_epi32(__m128i a, __m128i b);
190 extern __m64 _mm_sub_si64(__m64 a, __m64 b);
191 extern __m128i _mm_sub_epi64(__m128i a, __m128i b);
192 extern __m128i _mm_subs_epi8(__m128i a, __m128i b);
193 extern __m128i _mm_subs_epi16(__m128i a, __m128i b);
194 extern __m128i _mm_subs_epu8(__m128i a, __m128i b);
195 extern __m128i _mm_subs_epu16(__m128i a, __m128i b);
196 extern __m128i _mm_and_si128(__m128i a, __m128i b);
197 extern __m128i _mm_andnot_si128(__m128i a, __m128i b);
198 extern __m128i _mm_or_si128(__m128i a, __m128i b);
199 extern __m128i _mm_xor_si128(__m128i a, __m128i b);
200 extern __m128i _mm_slli_si128(__m128i a, int i);
201 extern __m128i _mm_slli_epi16(__m128i a, int count);
202 extern __m128i _mm_sll_epi16(__m128i a, __m128i count);
203 extern __m128i _mm_slli_epi32(__m128i a, int count);
204 extern __m128i _mm_sll_epi32(__m128i a, __m128i count);
205 extern __m128i _mm_slli_epi64(__m128i a, int count);
206 extern __m128i _mm_sll_epi64(__m128i a, __m128i count);
207 extern __m128i _mm_srai_epi16(__m128i a, int count);
208 extern __m128i _mm_sra_epi16(__m128i a, __m128i count);
209 extern __m128i _mm_srai_epi32(__m128i a, int count);
210 extern __m128i _mm_sra_epi32(__m128i a, __m128i count);
211 extern __m128i _mm_srli_si128(__m128i a, int imm);
212 extern __m128i _mm_srli_epi16(__m128i a, int count);
213 extern __m128i _mm_srl_epi16(__m128i a, __m128i count);
214 extern __m128i _mm_srli_epi32(__m128i a, int count);
215 extern __m128i _mm_srl_epi32(__m128i a, __m128i count);
216 extern __m128i _mm_srli_epi64(__m128i a, int count);
217 extern __m128i _mm_srl_epi64(__m128i a, __m128i count);
218 extern __m128i _mm_cmpeq_epi8(__m128i a, __m128i b);
219 extern __m128i _mm_cmpeq_epi16(__m128i a, __m128i b);
220 extern __m128i _mm_cmpeq_epi32(__m128i a, __m128i b);
221 extern __m128i _mm_cmpgt_epi8(__m128i a, __m128i b);
222 extern __m128i _mm_cmpgt_epi16(__m128i a, __m128i b);
223 extern __m128i _mm_cmpgt_epi32(__m128i a, __m128i b);
224 extern __m128i _mm_cmplt_epi8(__m128i a, __m128i b);
225 extern __m128i _mm_cmplt_epi16(__m128i a, __m128i b);
226 extern __m128i _mm_cmplt_epi32(__m128i a, __m128i b);
227 #ifdef _M_AMD64
228 extern __m128d _mm_cvtsi64_sd(__m128d a, long long b);
229 extern long long _mm_cvtsd_si64(__m128d a);
230 extern long long _mm_cvttsd_si64(__m128d a);
231 #endif
232 extern __m128 _mm_cvtepi32_ps(__m128i a);
233 extern __m128i _mm_cvtps_epi32(__m128 a);
234 extern __m128i _mm_cvttps_epi32(__m128 a);
235 extern __m128i _mm_cvtsi32_si128(int a);
236 #ifdef _M_AMD64
237 extern __m128i _mm_cvtsi64_si128(long long a);
238 #endif
239 extern int _mm_cvtsi128_si32(__m128i a);
240 #ifdef _M_AMD64
241 extern long long _mm_cvtsi128_si64(__m128i a);
242 #endif
243 extern __m128i _mm_load_si128(__m128i const *p);
244 extern __m128i _mm_loadu_si128(__m128i_u const *p);
245 extern __m128i _mm_loadl_epi64(__m128i_u const *p);
246 //extern __m128i _mm_undefined_si128(void);
247 //extern __m128i _mm_set_epi64x(long long q1, long long q0); // FIXME
248 extern __m128i _mm_set_epi64(__m64 q1, __m64 q0);
249 //extern __m128i _mm_set_epi32(int i3, int i1, int i0);
250 extern __m128i _mm_set_epi32(int i3, int i2, int i1, int i0);
251 //extern __m128i _mm_set_epi16(short w7, short w2, short w1, short w0);
252 extern __m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0);
253 //extern __m128i _mm_set_epi8(char b15, char b10, char b4, char b3, char b2, char b1, char b0);
254 extern __m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
255 //extern __m128i _mm_set1_epi64x(long long q); // FIXME
256 extern __m128i _mm_set1_epi64(__m64 q);
257 extern __m128i _mm_set1_epi32(int i);
258 extern __m128i _mm_set1_epi16(short w);
259 extern __m128i _mm_set1_epi8(char b);
260 extern __m128i _mm_setl_epi64(__m128i q); // FIXME: clang?
261 extern __m128i _mm_setr_epi64(__m64 q0, __m64 q1);
262 //extern __m128i _mm_setr_epi32(int i0, int i2, int i3);
263 extern __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3);
264 //extern __m128i _mm_setr_epi16(short w0, short w5, short w6, short w7);
265 extern __m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7);
266 //extern __m128i _mm_setr_epi8(char b0, char b6, char b11, char b12, char b13, char b14, char b15);
267 extern __m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
268 extern __m128i _mm_setzero_si128(void);
269 extern void _mm_store_si128(__m128i *p, __m128i b);
270 extern void _mm_storeu_si128(__m128i_u *p, __m128i b);
271 //extern void _mm_storeu_si64(void *p, __m128i b);
272 //extern void _mm_storeu_si32(void *p, __m128i b);
273 //extern void _mm_storeu_si16(void *p, __m128i b);
274 extern void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p);
275 extern void _mm_storel_epi64(__m128i_u *p, __m128i a);
276 extern void _mm_stream_pd(double *p, __m128d a);
277 extern void _mm_stream_si128(__m128i *p, __m128i a);
278 extern void _mm_stream_si32(int *p, int a);
279 extern void _mm_clflush(void const *p);
280 extern void _mm_lfence(void);
281 extern void _mm_mfence(void);
282 extern __m128i _mm_packs_epi16(__m128i a, __m128i b);
283 extern __m128i _mm_packs_epi32(__m128i a, __m128i b);
284 extern __m128i _mm_packus_epi16(__m128i a, __m128i b);
285 extern int _mm_extract_epi16(__m128i a, int imm);
286 extern __m128i _mm_insert_epi16(__m128i a, int b, int imm);
287 extern int _mm_movemask_epi8(__m128i a);
288 extern __m128i _mm_shuffle_epi32(__m128i a, int imm);
289 extern __m128i _mm_shufflelo_epi16(__m128i a, int imm);
290 extern __m128i _mm_shufflehi_epi16(__m128i a, int imm);
291 extern __m128i _mm_unpackhi_epi8(__m128i a, __m128i b);
292 extern __m128i _mm_unpackhi_epi16(__m128i a, __m128i b);
293 extern __m128i _mm_unpackhi_epi32(__m128i a, __m128i b);
294 extern __m128i _mm_unpackhi_epi64(__m128i a, __m128i b);
295 extern __m128i _mm_unpacklo_epi8(__m128i a, __m128i b);
296 extern __m128i _mm_unpacklo_epi16(__m128i a, __m128i b);
297 extern __m128i _mm_unpacklo_epi32(__m128i a, __m128i b);
298 extern __m128i _mm_unpacklo_epi64(__m128i a, __m128i b);
299 extern __m64 _mm_movepi64_pi64(__m128i a);
300 extern __m128i _mm_movpi64_epi64(__m64 a);
301 extern __m128i _mm_move_epi64(__m128i a);
302 extern __m128d _mm_unpackhi_pd(__m128d a, __m128d b);
303 extern __m128d _mm_unpacklo_pd(__m128d a, __m128d b);
304 extern int _mm_movemask_pd(__m128d a);
305 extern __m128d _mm_shuffle_pd(__m128d a, __m128d b, int imm);
306 extern __m128 _mm_castpd_ps(__m128d a);
307 extern __m128i _mm_castpd_si128(__m128d a);
308 extern __m128d _mm_castps_pd(__m128 a);
309 extern __m128i _mm_castps_si128(__m128 a);
310 extern __m128 _mm_castsi128_ps(__m128i a);
311 extern __m128d _mm_castsi128_pd(__m128i a);
312 void _mm_pause(void);
313 
314 /* Alternate names */
315 #define _mm_set_pd1(a) _mm_set1_pd(a)
316 #define _mm_load_pd1(p) _mm_load1_pd(p)
317 #define _mm_store_pd1(p, a) _mm_store1_pd((p), (a))
318 #define _mm_bslli_si128 _mm_slli_si128
319 #define _mm_bsrli_si128 _mm_srli_si128
320 #define _mm_stream_si64 _mm_stream_si64x
321 
322 #if defined(_MSC_VER) && !defined(__clang__)
323 
324 #pragma intrinsic(_mm_add_sd)
325 #pragma intrinsic(_mm_add_pd)
326 #pragma intrinsic(_mm_sub_sd)
327 #pragma intrinsic(_mm_sub_pd)
328 #pragma intrinsic(_mm_mul_sd)
329 #pragma intrinsic(_mm_mul_pd)
330 #pragma intrinsic(_mm_div_sd)
331 #pragma intrinsic(_mm_div_pd)
332 #pragma intrinsic(_mm_sqrt_sd)
333 #pragma intrinsic(_mm_sqrt_pd)
334 #pragma intrinsic(_mm_min_sd)
335 #pragma intrinsic(_mm_min_pd)
336 #pragma intrinsic(_mm_max_sd)
337 #pragma intrinsic(_mm_max_pd)
338 #pragma intrinsic(_mm_and_pd)
339 #pragma intrinsic(_mm_andnot_pd)
340 #pragma intrinsic(_mm_or_pd)
341 #pragma intrinsic(_mm_xor_pd)
342 #pragma intrinsic(_mm_cmpeq_pd)
343 #pragma intrinsic(_mm_cmplt_pd)
344 #pragma intrinsic(_mm_cmple_pd)
345 #pragma intrinsic(_mm_cmpgt_pd)
346 #pragma intrinsic(_mm_cmpge_pd)
347 #pragma intrinsic(_mm_cmpord_pd)
348 #pragma intrinsic(_mm_cmpunord_pd)
349 #pragma intrinsic(_mm_cmpneq_pd)
350 #pragma intrinsic(_mm_cmpnlt_pd)
351 #pragma intrinsic(_mm_cmpnle_pd)
352 #pragma intrinsic(_mm_cmpngt_pd)
353 #pragma intrinsic(_mm_cmpnge_pd)
354 #pragma intrinsic(_mm_cmpeq_sd)
355 #pragma intrinsic(_mm_cmplt_sd)
356 #pragma intrinsic(_mm_cmple_sd)
357 #pragma intrinsic(_mm_cmpgt_sd)
358 #pragma intrinsic(_mm_cmpge_sd)
359 #pragma intrinsic(_mm_cmpord_sd)
360 #pragma intrinsic(_mm_cmpunord_sd)
361 #pragma intrinsic(_mm_cmpneq_sd)
362 #pragma intrinsic(_mm_cmpnlt_sd)
363 #pragma intrinsic(_mm_cmpnle_sd)
364 #pragma intrinsic(_mm_cmpngt_sd)
365 #pragma intrinsic(_mm_cmpnge_sd)
366 #pragma intrinsic(_mm_comieq_sd)
367 #pragma intrinsic(_mm_comilt_sd)
368 #pragma intrinsic(_mm_comile_sd)
369 #pragma intrinsic(_mm_comigt_sd)
370 #pragma intrinsic(_mm_comige_sd)
371 #pragma intrinsic(_mm_comineq_sd)
372 #pragma intrinsic(_mm_ucomieq_sd)
373 #pragma intrinsic(_mm_ucomilt_sd)
374 #pragma intrinsic(_mm_ucomile_sd)
375 #pragma intrinsic(_mm_ucomigt_sd)
376 #pragma intrinsic(_mm_ucomige_sd)
377 #pragma intrinsic(_mm_ucomineq_sd)
378 #pragma intrinsic(_mm_cvtpd_ps)
379 #pragma intrinsic(_mm_cvtps_pd)
380 #pragma intrinsic(_mm_cvtepi32_pd)
381 #pragma intrinsic(_mm_cvtpd_epi32)
382 #pragma intrinsic(_mm_cvtsd_si32)
383 #pragma intrinsic(_mm_cvtsd_ss)
384 #pragma intrinsic(_mm_cvtsi32_sd)
385 #pragma intrinsic(_mm_cvtss_sd)
386 #pragma intrinsic(_mm_cvttpd_epi32)
387 #pragma intrinsic(_mm_cvttsd_si32)
388 //#pragma intrinsic(_mm_cvtpd_pi32)
389 //#pragma intrinsic(_mm_cvttpd_pi32)
390 //#pragma intrinsic(_mm_cvtpi32_pd)
391 #pragma intrinsic(_mm_cvtsd_f64)
392 #pragma intrinsic(_mm_load_pd)
393 #pragma intrinsic(_mm_load1_pd)
394 #pragma intrinsic(_mm_loadr_pd)
395 #pragma intrinsic(_mm_loadu_pd)
396 //#pragma intrinsic(_mm_loadu_si64)
397 //#pragma intrinsic(_mm_loadu_si32)
398 //#pragma intrinsic(_mm_loadu_si16)
399 #pragma intrinsic(_mm_load_sd)
400 #pragma intrinsic(_mm_loadh_pd)
401 #pragma intrinsic(_mm_loadl_pd)
402 //#pragma intrinsic(_mm_undefined_pd)
403 #pragma intrinsic(_mm_set_sd)
404 #pragma intrinsic(_mm_set1_pd)
405 #pragma intrinsic(_mm_set_pd)
406 #pragma intrinsic(_mm_setr_pd)
407 #pragma intrinsic(_mm_setzero_pd)
408 #pragma intrinsic(_mm_move_sd)
409 #pragma intrinsic(_mm_store_sd)
410 #pragma intrinsic(_mm_store_pd)
411 #pragma intrinsic(_mm_store1_pd)
412 #pragma intrinsic(_mm_storeu_pd)
413 #pragma intrinsic(_mm_storer_pd)
414 #pragma intrinsic(_mm_storeh_pd)
415 #pragma intrinsic(_mm_storel_pd)
416 #pragma intrinsic(_mm_add_epi8)
417 #pragma intrinsic(_mm_add_epi16)
418 #pragma intrinsic(_mm_add_epi32)
419 //#pragma intrinsic(_mm_add_si64)
420 #pragma intrinsic(_mm_add_epi64)
421 #pragma intrinsic(_mm_adds_epi8)
422 #pragma intrinsic(_mm_adds_epi16)
423 #pragma intrinsic(_mm_adds_epu8)
424 #pragma intrinsic(_mm_adds_epu16)
425 #pragma intrinsic(_mm_avg_epu8)
426 #pragma intrinsic(_mm_avg_epu16)
427 #pragma intrinsic(_mm_madd_epi16)
428 #pragma intrinsic(_mm_max_epi16)
429 #pragma intrinsic(_mm_max_epu8)
430 #pragma intrinsic(_mm_min_epi16)
431 #pragma intrinsic(_mm_min_epu8)
432 #pragma intrinsic(_mm_mulhi_epi16)
433 #pragma intrinsic(_mm_mulhi_epu16)
434 #pragma intrinsic(_mm_mullo_epi16)
435 //#pragma intrinsic(_mm_mul_su32)
436 #pragma intrinsic(_mm_mul_epu32)
437 #pragma intrinsic(_mm_sad_epu8)
438 #pragma intrinsic(_mm_sub_epi8)
439 #pragma intrinsic(_mm_sub_epi16)
440 #pragma intrinsic(_mm_sub_epi32)
441 //#pragma intrinsic(_mm_sub_si64)
442 #pragma intrinsic(_mm_sub_epi64)
443 #pragma intrinsic(_mm_subs_epi8)
444 #pragma intrinsic(_mm_subs_epi16)
445 #pragma intrinsic(_mm_subs_epu8)
446 #pragma intrinsic(_mm_subs_epu16)
447 #pragma intrinsic(_mm_and_si128)
448 #pragma intrinsic(_mm_andnot_si128)
449 #pragma intrinsic(_mm_or_si128)
450 #pragma intrinsic(_mm_xor_si128)
451 #pragma intrinsic(_mm_slli_si128)
452 #pragma intrinsic(_mm_slli_epi16)
453 #pragma intrinsic(_mm_sll_epi16)
454 #pragma intrinsic(_mm_slli_epi32)
455 #pragma intrinsic(_mm_sll_epi32)
456 #pragma intrinsic(_mm_slli_epi64)
457 #pragma intrinsic(_mm_sll_epi64)
458 #pragma intrinsic(_mm_srai_epi16)
459 #pragma intrinsic(_mm_sra_epi16)
460 #pragma intrinsic(_mm_srai_epi32)
461 #pragma intrinsic(_mm_sra_epi32)
462 #pragma intrinsic(_mm_srli_si128)
463 #pragma intrinsic(_mm_srli_epi16)
464 #pragma intrinsic(_mm_srl_epi16)
465 #pragma intrinsic(_mm_srli_epi32)
466 #pragma intrinsic(_mm_srl_epi32)
467 #pragma intrinsic(_mm_srli_epi64)
468 #pragma intrinsic(_mm_srl_epi64)
469 #pragma intrinsic(_mm_cmpeq_epi8)
470 #pragma intrinsic(_mm_cmpeq_epi16)
471 #pragma intrinsic(_mm_cmpeq_epi32)
472 #pragma intrinsic(_mm_cmpgt_epi8)
473 #pragma intrinsic(_mm_cmpgt_epi16)
474 #pragma intrinsic(_mm_cmpgt_epi32)
475 #pragma intrinsic(_mm_cmplt_epi8)
476 #pragma intrinsic(_mm_cmplt_epi16)
477 #pragma intrinsic(_mm_cmplt_epi32)
478 #ifdef _M_AMD64
479 #pragma intrinsic(_mm_cvtsi64_sd)
480 #pragma intrinsic(_mm_cvtsd_si64)
481 #pragma intrinsic(_mm_cvttsd_si64)
482 #endif
483 #pragma intrinsic(_mm_cvtepi32_ps)
484 #pragma intrinsic(_mm_cvtps_epi32)
485 #pragma intrinsic(_mm_cvttps_epi32)
486 #pragma intrinsic(_mm_cvtsi32_si128)
487 #ifdef _M_AMD64
488 #pragma intrinsic(_mm_cvtsi64_si128)
489 #endif
490 #pragma intrinsic(_mm_cvtsi128_si32)
491 #ifdef _M_AMD64
492 #pragma intrinsic(_mm_cvtsi128_si64)
493 #endif
494 #pragma intrinsic(_mm_load_si128)
495 #pragma intrinsic(_mm_loadu_si128)
496 #pragma intrinsic(_mm_loadl_epi64)
497 //#pragma intrinsic(_mm_undefined_si128)
498 #pragma intrinsic(_mm_set_epi64x)
499 //#pragma intrinsic(_mm_set_epi64)
500 #pragma intrinsic(_mm_set_epi32)
501 #pragma intrinsic(_mm_set_epi16)
502 #pragma intrinsic(_mm_set_epi8)
503 #pragma intrinsic(_mm_set1_epi64x)
504 //#pragma intrinsic(_mm_set1_epi64)
505 #pragma intrinsic(_mm_set1_epi32)
506 #pragma intrinsic(_mm_set1_epi16)
507 #pragma intrinsic(_mm_set1_epi8)
508 #pragma intrinsic(_mm_setl_epi64)
509 //#pragma intrinsic(_mm_setr_epi64)
510 #pragma intrinsic(_mm_setr_epi32)
511 #pragma intrinsic(_mm_setr_epi16)
512 #pragma intrinsic(_mm_setr_epi8)
513 #pragma intrinsic(_mm_setzero_si128)
514 #pragma intrinsic(_mm_store_si128)
515 #pragma intrinsic(_mm_storeu_si128)
516 //#pragma intrinsic(_mm_storeu_si64)
517 //#pragma intrinsic(_mm_storeu_si32)
518 //#pragma intrinsic(_mm_storeu_si16)
519 #pragma intrinsic(_mm_maskmoveu_si128)
520 #pragma intrinsic(_mm_storel_epi64)
521 #pragma intrinsic(_mm_stream_pd)
522 #pragma intrinsic(_mm_stream_si128)
523 #pragma intrinsic(_mm_stream_si32)
524 #pragma intrinsic(_mm_clflush)
525 #pragma intrinsic(_mm_lfence)
526 #pragma intrinsic(_mm_mfence)
527 #pragma intrinsic(_mm_packs_epi16)
528 #pragma intrinsic(_mm_packs_epi32)
529 #pragma intrinsic(_mm_packus_epi16)
530 #pragma intrinsic(_mm_extract_epi16)
531 #pragma intrinsic(_mm_insert_epi16)
532 #pragma intrinsic(_mm_movemask_epi8)
533 #pragma intrinsic(_mm_shuffle_epi32)
534 #pragma intrinsic(_mm_shufflelo_epi16)
535 #pragma intrinsic(_mm_shufflehi_epi16)
536 #pragma intrinsic(_mm_unpackhi_epi8)
537 #pragma intrinsic(_mm_unpackhi_epi16)
538 #pragma intrinsic(_mm_unpackhi_epi32)
539 #pragma intrinsic(_mm_unpackhi_epi64)
540 #pragma intrinsic(_mm_unpacklo_epi8)
541 #pragma intrinsic(_mm_unpacklo_epi16)
542 #pragma intrinsic(_mm_unpacklo_epi32)
543 #pragma intrinsic(_mm_unpacklo_epi64)
544 //#pragma intrinsic(_mm_movepi64_pi64)
545 //#pragma intrinsic(_mm_movpi64_epi64)
546 #pragma intrinsic(_mm_move_epi64)
547 #pragma intrinsic(_mm_unpackhi_pd)
548 #pragma intrinsic(_mm_unpacklo_pd)
549 #pragma intrinsic(_mm_movemask_pd)
550 #pragma intrinsic(_mm_shuffle_pd)
551 #pragma intrinsic(_mm_castpd_ps)
552 #pragma intrinsic(_mm_castpd_si128)
553 #pragma intrinsic(_mm_castps_pd)
554 #pragma intrinsic(_mm_castps_si128)
555 #pragma intrinsic(_mm_castsi128_ps)
556 #pragma intrinsic(_mm_castsi128_pd)
557 #pragma intrinsic(_mm_pause)
558 
559 #else /* _MSC_VER */
560 
561 /*
562  Clang: https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/emmintrin.h
563  Clang older version: https://github.com/llvm/llvm-project/blob/3ef88b31843e040c95f23ff2c3c206f1fa399c05/clang/lib/Headers/emmintrin.h
564  unikraft: https://github.com/unikraft/lib-intel-intrinsics/blob/staging/include/emmintrin.h
565 */
566 
567 __INTRIN_INLINE_SSE2 __m128d _mm_add_sd(__m128d a, __m128d b)
568 {
569  a[0] += b[0];
570  return a;
571 }
572 
573 __INTRIN_INLINE_SSE2 __m128d _mm_add_pd(__m128d a, __m128d b)
574 {
575  return (__m128d)((__v2df)a + (__v2df)b);
576 }
577 
578 __INTRIN_INLINE_SSE2 __m128d _mm_sub_sd(__m128d a, __m128d b)
579 {
580  a[0] -= b[0];
581  return a;
582 }
583 
584 __INTRIN_INLINE_SSE2 __m128d _mm_sub_pd(__m128d a, __m128d b)
585 {
586  return (__m128d)((__v2df)a - (__v2df)b);
587 }
588 
589 __INTRIN_INLINE_SSE2 __m128d _mm_mul_sd(__m128d a, __m128d b)
590 {
591  a[0] *= b[0];
592  return a;
593 }
594 
595 __INTRIN_INLINE_SSE2 __m128d _mm_mul_pd(__m128d a, __m128d b)
596 {
597  return (__m128d)((__v2df)a * (__v2df)b);
598 }
599 
600 __INTRIN_INLINE_SSE2 __m128d _mm_div_sd(__m128d a, __m128d b)
601 {
602  a[0] /= b[0];
603  return a;
604 }
605 
606 __INTRIN_INLINE_SSE2 __m128d _mm_div_pd(__m128d a, __m128d b)
607 {
608  return (__m128d)((__v2df)a / (__v2df)b);
609 }
610 
611 __INTRIN_INLINE_SSE2 __m128d _mm_sqrt_sd(__m128d a, __m128d b)
612 {
613  __m128d __c = __builtin_ia32_sqrtsd((__v2df)b);
614  return __extension__(__m128d){__c[0], a[1]};
615 }
616 
618 {
619  return __builtin_ia32_sqrtpd((__v2df)a);
620 }
621 
622 __INTRIN_INLINE_SSE2 __m128d _mm_min_sd(__m128d a, __m128d b)
623 {
624  return __builtin_ia32_minsd((__v2df)a, (__v2df)b);
625 }
626 
627 __INTRIN_INLINE_SSE2 __m128d _mm_min_pd(__m128d a, __m128d b)
628 {
629  return __builtin_ia32_minpd((__v2df)a, (__v2df)b);
630 }
631 
632 __INTRIN_INLINE_SSE2 __m128d _mm_max_sd(__m128d a, __m128d b)
633 {
634  return __builtin_ia32_maxsd((__v2df)a, (__v2df)b);
635 }
636 
637 __INTRIN_INLINE_SSE2 __m128d _mm_max_pd(__m128d a, __m128d b)
638 {
639  return __builtin_ia32_maxpd((__v2df)a, (__v2df)b);
640 }
641 
642 __INTRIN_INLINE_SSE2 __m128d _mm_and_pd(__m128d a, __m128d b)
643 {
644  return (__m128d)((__v2du)a & (__v2du)b);
645 }
646 
647 __INTRIN_INLINE_SSE2 __m128d _mm_andnot_pd(__m128d a, __m128d b)
648 {
649  return (__m128d)(~(__v2du)a & (__v2du)b);
650 }
651 
652 __INTRIN_INLINE_SSE2 __m128d _mm_or_pd(__m128d a, __m128d b)
653 {
654  return (__m128d)((__v2du)a | (__v2du)b);
655 }
656 
657 __INTRIN_INLINE_SSE2 __m128d _mm_xor_pd(__m128d a, __m128d b)
658 {
659  return (__m128d)((__v2du)a ^ (__v2du)b);
660 }
661 
662 __INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_pd(__m128d a, __m128d b)
663 {
664  return (__m128d)__builtin_ia32_cmpeqpd((__v2df)a, (__v2df)b);
665 }
666 
667 __INTRIN_INLINE_SSE2 __m128d _mm_cmplt_pd(__m128d a, __m128d b)
668 {
669  return (__m128d)__builtin_ia32_cmpltpd((__v2df)a, (__v2df)b);
670 }
671 
672 __INTRIN_INLINE_SSE2 __m128d _mm_cmple_pd(__m128d a, __m128d b)
673 {
674  return (__m128d)__builtin_ia32_cmplepd((__v2df)a, (__v2df)b);
675 }
676 
677 __INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
678 {
679  return (__m128d)__builtin_ia32_cmpltpd((__v2df)b, (__v2df)a);
680 }
681 
682 __INTRIN_INLINE_SSE2 __m128d _mm_cmpge_pd(__m128d a, __m128d b)
683 {
684  return (__m128d)__builtin_ia32_cmplepd((__v2df)b, (__v2df)a);
685 }
686 
687 __INTRIN_INLINE_SSE2 __m128d _mm_cmpord_pd(__m128d a, __m128d b)
688 {
689  return (__m128d)__builtin_ia32_cmpordpd((__v2df)a, (__v2df)b);
690 }
691 
692 __INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
693 {
694  return (__m128d)__builtin_ia32_cmpunordpd((__v2df)a, (__v2df)b);
695 }
696 
697 __INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_pd(__m128d a, __m128d b)
698 {
699  return (__m128d)__builtin_ia32_cmpneqpd((__v2df)a, (__v2df)b);
700 }
701 
702 __INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
703 {
704  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)a, (__v2df)b);
705 }
706 
707 __INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_pd(__m128d a, __m128d b)
708 {
709  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)a, (__v2df)b);
710 }
711 
712 __INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_pd(__m128d a, __m128d b)
713 {
714  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)b, (__v2df)a);
715 }
716 
717 __INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_pd(__m128d a, __m128d b)
718 {
719  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)b, (__v2df)a);
720 }
721 
722 __INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
723 {
724  return (__m128d)__builtin_ia32_cmpeqsd((__v2df)a, (__v2df)b);
725 }
726 
727 __INTRIN_INLINE_SSE2 __m128d _mm_cmplt_sd(__m128d a, __m128d b)
728 {
729  return (__m128d)__builtin_ia32_cmpltsd((__v2df)a, (__v2df)b);
730 }
731 
732 __INTRIN_INLINE_SSE2 __m128d _mm_cmple_sd(__m128d a, __m128d b)
733 {
734  return (__m128d)__builtin_ia32_cmplesd((__v2df)a, (__v2df)b);
735 }
736 
737 __INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
738 {
739  __m128d __c = __builtin_ia32_cmpltsd((__v2df)b, (__v2df)a);
740  return __extension__(__m128d){__c[0], a[1]};
741 }
742 
743 __INTRIN_INLINE_SSE2 __m128d _mm_cmpge_sd(__m128d a, __m128d b)
744 {
745  __m128d __c = __builtin_ia32_cmplesd((__v2df)b, (__v2df)a);
746  return __extension__(__m128d){__c[0], a[1]};
747 }
748 
749 __INTRIN_INLINE_SSE2 __m128d _mm_cmpord_sd(__m128d a, __m128d b)
750 {
751  return (__m128d)__builtin_ia32_cmpordsd((__v2df)a, (__v2df)b);
752 }
753 
754 __INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
755 {
756  return (__m128d)__builtin_ia32_cmpunordsd((__v2df)a, (__v2df)b);
757 }
758 
759 __INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
760 {
761  return (__m128d)__builtin_ia32_cmpneqsd((__v2df)a, (__v2df)b);
762 }
763 
764 __INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
765 {
766  return (__m128d)__builtin_ia32_cmpnltsd((__v2df)a, (__v2df)b);
767 }
768 
769 __INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_sd(__m128d a, __m128d b)
770 {
771  return (__m128d)__builtin_ia32_cmpnlesd((__v2df)a, (__v2df)b);
772 }
773 
774 __INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_sd(__m128d a, __m128d b)
775 {
776  __m128d __c = __builtin_ia32_cmpnltsd((__v2df)b, (__v2df)a);
777  return __extension__(__m128d){__c[0], a[1]};
778 }
779 
780 __INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_sd(__m128d a, __m128d b)
781 {
782  __m128d __c = __builtin_ia32_cmpnlesd((__v2df)b, (__v2df)a);
783  return __extension__(__m128d){__c[0], a[1]};
784 }
785 
786 __INTRIN_INLINE_SSE2 int _mm_comieq_sd(__m128d a, __m128d b)
787 {
788  return __builtin_ia32_comisdeq((__v2df)a, (__v2df)b);
789 }
790 
791 __INTRIN_INLINE_SSE2 int _mm_comilt_sd(__m128d a, __m128d b)
792 {
793  return __builtin_ia32_comisdlt((__v2df)a, (__v2df)b);
794 }
795 
796 __INTRIN_INLINE_SSE2 int _mm_comile_sd(__m128d a, __m128d b)
797 {
798  return __builtin_ia32_comisdle((__v2df)a, (__v2df)b);
799 }
800 
801 __INTRIN_INLINE_SSE2 int _mm_comigt_sd(__m128d a, __m128d b)
802 {
803  return __builtin_ia32_comisdgt((__v2df)a, (__v2df)b);
804 }
805 
806 __INTRIN_INLINE_SSE2 int _mm_comige_sd(__m128d a, __m128d b)
807 {
808  return __builtin_ia32_comisdge((__v2df)a, (__v2df)b);
809 }
810 
811 __INTRIN_INLINE_SSE2 int _mm_comineq_sd(__m128d a, __m128d b)
812 {
813  return __builtin_ia32_comisdneq((__v2df)a, (__v2df)b);
814 }
815 
816 __INTRIN_INLINE_SSE2 int _mm_ucomieq_sd(__m128d a, __m128d b)
817 {
818  return __builtin_ia32_ucomisdeq((__v2df)a, (__v2df)b);
819 }
820 
821 __INTRIN_INLINE_SSE2 int _mm_ucomilt_sd(__m128d a, __m128d b)
822 {
823  return __builtin_ia32_ucomisdlt((__v2df)a, (__v2df)b);
824 }
825 
826 __INTRIN_INLINE_SSE2 int _mm_ucomile_sd(__m128d a, __m128d b)
827 {
828  return __builtin_ia32_ucomisdle((__v2df)a, (__v2df)b);
829 }
830 
831 __INTRIN_INLINE_SSE2 int _mm_ucomigt_sd(__m128d a, __m128d b)
832 {
833  return __builtin_ia32_ucomisdgt((__v2df)a, (__v2df)b);
834 }
835 
836 __INTRIN_INLINE_SSE2 int _mm_ucomige_sd(__m128d a, __m128d b)
837 {
838  return __builtin_ia32_ucomisdge((__v2df)a, (__v2df)b);
839 }
840 
842 {
843  return __builtin_ia32_ucomisdneq((__v2df)a, (__v2df)b);
844 }
845 
847 {
848  return __builtin_ia32_cvtpd2ps((__v2df)a);
849 }
850 
852 {
853 #if HAS_BUILTIN(__builtin_convertvector)
854  return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4sf)a, (__v4sf)a, 0, 1), __v2df);
855 #else
856  return __builtin_ia32_cvtps2pd(a);
857 #endif
858 }
859 
861 {
862 #if HAS_BUILTIN(__builtin_convertvector)
863  return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4si)a, (__v4si)a, 0, 1), __v2df);
864 #else
865  return __builtin_ia32_cvtdq2pd((__v4si)a);
866 #endif
867 }
868 
870 {
871  return (__m128i)__builtin_ia32_cvtpd2dq((__v2df)a);
872 }
873 
875 {
876  return __builtin_ia32_cvtsd2si((__v2df)a);
877 }
878 
879 __INTRIN_INLINE_SSE2 __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
880 {
881  return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)a, (__v2df)b);
882 }
883 
885  int b)
886 {
887  a[0] = b;
888  return a;
889 }
890 
891 __INTRIN_INLINE_SSE2 __m128d _mm_cvtss_sd(__m128d a, __m128 b)
892 {
893  a[0] = b[0];
894  return a;
895 }
896 
898 {
899  return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)a);
900 }
901 
903 {
904  return __builtin_ia32_cvttsd2si((__v2df)a);
905 }
906 
908 {
909  return (__m64)__builtin_ia32_cvtpd2pi((__v2df)a);
910 }
911 
913 {
914  return (__m64)__builtin_ia32_cvttpd2pi((__v2df)a);
915 }
916 
918 {
919  return __builtin_ia32_cvtpi2pd((__v2si)a);
920 }
921 
923 {
924  return a[0];
925 }
926 
927 __INTRIN_INLINE_SSE2 __m128d _mm_load_pd(double const *dp)
928 {
929  return *(const __m128d *)dp;
930 }
931 
932 __INTRIN_INLINE_SSE2 __m128d _mm_load1_pd(double const *dp)
933 {
934  struct __mm_load1_pd_struct {
935  double __u;
936  } __attribute__((__packed__, __may_alias__));
937  double __u = ((const struct __mm_load1_pd_struct *)dp)->__u;
938  return __extension__(__m128d){__u, __u};
939 }
940 
941 // GCC:
942 /* Create a selector for use with the SHUFPD instruction. */
943 #define _MM_SHUFFLE2(fp1,fp0) \
944  (((fp1) << 1) | (fp0))
945 
946 __INTRIN_INLINE_SSE2 __m128d _mm_loadr_pd(double const *dp)
947 {
948 #if HAS_BUILTIN(__builtin_shufflevector)
949  __m128d u = *(const __m128d *)dp;
950  return __builtin_shufflevector((__v2df)u, (__v2df)u, 1, 0);
951 #else
952  return (__m128d){ dp[1], dp[0] };
953 #endif
954 }
955 
956 __INTRIN_INLINE_SSE2 __m128d _mm_loadu_pd(double const *dp)
957 {
958  struct __loadu_pd {
959  __m128d_u __v;
960  } __attribute__((__packed__, __may_alias__));
961  return ((const struct __loadu_pd *)dp)->__v;
962 }
963 
965 {
966  struct __loadu_si64 {
967  long long __v;
968  } __attribute__((__packed__, __may_alias__));
969  long long __u = ((const struct __loadu_si64 *)a)->__v;
970  return __extension__(__m128i)(__v2di){__u, 0LL};
971 }
972 
974 {
975  struct __loadu_si32 {
976  int __v;
977  } __attribute__((__packed__, __may_alias__));
978  int __u = ((const struct __loadu_si32 *)a)->__v;
979  return __extension__(__m128i)(__v4si){__u, 0, 0, 0};
980 }
981 
983 {
984  struct __loadu_si16 {
985  short __v;
986  } __attribute__((__packed__, __may_alias__));
987  short __u = ((const struct __loadu_si16 *)a)->__v;
988  return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
989 }
990 
991 __INTRIN_INLINE_SSE2 __m128d _mm_load_sd(double const *dp)
992 {
993  struct __mm_load_sd_struct {
994  double __u;
995  } __attribute__((__packed__, __may_alias__));
996  double __u = ((const struct __mm_load_sd_struct *)dp)->__u;
997  return __extension__(__m128d){__u, 0};
998 }
999 
1000 __INTRIN_INLINE_SSE2 __m128d _mm_loadh_pd(__m128d a, double const *dp)
1001 {
1002  struct __mm_loadh_pd_struct {
1003  double __u;
1004  } __attribute__((__packed__, __may_alias__));
1005  double __u = ((const struct __mm_loadh_pd_struct *)dp)->__u;
1006  return __extension__(__m128d){a[0], __u};
1007 }
1008 
1009 __INTRIN_INLINE_SSE2 __m128d _mm_loadl_pd(__m128d a, double const *dp)
1010 {
1011  struct __mm_loadl_pd_struct {
1012  double __u;
1013  } __attribute__((__packed__, __may_alias__));
1014  double __u = ((const struct __mm_loadl_pd_struct *)dp)->__u;
1015  return __extension__(__m128d){__u, a[1]};
1016 }
1017 
1019 {
1020 #if HAS_BUILTIN(__builtin_ia32_undef128)
1021  return (__m128d)__builtin_ia32_undef128();
1022 #else
1023  __m128d undef = undef;
1024  return undef;
1025 #endif
1026 }
1027 
1029 {
1030  return __extension__(__m128d){w, 0};
1031 }
1032 
1034 {
1035  return __extension__(__m128d){w, w};
1036 }
1037 
1038 __INTRIN_INLINE_SSE2 __m128d _mm_set_pd(double w, double x)
1039 {
1040  return __extension__(__m128d){x, w};
1041 }
1042 
1043 __INTRIN_INLINE_SSE2 __m128d _mm_setr_pd(double w, double x)
1044 {
1045  return __extension__(__m128d){w, x};
1046 }
1047 
1049 {
1050  return __extension__(__m128d){0, 0};
1051 }
1052 
1053 __INTRIN_INLINE_SSE2 __m128d _mm_move_sd(__m128d a, __m128d b)
1054 {
1055  a[0] = b[0];
1056  return a;
1057 }
1058 
1059 __INTRIN_INLINE_SSE2 void _mm_store_sd(double *dp, __m128d a)
1060 {
1061  struct __mm_store_sd_struct {
1062  double __u;
1063  } __attribute__((__packed__, __may_alias__));
1064  ((struct __mm_store_sd_struct *)dp)->__u = a[0];
1065 }
1066 
1067 __INTRIN_INLINE_SSE2 void _mm_store_pd(double *dp, __m128d a)
1068 {
1069  *(__m128d *)dp = a;
1070 }
1071 
1072 __INTRIN_INLINE_SSE2 void _mm_store1_pd(double *dp, __m128d a)
1073 {
1074 #if HAS_BUILTIN(__builtin_shufflevector)
1075  a = __builtin_shufflevector((__v2df)a, (__v2df)a, 0, 0);
1076  _mm_store_pd(dp, a);
1077 #else
1078  dp[0] = a[0];
1079  dp[1] = a[0];
1080 #endif
1081 }
1082 
1083 __INTRIN_INLINE_SSE2 void _mm_storeu_pd(double *dp, __m128d a)
1084 {
1085  struct __storeu_pd {
1086  __m128d_u __v;
1087  } __attribute__((__packed__, __may_alias__));
1088  ((struct __storeu_pd *)dp)->__v = a;
1089 }
1090 
1091 __INTRIN_INLINE_SSE2 void _mm_storer_pd(double *dp, __m128d a)
1092 {
1093 #if HAS_BUILTIN(__builtin_shufflevector)
1094  a = __builtin_shufflevector((__v2df)a, (__v2df)a, 1, 0);
1095  *(__m128d *)dp = a;
1096 #else
1097  dp[0] = a[1];
1098  dp[1] = a[0];
1099 #endif
1100 }
1101 
1102 __INTRIN_INLINE_SSE2 void _mm_storeh_pd(double *dp, __m128d a)
1103 {
1104  struct __mm_storeh_pd_struct {
1105  double __u;
1106  } __attribute__((__packed__, __may_alias__));
1107  ((struct __mm_storeh_pd_struct *)dp)->__u = a[1];
1108 }
1109 
1110 __INTRIN_INLINE_SSE2 void _mm_storel_pd(double *dp, __m128d a)
1111 {
1112  struct __mm_storeh_pd_struct {
1113  double __u;
1114  } __attribute__((__packed__, __may_alias__));
1115  ((struct __mm_storeh_pd_struct *)dp)->__u = a[0];
1116 }
1117 
1118 __INTRIN_INLINE_SSE2 __m128i _mm_add_epi8(__m128i a, __m128i b)
1119 {
1120  return (__m128i)((__v16qu)a + (__v16qu)b);
1121 }
1122 
1123 __INTRIN_INLINE_SSE2 __m128i _mm_add_epi16(__m128i a, __m128i b)
1124 {
1125  return (__m128i)((__v8hu)a + (__v8hu)b);
1126 }
1127 
1128 __INTRIN_INLINE_SSE2 __m128i _mm_add_epi32(__m128i a, __m128i b)
1129 {
1130  return (__m128i)((__v4su)a + (__v4su)b);
1131 }
1132 
1134 {
1135  return (__m64)__builtin_ia32_paddq((__v1di)a, (__v1di)b);
1136 }
1137 
1138 __INTRIN_INLINE_SSE2 __m128i _mm_add_epi64(__m128i a, __m128i b)
1139 {
1140  return (__m128i)((__v2du)a + (__v2du)b);
1141 }
1142 
1143 __INTRIN_INLINE_SSE2 __m128i _mm_adds_epi8(__m128i a, __m128i b)
1144 {
1145 #if HAS_BUILTIN(__builtin_elementwise_add_sat)
1146  return (__m128i)__builtin_elementwise_add_sat((__v16qs)a, (__v16qs)b);
1147 #else
1148  return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
1149 #endif
1150 }
1151 
1152 __INTRIN_INLINE_SSE2 __m128i _mm_adds_epi16(__m128i a, __m128i b)
1153 {
1154 #if HAS_BUILTIN(__builtin_elementwise_add_sat)
1155  return (__m128i)__builtin_elementwise_add_sat((__v8hi)a, (__v8hi)b);
1156 #else
1157  return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
1158 #endif
1159 }
1160 
1161 __INTRIN_INLINE_SSE2 __m128i _mm_adds_epu8(__m128i a, __m128i b)
1162 {
1163 #if HAS_BUILTIN(__builtin_elementwise_add_sat)
1164  return (__m128i)__builtin_elementwise_add_sat((__v16qu)a, (__v16qu)b);
1165 #else
1166  return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
1167 #endif
1168 }
1169 
1170 __INTRIN_INLINE_SSE2 __m128i _mm_adds_epu16(__m128i a, __m128i b)
1171 {
1172 #if HAS_BUILTIN(__builtin_elementwise_add_sat)
1173  return (__m128i)__builtin_elementwise_add_sat((__v8hu)a, (__v8hu)b);
1174 #else
1175  return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
1176 #endif
1177 }
1178 
1179 __INTRIN_INLINE_SSE2 __m128i _mm_avg_epu8(__m128i a, __m128i b)
1180 {
1181  return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
1182 }
1183 
1184 __INTRIN_INLINE_SSE2 __m128i _mm_avg_epu16(__m128i a, __m128i b)
1185 {
1186  return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
1187 }
1188 
1189 __INTRIN_INLINE_SSE2 __m128i _mm_madd_epi16(__m128i a, __m128i b)
1190 {
1191  return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
1192 }
1193 
1194 __INTRIN_INLINE_SSE2 __m128i _mm_max_epi16(__m128i a, __m128i b)
1195 {
1196 #if HAS_BUILTIN(__builtin_elementwise_max)
1197  return (__m128i)__builtin_elementwise_max((__v8hi)a, (__v8hi)b);
1198 #else
1199  return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
1200 #endif
1201 }
1202 
1203 __INTRIN_INLINE_SSE2 __m128i _mm_max_epu8(__m128i a, __m128i b)
1204 {
1205 #if HAS_BUILTIN(__builtin_elementwise_max)
1206  return (__m128i)__builtin_elementwise_max((__v16qu)a, (__v16qu)b);
1207 #else
1208  return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
1209 #endif
1210 }
1211 
1212 __INTRIN_INLINE_SSE2 __m128i _mm_min_epi16(__m128i a, __m128i b)
1213 {
1214 #if HAS_BUILTIN(__builtin_elementwise_min)
1215  return (__m128i)__builtin_elementwise_min((__v8hi)a, (__v8hi)b);
1216 #else
1217  return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
1218 #endif
1219 }
1220 
1221 __INTRIN_INLINE_SSE2 __m128i _mm_min_epu8(__m128i a, __m128i b)
1222 {
1223 #if HAS_BUILTIN(__builtin_elementwise_min)
1224  return (__m128i)__builtin_elementwise_min((__v16qu)a, (__v16qu)b);
1225 #else
1226  return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
1227 #endif
1228 }
1229 
1230 __INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
1231 {
1232  return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
1233 }
1234 
1235 __INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
1236 {
1237  return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
1238 }
1239 
1240 __INTRIN_INLINE_SSE2 __m128i _mm_mullo_epi16(__m128i a, __m128i b)
1241 {
1242  return (__m128i)((__v8hu)a * (__v8hu)b);
1243 }
1244 
1246 {
1247  return (__m64)__builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
1248 }
1249 
1250 __INTRIN_INLINE_SSE2 __m128i _mm_mul_epu32(__m128i a, __m128i b)
1251 {
1252  return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
1253 }
1254 
1255 __INTRIN_INLINE_SSE2 __m128i _mm_sad_epu8(__m128i a, __m128i b)
1256 {
1257  return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
1258 }
1259 
1260 __INTRIN_INLINE_SSE2 __m128i _mm_sub_epi8(__m128i a, __m128i b)
1261 {
1262  return (__m128i)((__v16qu)a - (__v16qu)b);
1263 }
1264 
1265 __INTRIN_INLINE_SSE2 __m128i _mm_sub_epi16(__m128i a, __m128i b)
1266 {
1267  return (__m128i)((__v8hu)a - (__v8hu)b);
1268 }
1269 
1270 __INTRIN_INLINE_SSE2 __m128i _mm_sub_epi32(__m128i a, __m128i b)
1271 {
1272  return (__m128i)((__v4su)a - (__v4su)b);
1273 }
1274 
1276 {
1277  return (__m64)__builtin_ia32_psubq((__v1di)a, (__v1di)b);
1278 }
1279 
1280 __INTRIN_INLINE_SSE2 __m128i _mm_sub_epi64(__m128i a, __m128i b)
1281 {
1282  return (__m128i)((__v2du)a - (__v2du)b);
1283 }
1284 
1285 __INTRIN_INLINE_SSE2 __m128i _mm_subs_epi8(__m128i a, __m128i b)
1286 {
1287 #if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1288  return (__m128i)__builtin_elementwise_sub_sat((__v16qs)a, (__v16qs)b);
1289 #else
1290  return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
1291 #endif
1292 }
1293 
1294 __INTRIN_INLINE_SSE2 __m128i _mm_subs_epi16(__m128i a, __m128i b)
1295 {
1296 #if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1297  return (__m128i)__builtin_elementwise_sub_sat((__v8hi)a, (__v8hi)b);
1298 #else
1299  return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
1300 #endif
1301 }
1302 
1303 __INTRIN_INLINE_SSE2 __m128i _mm_subs_epu8(__m128i a, __m128i b)
1304 {
1305 #if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1306  return (__m128i)__builtin_elementwise_sub_sat((__v16qu)a, (__v16qu)b);
1307 #else
1308  return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
1309 #endif
1310 }
1311 
1312 __INTRIN_INLINE_SSE2 __m128i _mm_subs_epu16(__m128i a, __m128i b)
1313 {
1314 #if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1315  return (__m128i)__builtin_elementwise_sub_sat((__v8hu)a, (__v8hu)b);
1316 #else
1317  return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
1318 #endif
1319 }
1320 
1321 __INTRIN_INLINE_SSE2 __m128i _mm_and_si128(__m128i a, __m128i b)
1322 {
1323  return (__m128i)((__v2du)a & (__v2du)b);
1324 }
1325 
1326 __INTRIN_INLINE_SSE2 __m128i _mm_andnot_si128(__m128i a, __m128i b)
1327 {
1328  return (__m128i)(~(__v2du)a & (__v2du)b);
1329 }
1330 
1331 __INTRIN_INLINE_SSE2 __m128i _mm_or_si128(__m128i a, __m128i b)
1332 {
1333  return (__m128i)((__v2du)a | (__v2du)b);
1334 }
1335 
1336 __INTRIN_INLINE_SSE2 __m128i _mm_xor_si128(__m128i a, __m128i b)
1337 {
1338  return (__m128i)((__v2du)a ^ (__v2du)b);
1339 }
1340 
1341 #define _mm_slli_si128(a, imm) \
1342  ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1343 
1345 {
1346  return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
1347 }
1348 
1349 __INTRIN_INLINE_SSE2 __m128i _mm_sll_epi16(__m128i a, __m128i count)
1350 {
1351  return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
1352 }
1353 
1355 {
1356  return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
1357 }
1358 
1359 __INTRIN_INLINE_SSE2 __m128i _mm_sll_epi32(__m128i a, __m128i count)
1360 {
1361  return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
1362 }
1363 
1365 {
1366  return __builtin_ia32_psllqi128((__v2di)a, count);
1367 }
1368 
1369 __INTRIN_INLINE_SSE2 __m128i _mm_sll_epi64(__m128i a, __m128i count)
1370 {
1371  return __builtin_ia32_psllq128((__v2di)a, (__v2di)count);
1372 }
1373 
1375 {
1376  return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
1377 }
1378 
1379 __INTRIN_INLINE_SSE2 __m128i _mm_sra_epi16(__m128i a, __m128i count)
1380 {
1381  return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
1382 }
1383 
1385 {
1386  return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
1387 }
1388 
1389 __INTRIN_INLINE_SSE2 __m128i _mm_sra_epi32(__m128i a, __m128i count)
1390 {
1391  return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
1392 }
1393 
1394 #define _mm_srli_si128(a, imm) \
1395  ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1396 
1398 {
1399  return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
1400 }
1401 
1402 __INTRIN_INLINE_SSE2 __m128i _mm_srl_epi16(__m128i a, __m128i count)
1403 {
1404  return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
1405 }
1406 
1408 {
1409  return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
1410 }
1411 
1412 __INTRIN_INLINE_SSE2 __m128i _mm_srl_epi32(__m128i a, __m128i count)
1413 {
1414  return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
1415 }
1416 
1418 {
1419  return __builtin_ia32_psrlqi128((__v2di)a, count);
1420 }
1421 
1422 __INTRIN_INLINE_SSE2 __m128i _mm_srl_epi64(__m128i a, __m128i count)
1423 {
1424  return __builtin_ia32_psrlq128((__v2di)a, (__v2di)count);
1425 }
1426 
1427 __INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
1428 {
1429  return (__m128i)((__v16qi)a == (__v16qi)b);
1430 }
1431 
1432 __INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
1433 {
1434  return (__m128i)((__v8hi)a == (__v8hi)b);
1435 }
1436 
1437 __INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
1438 {
1439  return (__m128i)((__v4si)a == (__v4si)b);
1440 }
1441 
1442 __INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
1443 {
1444  /* This function always performs a signed comparison, but __v16qi is a char
1445  which may be signed or unsigned, so use __v16qs. */
1446  return (__m128i)((__v16qs)a > (__v16qs)b);
1447 }
1448 
1449 __INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
1450 {
1451  return (__m128i)((__v8hi)a > (__v8hi)b);
1452 }
1453 
1454 __INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
1455 {
1456  return (__m128i)((__v4si)a > (__v4si)b);
1457 }
1458 
1459 __INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
1460 {
1461  return _mm_cmpgt_epi8(b, a);
1462 }
1463 
1464 __INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi16(__m128i a, __m128i b)
1465 {
1466  return _mm_cmpgt_epi16(b, a);
1467 }
1468 
1469 __INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
1470 {
1471  return _mm_cmpgt_epi32(b, a);
1472 }
1473 
1474 #ifdef _M_AMD64
1475 
1476 __INTRIN_INLINE_SSE2 __m128d _mm_cvtsi64_sd(__m128d a, long long b)
1477 {
1478  a[0] = b;
1479  return a;
1480 }
1481 
1482 __INTRIN_INLINE_SSE2 long long _mm_cvtsd_si64(__m128d a)
1483 {
1484  return __builtin_ia32_cvtsd2si64((__v2df)a);
1485 }
1486 
1487 __INTRIN_INLINE_SSE2 long long _mm_cvttsd_si64(__m128d a)
1488 {
1489  return __builtin_ia32_cvttsd2si64((__v2df)a);
1490 }
1491 #endif
1492 
1494 {
1495 #if HAS_BUILTIN(__builtin_convertvector)
1496  return (__m128)__builtin_convertvector((__v4si)a, __v4sf);
1497 #else
1498  return __builtin_ia32_cvtdq2ps((__v4si)a);
1499 #endif
1500 }
1501 
1503 {
1504  return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)a);
1505 }
1506 
1508 {
1509  return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)a);
1510 }
1511 
1513 {
1514  return __extension__(__m128i)(__v4si){a, 0, 0, 0};
1515 }
1516 
1518 {
1519  return __extension__(__m128i)(__v2di){a, 0};
1520 }
1521 
1523 {
1524  __v4si b = (__v4si)a;
1525  return b[0];
1526 }
1527 
1529 {
1530  return a[0];
1531 }
1532 
1533 __INTRIN_INLINE_SSE2 __m128i _mm_load_si128(__m128i const *p)
1534 {
1535  return *p;
1536 }
1537 
1538 __INTRIN_INLINE_SSE2 __m128i _mm_loadu_si128(__m128i_u const *p)
1539 {
1540  struct __loadu_si128 {
1541  __m128i_u __v;
1542  } __attribute__((__packed__, __may_alias__));
1543  return ((const struct __loadu_si128 *)p)->__v;
1544 }
1545 
1546 __INTRIN_INLINE_SSE2 __m128i _mm_loadl_epi64(__m128i_u const *p)
1547 {
1548  struct __mm_loadl_epi64_struct {
1549  long long __u;
1550  } __attribute__((__packed__, __may_alias__));
1551  return __extension__(__m128i){
1552  ((const struct __mm_loadl_epi64_struct *)p)->__u, 0};
1553 }
1554 
1556 {
1557 #if HAS_BUILTIN(__builtin_ia32_undef128)
1558  return (__m128i)__builtin_ia32_undef128();
1559 #else
1560  __m128i undef = undef;
1561  return undef;
1562 #endif
1563 }
1564 
1565 __INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
1566 {
1567  return __extension__(__m128i)(__v2di){q0, q1};
1568 }
1569 
1570 __INTRIN_INLINE_SSE2 __m128i _mm_set_epi64(__m64 q1, __m64 q0)
1571 {
1572  return _mm_set_epi64x((long long)q1, (long long)q0);
1573 }
1574 
1575 __INTRIN_INLINE_SSE2 __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
1576 {
1577  return __extension__(__m128i)(__v4si){i0, i1, i2, i3};
1578 }
1579 
1581  short w7, short w6, short w5, short w4,
1582  short w3, short w2, short w1, short w0)
1583 {
1584  return __extension__(__m128i)(__v8hi){w0, w1, w2, w3, w4, w5, w6, w7};
1585 }
1586 
1588  char b15, char b14, char b13, char b12,
1589  char b11, char b10, char b9, char b8,
1590  char b7, char b6, char b5, char b4,
1591  char b3, char b2, char b1, char b0)
1592 {
1593  return __extension__(__m128i)(__v16qi){
1594  b0, b1, b2, b3, b4, b5, b6, b7,
1595  b8, b9, b10, b11, b12, b13, b14, b15};
1596 }
1597 
1599 {
1600  return _mm_set_epi64x(q, q);
1601 }
1602 
1604 {
1605  return _mm_set_epi64(q, q);
1606 }
1607 
1609 {
1610  return _mm_set_epi32(i, i, i, i);
1611 }
1612 
1614 {
1615  return _mm_set_epi16(w, w, w, w, w, w, w, w);
1616 }
1617 
1619 {
1620  return _mm_set_epi8(b, b, b, b, b, b, b, b, b, b, b,
1621  b, b, b, b, b);
1622 }
1623 
1624 __INTRIN_INLINE_SSE2 __m128i _mm_setr_epi64(__m64 q0, __m64 q1)
1625 {
1626  return _mm_set_epi64(q1, q0);
1627 }
1628 
1629 __INTRIN_INLINE_SSE2 __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
1630 {
1631  return _mm_set_epi32(i3, i2, i1, i0);
1632 }
1633 
1635  short w0, short w1, short w2, short w3,
1636  short w4, short w5, short w6, short w7)
1637 {
1638  return _mm_set_epi16(w7, w6, w5, w4, w3, w2, w1, w0);
1639 }
1640 
1642  char b0, char b1, char b2, char b3,
1643  char b4, char b5, char b6, char b7,
1644  char b8, char b9, char b10, char b11,
1645  char b12, char b13, char b14, char b15)
1646 {
1647  return _mm_set_epi8(b15, b14, b13, b12, b11, b10, b9, b8,
1648  b7, b6, b5, b4, b3, b2, b1, b0);
1649 }
1650 
1652 {
1653  return __extension__(__m128i)(__v2di){0LL, 0LL};
1654 }
1655 
1656 __INTRIN_INLINE_SSE2 void _mm_store_si128(__m128i *p, __m128i b)
1657 {
1658  *p = b;
1659 }
1660 
1661 __INTRIN_INLINE_SSE2 void _mm_storeu_si128(__m128i_u *p, __m128i b)
1662 {
1663  struct __storeu_si128 {
1664  __m128i_u __v;
1665  } __attribute__((__packed__, __may_alias__));
1666  ((struct __storeu_si128 *)p)->__v = b;
1667 }
1668 
1670 {
1671  struct __storeu_si64 {
1672  long long __v;
1673  } __attribute__((__packed__, __may_alias__));
1674  ((struct __storeu_si64 *)p)->__v = ((__v2di)b)[0];
1675 }
1676 
1678 {
1679  struct __storeu_si32 {
1680  int __v;
1681  } __attribute__((__packed__, __may_alias__));
1682  ((struct __storeu_si32 *)p)->__v = ((__v4si)b)[0];
1683 }
1684 
1686 {
1687  struct __storeu_si16 {
1688  short __v;
1689  } __attribute__((__packed__, __may_alias__));
1690  ((struct __storeu_si16 *)p)->__v = ((__v8hi)b)[0];
1691 }
1692 
1693 __INTRIN_INLINE_SSE2 void _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
1694 {
1695  __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1696 }
1697 
1698 __INTRIN_INLINE_SSE2 void _mm_storel_epi64(__m128i_u *p, __m128i a)
1699 {
1700  struct __mm_storel_epi64_struct {
1701  long long __u;
1702  } __attribute__((__packed__, __may_alias__));
1703  ((struct __mm_storel_epi64_struct *)p)->__u = a[0];
1704 }
1705 
1706 __INTRIN_INLINE_SSE2 void _mm_stream_pd(double *p, __m128d a)
1707 {
1708 #if HAS_BUILTIN(__builtin_nontemporal_store)
1709  __builtin_nontemporal_store((__v2df)a, (__v2df *)p);
1710 #else
1711  __builtin_ia32_movntpd(p, a);
1712 #endif
1713 }
1714 
1715 __INTRIN_INLINE_SSE2 void _mm_stream_si128(__m128i *p, __m128i a)
1716 {
1717 #if HAS_BUILTIN(__builtin_nontemporal_store)
1718  __builtin_nontemporal_store((__v2di)a, (__v2di*)p);
1719 #else
1720  __builtin_ia32_movntdq(p, a);
1721 #endif
1722 }
1723 
1725 {
1726  __builtin_ia32_movnti(p, a);
1727 }
1728 
1729 #ifdef _M_AMD64
1730 __INTRIN_INLINE_SSE2 void _mm_stream_si64(long long *p, long long a)
1731 {
1732  __builtin_ia32_movnti64(p, a);
1733 }
1734 #endif
1735 
1736 void _mm_clflush(void const *p);
1737 
1738 void _mm_lfence(void);
1739 
1740 void _mm_mfence(void);
1741 
1742 __INTRIN_INLINE_SSE2 __m128i _mm_packs_epi16(__m128i a, __m128i b)
1743 {
1744  return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1745 }
1746 
1747 __INTRIN_INLINE_SSE2 __m128i _mm_packs_epi32(__m128i a, __m128i b)
1748 {
1749  return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1750 }
1751 
1752 __INTRIN_INLINE_SSE2 __m128i _mm_packus_epi16(__m128i a, __m128i b)
1753 {
1754  return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1755 }
1756 
1757 #define _mm_extract_epi16(a, imm) \
1758  ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
1759  (int)(imm)))
1760 
1761 #define _mm_insert_epi16(a, b, imm) \
1762  ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
1763  (int)(imm)))
1764 
1766 {
1767  return __builtin_ia32_pmovmskb128((__v16qi)a);
1768 }
1769 
1770 #define _mm_shuffle_epi32(a, imm) \
1771  ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
1772 
1773 #define _mm_shufflelo_epi16(a, imm) \
1774  ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
1775 
1776 #define _mm_shufflehi_epi16(a, imm) \
1777  ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
1778 
1779 __INTRIN_INLINE_SSE2 __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
1780 {
1781 #if HAS_BUILTIN(__builtin_shufflevector)
1782  return (__m128i)__builtin_shufflevector(
1783  (__v16qi)a, (__v16qi)b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
1784  16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
1785 #else
1786  return (__m128i)__builtin_ia32_punpckhbw128((__v16qi)a, (__v16qi)b);
1787 #endif
1788 }
1789 
1790 __INTRIN_INLINE_SSE2 __m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
1791 {
1792 #if HAS_BUILTIN(__builtin_shufflevector)
1793  return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8 + 4, 5,
1794  8 + 5, 6, 8 + 6, 7, 8 + 7);
1795 #else
1796  return (__m128i)__builtin_ia32_punpckhwd128((__v8hi)a, (__v8hi)b);
1797 #endif
1798 }
1799 
1800 __INTRIN_INLINE_SSE2 __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
1801 {
1802 #if HAS_BUILTIN(__builtin_shufflevector)
1803  return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4 + 2, 3,
1804  4 + 3);
1805 #else
1806  return (__m128i)__builtin_ia32_punpckhdq128((__v4si)a, (__v4si)b);
1807 #endif
1808 }
1809 
1810 __INTRIN_INLINE_SSE2 __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
1811 {
1812 #if HAS_BUILTIN(__builtin_shufflevector)
1813  return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 1, 2 + 1);
1814 #else
1815  return (__m128i)__builtin_ia32_punpckhqdq128((__v2di)a, (__v2di)b);
1816 #endif
1817 }
1818 
1819 __INTRIN_INLINE_SSE2 __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
1820 {
1821 #if HAS_BUILTIN(__builtin_shufflevector)
1822  return (__m128i)__builtin_shufflevector(
1823  (__v16qi)a, (__v16qi)b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
1824  16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
1825 #else
1826  return (__m128i)__builtin_ia32_punpcklbw128((__v16qi)a, (__v16qi)b);
1827 #endif
1828 }
1829 
1830 __INTRIN_INLINE_SSE2 __m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
1831 {
1832 #if HAS_BUILTIN(__builtin_shufflevector)
1833  return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8 + 0, 1,
1834  8 + 1, 2, 8 + 2, 3, 8 + 3);
1835 #else
1836  return (__m128i)__builtin_ia32_punpcklwd128((__v8hi)a, (__v8hi)b);
1837 #endif
1838 }
1839 
1840 __INTRIN_INLINE_SSE2 __m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
1841 {
1842 #if HAS_BUILTIN(__builtin_shufflevector)
1843  return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4 + 0, 1,
1844  4 + 1);
1845 #else
1846  return (__m128i)__builtin_ia32_punpckldq128((__v4si)a, (__v4si)b);
1847 #endif
1848 }
1849 
1850 __INTRIN_INLINE_SSE2 __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
1851 {
1852 #if HAS_BUILTIN(__builtin_shufflevector)
1853  return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 0, 2 + 0);
1854 #else
1855  return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)a, (__v2di)b);
1856 #endif
1857 }
1858 
1860 {
1861  return (__m64)a[0];
1862 }
1863 
1865 {
1866  return __extension__(__m128i)(__v2di){(long long)a, 0};
1867 }
1868 
1870 {
1871 #if HAS_BUILTIN(__builtin_shufflevector)
1872  return __builtin_shufflevector((__v2di)a, _mm_setzero_si128(), 0, 2);
1873 #else
1874  return (__m128i)__builtin_ia32_movq128((__v2di)a);
1875 #endif
1876 }
1877 
1878 __INTRIN_INLINE_SSE2 __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
1879 {
1880 #if HAS_BUILTIN(__builtin_shufflevector)
1881  return __builtin_shufflevector((__v2df)a, (__v2df)b, 1, 2 + 1);
1882 #else
1883  return (__m128d)__builtin_ia32_unpckhpd((__v2df)a, (__v2df)b);
1884 #endif
1885 }
1886 
1887 __INTRIN_INLINE_SSE2 __m128d _mm_unpacklo_pd(__m128d a, __m128d b)
1888 {
1889 #if HAS_BUILTIN(__builtin_shufflevector)
1890  return __builtin_shufflevector((__v2df)a, (__v2df)b, 0, 2 + 0);
1891 #else
1892  return (__m128d)__builtin_ia32_unpcklpd((__v2df)a, (__v2df)b);
1893 #endif
1894 }
1895 
1897 {
1898  return __builtin_ia32_movmskpd((__v2df)a);
1899 }
1900 
1901 #define _mm_shuffle_pd(a, b, i) \
1902  ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
1903  (int)(i)))
1904 
1906 {
1907  return (__m128)a;
1908 }
1909 
1911 {
1912  return (__m128i)a;
1913 }
1914 
1916 {
1917  return (__m128d)a;
1918 }
1919 
1921 {
1922  return (__m128i)a;
1923 }
1924 
1926 {
1927  return (__m128)a;
1928 }
1929 
1931 {
1932  return (__m128d)a;
1933 }
1934 
1935 void _mm_pause(void);
1936 
1937 #endif /* _MSC_VER */
1938 
1939 
1940 
1941 #endif /* _INCLUDED_EMM */
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
int _mm_ucomile_sd(__m128d a, __m128d b)
Definition: emmintrin.h:826
__m128i _mm_mullo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1240
__m128i _mm_add_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1123
__m128d _mm_div_pd(__m128d a, __m128d b)
Definition: emmintrin.h:606
__m128 _mm_cvtpd_ps(__m128d a)
Definition: emmintrin.h:846
__m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Definition: emmintrin.h:1587
__m128i _mm_xor_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1336
__m128d _mm_setr_pd(double w, double x)
Definition: emmintrin.h:1043
__m128i _mm_cmplt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1464
__m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1427
__m128d _mm_cmpnle_pd(__m128d a, __m128d b)
Definition: emmintrin.h:707
__m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1840
__m128d _mm_sqrt_pd(__m128d a)
Definition: emmintrin.h:617
__m128i _mm_andnot_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1326
void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p)
#define LL
Definition: tui.h:150
__m128d _mm_castps_pd(__m128 a)
Definition: emmintrin.h:1915
GLubyte GLubyte GLubyte GLubyte w
Definition: glext.h:6102
__m128d _mm_cmplt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:727
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si32(void const *a)
Definition: emmintrin.h:973
__m128i _mm_setzero_si128(void)
Definition: emmintrin.h:1651
void _mm_storeu_pd(double *dp, __m128d a)
Definition: emmintrin.h:1083
__m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Definition: emmintrin.h:1580
__m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:764
__m128d _mm_castsi128_pd(__m128i a)
Definition: emmintrin.h:1930
__m128i _mm_sub_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1260
__m128i _mm_mulhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1230
__m64 _mm_movepi64_pi64(__m128i a)
Definition: emmintrin.h:1859
__m128i _mm_subs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1294
__m128i _mm_sad_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1255
__m128i _mm_set1_epi32(int i)
Definition: emmintrin.h:1608
__m128d _mm_cmpunord_sd(__m128d a, __m128d b)
Definition: emmintrin.h:754
int _mm_comineq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:811
void _mm_pause(void)
Definition: intrin_x86.h:2028
#define _DECLSPEC_INTRIN_TYPE
Definition: _mingw.h:234
__m128d _mm_cmplt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:667
void _mm_store_si128(__m128i *p, __m128i b)
Definition: emmintrin.h:1656
__m128i _mm_max_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1194
GLuint GLuint GLsizei count
Definition: gl.h:1545
__m128i _mm_srli_epi32(__m128i a, int count)
Definition: emmintrin.h:1407
__m128i _mm_srai_epi32(__m128i a, int count)
Definition: emmintrin.h:1384
__INTRIN_INLINE_SSE2 void _mm_storeu_si32(void *p, __m128i b)
Definition: emmintrin.h:1677
__m128d _mm_andnot_pd(__m128d a, __m128d b)
Definition: emmintrin.h:647
__m128i _mm_or_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1331
__m128d _mm_xor_pd(__m128d a, __m128d b)
Definition: emmintrin.h:657
__m128d _mm_unpackhi_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1878
GLdouble n
Definition: glext.h:7729
__m128i _mm_srl_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1412
__m128d _mm_min_sd(__m128d a, __m128d b)
Definition: emmintrin.h:622
__m128i _mm_adds_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1170
#define _mm_shufflehi_epi16(a, imm)
Definition: emmintrin.h:1776
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
struct _EXCEPTION_POINTERS *_CRTIMP __declspec(noreturn) void __cdecl terminate(void)
Definition: eh.h:27
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si16(void const *a)
Definition: emmintrin.h:982
__m128 _mm_cvtsd_ss(__m128 a, __m128d b)
Definition: emmintrin.h:879
__m128i _mm_sub_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1270
int align(int length, int align)
Definition: dsound8.c:36
__m128i _mm_srl_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1422
__m128d _mm_min_pd(__m128d a, __m128d b)
Definition: emmintrin.h:627
#define __INTRIN_INLINE_SSE
Definition: xmmintrin.h:70
__m128d _mm_cvtsi32_sd(__m128d a, int b)
Definition: emmintrin.h:884
__m128d _mm_mul_sd(__m128d a, __m128d b)
Definition: emmintrin.h:589
#define _mm_extract_epi16(a, imm)
Definition: emmintrin.h:1757
__m128d _mm_sub_pd(__m128d a, __m128d b)
Definition: emmintrin.h:584
__m128i _mm_sra_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1379
__m128i _mm_packs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1742
__m128d _mm_cmpnge_pd(__m128d a, __m128d b)
Definition: emmintrin.h:717
#define __c
Definition: schilyio.h:209
__INTRIN_INLINE_SSE2 long long _mm_cvtsi128_si64(__m128i a)
Definition: emmintrin.h:1528
void _mm_storel_epi64(__m128i_u *p, __m128i a)
Definition: emmintrin.h:1698
__m128d _mm_cmpngt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:712
__m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1454
void _mm_stream_si32(int *p, int a)
Definition: emmintrin.h:1724
__m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:702
int _mm_cvttsd_si32(__m128d a)
Definition: emmintrin.h:902
__m128i _mm_cvttpd_epi32(__m128d a)
Definition: emmintrin.h:897
__m128d _mm_cmpngt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:774
__m128i _mm_srli_epi16(__m128i a, int count)
Definition: emmintrin.h:1397
__m128i _mm_movpi64_epi64(__m64 a)
Definition: emmintrin.h:1864
void _mm_store_pd(double *dp, __m128d a)
Definition: emmintrin.h:1067
__INTRIN_INLINE_SSE2 __m128i _mm_set1_epi64x(long long q)
Definition: emmintrin.h:1598
__m128i _mm_packus_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1752
__m128d _mm_max_sd(__m128d a, __m128d b)
Definition: emmintrin.h:632
void _mm_mfence(void)
Definition: intrin_x86.h:99
void _mm_storer_pd(double *dp, __m128d a)
Definition: emmintrin.h:1091
int _mm_comigt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:801
#define _STATIC_ASSERT(expr)
Definition: crtdefs.h:191
__m128d _mm_cvtss_sd(__m128d a, __m128 b)
Definition: emmintrin.h:891
__m128d _mm_cmpunord_pd(__m128d a, __m128d b)
Definition: emmintrin.h:692
#define a
Definition: ke_i.h:78
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si64(void const *a)
Definition: emmintrin.h:964
__m128d _mm_setzero_pd(void)
Definition: emmintrin.h:1048
__m128i _mm_adds_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1161
__m128i _mm_cvtps_epi32(__m128 a)
Definition: emmintrin.h:1502
__m128i _mm_loadu_si128(__m128i_u const *p)
Definition: emmintrin.h:1538
static CRYPT_DATA_BLOB b1[]
Definition: msg.c:573
__m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1810
__m128 _mm_castsi128_ps(__m128i a)
Definition: emmintrin.h:1925
__m128i _mm_loadl_epi64(__m128i_u const *p)
Definition: emmintrin.h:1546
__m128d _mm_add_pd(__m128d a, __m128d b)
Definition: emmintrin.h:573
__m128d _mm_add_sd(__m128d a, __m128d b)
Definition: emmintrin.h:567
__m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
Definition: emmintrin.h:1629
__m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1437
__m128d _mm_cvtepi32_pd(__m128i a)
Definition: emmintrin.h:860
void _mm_store_sd(double *dp, __m128d a)
Definition: emmintrin.h:1059
__m128i _mm_set_epi64(__m64 q1, __m64 q0)
Definition: emmintrin.h:1570
#define _mm_shuffle_pd(a, b, i)
Definition: emmintrin.h:1901
__INTRIN_INLINE_SSE2 void _mm_storeu_si64(void *p, __m128i b)
Definition: emmintrin.h:1669
__m128i _mm_move_epi64(__m128i a)
Definition: emmintrin.h:1869
__m128i _mm_load_si128(__m128i const *p)
Definition: emmintrin.h:1533
__m64 _mm_sub_si64(__m64 a, __m64 b)
Definition: emmintrin.h:1275
__m128i _mm_setr_epi64(__m64 q0, __m64 q1)
Definition: emmintrin.h:1624
__m128d _mm_cmpneq_pd(__m128d a, __m128d b)
Definition: emmintrin.h:697
void _mm_clflush(void const *p)
double _mm_cvtsd_f64(__m128d a)
Definition: emmintrin.h:922
__INTRIN_INLINE_SSE2 void _mm_storeu_si16(void *p, __m128i b)
Definition: emmintrin.h:1685
__m128i _mm_sll_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1369
void _mm_storel_pd(double *dp, __m128d a)
Definition: emmintrin.h:1110
__m128i _mm_srai_epi16(__m128i a, int count)
Definition: emmintrin.h:1374
__m128i _mm_set1_epi8(char b)
Definition: emmintrin.h:1618
__m128d _mm_cmple_pd(__m128d a, __m128d b)
Definition: emmintrin.h:672
void _mm_stream_si128(__m128i *p, __m128i a)
Definition: emmintrin.h:1715
__m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1442
int _mm_comile_sd(__m128d a, __m128d b)
Definition: emmintrin.h:796
__m128d _mm_cmpge_pd(__m128d a, __m128d b)
Definition: emmintrin.h:682
#define b
Definition: ke_i.h:79
__m128d _mm_cmpnle_sd(__m128d a, __m128d b)
Definition: emmintrin.h:769
int _mm_ucomigt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:831
__m128d _mm_loadu_pd(double const *dp)
Definition: emmintrin.h:956
__m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1850
int _mm_movemask_epi8(__m128i a)
Definition: emmintrin.h:1765
void _mm_store1_pd(double *dp, __m128d a)
Definition: emmintrin.h:1072
__m128i _mm_slli_epi32(__m128i a, int count)
Definition: emmintrin.h:1354
int _mm_comieq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:786
__m128d _mm_load1_pd(double const *dp)
Definition: emmintrin.h:932
__m128d _mm_cmpneq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:759
__m128d _mm_set1_pd(double w)
Definition: emmintrin.h:1033
__m128i _mm_slli_epi16(__m128i a, int count)
Definition: emmintrin.h:1344
__m128i _mm_cmplt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1459
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
int _mm_ucomilt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:821
__m128i _mm_min_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1221
__m128i _mm_cvttps_epi32(__m128 a)
Definition: emmintrin.h:1507
__m128d _mm_loadr_pd(double const *dp)
Definition: emmintrin.h:946
__m128i _mm_sub_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1265
__m128i _mm_srl_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1402
__m128d _mm_loadl_pd(__m128d a, double const *dp)
Definition: emmintrin.h:1009
__m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1432
#define _mm_insert_epi16(a, b, imm)
Definition: emmintrin.h:1761
__INTRIN_INLINE_SSE2 __m128i _mm_cvtsi64_si128(long long a)
Definition: emmintrin.h:1517
__m128d _mm_cmple_sd(__m128d a, __m128d b)
Definition: emmintrin.h:732
__m128d _mm_or_pd(__m128d a, __m128d b)
Definition: emmintrin.h:652
#define _mm_stream_si64
Definition: emmintrin.h:320
int _mm_movemask_pd(__m128d a)
Definition: emmintrin.h:1896
__m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1800
__INTRIN_INLINE_SSE2 __m128i _mm_undefined_si128(void)
Definition: emmintrin.h:1555
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063
__m128i _mm_sra_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1389
__m128i _mm_sub_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1280
#define _CRT_ALIGN(x)
Definition: crtdefs.h:154
__m64 _mm_add_si64(__m64 a, __m64 b)
Definition: emmintrin.h:1133
__m128i _mm_cvtsi32_si128(int a)
Definition: emmintrin.h:1512
__m128i _mm_subs_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1285
__m128i _mm_mulhi_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1235
__m128d _mm_set_sd(double w)
Definition: emmintrin.h:1028
#define _mm_shufflelo_epi16(a, imm)
Definition: emmintrin.h:1773
void _mm_storeu_si128(__m128i_u *p, __m128i b)
Definition: emmintrin.h:1661
__m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1779
__m128d _mm_and_pd(__m128d a, __m128d b)
Definition: emmintrin.h:642
__m128d _mm_cmpgt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:737
__m64 _mm_mul_su32(__m64 a, __m64 b)
Definition: emmintrin.h:1245
__m128i _mm_sll_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1359
__m128i _mm_subs_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1303
__m128d _mm_cvtpi32_pd(__m64 a)
Definition: emmintrin.h:917
int _mm_ucomieq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:816
__m128d _mm_cmpgt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:677
__m128d _mm_load_pd(double const *dp)
Definition: emmintrin.h:927
__m64 _mm_cvttpd_pi32(__m128d a)
Definition: emmintrin.h:912
__m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
Definition: emmintrin.h:1575
int _mm_comilt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:791
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble GLdouble w2
Definition: glext.h:8308
__m128i _mm_castpd_si128(__m128d a)
Definition: emmintrin.h:1910
__m128d _mm_unpacklo_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1887
__m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1790
int _mm_cvtsd_si32(__m128d a)
Definition: emmintrin.h:874
__m128i _mm_madd_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1189
void _mm_storeh_pd(double *dp, __m128d a)
Definition: emmintrin.h:1102
#define __INTRIN_INLINE_SSE2
Definition: emmintrin.h:69
__m128i _mm_srli_epi64(__m128i a, int count)
Definition: emmintrin.h:1417
__m128 _mm_cvtepi32_ps(__m128i a)
Definition: emmintrin.h:1493
__m128d _mm_sub_sd(__m128d a, __m128d b)
Definition: emmintrin.h:578
__m128i _mm_adds_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1143
__m128i _mm_add_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1118
__m128 _mm_castpd_ps(__m128d a)
Definition: emmintrin.h:1905
__m128i _mm_adds_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1152
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
__m128d _mm_cmpge_sd(__m128d a, __m128d b)
Definition: emmintrin.h:743
static CRYPT_DATA_BLOB b3[]
Definition: msg.c:592
#define __int32
Definition: basetyps.h:19
__m128d _mm_cvtps_pd(__m128 a)
Definition: emmintrin.h:851
__m128i _mm_slli_epi64(__m128i a, int count)
Definition: emmintrin.h:1364
__m128d _mm_mul_pd(__m128d a, __m128d b)
Definition: emmintrin.h:595
__m128d _mm_cmpnge_sd(__m128d a, __m128d b)
Definition: emmintrin.h:780
#define __INTRIN_INLINE_MMX
Definition: mmintrin.h:64
#define long
Definition: qsort.c:33
__m128i _mm_cvtpd_epi32(__m128d a)
Definition: emmintrin.h:869
#define _mm_srli_si128(a, imm)
Definition: emmintrin.h:1394
__m128i _mm_cmplt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1469
__m128d _mm_cmpeq_pd(__m128d a, __m128d b)
Definition: emmintrin.h:662
__m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1449
__m128d _mm_sqrt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:611
#define _mm_shuffle_epi32(a, imm)
Definition: emmintrin.h:1770
__m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Definition: emmintrin.h:1634
__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
Definition: emmintrin.h:1565
__m128d _mm_loadh_pd(__m128d a, double const *dp)
Definition: emmintrin.h:1000
#define __int8
Definition: basetyps.h:25
__m128i _mm_and_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1321
__m128d _mm_div_sd(__m128d a, __m128d b)
Definition: emmintrin.h:600
__m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1819
__m128d _mm_load_sd(double const *dp)
Definition: emmintrin.h:991
__m128i _mm_add_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1138
static CRYPT_DATA_BLOB b2[]
Definition: msg.c:582
static CRYPT_DATA_BLOB b4
Definition: msg.c:2284
__INTRIN_INLINE_SSE2 __m128d _mm_undefined_pd(void)
Definition: emmintrin.h:1018
__m128d _mm_move_sd(__m128d a, __m128d b)
Definition: emmintrin.h:1053
__m128d _mm_max_pd(__m128d a, __m128d b)
Definition: emmintrin.h:637
__m128i _mm_avg_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1184
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
Definition: emmintrin.h:43
int _mm_comige_sd(__m128d a, __m128d b)
Definition: emmintrin.h:806
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
int _mm_ucomige_sd(__m128d a, __m128d b)
Definition: emmintrin.h:836
GLfloat GLfloat p
Definition: glext.h:8902
__m128i _mm_set1_epi64(__m64 q)
Definition: emmintrin.h:1603
__m128d _mm_cmpord_pd(__m128d a, __m128d b)
Definition: emmintrin.h:687
void _mm_stream_pd(double *p, __m128d a)
Definition: emmintrin.h:1706
int _mm_cvtsi128_si32(__m128i a)
Definition: emmintrin.h:1522
__m128d _mm_set_pd(double w, double x)
Definition: emmintrin.h:1038
__m128i _mm_mul_epu32(__m128i a, __m128i b)
Definition: emmintrin.h:1250
__m64 _mm_cvtpd_pi32(__m128d a)
Definition: emmintrin.h:907
__m128d _mm_cmpeq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:722
__m128i _mm_subs_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1312
#define _Out_writes_bytes_(size)
Definition: ms_sal.h:350
__m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1830
__m128i _mm_min_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1212
__m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Definition: emmintrin.h:1641
__m128i _mm_set1_epi16(short w)
Definition: emmintrin.h:1613
int _mm_ucomineq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:841
#define _mm_slli_si128(a, imm)
Definition: emmintrin.h:1341
__m128i _mm_sll_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1349
__m128i _mm_avg_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1179
__m128d _mm_cmpord_sd(__m128d a, __m128d b)
Definition: emmintrin.h:749
#define d
Definition: ke_i.h:81
void _mm_lfence(void)
Definition: intrin_x86.h:106
__m128i _mm_castps_si128(__m128 a)
Definition: emmintrin.h:1920
__m128i _mm_setl_epi64(__m128i q)
#define __int64
Definition: basetyps.h:16
__m128i _mm_max_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1203
__m128i _mm_packs_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1747
#define __int16
Definition: basetyps.h:22
__m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1128
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble w1
Definition: glext.h:8308