ReactOS 0.4.15-dev-7907-g95bf896
emmintrin.h
Go to the documentation of this file.
1/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#pragma once
11#ifndef _INCLUDED_EMM
12#define _INCLUDED_EMM
13
14#include <crtdefs.h>
15#include <xmmintrin.h>
16
17#if defined(_MSC_VER) && !defined(__clang__)
18
19typedef union _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128i
20{
21 __int8 m128i_i8[16];
22 __int16 m128i_i16[8];
23 __int32 m128i_i32[4];
24 __int64 m128i_i64[2];
25 unsigned __int8 m128i_u8[16];
26 unsigned __int16 m128i_u16[8];
27 unsigned __int32 m128i_u32[4];
28 unsigned __int64 m128i_u64[2];
29} __m128i;
30_STATIC_ASSERT(sizeof(__m128i) == 16);
31
32typedef struct _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128d
33{
34 double m128d_f64[2];
35} __m128d;
36
37typedef __declspec(align(1)) __m128i __m128i_u;
38
39#define __ATTRIBUTE_SSE2__
40
41#else /* _MSC_VER */
42
43typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
44typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
45
46typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
47typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
48
49/* Type defines. */
50typedef double __v2df __attribute__((__vector_size__(16)));
51typedef long long __v2di __attribute__((__vector_size__(16)));
52typedef short __v8hi __attribute__((__vector_size__(16)));
53typedef char __v16qi __attribute__((__vector_size__(16)));
54
55/* Unsigned types */
56typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
57typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
58typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
59
60/* We need an explicitly signed variant for char. Note that this shouldn't
61 * appear in the interface though. */
62typedef signed char __v16qs __attribute__((__vector_size__(16)));
63
64#ifdef __clang__
65#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128)))
66#else
67#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
68#endif
69#define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__
70
71#endif /* _MSC_VER */
72
73extern __m128d _mm_add_sd(__m128d a, __m128d b);
74extern __m128d _mm_add_pd(__m128d a, __m128d b);
75extern __m128d _mm_sub_sd(__m128d a, __m128d b);
76extern __m128d _mm_sub_pd(__m128d a, __m128d b);
77extern __m128d _mm_mul_sd(__m128d a, __m128d b);
78extern __m128d _mm_mul_pd(__m128d a, __m128d b);
79extern __m128d _mm_div_sd(__m128d a, __m128d b);
80extern __m128d _mm_div_pd(__m128d a, __m128d b);
81extern __m128d _mm_sqrt_sd(__m128d a, __m128d b);
82extern __m128d _mm_sqrt_pd(__m128d a);
83extern __m128d _mm_min_sd(__m128d a, __m128d b);
84extern __m128d _mm_min_pd(__m128d a, __m128d b);
85extern __m128d _mm_max_sd(__m128d a, __m128d b);
86extern __m128d _mm_max_pd(__m128d a, __m128d b);
87extern __m128d _mm_and_pd(__m128d a, __m128d b);
88extern __m128d _mm_andnot_pd(__m128d a, __m128d b);
89extern __m128d _mm_or_pd(__m128d a, __m128d b);
90extern __m128d _mm_xor_pd(__m128d a, __m128d b);
91extern __m128d _mm_cmpeq_pd(__m128d a, __m128d b);
92extern __m128d _mm_cmplt_pd(__m128d a, __m128d b);
93extern __m128d _mm_cmple_pd(__m128d a, __m128d b);
94extern __m128d _mm_cmpgt_pd(__m128d a, __m128d b);
95extern __m128d _mm_cmpge_pd(__m128d a, __m128d b);
96extern __m128d _mm_cmpord_pd(__m128d a, __m128d b);
97extern __m128d _mm_cmpunord_pd(__m128d a, __m128d b);
98extern __m128d _mm_cmpneq_pd(__m128d a, __m128d b);
99extern __m128d _mm_cmpnlt_pd(__m128d a, __m128d b);
100extern __m128d _mm_cmpnle_pd(__m128d a, __m128d b);
101extern __m128d _mm_cmpngt_pd(__m128d a, __m128d b);
102extern __m128d _mm_cmpnge_pd(__m128d a, __m128d b);
103extern __m128d _mm_cmpeq_sd(__m128d a, __m128d b);
104extern __m128d _mm_cmplt_sd(__m128d a, __m128d b);
105extern __m128d _mm_cmple_sd(__m128d a, __m128d b);
106extern __m128d _mm_cmpgt_sd(__m128d a, __m128d b);
107extern __m128d _mm_cmpge_sd(__m128d a, __m128d b);
108extern __m128d _mm_cmpord_sd(__m128d a, __m128d b);
109extern __m128d _mm_cmpunord_sd(__m128d a, __m128d b);
110extern __m128d _mm_cmpneq_sd(__m128d a, __m128d b);
111extern __m128d _mm_cmpnlt_sd(__m128d a, __m128d b);
112extern __m128d _mm_cmpnle_sd(__m128d a, __m128d b);
113extern __m128d _mm_cmpngt_sd(__m128d a, __m128d b);
114extern __m128d _mm_cmpnge_sd(__m128d a, __m128d b);
115extern int _mm_comieq_sd(__m128d a, __m128d b);
116extern int _mm_comilt_sd(__m128d a, __m128d b);
117extern int _mm_comile_sd(__m128d a, __m128d b);
118extern int _mm_comigt_sd(__m128d a, __m128d b);
119extern int _mm_comige_sd(__m128d a, __m128d b);
120extern int _mm_comineq_sd(__m128d a, __m128d b);
121extern int _mm_ucomieq_sd(__m128d a, __m128d b);
122extern int _mm_ucomilt_sd(__m128d a, __m128d b);
123extern int _mm_ucomile_sd(__m128d a, __m128d b);
124extern int _mm_ucomigt_sd(__m128d a, __m128d b);
125extern int _mm_ucomige_sd(__m128d a, __m128d b);
126extern int _mm_ucomineq_sd(__m128d a, __m128d b);
127extern __m128 _mm_cvtpd_ps(__m128d a);
128extern __m128d _mm_cvtps_pd(__m128 a);
129extern __m128d _mm_cvtepi32_pd(__m128i a);
130extern __m128i _mm_cvtpd_epi32(__m128d a);
131extern int _mm_cvtsd_si32(__m128d a);
132extern __m128 _mm_cvtsd_ss(__m128 a, __m128d b);
133extern __m128d _mm_cvtsi32_sd(__m128d a, int b);
134extern __m128d _mm_cvtss_sd(__m128d a, __m128 b);
135extern __m128i _mm_cvttpd_epi32(__m128d a);
136extern int _mm_cvttsd_si32(__m128d a);
137extern __m64 _mm_cvtpd_pi32(__m128d a);
138extern __m64 _mm_cvttpd_pi32(__m128d a);
139extern __m128d _mm_cvtpi32_pd(__m64 a);
140extern double _mm_cvtsd_f64(__m128d a);
141extern __m128d _mm_load_pd(double const *dp);
142extern __m128d _mm_load1_pd(double const *dp);
143extern __m128d _mm_loadr_pd(double const *dp);
144extern __m128d _mm_loadu_pd(double const *dp);
145//extern __m128i _mm_loadu_si64(void const *a);
146//extern __m128i _mm_loadu_si32(void const *a);
147//extern __m128i _mm_loadu_si16(void const *a);
148extern __m128d _mm_load_sd(double const *dp);
149extern __m128d _mm_loadh_pd(__m128d a, double const *dp);
150extern __m128d _mm_loadl_pd(__m128d a, double const *dp);
151//extern __m128d _mm_undefined_pd(void);
152extern __m128d _mm_set_sd(double w);
153extern __m128d _mm_set1_pd(double w);
154extern __m128d _mm_set_pd(double w, double x);
155extern __m128d _mm_setr_pd(double w, double x);
156extern __m128d _mm_setzero_pd(void);
157extern __m128d _mm_move_sd(__m128d a, __m128d b);
158extern void _mm_store_sd(double *dp, __m128d a);
159extern void _mm_store_pd(double *dp, __m128d a);
160extern void _mm_store1_pd(double *dp, __m128d a);
161extern void _mm_storeu_pd(double *dp, __m128d a);
162extern void _mm_storer_pd(double *dp, __m128d a);
163extern void _mm_storeh_pd(double *dp, __m128d a);
164extern void _mm_storel_pd(double *dp, __m128d a);
165extern __m128i _mm_add_epi8(__m128i a, __m128i b);
166extern __m128i _mm_add_epi16(__m128i a, __m128i b);
167extern __m128i _mm_add_epi32(__m128i a, __m128i b);
168extern __m64 _mm_add_si64(__m64 a, __m64 b);
169extern __m128i _mm_add_epi64(__m128i a, __m128i b);
170extern __m128i _mm_adds_epi8(__m128i a, __m128i b);
171extern __m128i _mm_adds_epi16(__m128i a, __m128i b);
172extern __m128i _mm_adds_epu8(__m128i a, __m128i b);
173extern __m128i _mm_adds_epu16(__m128i a, __m128i b);
174extern __m128i _mm_avg_epu8(__m128i a, __m128i b);
175extern __m128i _mm_avg_epu16(__m128i a, __m128i b);
176extern __m128i _mm_madd_epi16(__m128i a, __m128i b);
177extern __m128i _mm_max_epi16(__m128i a, __m128i b);
178extern __m128i _mm_max_epu8(__m128i a, __m128i b);
179extern __m128i _mm_min_epi16(__m128i a, __m128i b);
180extern __m128i _mm_min_epu8(__m128i a, __m128i b);
181extern __m128i _mm_mulhi_epi16(__m128i a, __m128i b);
182extern __m128i _mm_mulhi_epu16(__m128i a, __m128i b);
183extern __m128i _mm_mullo_epi16(__m128i a, __m128i b);
184extern __m64 _mm_mul_su32(__m64 a, __m64 b);
185extern __m128i _mm_mul_epu32(__m128i a, __m128i b);
186extern __m128i _mm_sad_epu8(__m128i a, __m128i b);
187extern __m128i _mm_sub_epi8(__m128i a, __m128i b);
188extern __m128i _mm_sub_epi16(__m128i a, __m128i b);
189extern __m128i _mm_sub_epi32(__m128i a, __m128i b);
190extern __m64 _mm_sub_si64(__m64 a, __m64 b);
191extern __m128i _mm_sub_epi64(__m128i a, __m128i b);
192extern __m128i _mm_subs_epi8(__m128i a, __m128i b);
193extern __m128i _mm_subs_epi16(__m128i a, __m128i b);
194extern __m128i _mm_subs_epu8(__m128i a, __m128i b);
195extern __m128i _mm_subs_epu16(__m128i a, __m128i b);
196extern __m128i _mm_and_si128(__m128i a, __m128i b);
197extern __m128i _mm_andnot_si128(__m128i a, __m128i b);
198extern __m128i _mm_or_si128(__m128i a, __m128i b);
199extern __m128i _mm_xor_si128(__m128i a, __m128i b);
200extern __m128i _mm_slli_si128(__m128i a, int i);
201extern __m128i _mm_slli_epi16(__m128i a, int count);
202extern __m128i _mm_sll_epi16(__m128i a, __m128i count);
203extern __m128i _mm_slli_epi32(__m128i a, int count);
204extern __m128i _mm_sll_epi32(__m128i a, __m128i count);
205extern __m128i _mm_slli_epi64(__m128i a, int count);
206extern __m128i _mm_sll_epi64(__m128i a, __m128i count);
207extern __m128i _mm_srai_epi16(__m128i a, int count);
208extern __m128i _mm_sra_epi16(__m128i a, __m128i count);
209extern __m128i _mm_srai_epi32(__m128i a, int count);
210extern __m128i _mm_sra_epi32(__m128i a, __m128i count);
211extern __m128i _mm_srli_si128(__m128i a, int imm);
212extern __m128i _mm_srli_epi16(__m128i a, int count);
213extern __m128i _mm_srl_epi16(__m128i a, __m128i count);
214extern __m128i _mm_srli_epi32(__m128i a, int count);
215extern __m128i _mm_srl_epi32(__m128i a, __m128i count);
216extern __m128i _mm_srli_epi64(__m128i a, int count);
217extern __m128i _mm_srl_epi64(__m128i a, __m128i count);
218extern __m128i _mm_cmpeq_epi8(__m128i a, __m128i b);
219extern __m128i _mm_cmpeq_epi16(__m128i a, __m128i b);
220extern __m128i _mm_cmpeq_epi32(__m128i a, __m128i b);
221extern __m128i _mm_cmpgt_epi8(__m128i a, __m128i b);
222extern __m128i _mm_cmpgt_epi16(__m128i a, __m128i b);
223extern __m128i _mm_cmpgt_epi32(__m128i a, __m128i b);
224extern __m128i _mm_cmplt_epi8(__m128i a, __m128i b);
225extern __m128i _mm_cmplt_epi16(__m128i a, __m128i b);
226extern __m128i _mm_cmplt_epi32(__m128i a, __m128i b);
227#ifdef _M_AMD64
228extern __m128d _mm_cvtsi64_sd(__m128d a, long long b);
229extern long long _mm_cvtsd_si64(__m128d a);
230extern long long _mm_cvttsd_si64(__m128d a);
231#endif
232extern __m128 _mm_cvtepi32_ps(__m128i a);
233extern __m128i _mm_cvtps_epi32(__m128 a);
234extern __m128i _mm_cvttps_epi32(__m128 a);
235extern __m128i _mm_cvtsi32_si128(int a);
236#ifdef _M_AMD64
237extern __m128i _mm_cvtsi64_si128(long long a);
238#endif
239extern int _mm_cvtsi128_si32(__m128i a);
240#ifdef _M_AMD64
241extern long long _mm_cvtsi128_si64(__m128i a);
242#endif
243extern __m128i _mm_load_si128(__m128i const *p);
244extern __m128i _mm_loadu_si128(__m128i_u const *p);
245extern __m128i _mm_loadl_epi64(__m128i_u const *p);
246//extern __m128i _mm_undefined_si128(void);
247//extern __m128i _mm_set_epi64x(long long q1, long long q0); // FIXME
248extern __m128i _mm_set_epi64(__m64 q1, __m64 q0);
249//extern __m128i _mm_set_epi32(int i3, int i1, int i0);
250extern __m128i _mm_set_epi32(int i3, int i2, int i1, int i0);
251//extern __m128i _mm_set_epi16(short w7, short w2, short w1, short w0);
252extern __m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0);
253//extern __m128i _mm_set_epi8(char b15, char b10, char b4, char b3, char b2, char b1, char b0);
254extern __m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
255//extern __m128i _mm_set1_epi64x(long long q); // FIXME
256extern __m128i _mm_set1_epi64(__m64 q);
257extern __m128i _mm_set1_epi32(int i);
258extern __m128i _mm_set1_epi16(short w);
259extern __m128i _mm_set1_epi8(char b);
260extern __m128i _mm_setl_epi64(__m128i q); // FIXME: clang?
261extern __m128i _mm_setr_epi64(__m64 q0, __m64 q1);
262//extern __m128i _mm_setr_epi32(int i0, int i2, int i3);
263extern __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3);
264//extern __m128i _mm_setr_epi16(short w0, short w5, short w6, short w7);
265extern __m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7);
266//extern __m128i _mm_setr_epi8(char b0, char b6, char b11, char b12, char b13, char b14, char b15);
267extern __m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
268extern __m128i _mm_setzero_si128(void);
269extern void _mm_store_si128(__m128i *p, __m128i b);
270extern void _mm_storeu_si128(__m128i_u *p, __m128i b);
271//extern void _mm_storeu_si64(void *p, __m128i b);
272//extern void _mm_storeu_si32(void *p, __m128i b);
273//extern void _mm_storeu_si16(void *p, __m128i b);
274extern void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p);
275extern void _mm_storel_epi64(__m128i_u *p, __m128i a);
276extern void _mm_stream_pd(double *p, __m128d a);
277extern void _mm_stream_si128(__m128i *p, __m128i a);
278extern void _mm_stream_si32(int *p, int a);
279extern void _mm_clflush(void const *p);
280extern void _mm_lfence(void);
281extern void _mm_mfence(void);
282extern __m128i _mm_packs_epi16(__m128i a, __m128i b);
283extern __m128i _mm_packs_epi32(__m128i a, __m128i b);
284extern __m128i _mm_packus_epi16(__m128i a, __m128i b);
285extern int _mm_extract_epi16(__m128i a, int imm);
286extern __m128i _mm_insert_epi16(__m128i a, int b, int imm);
287extern int _mm_movemask_epi8(__m128i a);
288extern __m128i _mm_shuffle_epi32(__m128i a, int imm);
289extern __m128i _mm_shufflelo_epi16(__m128i a, int imm);
290extern __m128i _mm_shufflehi_epi16(__m128i a, int imm);
291extern __m128i _mm_unpackhi_epi8(__m128i a, __m128i b);
292extern __m128i _mm_unpackhi_epi16(__m128i a, __m128i b);
293extern __m128i _mm_unpackhi_epi32(__m128i a, __m128i b);
294extern __m128i _mm_unpackhi_epi64(__m128i a, __m128i b);
295extern __m128i _mm_unpacklo_epi8(__m128i a, __m128i b);
296extern __m128i _mm_unpacklo_epi16(__m128i a, __m128i b);
297extern __m128i _mm_unpacklo_epi32(__m128i a, __m128i b);
298extern __m128i _mm_unpacklo_epi64(__m128i a, __m128i b);
299extern __m64 _mm_movepi64_pi64(__m128i a);
300extern __m128i _mm_movpi64_epi64(__m64 a);
301extern __m128i _mm_move_epi64(__m128i a);
302extern __m128d _mm_unpackhi_pd(__m128d a, __m128d b);
303extern __m128d _mm_unpacklo_pd(__m128d a, __m128d b);
304extern int _mm_movemask_pd(__m128d a);
305extern __m128d _mm_shuffle_pd(__m128d a, __m128d b, int imm);
306extern __m128 _mm_castpd_ps(__m128d a);
307extern __m128i _mm_castpd_si128(__m128d a);
308extern __m128d _mm_castps_pd(__m128 a);
309extern __m128i _mm_castps_si128(__m128 a);
310extern __m128 _mm_castsi128_ps(__m128i a);
311extern __m128d _mm_castsi128_pd(__m128i a);
312void _mm_pause(void);
313
314/* Alternate names */
315#define _mm_set_pd1(a) _mm_set1_pd(a)
316#define _mm_load_pd1(p) _mm_load1_pd(p)
317#define _mm_store_pd1(p, a) _mm_store1_pd((p), (a))
318#define _mm_bslli_si128 _mm_slli_si128
319#define _mm_bsrli_si128 _mm_srli_si128
320#define _mm_stream_si64 _mm_stream_si64x
321
322#if defined(_MSC_VER) && !defined(__clang__)
323
324#pragma intrinsic(_mm_add_sd)
325#pragma intrinsic(_mm_add_pd)
326#pragma intrinsic(_mm_sub_sd)
327#pragma intrinsic(_mm_sub_pd)
328#pragma intrinsic(_mm_mul_sd)
329#pragma intrinsic(_mm_mul_pd)
330#pragma intrinsic(_mm_div_sd)
331#pragma intrinsic(_mm_div_pd)
332#pragma intrinsic(_mm_sqrt_sd)
333#pragma intrinsic(_mm_sqrt_pd)
334#pragma intrinsic(_mm_min_sd)
335#pragma intrinsic(_mm_min_pd)
336#pragma intrinsic(_mm_max_sd)
337#pragma intrinsic(_mm_max_pd)
338#pragma intrinsic(_mm_and_pd)
339#pragma intrinsic(_mm_andnot_pd)
340#pragma intrinsic(_mm_or_pd)
341#pragma intrinsic(_mm_xor_pd)
342#pragma intrinsic(_mm_cmpeq_pd)
343#pragma intrinsic(_mm_cmplt_pd)
344#pragma intrinsic(_mm_cmple_pd)
345#pragma intrinsic(_mm_cmpgt_pd)
346#pragma intrinsic(_mm_cmpge_pd)
347#pragma intrinsic(_mm_cmpord_pd)
348#pragma intrinsic(_mm_cmpunord_pd)
349#pragma intrinsic(_mm_cmpneq_pd)
350#pragma intrinsic(_mm_cmpnlt_pd)
351#pragma intrinsic(_mm_cmpnle_pd)
352#pragma intrinsic(_mm_cmpngt_pd)
353#pragma intrinsic(_mm_cmpnge_pd)
354#pragma intrinsic(_mm_cmpeq_sd)
355#pragma intrinsic(_mm_cmplt_sd)
356#pragma intrinsic(_mm_cmple_sd)
357#pragma intrinsic(_mm_cmpgt_sd)
358#pragma intrinsic(_mm_cmpge_sd)
359#pragma intrinsic(_mm_cmpord_sd)
360#pragma intrinsic(_mm_cmpunord_sd)
361#pragma intrinsic(_mm_cmpneq_sd)
362#pragma intrinsic(_mm_cmpnlt_sd)
363#pragma intrinsic(_mm_cmpnle_sd)
364#pragma intrinsic(_mm_cmpngt_sd)
365#pragma intrinsic(_mm_cmpnge_sd)
366#pragma intrinsic(_mm_comieq_sd)
367#pragma intrinsic(_mm_comilt_sd)
368#pragma intrinsic(_mm_comile_sd)
369#pragma intrinsic(_mm_comigt_sd)
370#pragma intrinsic(_mm_comige_sd)
371#pragma intrinsic(_mm_comineq_sd)
372#pragma intrinsic(_mm_ucomieq_sd)
373#pragma intrinsic(_mm_ucomilt_sd)
374#pragma intrinsic(_mm_ucomile_sd)
375#pragma intrinsic(_mm_ucomigt_sd)
376#pragma intrinsic(_mm_ucomige_sd)
377#pragma intrinsic(_mm_ucomineq_sd)
378#pragma intrinsic(_mm_cvtpd_ps)
379#pragma intrinsic(_mm_cvtps_pd)
380#pragma intrinsic(_mm_cvtepi32_pd)
381#pragma intrinsic(_mm_cvtpd_epi32)
382#pragma intrinsic(_mm_cvtsd_si32)
383#pragma intrinsic(_mm_cvtsd_ss)
384#pragma intrinsic(_mm_cvtsi32_sd)
385#pragma intrinsic(_mm_cvtss_sd)
386#pragma intrinsic(_mm_cvttpd_epi32)
387#pragma intrinsic(_mm_cvttsd_si32)
388//#pragma intrinsic(_mm_cvtpd_pi32)
389//#pragma intrinsic(_mm_cvttpd_pi32)
390//#pragma intrinsic(_mm_cvtpi32_pd)
391#pragma intrinsic(_mm_cvtsd_f64)
392#pragma intrinsic(_mm_load_pd)
393#pragma intrinsic(_mm_load1_pd)
394#pragma intrinsic(_mm_loadr_pd)
395#pragma intrinsic(_mm_loadu_pd)
396//#pragma intrinsic(_mm_loadu_si64)
397//#pragma intrinsic(_mm_loadu_si32)
398//#pragma intrinsic(_mm_loadu_si16)
399#pragma intrinsic(_mm_load_sd)
400#pragma intrinsic(_mm_loadh_pd)
401#pragma intrinsic(_mm_loadl_pd)
402//#pragma intrinsic(_mm_undefined_pd)
403#pragma intrinsic(_mm_set_sd)
404#pragma intrinsic(_mm_set1_pd)
405#pragma intrinsic(_mm_set_pd)
406#pragma intrinsic(_mm_setr_pd)
407#pragma intrinsic(_mm_setzero_pd)
408#pragma intrinsic(_mm_move_sd)
409#pragma intrinsic(_mm_store_sd)
410#pragma intrinsic(_mm_store_pd)
411#pragma intrinsic(_mm_store1_pd)
412#pragma intrinsic(_mm_storeu_pd)
413#pragma intrinsic(_mm_storer_pd)
414#pragma intrinsic(_mm_storeh_pd)
415#pragma intrinsic(_mm_storel_pd)
416#pragma intrinsic(_mm_add_epi8)
417#pragma intrinsic(_mm_add_epi16)
418#pragma intrinsic(_mm_add_epi32)
419//#pragma intrinsic(_mm_add_si64)
420#pragma intrinsic(_mm_add_epi64)
421#pragma intrinsic(_mm_adds_epi8)
422#pragma intrinsic(_mm_adds_epi16)
423#pragma intrinsic(_mm_adds_epu8)
424#pragma intrinsic(_mm_adds_epu16)
425#pragma intrinsic(_mm_avg_epu8)
426#pragma intrinsic(_mm_avg_epu16)
427#pragma intrinsic(_mm_madd_epi16)
428#pragma intrinsic(_mm_max_epi16)
429#pragma intrinsic(_mm_max_epu8)
430#pragma intrinsic(_mm_min_epi16)
431#pragma intrinsic(_mm_min_epu8)
432#pragma intrinsic(_mm_mulhi_epi16)
433#pragma intrinsic(_mm_mulhi_epu16)
434#pragma intrinsic(_mm_mullo_epi16)
435//#pragma intrinsic(_mm_mul_su32)
436#pragma intrinsic(_mm_mul_epu32)
437#pragma intrinsic(_mm_sad_epu8)
438#pragma intrinsic(_mm_sub_epi8)
439#pragma intrinsic(_mm_sub_epi16)
440#pragma intrinsic(_mm_sub_epi32)
441//#pragma intrinsic(_mm_sub_si64)
442#pragma intrinsic(_mm_sub_epi64)
443#pragma intrinsic(_mm_subs_epi8)
444#pragma intrinsic(_mm_subs_epi16)
445#pragma intrinsic(_mm_subs_epu8)
446#pragma intrinsic(_mm_subs_epu16)
447#pragma intrinsic(_mm_and_si128)
448#pragma intrinsic(_mm_andnot_si128)
449#pragma intrinsic(_mm_or_si128)
450#pragma intrinsic(_mm_xor_si128)
451#pragma intrinsic(_mm_slli_si128)
452#pragma intrinsic(_mm_slli_epi16)
453#pragma intrinsic(_mm_sll_epi16)
454#pragma intrinsic(_mm_slli_epi32)
455#pragma intrinsic(_mm_sll_epi32)
456#pragma intrinsic(_mm_slli_epi64)
457#pragma intrinsic(_mm_sll_epi64)
458#pragma intrinsic(_mm_srai_epi16)
459#pragma intrinsic(_mm_sra_epi16)
460#pragma intrinsic(_mm_srai_epi32)
461#pragma intrinsic(_mm_sra_epi32)
462#pragma intrinsic(_mm_srli_si128)
463#pragma intrinsic(_mm_srli_epi16)
464#pragma intrinsic(_mm_srl_epi16)
465#pragma intrinsic(_mm_srli_epi32)
466#pragma intrinsic(_mm_srl_epi32)
467#pragma intrinsic(_mm_srli_epi64)
468#pragma intrinsic(_mm_srl_epi64)
469#pragma intrinsic(_mm_cmpeq_epi8)
470#pragma intrinsic(_mm_cmpeq_epi16)
471#pragma intrinsic(_mm_cmpeq_epi32)
472#pragma intrinsic(_mm_cmpgt_epi8)
473#pragma intrinsic(_mm_cmpgt_epi16)
474#pragma intrinsic(_mm_cmpgt_epi32)
475#pragma intrinsic(_mm_cmplt_epi8)
476#pragma intrinsic(_mm_cmplt_epi16)
477#pragma intrinsic(_mm_cmplt_epi32)
478#ifdef _M_AMD64
479#pragma intrinsic(_mm_cvtsi64_sd)
480#pragma intrinsic(_mm_cvtsd_si64)
481#pragma intrinsic(_mm_cvttsd_si64)
482#endif
483#pragma intrinsic(_mm_cvtepi32_ps)
484#pragma intrinsic(_mm_cvtps_epi32)
485#pragma intrinsic(_mm_cvttps_epi32)
486#pragma intrinsic(_mm_cvtsi32_si128)
487#ifdef _M_AMD64
488#pragma intrinsic(_mm_cvtsi64_si128)
489#endif
490#pragma intrinsic(_mm_cvtsi128_si32)
491#ifdef _M_AMD64
492#pragma intrinsic(_mm_cvtsi128_si64)
493#endif
494#pragma intrinsic(_mm_load_si128)
495#pragma intrinsic(_mm_loadu_si128)
496#pragma intrinsic(_mm_loadl_epi64)
497//#pragma intrinsic(_mm_undefined_si128)
498#pragma intrinsic(_mm_set_epi64x)
499//#pragma intrinsic(_mm_set_epi64)
500#pragma intrinsic(_mm_set_epi32)
501#pragma intrinsic(_mm_set_epi16)
502#pragma intrinsic(_mm_set_epi8)
503#pragma intrinsic(_mm_set1_epi64x)
504//#pragma intrinsic(_mm_set1_epi64)
505#pragma intrinsic(_mm_set1_epi32)
506#pragma intrinsic(_mm_set1_epi16)
507#pragma intrinsic(_mm_set1_epi8)
508#pragma intrinsic(_mm_setl_epi64)
509//#pragma intrinsic(_mm_setr_epi64)
510#pragma intrinsic(_mm_setr_epi32)
511#pragma intrinsic(_mm_setr_epi16)
512#pragma intrinsic(_mm_setr_epi8)
513#pragma intrinsic(_mm_setzero_si128)
514#pragma intrinsic(_mm_store_si128)
515#pragma intrinsic(_mm_storeu_si128)
516//#pragma intrinsic(_mm_storeu_si64)
517//#pragma intrinsic(_mm_storeu_si32)
518//#pragma intrinsic(_mm_storeu_si16)
519#pragma intrinsic(_mm_maskmoveu_si128)
520#pragma intrinsic(_mm_storel_epi64)
521#pragma intrinsic(_mm_stream_pd)
522#pragma intrinsic(_mm_stream_si128)
523#pragma intrinsic(_mm_stream_si32)
524#pragma intrinsic(_mm_clflush)
525#pragma intrinsic(_mm_lfence)
526#pragma intrinsic(_mm_mfence)
527#pragma intrinsic(_mm_packs_epi16)
528#pragma intrinsic(_mm_packs_epi32)
529#pragma intrinsic(_mm_packus_epi16)
530#pragma intrinsic(_mm_extract_epi16)
531#pragma intrinsic(_mm_insert_epi16)
532#pragma intrinsic(_mm_movemask_epi8)
533#pragma intrinsic(_mm_shuffle_epi32)
534#pragma intrinsic(_mm_shufflelo_epi16)
535#pragma intrinsic(_mm_shufflehi_epi16)
536#pragma intrinsic(_mm_unpackhi_epi8)
537#pragma intrinsic(_mm_unpackhi_epi16)
538#pragma intrinsic(_mm_unpackhi_epi32)
539#pragma intrinsic(_mm_unpackhi_epi64)
540#pragma intrinsic(_mm_unpacklo_epi8)
541#pragma intrinsic(_mm_unpacklo_epi16)
542#pragma intrinsic(_mm_unpacklo_epi32)
543#pragma intrinsic(_mm_unpacklo_epi64)
544//#pragma intrinsic(_mm_movepi64_pi64)
545//#pragma intrinsic(_mm_movpi64_epi64)
546#pragma intrinsic(_mm_move_epi64)
547#pragma intrinsic(_mm_unpackhi_pd)
548#pragma intrinsic(_mm_unpacklo_pd)
549#pragma intrinsic(_mm_movemask_pd)
550#pragma intrinsic(_mm_shuffle_pd)
551#pragma intrinsic(_mm_castpd_ps)
552#pragma intrinsic(_mm_castpd_si128)
553#pragma intrinsic(_mm_castps_pd)
554#pragma intrinsic(_mm_castps_si128)
555#pragma intrinsic(_mm_castsi128_ps)
556#pragma intrinsic(_mm_castsi128_pd)
557#pragma intrinsic(_mm_pause)
558
559#else /* _MSC_VER */
560
561/*
562 Clang: https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/emmintrin.h
563 Clang older version: https://github.com/llvm/llvm-project/blob/3ef88b31843e040c95f23ff2c3c206f1fa399c05/clang/lib/Headers/emmintrin.h
564 unikraft: https://github.com/unikraft/lib-intel-intrinsics/blob/staging/include/emmintrin.h
565*/
566
567__INTRIN_INLINE_SSE2 __m128d _mm_add_sd(__m128d a, __m128d b)
568{
569 a[0] += b[0];
570 return a;
571}
572
573__INTRIN_INLINE_SSE2 __m128d _mm_add_pd(__m128d a, __m128d b)
574{
575 return (__m128d)((__v2df)a + (__v2df)b);
576}
577
578__INTRIN_INLINE_SSE2 __m128d _mm_sub_sd(__m128d a, __m128d b)
579{
580 a[0] -= b[0];
581 return a;
582}
583
584__INTRIN_INLINE_SSE2 __m128d _mm_sub_pd(__m128d a, __m128d b)
585{
586 return (__m128d)((__v2df)a - (__v2df)b);
587}
588
589__INTRIN_INLINE_SSE2 __m128d _mm_mul_sd(__m128d a, __m128d b)
590{
591 a[0] *= b[0];
592 return a;
593}
594
595__INTRIN_INLINE_SSE2 __m128d _mm_mul_pd(__m128d a, __m128d b)
596{
597 return (__m128d)((__v2df)a * (__v2df)b);
598}
599
600__INTRIN_INLINE_SSE2 __m128d _mm_div_sd(__m128d a, __m128d b)
601{
602 a[0] /= b[0];
603 return a;
604}
605
606__INTRIN_INLINE_SSE2 __m128d _mm_div_pd(__m128d a, __m128d b)
607{
608 return (__m128d)((__v2df)a / (__v2df)b);
609}
610
611__INTRIN_INLINE_SSE2 __m128d _mm_sqrt_sd(__m128d a, __m128d b)
612{
613 __m128d __c = __builtin_ia32_sqrtsd((__v2df)b);
614 return __extension__(__m128d){__c[0], a[1]};
615}
616
618{
619 return __builtin_ia32_sqrtpd((__v2df)a);
620}
621
622__INTRIN_INLINE_SSE2 __m128d _mm_min_sd(__m128d a, __m128d b)
623{
624 return __builtin_ia32_minsd((__v2df)a, (__v2df)b);
625}
626
627__INTRIN_INLINE_SSE2 __m128d _mm_min_pd(__m128d a, __m128d b)
628{
629 return __builtin_ia32_minpd((__v2df)a, (__v2df)b);
630}
631
632__INTRIN_INLINE_SSE2 __m128d _mm_max_sd(__m128d a, __m128d b)
633{
634 return __builtin_ia32_maxsd((__v2df)a, (__v2df)b);
635}
636
637__INTRIN_INLINE_SSE2 __m128d _mm_max_pd(__m128d a, __m128d b)
638{
639 return __builtin_ia32_maxpd((__v2df)a, (__v2df)b);
640}
641
642__INTRIN_INLINE_SSE2 __m128d _mm_and_pd(__m128d a, __m128d b)
643{
644 return (__m128d)((__v2du)a & (__v2du)b);
645}
646
647__INTRIN_INLINE_SSE2 __m128d _mm_andnot_pd(__m128d a, __m128d b)
648{
649 return (__m128d)(~(__v2du)a & (__v2du)b);
650}
651
652__INTRIN_INLINE_SSE2 __m128d _mm_or_pd(__m128d a, __m128d b)
653{
654 return (__m128d)((__v2du)a | (__v2du)b);
655}
656
657__INTRIN_INLINE_SSE2 __m128d _mm_xor_pd(__m128d a, __m128d b)
658{
659 return (__m128d)((__v2du)a ^ (__v2du)b);
660}
661
662__INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_pd(__m128d a, __m128d b)
663{
664 return (__m128d)__builtin_ia32_cmpeqpd((__v2df)a, (__v2df)b);
665}
666
667__INTRIN_INLINE_SSE2 __m128d _mm_cmplt_pd(__m128d a, __m128d b)
668{
669 return (__m128d)__builtin_ia32_cmpltpd((__v2df)a, (__v2df)b);
670}
671
672__INTRIN_INLINE_SSE2 __m128d _mm_cmple_pd(__m128d a, __m128d b)
673{
674 return (__m128d)__builtin_ia32_cmplepd((__v2df)a, (__v2df)b);
675}
676
677__INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
678{
679 return (__m128d)__builtin_ia32_cmpltpd((__v2df)b, (__v2df)a);
680}
681
682__INTRIN_INLINE_SSE2 __m128d _mm_cmpge_pd(__m128d a, __m128d b)
683{
684 return (__m128d)__builtin_ia32_cmplepd((__v2df)b, (__v2df)a);
685}
686
687__INTRIN_INLINE_SSE2 __m128d _mm_cmpord_pd(__m128d a, __m128d b)
688{
689 return (__m128d)__builtin_ia32_cmpordpd((__v2df)a, (__v2df)b);
690}
691
692__INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
693{
694 return (__m128d)__builtin_ia32_cmpunordpd((__v2df)a, (__v2df)b);
695}
696
697__INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_pd(__m128d a, __m128d b)
698{
699 return (__m128d)__builtin_ia32_cmpneqpd((__v2df)a, (__v2df)b);
700}
701
702__INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
703{
704 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)a, (__v2df)b);
705}
706
707__INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_pd(__m128d a, __m128d b)
708{
709 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)a, (__v2df)b);
710}
711
712__INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_pd(__m128d a, __m128d b)
713{
714 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)b, (__v2df)a);
715}
716
717__INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_pd(__m128d a, __m128d b)
718{
719 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)b, (__v2df)a);
720}
721
722__INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
723{
724 return (__m128d)__builtin_ia32_cmpeqsd((__v2df)a, (__v2df)b);
725}
726
727__INTRIN_INLINE_SSE2 __m128d _mm_cmplt_sd(__m128d a, __m128d b)
728{
729 return (__m128d)__builtin_ia32_cmpltsd((__v2df)a, (__v2df)b);
730}
731
732__INTRIN_INLINE_SSE2 __m128d _mm_cmple_sd(__m128d a, __m128d b)
733{
734 return (__m128d)__builtin_ia32_cmplesd((__v2df)a, (__v2df)b);
735}
736
737__INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
738{
739 __m128d __c = __builtin_ia32_cmpltsd((__v2df)b, (__v2df)a);
740 return __extension__(__m128d){__c[0], a[1]};
741}
742
743__INTRIN_INLINE_SSE2 __m128d _mm_cmpge_sd(__m128d a, __m128d b)
744{
745 __m128d __c = __builtin_ia32_cmplesd((__v2df)b, (__v2df)a);
746 return __extension__(__m128d){__c[0], a[1]};
747}
748
749__INTRIN_INLINE_SSE2 __m128d _mm_cmpord_sd(__m128d a, __m128d b)
750{
751 return (__m128d)__builtin_ia32_cmpordsd((__v2df)a, (__v2df)b);
752}
753
754__INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
755{
756 return (__m128d)__builtin_ia32_cmpunordsd((__v2df)a, (__v2df)b);
757}
758
759__INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
760{
761 return (__m128d)__builtin_ia32_cmpneqsd((__v2df)a, (__v2df)b);
762}
763
764__INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
765{
766 return (__m128d)__builtin_ia32_cmpnltsd((__v2df)a, (__v2df)b);
767}
768
769__INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_sd(__m128d a, __m128d b)
770{
771 return (__m128d)__builtin_ia32_cmpnlesd((__v2df)a, (__v2df)b);
772}
773
774__INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_sd(__m128d a, __m128d b)
775{
776 __m128d __c = __builtin_ia32_cmpnltsd((__v2df)b, (__v2df)a);
777 return __extension__(__m128d){__c[0], a[1]};
778}
779
780__INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_sd(__m128d a, __m128d b)
781{
782 __m128d __c = __builtin_ia32_cmpnlesd((__v2df)b, (__v2df)a);
783 return __extension__(__m128d){__c[0], a[1]};
784}
785
787{
788 return __builtin_ia32_comisdeq((__v2df)a, (__v2df)b);
789}
790
792{
793 return __builtin_ia32_comisdlt((__v2df)a, (__v2df)b);
794}
795
797{
798 return __builtin_ia32_comisdle((__v2df)a, (__v2df)b);
799}
800
802{
803 return __builtin_ia32_comisdgt((__v2df)a, (__v2df)b);
804}
805
807{
808 return __builtin_ia32_comisdge((__v2df)a, (__v2df)b);
809}
810
812{
813 return __builtin_ia32_comisdneq((__v2df)a, (__v2df)b);
814}
815
817{
818 return __builtin_ia32_ucomisdeq((__v2df)a, (__v2df)b);
819}
820
822{
823 return __builtin_ia32_ucomisdlt((__v2df)a, (__v2df)b);
824}
825
827{
828 return __builtin_ia32_ucomisdle((__v2df)a, (__v2df)b);
829}
830
832{
833 return __builtin_ia32_ucomisdgt((__v2df)a, (__v2df)b);
834}
835
837{
838 return __builtin_ia32_ucomisdge((__v2df)a, (__v2df)b);
839}
840
842{
843 return __builtin_ia32_ucomisdneq((__v2df)a, (__v2df)b);
844}
845
847{
848 return __builtin_ia32_cvtpd2ps((__v2df)a);
849}
850
852{
853#if HAS_BUILTIN(__builtin_convertvector)
854 return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4sf)a, (__v4sf)a, 0, 1), __v2df);
855#else
856 return __builtin_ia32_cvtps2pd(a);
857#endif
858}
859
861{
862#if HAS_BUILTIN(__builtin_convertvector)
863 return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4si)a, (__v4si)a, 0, 1), __v2df);
864#else
865 return __builtin_ia32_cvtdq2pd((__v4si)a);
866#endif
867}
868
870{
871 return (__m128i)__builtin_ia32_cvtpd2dq((__v2df)a);
872}
873
875{
876 return __builtin_ia32_cvtsd2si((__v2df)a);
877}
878
879__INTRIN_INLINE_SSE2 __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
880{
881 return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)a, (__v2df)b);
882}
883
885 int b)
886{
887 a[0] = b;
888 return a;
889}
890
891__INTRIN_INLINE_SSE2 __m128d _mm_cvtss_sd(__m128d a, __m128 b)
892{
893 a[0] = b[0];
894 return a;
895}
896
898{
899 return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)a);
900}
901
903{
904 return __builtin_ia32_cvttsd2si((__v2df)a);
905}
906
908{
909 return (__m64)__builtin_ia32_cvtpd2pi((__v2df)a);
910}
911
913{
914 return (__m64)__builtin_ia32_cvttpd2pi((__v2df)a);
915}
916
918{
919 return __builtin_ia32_cvtpi2pd((__v2si)a);
920}
921
923{
924 return a[0];
925}
926
927__INTRIN_INLINE_SSE2 __m128d _mm_load_pd(double const *dp)
928{
929 return *(const __m128d *)dp;
930}
931
932__INTRIN_INLINE_SSE2 __m128d _mm_load1_pd(double const *dp)
933{
934 struct __mm_load1_pd_struct {
935 double __u;
936 } __attribute__((__packed__, __may_alias__));
937 double __u = ((const struct __mm_load1_pd_struct *)dp)->__u;
938 return __extension__(__m128d){__u, __u};
939}
940
941// GCC:
942/* Create a selector for use with the SHUFPD instruction. */
943#define _MM_SHUFFLE2(fp1,fp0) \
944 (((fp1) << 1) | (fp0))
945
946__INTRIN_INLINE_SSE2 __m128d _mm_loadr_pd(double const *dp)
947{
948#if HAS_BUILTIN(__builtin_shufflevector)
949 __m128d u = *(const __m128d *)dp;
950 return __builtin_shufflevector((__v2df)u, (__v2df)u, 1, 0);
951#else
952 return (__m128d){ dp[1], dp[0] };
953#endif
954}
955
956__INTRIN_INLINE_SSE2 __m128d _mm_loadu_pd(double const *dp)
957{
958 struct __loadu_pd {
959 __m128d_u __v;
960 } __attribute__((__packed__, __may_alias__));
961 return ((const struct __loadu_pd *)dp)->__v;
962}
963
965{
966 struct __loadu_si64 {
967 long long __v;
968 } __attribute__((__packed__, __may_alias__));
969 long long __u = ((const struct __loadu_si64 *)a)->__v;
970 return __extension__(__m128i)(__v2di){__u, 0LL};
971}
972
974{
975 struct __loadu_si32 {
976 int __v;
977 } __attribute__((__packed__, __may_alias__));
978 int __u = ((const struct __loadu_si32 *)a)->__v;
979 return __extension__(__m128i)(__v4si){__u, 0, 0, 0};
980}
981
983{
984 struct __loadu_si16 {
985 short __v;
986 } __attribute__((__packed__, __may_alias__));
987 short __u = ((const struct __loadu_si16 *)a)->__v;
988 return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
989}
990
991__INTRIN_INLINE_SSE2 __m128d _mm_load_sd(double const *dp)
992{
993 struct __mm_load_sd_struct {
994 double __u;
995 } __attribute__((__packed__, __may_alias__));
996 double __u = ((const struct __mm_load_sd_struct *)dp)->__u;
997 return __extension__(__m128d){__u, 0};
998}
999
1000__INTRIN_INLINE_SSE2 __m128d _mm_loadh_pd(__m128d a, double const *dp)
1001{
1002 struct __mm_loadh_pd_struct {
1003 double __u;
1004 } __attribute__((__packed__, __may_alias__));
1005 double __u = ((const struct __mm_loadh_pd_struct *)dp)->__u;
1006 return __extension__(__m128d){a[0], __u};
1007}
1008
1009__INTRIN_INLINE_SSE2 __m128d _mm_loadl_pd(__m128d a, double const *dp)
1010{
1011 struct __mm_loadl_pd_struct {
1012 double __u;
1013 } __attribute__((__packed__, __may_alias__));
1014 double __u = ((const struct __mm_loadl_pd_struct *)dp)->__u;
1015 return __extension__(__m128d){__u, a[1]};
1016}
1017
1019{
1020#if HAS_BUILTIN(__builtin_ia32_undef128)
1021 return (__m128d)__builtin_ia32_undef128();
1022#else
1023 __m128d undef = undef;
1024 return undef;
1025#endif
1026}
1027
1029{
1030 return __extension__(__m128d){w, 0};
1031}
1032
1034{
1035 return __extension__(__m128d){w, w};
1036}
1037
1038__INTRIN_INLINE_SSE2 __m128d _mm_set_pd(double w, double x)
1039{
1040 return __extension__(__m128d){x, w};
1041}
1042
1043__INTRIN_INLINE_SSE2 __m128d _mm_setr_pd(double w, double x)
1044{
1045 return __extension__(__m128d){w, x};
1046}
1047
1049{
1050 return __extension__(__m128d){0, 0};
1051}
1052
1053__INTRIN_INLINE_SSE2 __m128d _mm_move_sd(__m128d a, __m128d b)
1054{
1055 a[0] = b[0];
1056 return a;
1057}
1058
1059__INTRIN_INLINE_SSE2 void _mm_store_sd(double *dp, __m128d a)
1060{
1061 struct __mm_store_sd_struct {
1062 double __u;
1063 } __attribute__((__packed__, __may_alias__));
1064 ((struct __mm_store_sd_struct *)dp)->__u = a[0];
1065}
1066
1067__INTRIN_INLINE_SSE2 void _mm_store_pd(double *dp, __m128d a)
1068{
1069 *(__m128d *)dp = a;
1070}
1071
1072__INTRIN_INLINE_SSE2 void _mm_store1_pd(double *dp, __m128d a)
1073{
1074#if HAS_BUILTIN(__builtin_shufflevector)
1075 a = __builtin_shufflevector((__v2df)a, (__v2df)a, 0, 0);
1076 _mm_store_pd(dp, a);
1077#else
1078 dp[0] = a[0];
1079 dp[1] = a[0];
1080#endif
1081}
1082
1083__INTRIN_INLINE_SSE2 void _mm_storeu_pd(double *dp, __m128d a)
1084{
1085 struct __storeu_pd {
1086 __m128d_u __v;
1087 } __attribute__((__packed__, __may_alias__));
1088 ((struct __storeu_pd *)dp)->__v = a;
1089}
1090
1091__INTRIN_INLINE_SSE2 void _mm_storer_pd(double *dp, __m128d a)
1092{
1093#if HAS_BUILTIN(__builtin_shufflevector)
1094 a = __builtin_shufflevector((__v2df)a, (__v2df)a, 1, 0);
1095 *(__m128d *)dp = a;
1096#else
1097 dp[0] = a[1];
1098 dp[1] = a[0];
1099#endif
1100}
1101
1102__INTRIN_INLINE_SSE2 void _mm_storeh_pd(double *dp, __m128d a)
1103{
1104 struct __mm_storeh_pd_struct {
1105 double __u;
1106 } __attribute__((__packed__, __may_alias__));
1107 ((struct __mm_storeh_pd_struct *)dp)->__u = a[1];
1108}
1109
1110__INTRIN_INLINE_SSE2 void _mm_storel_pd(double *dp, __m128d a)
1111{
1112 struct __mm_storeh_pd_struct {
1113 double __u;
1114 } __attribute__((__packed__, __may_alias__));
1115 ((struct __mm_storeh_pd_struct *)dp)->__u = a[0];
1116}
1117
1118__INTRIN_INLINE_SSE2 __m128i _mm_add_epi8(__m128i a, __m128i b)
1119{
1120 return (__m128i)((__v16qu)a + (__v16qu)b);
1121}
1122
1123__INTRIN_INLINE_SSE2 __m128i _mm_add_epi16(__m128i a, __m128i b)
1124{
1125 return (__m128i)((__v8hu)a + (__v8hu)b);
1126}
1127
1128__INTRIN_INLINE_SSE2 __m128i _mm_add_epi32(__m128i a, __m128i b)
1129{
1130 return (__m128i)((__v4su)a + (__v4su)b);
1131}
1132
1134{
1135 return (__m64)__builtin_ia32_paddq((__v1di)a, (__v1di)b);
1136}
1137
1138__INTRIN_INLINE_SSE2 __m128i _mm_add_epi64(__m128i a, __m128i b)
1139{
1140 return (__m128i)((__v2du)a + (__v2du)b);
1141}
1142
1143__INTRIN_INLINE_SSE2 __m128i _mm_adds_epi8(__m128i a, __m128i b)
1144{
1145#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1146 return (__m128i)__builtin_elementwise_add_sat((__v16qs)a, (__v16qs)b);
1147#else
1148 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
1149#endif
1150}
1151
1152__INTRIN_INLINE_SSE2 __m128i _mm_adds_epi16(__m128i a, __m128i b)
1153{
1154#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1155 return (__m128i)__builtin_elementwise_add_sat((__v8hi)a, (__v8hi)b);
1156#else
1157 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
1158#endif
1159}
1160
1161__INTRIN_INLINE_SSE2 __m128i _mm_adds_epu8(__m128i a, __m128i b)
1162{
1163#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1164 return (__m128i)__builtin_elementwise_add_sat((__v16qu)a, (__v16qu)b);
1165#else
1166 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
1167#endif
1168}
1169
1170__INTRIN_INLINE_SSE2 __m128i _mm_adds_epu16(__m128i a, __m128i b)
1171{
1172#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1173 return (__m128i)__builtin_elementwise_add_sat((__v8hu)a, (__v8hu)b);
1174#else
1175 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
1176#endif
1177}
1178
1179__INTRIN_INLINE_SSE2 __m128i _mm_avg_epu8(__m128i a, __m128i b)
1180{
1181 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
1182}
1183
1184__INTRIN_INLINE_SSE2 __m128i _mm_avg_epu16(__m128i a, __m128i b)
1185{
1186 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
1187}
1188
1189__INTRIN_INLINE_SSE2 __m128i _mm_madd_epi16(__m128i a, __m128i b)
1190{
1191 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
1192}
1193
1194__INTRIN_INLINE_SSE2 __m128i _mm_max_epi16(__m128i a, __m128i b)
1195{
1196#if HAS_BUILTIN(__builtin_elementwise_max)
1197 return (__m128i)__builtin_elementwise_max((__v8hi)a, (__v8hi)b);
1198#else
1199 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
1200#endif
1201}
1202
1203__INTRIN_INLINE_SSE2 __m128i _mm_max_epu8(__m128i a, __m128i b)
1204{
1205#if HAS_BUILTIN(__builtin_elementwise_max)
1206 return (__m128i)__builtin_elementwise_max((__v16qu)a, (__v16qu)b);
1207#else
1208 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
1209#endif
1210}
1211
1212__INTRIN_INLINE_SSE2 __m128i _mm_min_epi16(__m128i a, __m128i b)
1213{
1214#if HAS_BUILTIN(__builtin_elementwise_min)
1215 return (__m128i)__builtin_elementwise_min((__v8hi)a, (__v8hi)b);
1216#else
1217 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
1218#endif
1219}
1220
1221__INTRIN_INLINE_SSE2 __m128i _mm_min_epu8(__m128i a, __m128i b)
1222{
1223#if HAS_BUILTIN(__builtin_elementwise_min)
1224 return (__m128i)__builtin_elementwise_min((__v16qu)a, (__v16qu)b);
1225#else
1226 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
1227#endif
1228}
1229
1230__INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
1231{
1232 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
1233}
1234
1235__INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
1236{
1237 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
1238}
1239
1240__INTRIN_INLINE_SSE2 __m128i _mm_mullo_epi16(__m128i a, __m128i b)
1241{
1242 return (__m128i)((__v8hu)a * (__v8hu)b);
1243}
1244
1246{
1247 return (__m64)__builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
1248}
1249
1250__INTRIN_INLINE_SSE2 __m128i _mm_mul_epu32(__m128i a, __m128i b)
1251{
1252 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
1253}
1254
1255__INTRIN_INLINE_SSE2 __m128i _mm_sad_epu8(__m128i a, __m128i b)
1256{
1257 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
1258}
1259
1260__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi8(__m128i a, __m128i b)
1261{
1262 return (__m128i)((__v16qu)a - (__v16qu)b);
1263}
1264
1265__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi16(__m128i a, __m128i b)
1266{
1267 return (__m128i)((__v8hu)a - (__v8hu)b);
1268}
1269
1270__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi32(__m128i a, __m128i b)
1271{
1272 return (__m128i)((__v4su)a - (__v4su)b);
1273}
1274
1276{
1277 return (__m64)__builtin_ia32_psubq((__v1di)a, (__v1di)b);
1278}
1279
1280__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi64(__m128i a, __m128i b)
1281{
1282 return (__m128i)((__v2du)a - (__v2du)b);
1283}
1284
1285__INTRIN_INLINE_SSE2 __m128i _mm_subs_epi8(__m128i a, __m128i b)
1286{
1287#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1288 return (__m128i)__builtin_elementwise_sub_sat((__v16qs)a, (__v16qs)b);
1289#else
1290 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
1291#endif
1292}
1293
1294__INTRIN_INLINE_SSE2 __m128i _mm_subs_epi16(__m128i a, __m128i b)
1295{
1296#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1297 return (__m128i)__builtin_elementwise_sub_sat((__v8hi)a, (__v8hi)b);
1298#else
1299 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
1300#endif
1301}
1302
1303__INTRIN_INLINE_SSE2 __m128i _mm_subs_epu8(__m128i a, __m128i b)
1304{
1305#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1306 return (__m128i)__builtin_elementwise_sub_sat((__v16qu)a, (__v16qu)b);
1307#else
1308 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
1309#endif
1310}
1311
1312__INTRIN_INLINE_SSE2 __m128i _mm_subs_epu16(__m128i a, __m128i b)
1313{
1314#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1315 return (__m128i)__builtin_elementwise_sub_sat((__v8hu)a, (__v8hu)b);
1316#else
1317 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
1318#endif
1319}
1320
1321__INTRIN_INLINE_SSE2 __m128i _mm_and_si128(__m128i a, __m128i b)
1322{
1323 return (__m128i)((__v2du)a & (__v2du)b);
1324}
1325
1326__INTRIN_INLINE_SSE2 __m128i _mm_andnot_si128(__m128i a, __m128i b)
1327{
1328 return (__m128i)(~(__v2du)a & (__v2du)b);
1329}
1330
1331__INTRIN_INLINE_SSE2 __m128i _mm_or_si128(__m128i a, __m128i b)
1332{
1333 return (__m128i)((__v2du)a | (__v2du)b);
1334}
1335
1336__INTRIN_INLINE_SSE2 __m128i _mm_xor_si128(__m128i a, __m128i b)
1337{
1338 return (__m128i)((__v2du)a ^ (__v2du)b);
1339}
1340
1341#define _mm_slli_si128(a, imm) \
1342 ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1343
1345{
1346 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
1347}
1348
1350{
1351 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
1352}
1353
1355{
1356 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
1357}
1358
1360{
1361 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
1362}
1363
1365{
1366 return __builtin_ia32_psllqi128((__v2di)a, count);
1367}
1368
1370{
1371 return __builtin_ia32_psllq128((__v2di)a, (__v2di)count);
1372}
1373
1375{
1376 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
1377}
1378
1380{
1381 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
1382}
1383
1385{
1386 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
1387}
1388
1390{
1391 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
1392}
1393
1394#define _mm_srli_si128(a, imm) \
1395 ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1396
1398{
1399 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
1400}
1401
1403{
1404 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
1405}
1406
1408{
1409 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
1410}
1411
1413{
1414 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
1415}
1416
1418{
1419 return __builtin_ia32_psrlqi128((__v2di)a, count);
1420}
1421
1423{
1424 return __builtin_ia32_psrlq128((__v2di)a, (__v2di)count);
1425}
1426
1427__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
1428{
1429 return (__m128i)((__v16qi)a == (__v16qi)b);
1430}
1431
1432__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
1433{
1434 return (__m128i)((__v8hi)a == (__v8hi)b);
1435}
1436
1437__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
1438{
1439 return (__m128i)((__v4si)a == (__v4si)b);
1440}
1441
1442__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
1443{
1444 /* This function always performs a signed comparison, but __v16qi is a char
1445 which may be signed or unsigned, so use __v16qs. */
1446 return (__m128i)((__v16qs)a > (__v16qs)b);
1447}
1448
1449__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
1450{
1451 return (__m128i)((__v8hi)a > (__v8hi)b);
1452}
1453
1454__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
1455{
1456 return (__m128i)((__v4si)a > (__v4si)b);
1457}
1458
1459__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
1460{
1461 return _mm_cmpgt_epi8(b, a);
1462}
1463
1464__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi16(__m128i a, __m128i b)
1465{
1466 return _mm_cmpgt_epi16(b, a);
1467}
1468
1469__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
1470{
1471 return _mm_cmpgt_epi32(b, a);
1472}
1473
1474#ifdef _M_AMD64
1475
1476__INTRIN_INLINE_SSE2 __m128d _mm_cvtsi64_sd(__m128d a, long long b)
1477{
1478 a[0] = b;
1479 return a;
1480}
1481
1482__INTRIN_INLINE_SSE2 long long _mm_cvtsd_si64(__m128d a)
1483{
1484 return __builtin_ia32_cvtsd2si64((__v2df)a);
1485}
1486
1487__INTRIN_INLINE_SSE2 long long _mm_cvttsd_si64(__m128d a)
1488{
1489 return __builtin_ia32_cvttsd2si64((__v2df)a);
1490}
1491#endif
1492
1494{
1495#if HAS_BUILTIN(__builtin_convertvector)
1496 return (__m128)__builtin_convertvector((__v4si)a, __v4sf);
1497#else
1498 return __builtin_ia32_cvtdq2ps((__v4si)a);
1499#endif
1500}
1501
1503{
1504 return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)a);
1505}
1506
1508{
1509 return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)a);
1510}
1511
1513{
1514 return __extension__(__m128i)(__v4si){a, 0, 0, 0};
1515}
1516
1518{
1519 return __extension__(__m128i)(__v2di){a, 0};
1520}
1521
1523{
1524 __v4si b = (__v4si)a;
1525 return b[0];
1526}
1527
1529{
1530 return a[0];
1531}
1532
1534{
1535 return *p;
1536}
1537
1538__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si128(__m128i_u const *p)
1539{
1540 struct __loadu_si128 {
1541 __m128i_u __v;
1542 } __attribute__((__packed__, __may_alias__));
1543 return ((const struct __loadu_si128 *)p)->__v;
1544}
1545
1546__INTRIN_INLINE_SSE2 __m128i _mm_loadl_epi64(__m128i_u const *p)
1547{
1548 struct __mm_loadl_epi64_struct {
1549 long long __u;
1550 } __attribute__((__packed__, __may_alias__));
1551 return __extension__(__m128i){
1552 ((const struct __mm_loadl_epi64_struct *)p)->__u, 0};
1553}
1554
1556{
1557#if HAS_BUILTIN(__builtin_ia32_undef128)
1558 return (__m128i)__builtin_ia32_undef128();
1559#else
1560 __m128i undef = undef;
1561 return undef;
1562#endif
1563}
1564
1565__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
1566{
1567 return __extension__(__m128i)(__v2di){q0, q1};
1568}
1569
1570__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64(__m64 q1, __m64 q0)
1571{
1572 return _mm_set_epi64x((long long)q1, (long long)q0);
1573}
1574
1575__INTRIN_INLINE_SSE2 __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
1576{
1577 return __extension__(__m128i)(__v4si){i0, i1, i2, i3};
1578}
1579
1581 short w7, short w6, short w5, short w4,
1582 short w3, short w2, short w1, short w0)
1583{
1584 return __extension__(__m128i)(__v8hi){w0, w1, w2, w3, w4, w5, w6, w7};
1585}
1586
1588 char b15, char b14, char b13, char b12,
1589 char b11, char b10, char b9, char b8,
1590 char b7, char b6, char b5, char b4,
1591 char b3, char b2, char b1, char b0)
1592{
1593 return __extension__(__m128i)(__v16qi){
1594 b0, b1, b2, b3, b4, b5, b6, b7,
1595 b8, b9, b10, b11, b12, b13, b14, b15};
1596}
1597
1599{
1600 return _mm_set_epi64x(q, q);
1601}
1602
1604{
1605 return _mm_set_epi64(q, q);
1606}
1607
1609{
1610 return _mm_set_epi32(i, i, i, i);
1611}
1612
1614{
1615 return _mm_set_epi16(w, w, w, w, w, w, w, w);
1616}
1617
1619{
1620 return _mm_set_epi8(b, b, b, b, b, b, b, b, b, b, b,
1621 b, b, b, b, b);
1622}
1623
1624__INTRIN_INLINE_SSE2 __m128i _mm_setr_epi64(__m64 q0, __m64 q1)
1625{
1626 return _mm_set_epi64(q1, q0);
1627}
1628
1629__INTRIN_INLINE_SSE2 __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
1630{
1631 return _mm_set_epi32(i3, i2, i1, i0);
1632}
1633
1635 short w0, short w1, short w2, short w3,
1636 short w4, short w5, short w6, short w7)
1637{
1638 return _mm_set_epi16(w7, w6, w5, w4, w3, w2, w1, w0);
1639}
1640
1642 char b0, char b1, char b2, char b3,
1643 char b4, char b5, char b6, char b7,
1644 char b8, char b9, char b10, char b11,
1645 char b12, char b13, char b14, char b15)
1646{
1647 return _mm_set_epi8(b15, b14, b13, b12, b11, b10, b9, b8,
1648 b7, b6, b5, b4, b3, b2, b1, b0);
1649}
1650
1652{
1653 return __extension__(__m128i)(__v2di){0LL, 0LL};
1654}
1655
1657{
1658 *p = b;
1659}
1660
1661__INTRIN_INLINE_SSE2 void _mm_storeu_si128(__m128i_u *p, __m128i b)
1662{
1663 struct __storeu_si128 {
1664 __m128i_u __v;
1665 } __attribute__((__packed__, __may_alias__));
1666 ((struct __storeu_si128 *)p)->__v = b;
1667}
1668
1670{
1671 struct __storeu_si64 {
1672 long long __v;
1673 } __attribute__((__packed__, __may_alias__));
1674 ((struct __storeu_si64 *)p)->__v = ((__v2di)b)[0];
1675}
1676
1678{
1679 struct __storeu_si32 {
1680 int __v;
1681 } __attribute__((__packed__, __may_alias__));
1682 ((struct __storeu_si32 *)p)->__v = ((__v4si)b)[0];
1683}
1684
1686{
1687 struct __storeu_si16 {
1688 short __v;
1689 } __attribute__((__packed__, __may_alias__));
1690 ((struct __storeu_si16 *)p)->__v = ((__v8hi)b)[0];
1691}
1692
1693__INTRIN_INLINE_SSE2 void _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
1694{
1695 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1696}
1697
1698__INTRIN_INLINE_SSE2 void _mm_storel_epi64(__m128i_u *p, __m128i a)
1699{
1700 struct __mm_storel_epi64_struct {
1701 long long __u;
1702 } __attribute__((__packed__, __may_alias__));
1703 ((struct __mm_storel_epi64_struct *)p)->__u = a[0];
1704}
1705
1707{
1708#if HAS_BUILTIN(__builtin_nontemporal_store)
1709 __builtin_nontemporal_store((__v2df)a, (__v2df *)p);
1710#else
1711 __builtin_ia32_movntpd(p, a);
1712#endif
1713}
1714
1716{
1717#if HAS_BUILTIN(__builtin_nontemporal_store)
1718 __builtin_nontemporal_store((__v2di)a, (__v2di*)p);
1719#else
1720 __builtin_ia32_movntdq(p, a);
1721#endif
1722}
1723
1725{
1726 __builtin_ia32_movnti(p, a);
1727}
1728
1729#ifdef _M_AMD64
1730__INTRIN_INLINE_SSE2 void _mm_stream_si64(long long *p, long long a)
1731{
1732 __builtin_ia32_movnti64(p, a);
1733}
1734#endif
1735
1736void _mm_clflush(void const *p);
1737
1738void _mm_lfence(void);
1739
1740void _mm_mfence(void);
1741
1742__INTRIN_INLINE_SSE2 __m128i _mm_packs_epi16(__m128i a, __m128i b)
1743{
1744 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1745}
1746
1747__INTRIN_INLINE_SSE2 __m128i _mm_packs_epi32(__m128i a, __m128i b)
1748{
1749 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1750}
1751
1752__INTRIN_INLINE_SSE2 __m128i _mm_packus_epi16(__m128i a, __m128i b)
1753{
1754 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1755}
1756
1757#define _mm_extract_epi16(a, imm) \
1758 ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
1759 (int)(imm)))
1760
1761#define _mm_insert_epi16(a, b, imm) \
1762 ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
1763 (int)(imm)))
1764
1766{
1767 return __builtin_ia32_pmovmskb128((__v16qi)a);
1768}
1769
1770#define _mm_shuffle_epi32(a, imm) \
1771 ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
1772
1773#define _mm_shufflelo_epi16(a, imm) \
1774 ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
1775
1776#define _mm_shufflehi_epi16(a, imm) \
1777 ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
1778
1780{
1781#if HAS_BUILTIN(__builtin_shufflevector)
1782 return (__m128i)__builtin_shufflevector(
1783 (__v16qi)a, (__v16qi)b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
1784 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
1785#else
1786 return (__m128i)__builtin_ia32_punpckhbw128((__v16qi)a, (__v16qi)b);
1787#endif
1788}
1789
1791{
1792#if HAS_BUILTIN(__builtin_shufflevector)
1793 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8 + 4, 5,
1794 8 + 5, 6, 8 + 6, 7, 8 + 7);
1795#else
1796 return (__m128i)__builtin_ia32_punpckhwd128((__v8hi)a, (__v8hi)b);
1797#endif
1798}
1799
1801{
1802#if HAS_BUILTIN(__builtin_shufflevector)
1803 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4 + 2, 3,
1804 4 + 3);
1805#else
1806 return (__m128i)__builtin_ia32_punpckhdq128((__v4si)a, (__v4si)b);
1807#endif
1808}
1809
1811{
1812#if HAS_BUILTIN(__builtin_shufflevector)
1813 return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 1, 2 + 1);
1814#else
1815 return (__m128i)__builtin_ia32_punpckhqdq128((__v2di)a, (__v2di)b);
1816#endif
1817}
1818
1820{
1821#if HAS_BUILTIN(__builtin_shufflevector)
1822 return (__m128i)__builtin_shufflevector(
1823 (__v16qi)a, (__v16qi)b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
1824 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
1825#else
1826 return (__m128i)__builtin_ia32_punpcklbw128((__v16qi)a, (__v16qi)b);
1827#endif
1828}
1829
1831{
1832#if HAS_BUILTIN(__builtin_shufflevector)
1833 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8 + 0, 1,
1834 8 + 1, 2, 8 + 2, 3, 8 + 3);
1835#else
1836 return (__m128i)__builtin_ia32_punpcklwd128((__v8hi)a, (__v8hi)b);
1837#endif
1838}
1839
1841{
1842#if HAS_BUILTIN(__builtin_shufflevector)
1843 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4 + 0, 1,
1844 4 + 1);
1845#else
1846 return (__m128i)__builtin_ia32_punpckldq128((__v4si)a, (__v4si)b);
1847#endif
1848}
1849
1851{
1852#if HAS_BUILTIN(__builtin_shufflevector)
1853 return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 0, 2 + 0);
1854#else
1855 return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)a, (__v2di)b);
1856#endif
1857}
1858
1860{
1861 return (__m64)a[0];
1862}
1863
1865{
1866 return __extension__(__m128i)(__v2di){(long long)a, 0};
1867}
1868
1870{
1871#if HAS_BUILTIN(__builtin_shufflevector)
1872 return __builtin_shufflevector((__v2di)a, _mm_setzero_si128(), 0, 2);
1873#else
1874 return (__m128i)__builtin_ia32_movq128((__v2di)a);
1875#endif
1876}
1877
1878__INTRIN_INLINE_SSE2 __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
1879{
1880#if HAS_BUILTIN(__builtin_shufflevector)
1881 return __builtin_shufflevector((__v2df)a, (__v2df)b, 1, 2 + 1);
1882#else
1883 return (__m128d)__builtin_ia32_unpckhpd((__v2df)a, (__v2df)b);
1884#endif
1885}
1886
1887__INTRIN_INLINE_SSE2 __m128d _mm_unpacklo_pd(__m128d a, __m128d b)
1888{
1889#if HAS_BUILTIN(__builtin_shufflevector)
1890 return __builtin_shufflevector((__v2df)a, (__v2df)b, 0, 2 + 0);
1891#else
1892 return (__m128d)__builtin_ia32_unpcklpd((__v2df)a, (__v2df)b);
1893#endif
1894}
1895
1897{
1898 return __builtin_ia32_movmskpd((__v2df)a);
1899}
1900
1901#define _mm_shuffle_pd(a, b, i) \
1902 ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
1903 (int)(i)))
1904
1906{
1907 return (__m128)a;
1908}
1909
1911{
1912 return (__m128i)a;
1913}
1914
1916{
1917 return (__m128d)a;
1918}
1919
1921{
1922 return (__m128i)a;
1923}
1924
1926{
1927 return (__m128)a;
1928}
1929
1931{
1932 return (__m128d)a;
1933}
1934
1935void _mm_pause(void);
1936
1937#endif /* _MSC_VER */
1938
1939
1940
1941#endif /* _INCLUDED_EMM */
#define _DECLSPEC_INTRIN_TYPE
Definition: _mingw.h:234
#define __int8
Definition: basetyps.h:25
#define __int16
Definition: basetyps.h:22
#define __int64
Definition: basetyps.h:16
#define __int32
Definition: basetyps.h:19
#define _STATIC_ASSERT(expr)
Definition: crtdefs.h:191
#define _CRT_ALIGN(x)
Definition: crtdefs.h:154
int align(int length, int align)
Definition: dsound8.c:36
__declspec(noinline) int TestFunc(int
Definition: ehthrow.cxx:232
__m128 _mm_cvtpd_ps(__m128d a)
Definition: emmintrin.h:846
__m128d _mm_cmpnge_sd(__m128d a, __m128d b)
Definition: emmintrin.h:780
void _mm_storeu_pd(double *dp, __m128d a)
Definition: emmintrin.h:1083
__m128d _mm_add_sd(__m128d a, __m128d b)
Definition: emmintrin.h:567
void _mm_storeu_si128(__m128i_u *p, __m128i b)
Definition: emmintrin.h:1661
__m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Definition: emmintrin.h:1587
__m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1442
__m128i _mm_set1_epi16(short w)
Definition: emmintrin.h:1613
int _mm_cvtsi128_si32(__m128i a)
Definition: emmintrin.h:1522
__m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
Definition: emmintrin.h:1575
void _mm_store_si128(__m128i *p, __m128i b)
Definition: emmintrin.h:1656
__m128i _mm_adds_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1170
__m128i _mm_movpi64_epi64(__m64 a)
Definition: emmintrin.h:1864
__m128i _mm_slli_epi64(__m128i a, int count)
Definition: emmintrin.h:1364
int _mm_ucomile_sd(__m128d a, __m128d b)
Definition: emmintrin.h:826
__m128d _mm_cmpeq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:722
int _mm_cvtsd_si32(__m128d a)
Definition: emmintrin.h:874
int _mm_comile_sd(__m128d a, __m128d b)
Definition: emmintrin.h:796
__m128i _mm_castps_si128(__m128 a)
Definition: emmintrin.h:1920
__m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:702
__m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
Definition: emmintrin.h:1629
__m128i _mm_setzero_si128(void)
Definition: emmintrin.h:1651
__m128i _mm_srl_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1422
__m128d _mm_add_pd(__m128d a, __m128d b)
Definition: emmintrin.h:573
__m128i _mm_cvtpd_epi32(__m128d a)
Definition: emmintrin.h:869
__m128i _mm_xor_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1336
__m128i _mm_move_epi64(__m128i a)
Definition: emmintrin.h:1869
__m128d _mm_sub_sd(__m128d a, __m128d b)
Definition: emmintrin.h:578
__m128d _mm_loadh_pd(__m128d a, double const *dp)
Definition: emmintrin.h:1000
__m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:764
__m128i _mm_srli_epi64(__m128i a, int count)
Definition: emmintrin.h:1417
__m128d _mm_setr_pd(double w, double x)
Definition: emmintrin.h:1043
#define __INTRIN_INLINE_SSE2
Definition: emmintrin.h:69
__m128d _mm_cmpord_sd(__m128d a, __m128d b)
Definition: emmintrin.h:749
__m128i _mm_set1_epi64(__m64 q)
Definition: emmintrin.h:1603
__m128d _mm_cmpunord_pd(__m128d a, __m128d b)
Definition: emmintrin.h:692
__m128i _mm_packs_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1747
__m128d _mm_castps_pd(__m128 a)
Definition: emmintrin.h:1915
void _mm_store1_pd(double *dp, __m128d a)
Definition: emmintrin.h:1072
__m128i _mm_sad_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1255
__INTRIN_INLINE_SSE2 __m128d _mm_undefined_pd(void)
Definition: emmintrin.h:1018
__INTRIN_INLINE_SSE2 void _mm_storeu_si32(void *p, __m128i b)
Definition: emmintrin.h:1677
__m128d _mm_setzero_pd(void)
Definition: emmintrin.h:1048
__m128 _mm_castsi128_ps(__m128i a)
Definition: emmintrin.h:1925
__m128d _mm_cmpneq_pd(__m128d a, __m128d b)
Definition: emmintrin.h:697
void _mm_storel_epi64(__m128i_u *p, __m128i a)
Definition: emmintrin.h:1698
__m128i _mm_packus_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1752
__m128d _mm_set_sd(double w)
Definition: emmintrin.h:1028
__m128 _mm_cvtepi32_ps(__m128i a)
Definition: emmintrin.h:1493
#define _mm_slli_si128(a, imm)
Definition: emmintrin.h:1341
__m128i _mm_adds_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1161
__m128i _mm_sub_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1270
__m128i _mm_castpd_si128(__m128d a)
Definition: emmintrin.h:1910
__m128i _mm_add_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1123
__m128d _mm_cmpnge_pd(__m128d a, __m128d b)
Definition: emmintrin.h:717
int _mm_comige_sd(__m128d a, __m128d b)
Definition: emmintrin.h:806
__m128d _mm_cmpge_pd(__m128d a, __m128d b)
Definition: emmintrin.h:682
__INTRIN_INLINE_SSE2 void _mm_storeu_si16(void *p, __m128i b)
Definition: emmintrin.h:1685
__m128d _mm_loadr_pd(double const *dp)
Definition: emmintrin.h:946
__m128i _mm_mulhi_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1235
__m128i _mm_slli_epi32(__m128i a, int count)
Definition: emmintrin.h:1354
__m64 _mm_sub_si64(__m64 a, __m64 b)
Definition: emmintrin.h:1275
__m128i _mm_load_si128(__m128i const *p)
Definition: emmintrin.h:1533
__m128d _mm_max_sd(__m128d a, __m128d b)
Definition: emmintrin.h:632
__m64 _mm_cvtpd_pi32(__m128d a)
Definition: emmintrin.h:907
__m128d _mm_and_pd(__m128d a, __m128d b)
Definition: emmintrin.h:642
__m128i _mm_mul_epu32(__m128i a, __m128i b)
Definition: emmintrin.h:1250
__m128i _mm_cvttpd_epi32(__m128d a)
Definition: emmintrin.h:897
int _mm_ucomige_sd(__m128d a, __m128d b)
Definition: emmintrin.h:836
__m128d _mm_sub_pd(__m128d a, __m128d b)
Definition: emmintrin.h:584
__m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1427
__m128d _mm_load1_pd(double const *dp)
Definition: emmintrin.h:932
__m128d _mm_load_pd(double const *dp)
Definition: emmintrin.h:927
__m128d _mm_min_pd(__m128d a, __m128d b)
Definition: emmintrin.h:627
__m128i _mm_sll_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1359
__m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1790
void _mm_stream_si32(int *p, int a)
Definition: emmintrin.h:1724
__m128i _mm_subs_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1303
__m128i _mm_srl_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1412
__m128i _mm_mulhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1230
#define _mm_shuffle_pd(a, b, i)
Definition: emmintrin.h:1901
void _mm_stream_si128(__m128i *p, __m128i a)
Definition: emmintrin.h:1715
void _mm_mfence(void)
Definition: intrin_x86.h:99
__m128d _mm_or_pd(__m128d a, __m128d b)
Definition: emmintrin.h:652
__m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1432
__m128d _mm_cmpge_sd(__m128d a, __m128d b)
Definition: emmintrin.h:743
__m128d _mm_mul_sd(__m128d a, __m128d b)
Definition: emmintrin.h:589
__m128i _mm_set1_epi8(char b)
Definition: emmintrin.h:1618
__m128i _mm_sra_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1389
__m128d _mm_sqrt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:611
__m128i _mm_srai_epi32(__m128i a, int count)
Definition: emmintrin.h:1384
__m128i _mm_cvtsi32_si128(int a)
Definition: emmintrin.h:1512
__m64 _mm_add_si64(__m64 a, __m64 b)
Definition: emmintrin.h:1133
__m128i _mm_slli_epi16(__m128i a, int count)
Definition: emmintrin.h:1344
__m128d _mm_cmpeq_pd(__m128d a, __m128d b)
Definition: emmintrin.h:662
__m128i _mm_subs_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1285
void _mm_storer_pd(double *dp, __m128d a)
Definition: emmintrin.h:1091
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
Definition: emmintrin.h:43
__INTRIN_INLINE_SSE2 __m128i _mm_cvtsi64_si128(long long a)
Definition: emmintrin.h:1517
int _mm_movemask_pd(__m128d a)
Definition: emmintrin.h:1896
__m128i _mm_setr_epi64(__m64 q0, __m64 q1)
Definition: emmintrin.h:1624
__m128i _mm_sub_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1280
__m128d _mm_move_sd(__m128d a, __m128d b)
Definition: emmintrin.h:1053
__m128i _mm_min_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1221
__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
Definition: emmintrin.h:1565
__m128d _mm_unpackhi_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1878
int _mm_cvttsd_si32(__m128d a)
Definition: emmintrin.h:902
__m128d _mm_cmpnle_pd(__m128d a, __m128d b)
Definition: emmintrin.h:707
__m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1800
__m128d _mm_cmpnle_sd(__m128d a, __m128d b)
Definition: emmintrin.h:769
__m128i _mm_add_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1118
__m128d _mm_cmple_sd(__m128d a, __m128d b)
Definition: emmintrin.h:732
__m128d _mm_cmple_pd(__m128d a, __m128d b)
Definition: emmintrin.h:672
__m128i _mm_cmplt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1469
#define _mm_insert_epi16(a, b, imm)
Definition: emmintrin.h:1761
__m128d _mm_cvtpi32_pd(__m64 a)
Definition: emmintrin.h:917
__m128d _mm_loadu_pd(double const *dp)
Definition: emmintrin.h:956
__m64 _mm_mul_su32(__m64 a, __m64 b)
Definition: emmintrin.h:1245
__m128i _mm_avg_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1184
__m128d _mm_cvtss_sd(__m128d a, __m128 b)
Definition: emmintrin.h:891
int _mm_ucomieq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:816
__m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Definition: emmintrin.h:1641
__m128i _mm_sll_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1369
__m128d _mm_cmpngt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:774
void _mm_storel_pd(double *dp, __m128d a)
Definition: emmintrin.h:1110
__m128d _mm_cmplt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:727
__m128d _mm_cvtsi32_sd(__m128d a, int b)
Definition: emmintrin.h:884
__m128i _mm_or_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1331
__INTRIN_INLINE_SSE2 long long _mm_cvtsi128_si64(__m128i a)
Definition: emmintrin.h:1528
__m128i _mm_cmplt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1464
__m128i _mm_subs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1294
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si64(void const *a)
Definition: emmintrin.h:964
__m128d _mm_cmpngt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:712
#define _mm_extract_epi16(a, imm)
Definition: emmintrin.h:1757
double _mm_cvtsd_f64(__m128d a)
Definition: emmintrin.h:922
#define _mm_shufflelo_epi16(a, imm)
Definition: emmintrin.h:1773
__m128i _mm_packs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1742
__INTRIN_INLINE_SSE2 __m128i _mm_set1_epi64x(long long q)
Definition: emmintrin.h:1598
__m128d _mm_min_sd(__m128d a, __m128d b)
Definition: emmintrin.h:622
void _mm_store_pd(double *dp, __m128d a)
Definition: emmintrin.h:1067
__m128i _mm_srli_epi16(__m128i a, int count)
Definition: emmintrin.h:1397
__m128i _mm_sub_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1260
__m128d _mm_castsi128_pd(__m128i a)
Definition: emmintrin.h:1930
__m128d _mm_cmpord_pd(__m128d a, __m128d b)
Definition: emmintrin.h:687
void _mm_storeh_pd(double *dp, __m128d a)
Definition: emmintrin.h:1102
__m128d _mm_mul_pd(__m128d a, __m128d b)
Definition: emmintrin.h:595
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si32(void const *a)
Definition: emmintrin.h:973
__m128i _mm_sll_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1349
__m128d _mm_sqrt_pd(__m128d a)
Definition: emmintrin.h:617
__m128i _mm_mullo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1240
__m128i _mm_sra_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1379
int _mm_comieq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:786
__m128i _mm_cvttps_epi32(__m128 a)
Definition: emmintrin.h:1507
__m128d _mm_load_sd(double const *dp)
Definition: emmintrin.h:991
__m64 _mm_movepi64_pi64(__m128i a)
Definition: emmintrin.h:1859
void _mm_lfence(void)
Definition: intrin_x86.h:106
void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p)
int _mm_movemask_epi8(__m128i a)
Definition: emmintrin.h:1765
__m128i _mm_madd_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1189
__m128d _mm_cmpgt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:737
int _mm_ucomilt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:821
__m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1437
__m128i _mm_srai_epi16(__m128i a, int count)
Definition: emmintrin.h:1374
__m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Definition: emmintrin.h:1580
__m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Definition: emmintrin.h:1634
int _mm_ucomineq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:841
__m128i _mm_cvtps_epi32(__m128 a)
Definition: emmintrin.h:1502
__m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1779
__m128d _mm_andnot_pd(__m128d a, __m128d b)
Definition: emmintrin.h:647
__m128d _mm_loadl_pd(__m128d a, double const *dp)
Definition: emmintrin.h:1009
__m128d _mm_cmpneq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:759
__m128i _mm_min_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1212
__m128 _mm_cvtsd_ss(__m128 a, __m128d b)
Definition: emmintrin.h:879
__m128i _mm_andnot_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1326
__m128i _mm_and_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1321
__m128i _mm_setl_epi64(__m128i q)
void _mm_stream_pd(double *p, __m128d a)
Definition: emmintrin.h:1706
__m128 _mm_castpd_ps(__m128d a)
Definition: emmintrin.h:1905
__INTRIN_INLINE_SSE2 __m128i _mm_undefined_si128(void)
Definition: emmintrin.h:1555
int _mm_comineq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:811
__m128i _mm_avg_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1179
int _mm_ucomigt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:831
__m128i _mm_adds_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1152
#define _mm_stream_si64
Definition: emmintrin.h:320
__m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1810
__m128i _mm_adds_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1143
int _mm_comilt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:791
__m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1830
__m128i _mm_cmplt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1459
#define _mm_shufflehi_epi16(a, imm)
Definition: emmintrin.h:1776
__m128d _mm_cvtepi32_pd(__m128i a)
Definition: emmintrin.h:860
__m128i _mm_max_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1194
__m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1819
__m128d _mm_xor_pd(__m128d a, __m128d b)
Definition: emmintrin.h:657
__INTRIN_INLINE_SSE2 void _mm_storeu_si64(void *p, __m128i b)
Definition: emmintrin.h:1669
void _mm_clflush(void const *p)
__m128d _mm_cvtps_pd(__m128 a)
Definition: emmintrin.h:851
__m128d _mm_cmpgt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:677
void _mm_pause(void)
Definition: intrin_x86.h:2036
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si16(void const *a)
Definition: emmintrin.h:982
void _mm_store_sd(double *dp, __m128d a)
Definition: emmintrin.h:1059
__m128d _mm_set_pd(double w, double x)
Definition: emmintrin.h:1038
__m128i _mm_srli_epi32(__m128i a, int count)
Definition: emmintrin.h:1407
int _mm_comigt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:801
__m128i _mm_set_epi64(__m64 q1, __m64 q0)
Definition: emmintrin.h:1570
__m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1449
__m128d _mm_cmplt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:667
__m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1128
__m128i _mm_sub_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1265
__m64 _mm_cvttpd_pi32(__m128d a)
Definition: emmintrin.h:912
__m128i _mm_loadl_epi64(__m128i_u const *p)
Definition: emmintrin.h:1546
__m128i _mm_add_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1138
#define _mm_srli_si128(a, imm)
Definition: emmintrin.h:1394
__m128d _mm_div_sd(__m128d a, __m128d b)
Definition: emmintrin.h:600
__m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1454
#define _mm_shuffle_epi32(a, imm)
Definition: emmintrin.h:1770
__m128d _mm_set1_pd(double w)
Definition: emmintrin.h:1033
__m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1840
__m128d _mm_div_pd(__m128d a, __m128d b)
Definition: emmintrin.h:606
__m128i _mm_srl_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1402
__m128d _mm_max_pd(__m128d a, __m128d b)
Definition: emmintrin.h:637
__m128d _mm_cmpunord_sd(__m128d a, __m128d b)
Definition: emmintrin.h:754
__m128d _mm_unpacklo_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1887
__m128i _mm_subs_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1312
__m128i _mm_loadu_si128(__m128i_u const *p)
Definition: emmintrin.h:1538
__m128i _mm_set1_epi32(int i)
Definition: emmintrin.h:1608
__m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1850
__m128i _mm_max_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1203
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
GLuint GLuint GLsizei count
Definition: gl.h:1545
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063
GLdouble n
Definition: glext.h:7729
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble GLdouble w2
Definition: glext.h:8308
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLfloat GLfloat p
Definition: glext.h:8902
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble w1
Definition: glext.h:8308
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLubyte GLubyte GLubyte GLubyte w
Definition: glext.h:6102
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
#define d
Definition: ke_i.h:81
#define a
Definition: ke_i.h:78
#define b
Definition: ke_i.h:79
#define __INTRIN_INLINE_MMX
Definition: mmintrin.h:64
static CRYPT_DATA_BLOB b4
Definition: msg.c:2284
static CRYPT_DATA_BLOB b3[]
Definition: msg.c:592
static CRYPT_DATA_BLOB b2[]
Definition: msg.c:582
static CRYPT_DATA_BLOB b1[]
Definition: msg.c:573
#define _Out_writes_bytes_(size)
Definition: ms_sal.h:350
#define long
Definition: qsort.c:33
#define __c
Definition: schilyio.h:209
#define LL
Definition: tui.h:167
#define __INTRIN_INLINE_SSE
Definition: xmmintrin.h:70