ReactOS 0.4.16-dev-303-g11d5cb8
emmintrin.h
Go to the documentation of this file.
1/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#pragma once
11#ifndef _INCLUDED_EMM
12#define _INCLUDED_EMM
13
14#include <vcruntime.h>
15#include <xmmintrin.h>
16
17#if defined(_MSC_VER) && !defined(__clang__)
18
19typedef union _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128i
20{
21 __int8 m128i_i8[16];
22 __int16 m128i_i16[8];
23 __int32 m128i_i32[4];
24 __int64 m128i_i64[2];
25 unsigned __int8 m128i_u8[16];
26 unsigned __int16 m128i_u16[8];
27 unsigned __int32 m128i_u32[4];
28 unsigned __int64 m128i_u64[2];
29} __m128i;
30_STATIC_ASSERT(sizeof(__m128i) == 16);
31
32typedef struct _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128d
33{
34 double m128d_f64[2];
35} __m128d;
36
37typedef __declspec(align(1)) __m128i __m128i_u;
38
39#define __ATTRIBUTE_SSE2__
40
41#else /* _MSC_VER */
42
43typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
44typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
45
46typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
47typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
48
49/* Type defines. */
50typedef double __v2df __attribute__((__vector_size__(16)));
51typedef long long __v2di __attribute__((__vector_size__(16)));
52typedef short __v8hi __attribute__((__vector_size__(16)));
53typedef char __v16qi __attribute__((__vector_size__(16)));
54
55/* Unsigned types */
56typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
57typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
58typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
59
60/* We need an explicitly signed variant for char. Note that this shouldn't
61 * appear in the interface though. */
62typedef signed char __v16qs __attribute__((__vector_size__(16)));
63
64#ifdef __clang__
65#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128)))
66#define __ATTRIBUTE_MMXSSE2__ __attribute__((__target__("mmx,sse2"),__min_vector_width__(128)))
67#else
68#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
69#define __ATTRIBUTE_MMXSSE2__ __attribute__((__target__("mmx,sse2")))
70#endif
71#define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__
72#define __INTRIN_INLINE_MMXSSE2 __INTRIN_INLINE __ATTRIBUTE_MMXSSE2__
73
74#endif /* _MSC_VER */
75
76#ifdef __cplusplus
77extern "C" {
78#endif
79
80extern __m128d _mm_add_sd(__m128d a, __m128d b);
81extern __m128d _mm_add_pd(__m128d a, __m128d b);
82extern __m128d _mm_sub_sd(__m128d a, __m128d b);
83extern __m128d _mm_sub_pd(__m128d a, __m128d b);
84extern __m128d _mm_mul_sd(__m128d a, __m128d b);
85extern __m128d _mm_mul_pd(__m128d a, __m128d b);
86extern __m128d _mm_div_sd(__m128d a, __m128d b);
87extern __m128d _mm_div_pd(__m128d a, __m128d b);
88extern __m128d _mm_sqrt_sd(__m128d a, __m128d b);
89extern __m128d _mm_sqrt_pd(__m128d a);
90extern __m128d _mm_min_sd(__m128d a, __m128d b);
91extern __m128d _mm_min_pd(__m128d a, __m128d b);
92extern __m128d _mm_max_sd(__m128d a, __m128d b);
93extern __m128d _mm_max_pd(__m128d a, __m128d b);
94extern __m128d _mm_and_pd(__m128d a, __m128d b);
95extern __m128d _mm_andnot_pd(__m128d a, __m128d b);
96extern __m128d _mm_or_pd(__m128d a, __m128d b);
97extern __m128d _mm_xor_pd(__m128d a, __m128d b);
98extern __m128d _mm_cmpeq_pd(__m128d a, __m128d b);
99extern __m128d _mm_cmplt_pd(__m128d a, __m128d b);
100extern __m128d _mm_cmple_pd(__m128d a, __m128d b);
101extern __m128d _mm_cmpgt_pd(__m128d a, __m128d b);
102extern __m128d _mm_cmpge_pd(__m128d a, __m128d b);
103extern __m128d _mm_cmpord_pd(__m128d a, __m128d b);
104extern __m128d _mm_cmpunord_pd(__m128d a, __m128d b);
105extern __m128d _mm_cmpneq_pd(__m128d a, __m128d b);
106extern __m128d _mm_cmpnlt_pd(__m128d a, __m128d b);
107extern __m128d _mm_cmpnle_pd(__m128d a, __m128d b);
108extern __m128d _mm_cmpngt_pd(__m128d a, __m128d b);
109extern __m128d _mm_cmpnge_pd(__m128d a, __m128d b);
110extern __m128d _mm_cmpeq_sd(__m128d a, __m128d b);
111extern __m128d _mm_cmplt_sd(__m128d a, __m128d b);
112extern __m128d _mm_cmple_sd(__m128d a, __m128d b);
113extern __m128d _mm_cmpgt_sd(__m128d a, __m128d b);
114extern __m128d _mm_cmpge_sd(__m128d a, __m128d b);
115extern __m128d _mm_cmpord_sd(__m128d a, __m128d b);
116extern __m128d _mm_cmpunord_sd(__m128d a, __m128d b);
117extern __m128d _mm_cmpneq_sd(__m128d a, __m128d b);
118extern __m128d _mm_cmpnlt_sd(__m128d a, __m128d b);
119extern __m128d _mm_cmpnle_sd(__m128d a, __m128d b);
120extern __m128d _mm_cmpngt_sd(__m128d a, __m128d b);
121extern __m128d _mm_cmpnge_sd(__m128d a, __m128d b);
122extern int _mm_comieq_sd(__m128d a, __m128d b);
123extern int _mm_comilt_sd(__m128d a, __m128d b);
124extern int _mm_comile_sd(__m128d a, __m128d b);
125extern int _mm_comigt_sd(__m128d a, __m128d b);
126extern int _mm_comige_sd(__m128d a, __m128d b);
127extern int _mm_comineq_sd(__m128d a, __m128d b);
128extern int _mm_ucomieq_sd(__m128d a, __m128d b);
129extern int _mm_ucomilt_sd(__m128d a, __m128d b);
130extern int _mm_ucomile_sd(__m128d a, __m128d b);
131extern int _mm_ucomigt_sd(__m128d a, __m128d b);
132extern int _mm_ucomige_sd(__m128d a, __m128d b);
133extern int _mm_ucomineq_sd(__m128d a, __m128d b);
134extern __m128 _mm_cvtpd_ps(__m128d a);
135extern __m128d _mm_cvtps_pd(__m128 a);
136extern __m128d _mm_cvtepi32_pd(__m128i a);
137extern __m128i _mm_cvtpd_epi32(__m128d a);
138extern int _mm_cvtsd_si32(__m128d a);
139extern __m128 _mm_cvtsd_ss(__m128 a, __m128d b);
140extern __m128d _mm_cvtsi32_sd(__m128d a, int b);
141extern __m128d _mm_cvtss_sd(__m128d a, __m128 b);
142extern __m128i _mm_cvttpd_epi32(__m128d a);
143extern int _mm_cvttsd_si32(__m128d a);
144extern __m64 _mm_cvtpd_pi32(__m128d a);
145extern __m64 _mm_cvttpd_pi32(__m128d a);
146extern __m128d _mm_cvtpi32_pd(__m64 a);
147extern double _mm_cvtsd_f64(__m128d a);
148extern __m128d _mm_load_pd(double const *dp);
149extern __m128d _mm_load1_pd(double const *dp);
150extern __m128d _mm_loadr_pd(double const *dp);
151extern __m128d _mm_loadu_pd(double const *dp);
152//extern __m128i _mm_loadu_si64(void const *a);
153//extern __m128i _mm_loadu_si32(void const *a);
154//extern __m128i _mm_loadu_si16(void const *a);
155extern __m128d _mm_load_sd(double const *dp);
156extern __m128d _mm_loadh_pd(__m128d a, double const *dp);
157extern __m128d _mm_loadl_pd(__m128d a, double const *dp);
158//extern __m128d _mm_undefined_pd(void);
159extern __m128d _mm_set_sd(double w);
160extern __m128d _mm_set1_pd(double w);
161extern __m128d _mm_set_pd(double w, double x);
162extern __m128d _mm_setr_pd(double w, double x);
163extern __m128d _mm_setzero_pd(void);
164extern __m128d _mm_move_sd(__m128d a, __m128d b);
165extern void _mm_store_sd(double *dp, __m128d a);
166extern void _mm_store_pd(double *dp, __m128d a);
167extern void _mm_store1_pd(double *dp, __m128d a);
168extern void _mm_storeu_pd(double *dp, __m128d a);
169extern void _mm_storer_pd(double *dp, __m128d a);
170extern void _mm_storeh_pd(double *dp, __m128d a);
171extern void _mm_storel_pd(double *dp, __m128d a);
172extern __m128i _mm_add_epi8(__m128i a, __m128i b);
173extern __m128i _mm_add_epi16(__m128i a, __m128i b);
174extern __m128i _mm_add_epi32(__m128i a, __m128i b);
175extern __m64 _mm_add_si64(__m64 a, __m64 b);
176extern __m128i _mm_add_epi64(__m128i a, __m128i b);
177extern __m128i _mm_adds_epi8(__m128i a, __m128i b);
178extern __m128i _mm_adds_epi16(__m128i a, __m128i b);
179extern __m128i _mm_adds_epu8(__m128i a, __m128i b);
180extern __m128i _mm_adds_epu16(__m128i a, __m128i b);
181extern __m128i _mm_avg_epu8(__m128i a, __m128i b);
182extern __m128i _mm_avg_epu16(__m128i a, __m128i b);
183extern __m128i _mm_madd_epi16(__m128i a, __m128i b);
184extern __m128i _mm_max_epi16(__m128i a, __m128i b);
185extern __m128i _mm_max_epu8(__m128i a, __m128i b);
186extern __m128i _mm_min_epi16(__m128i a, __m128i b);
187extern __m128i _mm_min_epu8(__m128i a, __m128i b);
188extern __m128i _mm_mulhi_epi16(__m128i a, __m128i b);
189extern __m128i _mm_mulhi_epu16(__m128i a, __m128i b);
190extern __m128i _mm_mullo_epi16(__m128i a, __m128i b);
191extern __m64 _mm_mul_su32(__m64 a, __m64 b);
192extern __m128i _mm_mul_epu32(__m128i a, __m128i b);
193extern __m128i _mm_sad_epu8(__m128i a, __m128i b);
194extern __m128i _mm_sub_epi8(__m128i a, __m128i b);
195extern __m128i _mm_sub_epi16(__m128i a, __m128i b);
196extern __m128i _mm_sub_epi32(__m128i a, __m128i b);
197extern __m64 _mm_sub_si64(__m64 a, __m64 b);
198extern __m128i _mm_sub_epi64(__m128i a, __m128i b);
199extern __m128i _mm_subs_epi8(__m128i a, __m128i b);
200extern __m128i _mm_subs_epi16(__m128i a, __m128i b);
201extern __m128i _mm_subs_epu8(__m128i a, __m128i b);
202extern __m128i _mm_subs_epu16(__m128i a, __m128i b);
203extern __m128i _mm_and_si128(__m128i a, __m128i b);
204extern __m128i _mm_andnot_si128(__m128i a, __m128i b);
205extern __m128i _mm_or_si128(__m128i a, __m128i b);
206extern __m128i _mm_xor_si128(__m128i a, __m128i b);
207extern __m128i _mm_slli_si128(__m128i a, int i);
208extern __m128i _mm_slli_epi16(__m128i a, int count);
209extern __m128i _mm_sll_epi16(__m128i a, __m128i count);
210extern __m128i _mm_slli_epi32(__m128i a, int count);
211extern __m128i _mm_sll_epi32(__m128i a, __m128i count);
212extern __m128i _mm_slli_epi64(__m128i a, int count);
213extern __m128i _mm_sll_epi64(__m128i a, __m128i count);
214extern __m128i _mm_srai_epi16(__m128i a, int count);
215extern __m128i _mm_sra_epi16(__m128i a, __m128i count);
216extern __m128i _mm_srai_epi32(__m128i a, int count);
217extern __m128i _mm_sra_epi32(__m128i a, __m128i count);
218extern __m128i _mm_srli_si128(__m128i a, int imm);
219extern __m128i _mm_srli_epi16(__m128i a, int count);
220extern __m128i _mm_srl_epi16(__m128i a, __m128i count);
221extern __m128i _mm_srli_epi32(__m128i a, int count);
222extern __m128i _mm_srl_epi32(__m128i a, __m128i count);
223extern __m128i _mm_srli_epi64(__m128i a, int count);
224extern __m128i _mm_srl_epi64(__m128i a, __m128i count);
225extern __m128i _mm_cmpeq_epi8(__m128i a, __m128i b);
226extern __m128i _mm_cmpeq_epi16(__m128i a, __m128i b);
227extern __m128i _mm_cmpeq_epi32(__m128i a, __m128i b);
228extern __m128i _mm_cmpgt_epi8(__m128i a, __m128i b);
229extern __m128i _mm_cmpgt_epi16(__m128i a, __m128i b);
230extern __m128i _mm_cmpgt_epi32(__m128i a, __m128i b);
231extern __m128i _mm_cmplt_epi8(__m128i a, __m128i b);
232extern __m128i _mm_cmplt_epi16(__m128i a, __m128i b);
233extern __m128i _mm_cmplt_epi32(__m128i a, __m128i b);
234#ifdef _M_AMD64
235extern __m128d _mm_cvtsi64_sd(__m128d a, long long b);
236extern long long _mm_cvtsd_si64(__m128d a);
237extern long long _mm_cvttsd_si64(__m128d a);
238#endif
239extern __m128 _mm_cvtepi32_ps(__m128i a);
240extern __m128i _mm_cvtps_epi32(__m128 a);
241extern __m128i _mm_cvttps_epi32(__m128 a);
242extern __m128i _mm_cvtsi32_si128(int a);
243#ifdef _M_AMD64
244extern __m128i _mm_cvtsi64_si128(long long a);
245#endif
246extern int _mm_cvtsi128_si32(__m128i a);
247#ifdef _M_AMD64
248extern long long _mm_cvtsi128_si64(__m128i a);
249#endif
250extern __m128i _mm_load_si128(__m128i const *p);
251extern __m128i _mm_loadu_si128(__m128i_u const *p);
252extern __m128i _mm_loadl_epi64(__m128i_u const *p);
253//extern __m128i _mm_undefined_si128(void);
254//extern __m128i _mm_set_epi64x(long long q1, long long q0); // FIXME
255extern __m128i _mm_set_epi64(__m64 q1, __m64 q0);
256//extern __m128i _mm_set_epi32(int i3, int i1, int i0);
257extern __m128i _mm_set_epi32(int i3, int i2, int i1, int i0);
258//extern __m128i _mm_set_epi16(short w7, short w2, short w1, short w0);
259extern __m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0);
260//extern __m128i _mm_set_epi8(char b15, char b10, char b4, char b3, char b2, char b1, char b0);
261extern __m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
262//extern __m128i _mm_set1_epi64x(long long q); // FIXME
263extern __m128i _mm_set1_epi64(__m64 q);
264extern __m128i _mm_set1_epi32(int i);
265extern __m128i _mm_set1_epi16(short w);
266extern __m128i _mm_set1_epi8(char b);
267extern __m128i _mm_setl_epi64(__m128i q); // FIXME: clang?
268extern __m128i _mm_setr_epi64(__m64 q0, __m64 q1);
269//extern __m128i _mm_setr_epi32(int i0, int i2, int i3);
270extern __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3);
271//extern __m128i _mm_setr_epi16(short w0, short w5, short w6, short w7);
272extern __m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7);
273//extern __m128i _mm_setr_epi8(char b0, char b6, char b11, char b12, char b13, char b14, char b15);
274extern __m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
275extern __m128i _mm_setzero_si128(void);
276extern void _mm_store_si128(__m128i *p, __m128i b);
277extern void _mm_storeu_si128(__m128i_u *p, __m128i b);
278//extern void _mm_storeu_si64(void *p, __m128i b);
279//extern void _mm_storeu_si32(void *p, __m128i b);
280//extern void _mm_storeu_si16(void *p, __m128i b);
281extern void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p);
282extern void _mm_storel_epi64(__m128i_u *p, __m128i a);
283extern void _mm_stream_pd(double *p, __m128d a);
284extern void _mm_stream_si128(__m128i *p, __m128i a);
285extern void _mm_stream_si32(int *p, int a);
286extern void _mm_clflush(void const *p);
287extern void _mm_lfence(void);
288extern void _mm_mfence(void);
289extern __m128i _mm_packs_epi16(__m128i a, __m128i b);
290extern __m128i _mm_packs_epi32(__m128i a, __m128i b);
291extern __m128i _mm_packus_epi16(__m128i a, __m128i b);
292extern int _mm_extract_epi16(__m128i a, int imm);
293extern __m128i _mm_insert_epi16(__m128i a, int b, int imm);
294extern int _mm_movemask_epi8(__m128i a);
295extern __m128i _mm_shuffle_epi32(__m128i a, int imm);
296extern __m128i _mm_shufflelo_epi16(__m128i a, int imm);
297extern __m128i _mm_shufflehi_epi16(__m128i a, int imm);
298extern __m128i _mm_unpackhi_epi8(__m128i a, __m128i b);
299extern __m128i _mm_unpackhi_epi16(__m128i a, __m128i b);
300extern __m128i _mm_unpackhi_epi32(__m128i a, __m128i b);
301extern __m128i _mm_unpackhi_epi64(__m128i a, __m128i b);
302extern __m128i _mm_unpacklo_epi8(__m128i a, __m128i b);
303extern __m128i _mm_unpacklo_epi16(__m128i a, __m128i b);
304extern __m128i _mm_unpacklo_epi32(__m128i a, __m128i b);
305extern __m128i _mm_unpacklo_epi64(__m128i a, __m128i b);
306extern __m64 _mm_movepi64_pi64(__m128i a);
307extern __m128i _mm_movpi64_epi64(__m64 a);
308extern __m128i _mm_move_epi64(__m128i a);
309extern __m128d _mm_unpackhi_pd(__m128d a, __m128d b);
310extern __m128d _mm_unpacklo_pd(__m128d a, __m128d b);
311extern int _mm_movemask_pd(__m128d a);
312extern __m128d _mm_shuffle_pd(__m128d a, __m128d b, int imm);
313extern __m128 _mm_castpd_ps(__m128d a);
314extern __m128i _mm_castpd_si128(__m128d a);
315extern __m128d _mm_castps_pd(__m128 a);
316extern __m128i _mm_castps_si128(__m128 a);
317extern __m128 _mm_castsi128_ps(__m128i a);
318extern __m128d _mm_castsi128_pd(__m128i a);
319void _mm_pause(void);
320
321/* Alternate names */
322#define _mm_set_pd1(a) _mm_set1_pd(a)
323#define _mm_load_pd1(p) _mm_load1_pd(p)
324#define _mm_store_pd1(p, a) _mm_store1_pd((p), (a))
325#define _mm_bslli_si128 _mm_slli_si128
326#define _mm_bsrli_si128 _mm_srli_si128
327#define _mm_stream_si64 _mm_stream_si64x
328
329#if defined(_MSC_VER) && !defined(__clang__)
330
331#pragma intrinsic(_mm_add_sd)
332#pragma intrinsic(_mm_add_pd)
333#pragma intrinsic(_mm_sub_sd)
334#pragma intrinsic(_mm_sub_pd)
335#pragma intrinsic(_mm_mul_sd)
336#pragma intrinsic(_mm_mul_pd)
337#pragma intrinsic(_mm_div_sd)
338#pragma intrinsic(_mm_div_pd)
339#pragma intrinsic(_mm_sqrt_sd)
340#pragma intrinsic(_mm_sqrt_pd)
341#pragma intrinsic(_mm_min_sd)
342#pragma intrinsic(_mm_min_pd)
343#pragma intrinsic(_mm_max_sd)
344#pragma intrinsic(_mm_max_pd)
345#pragma intrinsic(_mm_and_pd)
346#pragma intrinsic(_mm_andnot_pd)
347#pragma intrinsic(_mm_or_pd)
348#pragma intrinsic(_mm_xor_pd)
349#pragma intrinsic(_mm_cmpeq_pd)
350#pragma intrinsic(_mm_cmplt_pd)
351#pragma intrinsic(_mm_cmple_pd)
352#pragma intrinsic(_mm_cmpgt_pd)
353#pragma intrinsic(_mm_cmpge_pd)
354#pragma intrinsic(_mm_cmpord_pd)
355#pragma intrinsic(_mm_cmpunord_pd)
356#pragma intrinsic(_mm_cmpneq_pd)
357#pragma intrinsic(_mm_cmpnlt_pd)
358#pragma intrinsic(_mm_cmpnle_pd)
359#pragma intrinsic(_mm_cmpngt_pd)
360#pragma intrinsic(_mm_cmpnge_pd)
361#pragma intrinsic(_mm_cmpeq_sd)
362#pragma intrinsic(_mm_cmplt_sd)
363#pragma intrinsic(_mm_cmple_sd)
364#pragma intrinsic(_mm_cmpgt_sd)
365#pragma intrinsic(_mm_cmpge_sd)
366#pragma intrinsic(_mm_cmpord_sd)
367#pragma intrinsic(_mm_cmpunord_sd)
368#pragma intrinsic(_mm_cmpneq_sd)
369#pragma intrinsic(_mm_cmpnlt_sd)
370#pragma intrinsic(_mm_cmpnle_sd)
371#pragma intrinsic(_mm_cmpngt_sd)
372#pragma intrinsic(_mm_cmpnge_sd)
373#pragma intrinsic(_mm_comieq_sd)
374#pragma intrinsic(_mm_comilt_sd)
375#pragma intrinsic(_mm_comile_sd)
376#pragma intrinsic(_mm_comigt_sd)
377#pragma intrinsic(_mm_comige_sd)
378#pragma intrinsic(_mm_comineq_sd)
379#pragma intrinsic(_mm_ucomieq_sd)
380#pragma intrinsic(_mm_ucomilt_sd)
381#pragma intrinsic(_mm_ucomile_sd)
382#pragma intrinsic(_mm_ucomigt_sd)
383#pragma intrinsic(_mm_ucomige_sd)
384#pragma intrinsic(_mm_ucomineq_sd)
385#pragma intrinsic(_mm_cvtpd_ps)
386#pragma intrinsic(_mm_cvtps_pd)
387#pragma intrinsic(_mm_cvtepi32_pd)
388#pragma intrinsic(_mm_cvtpd_epi32)
389#pragma intrinsic(_mm_cvtsd_si32)
390#pragma intrinsic(_mm_cvtsd_ss)
391#pragma intrinsic(_mm_cvtsi32_sd)
392#pragma intrinsic(_mm_cvtss_sd)
393#pragma intrinsic(_mm_cvttpd_epi32)
394#pragma intrinsic(_mm_cvttsd_si32)
395//#pragma intrinsic(_mm_cvtpd_pi32)
396//#pragma intrinsic(_mm_cvttpd_pi32)
397//#pragma intrinsic(_mm_cvtpi32_pd)
398#pragma intrinsic(_mm_cvtsd_f64)
399#pragma intrinsic(_mm_load_pd)
400#pragma intrinsic(_mm_load1_pd)
401#pragma intrinsic(_mm_loadr_pd)
402#pragma intrinsic(_mm_loadu_pd)
403//#pragma intrinsic(_mm_loadu_si64)
404//#pragma intrinsic(_mm_loadu_si32)
405//#pragma intrinsic(_mm_loadu_si16)
406#pragma intrinsic(_mm_load_sd)
407#pragma intrinsic(_mm_loadh_pd)
408#pragma intrinsic(_mm_loadl_pd)
409//#pragma intrinsic(_mm_undefined_pd)
410#pragma intrinsic(_mm_set_sd)
411#pragma intrinsic(_mm_set1_pd)
412#pragma intrinsic(_mm_set_pd)
413#pragma intrinsic(_mm_setr_pd)
414#pragma intrinsic(_mm_setzero_pd)
415#pragma intrinsic(_mm_move_sd)
416#pragma intrinsic(_mm_store_sd)
417#pragma intrinsic(_mm_store_pd)
418#pragma intrinsic(_mm_store1_pd)
419#pragma intrinsic(_mm_storeu_pd)
420#pragma intrinsic(_mm_storer_pd)
421#pragma intrinsic(_mm_storeh_pd)
422#pragma intrinsic(_mm_storel_pd)
423#pragma intrinsic(_mm_add_epi8)
424#pragma intrinsic(_mm_add_epi16)
425#pragma intrinsic(_mm_add_epi32)
426//#pragma intrinsic(_mm_add_si64)
427#pragma intrinsic(_mm_add_epi64)
428#pragma intrinsic(_mm_adds_epi8)
429#pragma intrinsic(_mm_adds_epi16)
430#pragma intrinsic(_mm_adds_epu8)
431#pragma intrinsic(_mm_adds_epu16)
432#pragma intrinsic(_mm_avg_epu8)
433#pragma intrinsic(_mm_avg_epu16)
434#pragma intrinsic(_mm_madd_epi16)
435#pragma intrinsic(_mm_max_epi16)
436#pragma intrinsic(_mm_max_epu8)
437#pragma intrinsic(_mm_min_epi16)
438#pragma intrinsic(_mm_min_epu8)
439#pragma intrinsic(_mm_mulhi_epi16)
440#pragma intrinsic(_mm_mulhi_epu16)
441#pragma intrinsic(_mm_mullo_epi16)
442//#pragma intrinsic(_mm_mul_su32)
443#pragma intrinsic(_mm_mul_epu32)
444#pragma intrinsic(_mm_sad_epu8)
445#pragma intrinsic(_mm_sub_epi8)
446#pragma intrinsic(_mm_sub_epi16)
447#pragma intrinsic(_mm_sub_epi32)
448//#pragma intrinsic(_mm_sub_si64)
449#pragma intrinsic(_mm_sub_epi64)
450#pragma intrinsic(_mm_subs_epi8)
451#pragma intrinsic(_mm_subs_epi16)
452#pragma intrinsic(_mm_subs_epu8)
453#pragma intrinsic(_mm_subs_epu16)
454#pragma intrinsic(_mm_and_si128)
455#pragma intrinsic(_mm_andnot_si128)
456#pragma intrinsic(_mm_or_si128)
457#pragma intrinsic(_mm_xor_si128)
458#pragma intrinsic(_mm_slli_si128)
459#pragma intrinsic(_mm_slli_epi16)
460#pragma intrinsic(_mm_sll_epi16)
461#pragma intrinsic(_mm_slli_epi32)
462#pragma intrinsic(_mm_sll_epi32)
463#pragma intrinsic(_mm_slli_epi64)
464#pragma intrinsic(_mm_sll_epi64)
465#pragma intrinsic(_mm_srai_epi16)
466#pragma intrinsic(_mm_sra_epi16)
467#pragma intrinsic(_mm_srai_epi32)
468#pragma intrinsic(_mm_sra_epi32)
469#pragma intrinsic(_mm_srli_si128)
470#pragma intrinsic(_mm_srli_epi16)
471#pragma intrinsic(_mm_srl_epi16)
472#pragma intrinsic(_mm_srli_epi32)
473#pragma intrinsic(_mm_srl_epi32)
474#pragma intrinsic(_mm_srli_epi64)
475#pragma intrinsic(_mm_srl_epi64)
476#pragma intrinsic(_mm_cmpeq_epi8)
477#pragma intrinsic(_mm_cmpeq_epi16)
478#pragma intrinsic(_mm_cmpeq_epi32)
479#pragma intrinsic(_mm_cmpgt_epi8)
480#pragma intrinsic(_mm_cmpgt_epi16)
481#pragma intrinsic(_mm_cmpgt_epi32)
482#pragma intrinsic(_mm_cmplt_epi8)
483#pragma intrinsic(_mm_cmplt_epi16)
484#pragma intrinsic(_mm_cmplt_epi32)
485#ifdef _M_AMD64
486#pragma intrinsic(_mm_cvtsi64_sd)
487#pragma intrinsic(_mm_cvtsd_si64)
488#pragma intrinsic(_mm_cvttsd_si64)
489#endif
490#pragma intrinsic(_mm_cvtepi32_ps)
491#pragma intrinsic(_mm_cvtps_epi32)
492#pragma intrinsic(_mm_cvttps_epi32)
493#pragma intrinsic(_mm_cvtsi32_si128)
494#ifdef _M_AMD64
495#pragma intrinsic(_mm_cvtsi64_si128)
496#endif
497#pragma intrinsic(_mm_cvtsi128_si32)
498#ifdef _M_AMD64
499#pragma intrinsic(_mm_cvtsi128_si64)
500#endif
501#pragma intrinsic(_mm_load_si128)
502#pragma intrinsic(_mm_loadu_si128)
503#pragma intrinsic(_mm_loadl_epi64)
504//#pragma intrinsic(_mm_undefined_si128)
505//#pragma intrinsic(_mm_set_epi64x)
506//#pragma intrinsic(_mm_set_epi64)
507#pragma intrinsic(_mm_set_epi32)
508#pragma intrinsic(_mm_set_epi16)
509#pragma intrinsic(_mm_set_epi8)
510//#pragma intrinsic(_mm_set1_epi64x)
511//#pragma intrinsic(_mm_set1_epi64)
512#pragma intrinsic(_mm_set1_epi32)
513#pragma intrinsic(_mm_set1_epi16)
514#pragma intrinsic(_mm_set1_epi8)
515#pragma intrinsic(_mm_setl_epi64)
516//#pragma intrinsic(_mm_setr_epi64)
517#pragma intrinsic(_mm_setr_epi32)
518#pragma intrinsic(_mm_setr_epi16)
519#pragma intrinsic(_mm_setr_epi8)
520#pragma intrinsic(_mm_setzero_si128)
521#pragma intrinsic(_mm_store_si128)
522#pragma intrinsic(_mm_storeu_si128)
523//#pragma intrinsic(_mm_storeu_si64)
524//#pragma intrinsic(_mm_storeu_si32)
525//#pragma intrinsic(_mm_storeu_si16)
526#pragma intrinsic(_mm_maskmoveu_si128)
527#pragma intrinsic(_mm_storel_epi64)
528#pragma intrinsic(_mm_stream_pd)
529#pragma intrinsic(_mm_stream_si128)
530#pragma intrinsic(_mm_stream_si32)
531#pragma intrinsic(_mm_clflush)
532#pragma intrinsic(_mm_lfence)
533#pragma intrinsic(_mm_mfence)
534#pragma intrinsic(_mm_packs_epi16)
535#pragma intrinsic(_mm_packs_epi32)
536#pragma intrinsic(_mm_packus_epi16)
537#pragma intrinsic(_mm_extract_epi16)
538#pragma intrinsic(_mm_insert_epi16)
539#pragma intrinsic(_mm_movemask_epi8)
540#pragma intrinsic(_mm_shuffle_epi32)
541#pragma intrinsic(_mm_shufflelo_epi16)
542#pragma intrinsic(_mm_shufflehi_epi16)
543#pragma intrinsic(_mm_unpackhi_epi8)
544#pragma intrinsic(_mm_unpackhi_epi16)
545#pragma intrinsic(_mm_unpackhi_epi32)
546#pragma intrinsic(_mm_unpackhi_epi64)
547#pragma intrinsic(_mm_unpacklo_epi8)
548#pragma intrinsic(_mm_unpacklo_epi16)
549#pragma intrinsic(_mm_unpacklo_epi32)
550#pragma intrinsic(_mm_unpacklo_epi64)
551//#pragma intrinsic(_mm_movepi64_pi64)
552//#pragma intrinsic(_mm_movpi64_epi64)
553#pragma intrinsic(_mm_move_epi64)
554#pragma intrinsic(_mm_unpackhi_pd)
555#pragma intrinsic(_mm_unpacklo_pd)
556#pragma intrinsic(_mm_movemask_pd)
557#pragma intrinsic(_mm_shuffle_pd)
558#pragma intrinsic(_mm_castpd_ps)
559#pragma intrinsic(_mm_castpd_si128)
560#pragma intrinsic(_mm_castps_pd)
561#pragma intrinsic(_mm_castps_si128)
562#pragma intrinsic(_mm_castsi128_ps)
563#pragma intrinsic(_mm_castsi128_pd)
564#pragma intrinsic(_mm_pause)
565
566#else /* _MSC_VER */
567
568/*
569 Clang: https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/emmintrin.h
570 Clang older version: https://github.com/llvm/llvm-project/blob/3ef88b31843e040c95f23ff2c3c206f1fa399c05/clang/lib/Headers/emmintrin.h
571 unikraft: https://github.com/unikraft/lib-intel-intrinsics/blob/staging/include/emmintrin.h
572*/
573
574__INTRIN_INLINE_SSE2 __m128d _mm_add_sd(__m128d a, __m128d b)
575{
576 a[0] += b[0];
577 return a;
578}
579
580__INTRIN_INLINE_SSE2 __m128d _mm_add_pd(__m128d a, __m128d b)
581{
582 return (__m128d)((__v2df)a + (__v2df)b);
583}
584
585__INTRIN_INLINE_SSE2 __m128d _mm_sub_sd(__m128d a, __m128d b)
586{
587 a[0] -= b[0];
588 return a;
589}
590
591__INTRIN_INLINE_SSE2 __m128d _mm_sub_pd(__m128d a, __m128d b)
592{
593 return (__m128d)((__v2df)a - (__v2df)b);
594}
595
596__INTRIN_INLINE_SSE2 __m128d _mm_mul_sd(__m128d a, __m128d b)
597{
598 a[0] *= b[0];
599 return a;
600}
601
602__INTRIN_INLINE_SSE2 __m128d _mm_mul_pd(__m128d a, __m128d b)
603{
604 return (__m128d)((__v2df)a * (__v2df)b);
605}
606
607__INTRIN_INLINE_SSE2 __m128d _mm_div_sd(__m128d a, __m128d b)
608{
609 a[0] /= b[0];
610 return a;
611}
612
613__INTRIN_INLINE_SSE2 __m128d _mm_div_pd(__m128d a, __m128d b)
614{
615 return (__m128d)((__v2df)a / (__v2df)b);
616}
617
618__INTRIN_INLINE_SSE2 __m128d _mm_sqrt_sd(__m128d a, __m128d b)
619{
620 __m128d __c = __builtin_ia32_sqrtsd((__v2df)b);
621 return __extension__(__m128d){__c[0], a[1]};
622}
623
625{
626 return __builtin_ia32_sqrtpd((__v2df)a);
627}
628
629__INTRIN_INLINE_SSE2 __m128d _mm_min_sd(__m128d a, __m128d b)
630{
631 return __builtin_ia32_minsd((__v2df)a, (__v2df)b);
632}
633
634__INTRIN_INLINE_SSE2 __m128d _mm_min_pd(__m128d a, __m128d b)
635{
636 return __builtin_ia32_minpd((__v2df)a, (__v2df)b);
637}
638
639__INTRIN_INLINE_SSE2 __m128d _mm_max_sd(__m128d a, __m128d b)
640{
641 return __builtin_ia32_maxsd((__v2df)a, (__v2df)b);
642}
643
644__INTRIN_INLINE_SSE2 __m128d _mm_max_pd(__m128d a, __m128d b)
645{
646 return __builtin_ia32_maxpd((__v2df)a, (__v2df)b);
647}
648
649__INTRIN_INLINE_SSE2 __m128d _mm_and_pd(__m128d a, __m128d b)
650{
651 return (__m128d)((__v2du)a & (__v2du)b);
652}
653
654__INTRIN_INLINE_SSE2 __m128d _mm_andnot_pd(__m128d a, __m128d b)
655{
656 return (__m128d)(~(__v2du)a & (__v2du)b);
657}
658
659__INTRIN_INLINE_SSE2 __m128d _mm_or_pd(__m128d a, __m128d b)
660{
661 return (__m128d)((__v2du)a | (__v2du)b);
662}
663
664__INTRIN_INLINE_SSE2 __m128d _mm_xor_pd(__m128d a, __m128d b)
665{
666 return (__m128d)((__v2du)a ^ (__v2du)b);
667}
668
669__INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_pd(__m128d a, __m128d b)
670{
671 return (__m128d)__builtin_ia32_cmpeqpd((__v2df)a, (__v2df)b);
672}
673
674__INTRIN_INLINE_SSE2 __m128d _mm_cmplt_pd(__m128d a, __m128d b)
675{
676 return (__m128d)__builtin_ia32_cmpltpd((__v2df)a, (__v2df)b);
677}
678
679__INTRIN_INLINE_SSE2 __m128d _mm_cmple_pd(__m128d a, __m128d b)
680{
681 return (__m128d)__builtin_ia32_cmplepd((__v2df)a, (__v2df)b);
682}
683
684__INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
685{
686 return (__m128d)__builtin_ia32_cmpltpd((__v2df)b, (__v2df)a);
687}
688
689__INTRIN_INLINE_SSE2 __m128d _mm_cmpge_pd(__m128d a, __m128d b)
690{
691 return (__m128d)__builtin_ia32_cmplepd((__v2df)b, (__v2df)a);
692}
693
694__INTRIN_INLINE_SSE2 __m128d _mm_cmpord_pd(__m128d a, __m128d b)
695{
696 return (__m128d)__builtin_ia32_cmpordpd((__v2df)a, (__v2df)b);
697}
698
699__INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
700{
701 return (__m128d)__builtin_ia32_cmpunordpd((__v2df)a, (__v2df)b);
702}
703
704__INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_pd(__m128d a, __m128d b)
705{
706 return (__m128d)__builtin_ia32_cmpneqpd((__v2df)a, (__v2df)b);
707}
708
709__INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
710{
711 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)a, (__v2df)b);
712}
713
714__INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_pd(__m128d a, __m128d b)
715{
716 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)a, (__v2df)b);
717}
718
719__INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_pd(__m128d a, __m128d b)
720{
721 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)b, (__v2df)a);
722}
723
724__INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_pd(__m128d a, __m128d b)
725{
726 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)b, (__v2df)a);
727}
728
729__INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
730{
731 return (__m128d)__builtin_ia32_cmpeqsd((__v2df)a, (__v2df)b);
732}
733
734__INTRIN_INLINE_SSE2 __m128d _mm_cmplt_sd(__m128d a, __m128d b)
735{
736 return (__m128d)__builtin_ia32_cmpltsd((__v2df)a, (__v2df)b);
737}
738
739__INTRIN_INLINE_SSE2 __m128d _mm_cmple_sd(__m128d a, __m128d b)
740{
741 return (__m128d)__builtin_ia32_cmplesd((__v2df)a, (__v2df)b);
742}
743
744__INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
745{
746 __m128d __c = __builtin_ia32_cmpltsd((__v2df)b, (__v2df)a);
747 return __extension__(__m128d){__c[0], a[1]};
748}
749
750__INTRIN_INLINE_SSE2 __m128d _mm_cmpge_sd(__m128d a, __m128d b)
751{
752 __m128d __c = __builtin_ia32_cmplesd((__v2df)b, (__v2df)a);
753 return __extension__(__m128d){__c[0], a[1]};
754}
755
756__INTRIN_INLINE_SSE2 __m128d _mm_cmpord_sd(__m128d a, __m128d b)
757{
758 return (__m128d)__builtin_ia32_cmpordsd((__v2df)a, (__v2df)b);
759}
760
761__INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
762{
763 return (__m128d)__builtin_ia32_cmpunordsd((__v2df)a, (__v2df)b);
764}
765
766__INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
767{
768 return (__m128d)__builtin_ia32_cmpneqsd((__v2df)a, (__v2df)b);
769}
770
771__INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
772{
773 return (__m128d)__builtin_ia32_cmpnltsd((__v2df)a, (__v2df)b);
774}
775
776__INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_sd(__m128d a, __m128d b)
777{
778 return (__m128d)__builtin_ia32_cmpnlesd((__v2df)a, (__v2df)b);
779}
780
781__INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_sd(__m128d a, __m128d b)
782{
783 __m128d __c = __builtin_ia32_cmpnltsd((__v2df)b, (__v2df)a);
784 return __extension__(__m128d){__c[0], a[1]};
785}
786
787__INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_sd(__m128d a, __m128d b)
788{
789 __m128d __c = __builtin_ia32_cmpnlesd((__v2df)b, (__v2df)a);
790 return __extension__(__m128d){__c[0], a[1]};
791}
792
794{
795 return __builtin_ia32_comisdeq((__v2df)a, (__v2df)b);
796}
797
799{
800 return __builtin_ia32_comisdlt((__v2df)a, (__v2df)b);
801}
802
804{
805 return __builtin_ia32_comisdle((__v2df)a, (__v2df)b);
806}
807
809{
810 return __builtin_ia32_comisdgt((__v2df)a, (__v2df)b);
811}
812
814{
815 return __builtin_ia32_comisdge((__v2df)a, (__v2df)b);
816}
817
819{
820 return __builtin_ia32_comisdneq((__v2df)a, (__v2df)b);
821}
822
824{
825 return __builtin_ia32_ucomisdeq((__v2df)a, (__v2df)b);
826}
827
829{
830 return __builtin_ia32_ucomisdlt((__v2df)a, (__v2df)b);
831}
832
834{
835 return __builtin_ia32_ucomisdle((__v2df)a, (__v2df)b);
836}
837
839{
840 return __builtin_ia32_ucomisdgt((__v2df)a, (__v2df)b);
841}
842
844{
845 return __builtin_ia32_ucomisdge((__v2df)a, (__v2df)b);
846}
847
849{
850 return __builtin_ia32_ucomisdneq((__v2df)a, (__v2df)b);
851}
852
854{
855 return __builtin_ia32_cvtpd2ps((__v2df)a);
856}
857
859{
860#if HAS_BUILTIN(__builtin_convertvector)
861 return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4sf)a, (__v4sf)a, 0, 1), __v2df);
862#else
863 return __builtin_ia32_cvtps2pd(a);
864#endif
865}
866
868{
869#if HAS_BUILTIN(__builtin_convertvector)
870 return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4si)a, (__v4si)a, 0, 1), __v2df);
871#else
872 return __builtin_ia32_cvtdq2pd((__v4si)a);
873#endif
874}
875
877{
878 return (__m128i)__builtin_ia32_cvtpd2dq((__v2df)a);
879}
880
882{
883 return __builtin_ia32_cvtsd2si((__v2df)a);
884}
885
886__INTRIN_INLINE_SSE2 __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
887{
888 return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)a, (__v2df)b);
889}
890
892 int b)
893{
894 a[0] = b;
895 return a;
896}
897
898__INTRIN_INLINE_SSE2 __m128d _mm_cvtss_sd(__m128d a, __m128 b)
899{
900 a[0] = b[0];
901 return a;
902}
903
905{
906 return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)a);
907}
908
910{
911 return __builtin_ia32_cvttsd2si((__v2df)a);
912}
913
915{
916 return (__m64)__builtin_ia32_cvtpd2pi((__v2df)a);
917}
918
920{
921 return (__m64)__builtin_ia32_cvttpd2pi((__v2df)a);
922}
923
925{
926 return __builtin_ia32_cvtpi2pd((__v2si)a);
927}
928
930{
931 return a[0];
932}
933
934__INTRIN_INLINE_SSE2 __m128d _mm_load_pd(double const *dp)
935{
936 return *(const __m128d *)dp;
937}
938
939__INTRIN_INLINE_SSE2 __m128d _mm_load1_pd(double const *dp)
940{
941 struct __mm_load1_pd_struct {
942 double __u;
943 } __attribute__((__packed__, __may_alias__));
944 double __u = ((const struct __mm_load1_pd_struct *)dp)->__u;
945 return __extension__(__m128d){__u, __u};
946}
947
948// GCC:
949/* Create a selector for use with the SHUFPD instruction. */
950#define _MM_SHUFFLE2(fp1,fp0) \
951 (((fp1) << 1) | (fp0))
952
953__INTRIN_INLINE_SSE2 __m128d _mm_loadr_pd(double const *dp)
954{
955#if HAS_BUILTIN(__builtin_shufflevector)
956 __m128d u = *(const __m128d *)dp;
957 return __builtin_shufflevector((__v2df)u, (__v2df)u, 1, 0);
958#else
959 return (__m128d){ dp[1], dp[0] };
960#endif
961}
962
963__INTRIN_INLINE_SSE2 __m128d _mm_loadu_pd(double const *dp)
964{
965 struct __loadu_pd {
966 __m128d_u __v;
967 } __attribute__((__packed__, __may_alias__));
968 return ((const struct __loadu_pd *)dp)->__v;
969}
970
972{
973 struct __loadu_si64 {
974 long long __v;
975 } __attribute__((__packed__, __may_alias__));
976 long long __u = ((const struct __loadu_si64 *)a)->__v;
977 return __extension__(__m128i)(__v2di){__u, 0LL};
978}
979
981{
982 struct __loadu_si32 {
983 int __v;
984 } __attribute__((__packed__, __may_alias__));
985 int __u = ((const struct __loadu_si32 *)a)->__v;
986 return __extension__(__m128i)(__v4si){__u, 0, 0, 0};
987}
988
990{
991 struct __loadu_si16 {
992 short __v;
993 } __attribute__((__packed__, __may_alias__));
994 short __u = ((const struct __loadu_si16 *)a)->__v;
995 return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
996}
997
998__INTRIN_INLINE_SSE2 __m128d _mm_load_sd(double const *dp)
999{
1000 struct __mm_load_sd_struct {
1001 double __u;
1002 } __attribute__((__packed__, __may_alias__));
1003 double __u = ((const struct __mm_load_sd_struct *)dp)->__u;
1004 return __extension__(__m128d){__u, 0};
1005}
1006
1007__INTRIN_INLINE_SSE2 __m128d _mm_loadh_pd(__m128d a, double const *dp)
1008{
1009 struct __mm_loadh_pd_struct {
1010 double __u;
1011 } __attribute__((__packed__, __may_alias__));
1012 double __u = ((const struct __mm_loadh_pd_struct *)dp)->__u;
1013 return __extension__(__m128d){a[0], __u};
1014}
1015
1016__INTRIN_INLINE_SSE2 __m128d _mm_loadl_pd(__m128d a, double const *dp)
1017{
1018 struct __mm_loadl_pd_struct {
1019 double __u;
1020 } __attribute__((__packed__, __may_alias__));
1021 double __u = ((const struct __mm_loadl_pd_struct *)dp)->__u;
1022 return __extension__(__m128d){__u, a[1]};
1023}
1024
1026{
1027#if HAS_BUILTIN(__builtin_ia32_undef128)
1028 return (__m128d)__builtin_ia32_undef128();
1029#else
1030 __m128d undef = undef;
1031 return undef;
1032#endif
1033}
1034
1036{
1037 return __extension__(__m128d){w, 0};
1038}
1039
1041{
1042 return __extension__(__m128d){w, w};
1043}
1044
1045__INTRIN_INLINE_SSE2 __m128d _mm_set_pd(double w, double x)
1046{
1047 return __extension__(__m128d){x, w};
1048}
1049
1050__INTRIN_INLINE_SSE2 __m128d _mm_setr_pd(double w, double x)
1051{
1052 return __extension__(__m128d){w, x};
1053}
1054
1056{
1057 return __extension__(__m128d){0, 0};
1058}
1059
1060__INTRIN_INLINE_SSE2 __m128d _mm_move_sd(__m128d a, __m128d b)
1061{
1062 a[0] = b[0];
1063 return a;
1064}
1065
1066__INTRIN_INLINE_SSE2 void _mm_store_sd(double *dp, __m128d a)
1067{
1068 struct __mm_store_sd_struct {
1069 double __u;
1070 } __attribute__((__packed__, __may_alias__));
1071 ((struct __mm_store_sd_struct *)dp)->__u = a[0];
1072}
1073
1074__INTRIN_INLINE_SSE2 void _mm_store_pd(double *dp, __m128d a)
1075{
1076 *(__m128d *)dp = a;
1077}
1078
1079__INTRIN_INLINE_SSE2 void _mm_store1_pd(double *dp, __m128d a)
1080{
1081#if HAS_BUILTIN(__builtin_shufflevector)
1082 a = __builtin_shufflevector((__v2df)a, (__v2df)a, 0, 0);
1083 _mm_store_pd(dp, a);
1084#else
1085 dp[0] = a[0];
1086 dp[1] = a[0];
1087#endif
1088}
1089
1090__INTRIN_INLINE_SSE2 void _mm_storeu_pd(double *dp, __m128d a)
1091{
1092 struct __storeu_pd {
1093 __m128d_u __v;
1094 } __attribute__((__packed__, __may_alias__));
1095 ((struct __storeu_pd *)dp)->__v = a;
1096}
1097
1098__INTRIN_INLINE_SSE2 void _mm_storer_pd(double *dp, __m128d a)
1099{
1100#if HAS_BUILTIN(__builtin_shufflevector)
1101 a = __builtin_shufflevector((__v2df)a, (__v2df)a, 1, 0);
1102 *(__m128d *)dp = a;
1103#else
1104 dp[0] = a[1];
1105 dp[1] = a[0];
1106#endif
1107}
1108
1109__INTRIN_INLINE_SSE2 void _mm_storeh_pd(double *dp, __m128d a)
1110{
1111 struct __mm_storeh_pd_struct {
1112 double __u;
1113 } __attribute__((__packed__, __may_alias__));
1114 ((struct __mm_storeh_pd_struct *)dp)->__u = a[1];
1115}
1116
1117__INTRIN_INLINE_SSE2 void _mm_storel_pd(double *dp, __m128d a)
1118{
1119 struct __mm_storeh_pd_struct {
1120 double __u;
1121 } __attribute__((__packed__, __may_alias__));
1122 ((struct __mm_storeh_pd_struct *)dp)->__u = a[0];
1123}
1124
1125__INTRIN_INLINE_SSE2 __m128i _mm_add_epi8(__m128i a, __m128i b)
1126{
1127 return (__m128i)((__v16qu)a + (__v16qu)b);
1128}
1129
1130__INTRIN_INLINE_SSE2 __m128i _mm_add_epi16(__m128i a, __m128i b)
1131{
1132 return (__m128i)((__v8hu)a + (__v8hu)b);
1133}
1134
1135__INTRIN_INLINE_SSE2 __m128i _mm_add_epi32(__m128i a, __m128i b)
1136{
1137 return (__m128i)((__v4su)a + (__v4su)b);
1138}
1139
1141{
1142 return (__m64)__builtin_ia32_paddq((__v1di)a, (__v1di)b);
1143}
1144
1145__INTRIN_INLINE_SSE2 __m128i _mm_add_epi64(__m128i a, __m128i b)
1146{
1147 return (__m128i)((__v2du)a + (__v2du)b);
1148}
1149
1150__INTRIN_INLINE_SSE2 __m128i _mm_adds_epi8(__m128i a, __m128i b)
1151{
1152#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1153 return (__m128i)__builtin_elementwise_add_sat((__v16qs)a, (__v16qs)b);
1154#else
1155 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
1156#endif
1157}
1158
1159__INTRIN_INLINE_SSE2 __m128i _mm_adds_epi16(__m128i a, __m128i b)
1160{
1161#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1162 return (__m128i)__builtin_elementwise_add_sat((__v8hi)a, (__v8hi)b);
1163#else
1164 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
1165#endif
1166}
1167
1168__INTRIN_INLINE_SSE2 __m128i _mm_adds_epu8(__m128i a, __m128i b)
1169{
1170#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1171 return (__m128i)__builtin_elementwise_add_sat((__v16qu)a, (__v16qu)b);
1172#else
1173 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
1174#endif
1175}
1176
1177__INTRIN_INLINE_SSE2 __m128i _mm_adds_epu16(__m128i a, __m128i b)
1178{
1179#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1180 return (__m128i)__builtin_elementwise_add_sat((__v8hu)a, (__v8hu)b);
1181#else
1182 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
1183#endif
1184}
1185
1186__INTRIN_INLINE_SSE2 __m128i _mm_avg_epu8(__m128i a, __m128i b)
1187{
1188 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
1189}
1190
1191__INTRIN_INLINE_SSE2 __m128i _mm_avg_epu16(__m128i a, __m128i b)
1192{
1193 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
1194}
1195
1196__INTRIN_INLINE_SSE2 __m128i _mm_madd_epi16(__m128i a, __m128i b)
1197{
1198 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
1199}
1200
1201__INTRIN_INLINE_SSE2 __m128i _mm_max_epi16(__m128i a, __m128i b)
1202{
1203#if HAS_BUILTIN(__builtin_elementwise_max)
1204 return (__m128i)__builtin_elementwise_max((__v8hi)a, (__v8hi)b);
1205#else
1206 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
1207#endif
1208}
1209
1210__INTRIN_INLINE_SSE2 __m128i _mm_max_epu8(__m128i a, __m128i b)
1211{
1212#if HAS_BUILTIN(__builtin_elementwise_max)
1213 return (__m128i)__builtin_elementwise_max((__v16qu)a, (__v16qu)b);
1214#else
1215 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
1216#endif
1217}
1218
1219__INTRIN_INLINE_SSE2 __m128i _mm_min_epi16(__m128i a, __m128i b)
1220{
1221#if HAS_BUILTIN(__builtin_elementwise_min)
1222 return (__m128i)__builtin_elementwise_min((__v8hi)a, (__v8hi)b);
1223#else
1224 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
1225#endif
1226}
1227
1228__INTRIN_INLINE_SSE2 __m128i _mm_min_epu8(__m128i a, __m128i b)
1229{
1230#if HAS_BUILTIN(__builtin_elementwise_min)
1231 return (__m128i)__builtin_elementwise_min((__v16qu)a, (__v16qu)b);
1232#else
1233 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
1234#endif
1235}
1236
1237__INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
1238{
1239 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
1240}
1241
1242__INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
1243{
1244 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
1245}
1246
1247__INTRIN_INLINE_SSE2 __m128i _mm_mullo_epi16(__m128i a, __m128i b)
1248{
1249 return (__m128i)((__v8hu)a * (__v8hu)b);
1250}
1251
1253{
1254 return (__m64)__builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
1255}
1256
1257__INTRIN_INLINE_SSE2 __m128i _mm_mul_epu32(__m128i a, __m128i b)
1258{
1259 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
1260}
1261
1262__INTRIN_INLINE_SSE2 __m128i _mm_sad_epu8(__m128i a, __m128i b)
1263{
1264 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
1265}
1266
1267__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi8(__m128i a, __m128i b)
1268{
1269 return (__m128i)((__v16qu)a - (__v16qu)b);
1270}
1271
1272__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi16(__m128i a, __m128i b)
1273{
1274 return (__m128i)((__v8hu)a - (__v8hu)b);
1275}
1276
1277__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi32(__m128i a, __m128i b)
1278{
1279 return (__m128i)((__v4su)a - (__v4su)b);
1280}
1281
1283{
1284 return (__m64)__builtin_ia32_psubq((__v1di)a, (__v1di)b);
1285}
1286
1287__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi64(__m128i a, __m128i b)
1288{
1289 return (__m128i)((__v2du)a - (__v2du)b);
1290}
1291
1292__INTRIN_INLINE_SSE2 __m128i _mm_subs_epi8(__m128i a, __m128i b)
1293{
1294#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1295 return (__m128i)__builtin_elementwise_sub_sat((__v16qs)a, (__v16qs)b);
1296#else
1297 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
1298#endif
1299}
1300
1301__INTRIN_INLINE_SSE2 __m128i _mm_subs_epi16(__m128i a, __m128i b)
1302{
1303#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1304 return (__m128i)__builtin_elementwise_sub_sat((__v8hi)a, (__v8hi)b);
1305#else
1306 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
1307#endif
1308}
1309
1310__INTRIN_INLINE_SSE2 __m128i _mm_subs_epu8(__m128i a, __m128i b)
1311{
1312#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1313 return (__m128i)__builtin_elementwise_sub_sat((__v16qu)a, (__v16qu)b);
1314#else
1315 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
1316#endif
1317}
1318
1319__INTRIN_INLINE_SSE2 __m128i _mm_subs_epu16(__m128i a, __m128i b)
1320{
1321#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1322 return (__m128i)__builtin_elementwise_sub_sat((__v8hu)a, (__v8hu)b);
1323#else
1324 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
1325#endif
1326}
1327
1328__INTRIN_INLINE_SSE2 __m128i _mm_and_si128(__m128i a, __m128i b)
1329{
1330 return (__m128i)((__v2du)a & (__v2du)b);
1331}
1332
1333__INTRIN_INLINE_SSE2 __m128i _mm_andnot_si128(__m128i a, __m128i b)
1334{
1335 return (__m128i)(~(__v2du)a & (__v2du)b);
1336}
1337
1338__INTRIN_INLINE_SSE2 __m128i _mm_or_si128(__m128i a, __m128i b)
1339{
1340 return (__m128i)((__v2du)a | (__v2du)b);
1341}
1342
1343__INTRIN_INLINE_SSE2 __m128i _mm_xor_si128(__m128i a, __m128i b)
1344{
1345 return (__m128i)((__v2du)a ^ (__v2du)b);
1346}
1347
1348#define _mm_slli_si128(a, imm) \
1349 ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1350
1352{
1353 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
1354}
1355
1357{
1358 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
1359}
1360
1362{
1363 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
1364}
1365
1367{
1368 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
1369}
1370
1372{
1373 return __builtin_ia32_psllqi128((__v2di)a, count);
1374}
1375
1377{
1378 return __builtin_ia32_psllq128((__v2di)a, (__v2di)count);
1379}
1380
1382{
1383 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
1384}
1385
1387{
1388 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
1389}
1390
1392{
1393 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
1394}
1395
1397{
1398 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
1399}
1400
1401#define _mm_srli_si128(a, imm) \
1402 ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1403
1405{
1406 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
1407}
1408
1410{
1411 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
1412}
1413
1415{
1416 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
1417}
1418
1420{
1421 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
1422}
1423
1425{
1426 return __builtin_ia32_psrlqi128((__v2di)a, count);
1427}
1428
1430{
1431 return __builtin_ia32_psrlq128((__v2di)a, (__v2di)count);
1432}
1433
1434__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
1435{
1436 return (__m128i)((__v16qi)a == (__v16qi)b);
1437}
1438
1439__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
1440{
1441 return (__m128i)((__v8hi)a == (__v8hi)b);
1442}
1443
1444__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
1445{
1446 return (__m128i)((__v4si)a == (__v4si)b);
1447}
1448
1449__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
1450{
1451 /* This function always performs a signed comparison, but __v16qi is a char
1452 which may be signed or unsigned, so use __v16qs. */
1453 return (__m128i)((__v16qs)a > (__v16qs)b);
1454}
1455
1456__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
1457{
1458 return (__m128i)((__v8hi)a > (__v8hi)b);
1459}
1460
1461__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
1462{
1463 return (__m128i)((__v4si)a > (__v4si)b);
1464}
1465
1466__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
1467{
1468 return _mm_cmpgt_epi8(b, a);
1469}
1470
1471__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi16(__m128i a, __m128i b)
1472{
1473 return _mm_cmpgt_epi16(b, a);
1474}
1475
1476__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
1477{
1478 return _mm_cmpgt_epi32(b, a);
1479}
1480
1481#ifdef _M_AMD64
1482
1483__INTRIN_INLINE_SSE2 __m128d _mm_cvtsi64_sd(__m128d a, long long b)
1484{
1485 a[0] = b;
1486 return a;
1487}
1488
1489__INTRIN_INLINE_SSE2 long long _mm_cvtsd_si64(__m128d a)
1490{
1491 return __builtin_ia32_cvtsd2si64((__v2df)a);
1492}
1493
1494__INTRIN_INLINE_SSE2 long long _mm_cvttsd_si64(__m128d a)
1495{
1496 return __builtin_ia32_cvttsd2si64((__v2df)a);
1497}
1498#endif
1499
1501{
1502#if HAS_BUILTIN(__builtin_convertvector)
1503 return (__m128)__builtin_convertvector((__v4si)a, __v4sf);
1504#else
1505 return __builtin_ia32_cvtdq2ps((__v4si)a);
1506#endif
1507}
1508
1510{
1511 return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)a);
1512}
1513
1515{
1516 return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)a);
1517}
1518
1520{
1521 return __extension__(__m128i)(__v4si){a, 0, 0, 0};
1522}
1523
1525{
1526 return __extension__(__m128i)(__v2di){a, 0};
1527}
1528
1530{
1531 __v4si b = (__v4si)a;
1532 return b[0];
1533}
1534
1536{
1537 return a[0];
1538}
1539
1541{
1542 return *p;
1543}
1544
1545__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si128(__m128i_u const *p)
1546{
1547 struct __loadu_si128 {
1548 __m128i_u __v;
1549 } __attribute__((__packed__, __may_alias__));
1550 return ((const struct __loadu_si128 *)p)->__v;
1551}
1552
1553__INTRIN_INLINE_SSE2 __m128i _mm_loadl_epi64(__m128i_u const *p)
1554{
1555 struct __mm_loadl_epi64_struct {
1556 long long __u;
1557 } __attribute__((__packed__, __may_alias__));
1558 return __extension__(__m128i){
1559 ((const struct __mm_loadl_epi64_struct *)p)->__u, 0};
1560}
1561
1563{
1564#if HAS_BUILTIN(__builtin_ia32_undef128)
1565 return (__m128i)__builtin_ia32_undef128();
1566#else
1567 __m128i undef = undef;
1568 return undef;
1569#endif
1570}
1571
1572__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
1573{
1574 return __extension__(__m128i)(__v2di){q0, q1};
1575}
1576
1577__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64(__m64 q1, __m64 q0)
1578{
1579 return _mm_set_epi64x((long long)q1, (long long)q0);
1580}
1581
1582__INTRIN_INLINE_SSE2 __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
1583{
1584 return __extension__(__m128i)(__v4si){i0, i1, i2, i3};
1585}
1586
1588 short w7, short w6, short w5, short w4,
1589 short w3, short w2, short w1, short w0)
1590{
1591 return __extension__(__m128i)(__v8hi){w0, w1, w2, w3, w4, w5, w6, w7};
1592}
1593
1595 char b15, char b14, char b13, char b12,
1596 char b11, char b10, char b9, char b8,
1597 char b7, char b6, char b5, char b4,
1598 char b3, char b2, char b1, char b0)
1599{
1600 return __extension__(__m128i)(__v16qi){
1601 b0, b1, b2, b3, b4, b5, b6, b7,
1602 b8, b9, b10, b11, b12, b13, b14, b15};
1603}
1604
1606{
1607 return _mm_set_epi64x(q, q);
1608}
1609
1611{
1612 return _mm_set_epi64(q, q);
1613}
1614
1616{
1617 return _mm_set_epi32(i, i, i, i);
1618}
1619
1621{
1622 return _mm_set_epi16(w, w, w, w, w, w, w, w);
1623}
1624
1626{
1627 return _mm_set_epi8(b, b, b, b, b, b, b, b, b, b, b,
1628 b, b, b, b, b);
1629}
1630
1631__INTRIN_INLINE_SSE2 __m128i _mm_setr_epi64(__m64 q0, __m64 q1)
1632{
1633 return _mm_set_epi64(q1, q0);
1634}
1635
1636__INTRIN_INLINE_SSE2 __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
1637{
1638 return _mm_set_epi32(i3, i2, i1, i0);
1639}
1640
1642 short w0, short w1, short w2, short w3,
1643 short w4, short w5, short w6, short w7)
1644{
1645 return _mm_set_epi16(w7, w6, w5, w4, w3, w2, w1, w0);
1646}
1647
1649 char b0, char b1, char b2, char b3,
1650 char b4, char b5, char b6, char b7,
1651 char b8, char b9, char b10, char b11,
1652 char b12, char b13, char b14, char b15)
1653{
1654 return _mm_set_epi8(b15, b14, b13, b12, b11, b10, b9, b8,
1655 b7, b6, b5, b4, b3, b2, b1, b0);
1656}
1657
1659{
1660 return __extension__(__m128i)(__v2di){0LL, 0LL};
1661}
1662
1664{
1665 *p = b;
1666}
1667
1668__INTRIN_INLINE_SSE2 void _mm_storeu_si128(__m128i_u *p, __m128i b)
1669{
1670 struct __storeu_si128 {
1671 __m128i_u __v;
1672 } __attribute__((__packed__, __may_alias__));
1673 ((struct __storeu_si128 *)p)->__v = b;
1674}
1675
1677{
1678 struct __storeu_si64 {
1679 long long __v;
1680 } __attribute__((__packed__, __may_alias__));
1681 ((struct __storeu_si64 *)p)->__v = ((__v2di)b)[0];
1682}
1683
1685{
1686 struct __storeu_si32 {
1687 int __v;
1688 } __attribute__((__packed__, __may_alias__));
1689 ((struct __storeu_si32 *)p)->__v = ((__v4si)b)[0];
1690}
1691
1693{
1694 struct __storeu_si16 {
1695 short __v;
1696 } __attribute__((__packed__, __may_alias__));
1697 ((struct __storeu_si16 *)p)->__v = ((__v8hi)b)[0];
1698}
1699
1700__INTRIN_INLINE_SSE2 void _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
1701{
1702 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1703}
1704
1705__INTRIN_INLINE_SSE2 void _mm_storel_epi64(__m128i_u *p, __m128i a)
1706{
1707 struct __mm_storel_epi64_struct {
1708 long long __u;
1709 } __attribute__((__packed__, __may_alias__));
1710 ((struct __mm_storel_epi64_struct *)p)->__u = a[0];
1711}
1712
1714{
1715#if HAS_BUILTIN(__builtin_nontemporal_store)
1716 __builtin_nontemporal_store((__v2df)a, (__v2df *)p);
1717#else
1718 __builtin_ia32_movntpd(p, a);
1719#endif
1720}
1721
1723{
1724#if HAS_BUILTIN(__builtin_nontemporal_store)
1725 __builtin_nontemporal_store((__v2di)a, (__v2di*)p);
1726#else
1727 __builtin_ia32_movntdq(p, a);
1728#endif
1729}
1730
1732{
1733 __builtin_ia32_movnti(p, a);
1734}
1735
1736#ifdef _M_AMD64
1737__INTRIN_INLINE_SSE2 void _mm_stream_si64(long long *p, long long a)
1738{
1739 __builtin_ia32_movnti64(p, a);
1740}
1741#endif
1742
1743void _mm_clflush(void const *p);
1744
1745void _mm_lfence(void);
1746
1747void _mm_mfence(void);
1748
1749__INTRIN_INLINE_SSE2 __m128i _mm_packs_epi16(__m128i a, __m128i b)
1750{
1751 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1752}
1753
1754__INTRIN_INLINE_SSE2 __m128i _mm_packs_epi32(__m128i a, __m128i b)
1755{
1756 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1757}
1758
1759__INTRIN_INLINE_SSE2 __m128i _mm_packus_epi16(__m128i a, __m128i b)
1760{
1761 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1762}
1763
1764#define _mm_extract_epi16(a, imm) \
1765 ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
1766 (int)(imm)))
1767
1768#define _mm_insert_epi16(a, b, imm) \
1769 ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
1770 (int)(imm)))
1771
1773{
1774 return __builtin_ia32_pmovmskb128((__v16qi)a);
1775}
1776
1777#define _mm_shuffle_epi32(a, imm) \
1778 ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
1779
1780#define _mm_shufflelo_epi16(a, imm) \
1781 ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
1782
1783#define _mm_shufflehi_epi16(a, imm) \
1784 ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
1785
1787{
1788#if HAS_BUILTIN(__builtin_shufflevector)
1789 return (__m128i)__builtin_shufflevector(
1790 (__v16qi)a, (__v16qi)b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
1791 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
1792#else
1793 return (__m128i)__builtin_ia32_punpckhbw128((__v16qi)a, (__v16qi)b);
1794#endif
1795}
1796
1798{
1799#if HAS_BUILTIN(__builtin_shufflevector)
1800 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8 + 4, 5,
1801 8 + 5, 6, 8 + 6, 7, 8 + 7);
1802#else
1803 return (__m128i)__builtin_ia32_punpckhwd128((__v8hi)a, (__v8hi)b);
1804#endif
1805}
1806
1808{
1809#if HAS_BUILTIN(__builtin_shufflevector)
1810 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4 + 2, 3,
1811 4 + 3);
1812#else
1813 return (__m128i)__builtin_ia32_punpckhdq128((__v4si)a, (__v4si)b);
1814#endif
1815}
1816
1818{
1819#if HAS_BUILTIN(__builtin_shufflevector)
1820 return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 1, 2 + 1);
1821#else
1822 return (__m128i)__builtin_ia32_punpckhqdq128((__v2di)a, (__v2di)b);
1823#endif
1824}
1825
1827{
1828#if HAS_BUILTIN(__builtin_shufflevector)
1829 return (__m128i)__builtin_shufflevector(
1830 (__v16qi)a, (__v16qi)b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
1831 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
1832#else
1833 return (__m128i)__builtin_ia32_punpcklbw128((__v16qi)a, (__v16qi)b);
1834#endif
1835}
1836
1838{
1839#if HAS_BUILTIN(__builtin_shufflevector)
1840 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8 + 0, 1,
1841 8 + 1, 2, 8 + 2, 3, 8 + 3);
1842#else
1843 return (__m128i)__builtin_ia32_punpcklwd128((__v8hi)a, (__v8hi)b);
1844#endif
1845}
1846
1848{
1849#if HAS_BUILTIN(__builtin_shufflevector)
1850 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4 + 0, 1,
1851 4 + 1);
1852#else
1853 return (__m128i)__builtin_ia32_punpckldq128((__v4si)a, (__v4si)b);
1854#endif
1855}
1856
1858{
1859#if HAS_BUILTIN(__builtin_shufflevector)
1860 return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 0, 2 + 0);
1861#else
1862 return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)a, (__v2di)b);
1863#endif
1864}
1865
1867{
1868 return (__m64)a[0];
1869}
1870
1872{
1873 return __extension__(__m128i)(__v2di){(long long)a, 0};
1874}
1875
1877{
1878#if HAS_BUILTIN(__builtin_shufflevector)
1879 return __builtin_shufflevector((__v2di)a, _mm_setzero_si128(), 0, 2);
1880#else
1881 return (__m128i)__builtin_ia32_movq128((__v2di)a);
1882#endif
1883}
1884
1885__INTRIN_INLINE_SSE2 __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
1886{
1887#if HAS_BUILTIN(__builtin_shufflevector)
1888 return __builtin_shufflevector((__v2df)a, (__v2df)b, 1, 2 + 1);
1889#else
1890 return (__m128d)__builtin_ia32_unpckhpd((__v2df)a, (__v2df)b);
1891#endif
1892}
1893
1894__INTRIN_INLINE_SSE2 __m128d _mm_unpacklo_pd(__m128d a, __m128d b)
1895{
1896#if HAS_BUILTIN(__builtin_shufflevector)
1897 return __builtin_shufflevector((__v2df)a, (__v2df)b, 0, 2 + 0);
1898#else
1899 return (__m128d)__builtin_ia32_unpcklpd((__v2df)a, (__v2df)b);
1900#endif
1901}
1902
1904{
1905 return __builtin_ia32_movmskpd((__v2df)a);
1906}
1907
1908#define _mm_shuffle_pd(a, b, i) \
1909 ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
1910 (int)(i)))
1911
1913{
1914 return (__m128)a;
1915}
1916
1918{
1919 return (__m128i)a;
1920}
1921
1923{
1924 return (__m128d)a;
1925}
1926
1928{
1929 return (__m128i)a;
1930}
1931
1933{
1934 return (__m128)a;
1935}
1936
1938{
1939 return (__m128d)a;
1940}
1941
1942void _mm_pause(void);
1943
1944#endif /* _MSC_VER */
1945
1946#ifdef __cplusplus
1947} // extern "C"
1948#endif
1949
1950#endif /* _INCLUDED_EMM */
#define _DECLSPEC_INTRIN_TYPE
Definition: _mingw.h:231
#define __int8
Definition: basetyps.h:25
#define __int16
Definition: basetyps.h:22
#define __int64
Definition: basetyps.h:16
#define __int32
Definition: basetyps.h:19
#define _STATIC_ASSERT(expr)
Definition: crtdefs.h:148
int align(int length, int align)
Definition: dsound8.c:36
#define __INTRIN_INLINE_MMXSSE2
Definition: emmintrin.h:72
__m128 _mm_cvtpd_ps(__m128d a)
Definition: emmintrin.h:853
__m128d _mm_cmpnge_sd(__m128d a, __m128d b)
Definition: emmintrin.h:787
void _mm_storeu_pd(double *dp, __m128d a)
Definition: emmintrin.h:1090
__m128d _mm_add_sd(__m128d a, __m128d b)
Definition: emmintrin.h:574
void _mm_storeu_si128(__m128i_u *p, __m128i b)
Definition: emmintrin.h:1668
__m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Definition: emmintrin.h:1594
__m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1449
__m128i _mm_set1_epi16(short w)
Definition: emmintrin.h:1620
int _mm_cvtsi128_si32(__m128i a)
Definition: emmintrin.h:1529
__m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
Definition: emmintrin.h:1582
void _mm_store_si128(__m128i *p, __m128i b)
Definition: emmintrin.h:1663
__m128i _mm_adds_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1177
__m128i _mm_movpi64_epi64(__m64 a)
Definition: emmintrin.h:1871
__m128i _mm_slli_epi64(__m128i a, int count)
Definition: emmintrin.h:1371
int _mm_ucomile_sd(__m128d a, __m128d b)
Definition: emmintrin.h:833
__m128d _mm_cmpeq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:729
int _mm_cvtsd_si32(__m128d a)
Definition: emmintrin.h:881
int _mm_comile_sd(__m128d a, __m128d b)
Definition: emmintrin.h:803
__m128i _mm_castps_si128(__m128 a)
Definition: emmintrin.h:1927
__m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:709
__m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
Definition: emmintrin.h:1636
__m128i _mm_setzero_si128(void)
Definition: emmintrin.h:1658
__m128i _mm_srl_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1429
__m128d _mm_add_pd(__m128d a, __m128d b)
Definition: emmintrin.h:580
__m128i _mm_cvtpd_epi32(__m128d a)
Definition: emmintrin.h:876
__m128i _mm_xor_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1343
__m128i _mm_move_epi64(__m128i a)
Definition: emmintrin.h:1876
__m128d _mm_sub_sd(__m128d a, __m128d b)
Definition: emmintrin.h:585
__m128d _mm_loadh_pd(__m128d a, double const *dp)
Definition: emmintrin.h:1007
__m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:771
__m128i _mm_srli_epi64(__m128i a, int count)
Definition: emmintrin.h:1424
__m128d _mm_setr_pd(double w, double x)
Definition: emmintrin.h:1050
#define __INTRIN_INLINE_SSE2
Definition: emmintrin.h:71
__m128d _mm_cmpord_sd(__m128d a, __m128d b)
Definition: emmintrin.h:756
__m128i _mm_set1_epi64(__m64 q)
Definition: emmintrin.h:1610
__m128d _mm_cmpunord_pd(__m128d a, __m128d b)
Definition: emmintrin.h:699
__m128i _mm_packs_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1754
__m128d _mm_castps_pd(__m128 a)
Definition: emmintrin.h:1922
void _mm_store1_pd(double *dp, __m128d a)
Definition: emmintrin.h:1079
__m128i _mm_sad_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1262
__INTRIN_INLINE_SSE2 __m128d _mm_undefined_pd(void)
Definition: emmintrin.h:1025
__INTRIN_INLINE_SSE2 void _mm_storeu_si32(void *p, __m128i b)
Definition: emmintrin.h:1684
__m128d _mm_setzero_pd(void)
Definition: emmintrin.h:1055
__m128 _mm_castsi128_ps(__m128i a)
Definition: emmintrin.h:1932
__m128d _mm_cmpneq_pd(__m128d a, __m128d b)
Definition: emmintrin.h:704
void _mm_storel_epi64(__m128i_u *p, __m128i a)
Definition: emmintrin.h:1705
__m128i _mm_packus_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1759
__m128d _mm_set_sd(double w)
Definition: emmintrin.h:1035
__m128 _mm_cvtepi32_ps(__m128i a)
Definition: emmintrin.h:1500
#define _mm_slli_si128(a, imm)
Definition: emmintrin.h:1348
__m128i _mm_adds_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1168
__m128i _mm_sub_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1277
__m128i _mm_castpd_si128(__m128d a)
Definition: emmintrin.h:1917
__m128i _mm_add_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1130
__m128d _mm_cmpnge_pd(__m128d a, __m128d b)
Definition: emmintrin.h:724
int _mm_comige_sd(__m128d a, __m128d b)
Definition: emmintrin.h:813
__m128d _mm_cmpge_pd(__m128d a, __m128d b)
Definition: emmintrin.h:689
__INTRIN_INLINE_SSE2 void _mm_storeu_si16(void *p, __m128i b)
Definition: emmintrin.h:1692
__m128d _mm_loadr_pd(double const *dp)
Definition: emmintrin.h:953
__m128i _mm_mulhi_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1242
__m128i _mm_slli_epi32(__m128i a, int count)
Definition: emmintrin.h:1361
__m128i _mm_load_si128(__m128i const *p)
Definition: emmintrin.h:1540
__m128d _mm_max_sd(__m128d a, __m128d b)
Definition: emmintrin.h:639
__m128d _mm_and_pd(__m128d a, __m128d b)
Definition: emmintrin.h:649
__m128i _mm_mul_epu32(__m128i a, __m128i b)
Definition: emmintrin.h:1257
__m128i _mm_cvttpd_epi32(__m128d a)
Definition: emmintrin.h:904
int _mm_ucomige_sd(__m128d a, __m128d b)
Definition: emmintrin.h:843
__m128d _mm_sub_pd(__m128d a, __m128d b)
Definition: emmintrin.h:591
__m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1434
__m128d _mm_load1_pd(double const *dp)
Definition: emmintrin.h:939
__m128d _mm_load_pd(double const *dp)
Definition: emmintrin.h:934
__m128d _mm_min_pd(__m128d a, __m128d b)
Definition: emmintrin.h:634
__m128i _mm_sll_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1366
__m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1797
__m64 _mm_sub_si64(__m64 a, __m64 b)
Definition: emmintrin.h:1282
void _mm_stream_si32(int *p, int a)
Definition: emmintrin.h:1731
__m128i _mm_subs_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1310
__m128i _mm_srl_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1419
__m128i _mm_mulhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1237
#define _mm_shuffle_pd(a, b, i)
Definition: emmintrin.h:1908
void _mm_stream_si128(__m128i *p, __m128i a)
Definition: emmintrin.h:1722
void _mm_mfence(void)
Definition: intrin_x86.h:99
__m128d _mm_or_pd(__m128d a, __m128d b)
Definition: emmintrin.h:659
__m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1439
__m128d _mm_cmpge_sd(__m128d a, __m128d b)
Definition: emmintrin.h:750
__m128d _mm_mul_sd(__m128d a, __m128d b)
Definition: emmintrin.h:596
__m128i _mm_set1_epi8(char b)
Definition: emmintrin.h:1625
__m128i _mm_sra_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1396
__m128d _mm_sqrt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:618
__m128i _mm_srai_epi32(__m128i a, int count)
Definition: emmintrin.h:1391
__m128i _mm_cvtsi32_si128(int a)
Definition: emmintrin.h:1519
__m128i _mm_slli_epi16(__m128i a, int count)
Definition: emmintrin.h:1351
__m128d _mm_cmpeq_pd(__m128d a, __m128d b)
Definition: emmintrin.h:669
__m128i _mm_subs_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1292
void _mm_storer_pd(double *dp, __m128d a)
Definition: emmintrin.h:1098
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
Definition: emmintrin.h:43
__INTRIN_INLINE_SSE2 __m128i _mm_cvtsi64_si128(long long a)
Definition: emmintrin.h:1524
int _mm_movemask_pd(__m128d a)
Definition: emmintrin.h:1903
__m128i _mm_setr_epi64(__m64 q0, __m64 q1)
Definition: emmintrin.h:1631
__m128i _mm_sub_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1287
__m128d _mm_move_sd(__m128d a, __m128d b)
Definition: emmintrin.h:1060
__m128i _mm_min_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1228
__m64 _mm_add_si64(__m64 a, __m64 b)
Definition: emmintrin.h:1140
__m128d _mm_cvtpi32_pd(__m64 a)
Definition: emmintrin.h:924
__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
Definition: emmintrin.h:1572
__m128d _mm_unpackhi_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1885
int _mm_cvttsd_si32(__m128d a)
Definition: emmintrin.h:909
__m128d _mm_cmpnle_pd(__m128d a, __m128d b)
Definition: emmintrin.h:714
__m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1807
__m128d _mm_cmpnle_sd(__m128d a, __m128d b)
Definition: emmintrin.h:776
__m128i _mm_add_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1125
__m128d _mm_cmple_sd(__m128d a, __m128d b)
Definition: emmintrin.h:739
__m128d _mm_cmple_pd(__m128d a, __m128d b)
Definition: emmintrin.h:679
__m128i _mm_cmplt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1476
#define _mm_insert_epi16(a, b, imm)
Definition: emmintrin.h:1768
__m128d _mm_loadu_pd(double const *dp)
Definition: emmintrin.h:963
__m128i _mm_avg_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1191
__m64 _mm_mul_su32(__m64 a, __m64 b)
Definition: emmintrin.h:1252
__m128d _mm_cvtss_sd(__m128d a, __m128 b)
Definition: emmintrin.h:898
int _mm_ucomieq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:823
__m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Definition: emmintrin.h:1648
__m128i _mm_sll_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1376
__m128d _mm_cmpngt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:781
void _mm_storel_pd(double *dp, __m128d a)
Definition: emmintrin.h:1117
__m128d _mm_cmplt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:734
__m128d _mm_cvtsi32_sd(__m128d a, int b)
Definition: emmintrin.h:891
__m128i _mm_or_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1338
__INTRIN_INLINE_SSE2 long long _mm_cvtsi128_si64(__m128i a)
Definition: emmintrin.h:1535
__m128i _mm_cmplt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1471
__m128i _mm_subs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1301
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si64(void const *a)
Definition: emmintrin.h:971
__m128d _mm_cmpngt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:719
#define _mm_extract_epi16(a, imm)
Definition: emmintrin.h:1764
double _mm_cvtsd_f64(__m128d a)
Definition: emmintrin.h:929
#define _mm_shufflelo_epi16(a, imm)
Definition: emmintrin.h:1780
__m128i _mm_packs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1749
__INTRIN_INLINE_SSE2 __m128i _mm_set1_epi64x(long long q)
Definition: emmintrin.h:1605
__m128d _mm_min_sd(__m128d a, __m128d b)
Definition: emmintrin.h:629
void _mm_store_pd(double *dp, __m128d a)
Definition: emmintrin.h:1074
__m128i _mm_srli_epi16(__m128i a, int count)
Definition: emmintrin.h:1404
__m128i _mm_sub_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1267
__m128d _mm_castsi128_pd(__m128i a)
Definition: emmintrin.h:1937
__m128d _mm_cmpord_pd(__m128d a, __m128d b)
Definition: emmintrin.h:694
void _mm_storeh_pd(double *dp, __m128d a)
Definition: emmintrin.h:1109
__m128d _mm_mul_pd(__m128d a, __m128d b)
Definition: emmintrin.h:602
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si32(void const *a)
Definition: emmintrin.h:980
__m64 _mm_cvttpd_pi32(__m128d a)
Definition: emmintrin.h:919
__m128i _mm_sll_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1356
__m128d _mm_sqrt_pd(__m128d a)
Definition: emmintrin.h:624
__m64 _mm_cvtpd_pi32(__m128d a)
Definition: emmintrin.h:914
__m128i _mm_mullo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1247
__m128i _mm_sra_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1386
int _mm_comieq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:793
__m128i _mm_cvttps_epi32(__m128 a)
Definition: emmintrin.h:1514
__m128d _mm_load_sd(double const *dp)
Definition: emmintrin.h:998
__m64 _mm_movepi64_pi64(__m128i a)
Definition: emmintrin.h:1866
void _mm_lfence(void)
Definition: intrin_x86.h:106
void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p)
int _mm_movemask_epi8(__m128i a)
Definition: emmintrin.h:1772
__m128i _mm_madd_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1196
__m128d _mm_cmpgt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:744
int _mm_ucomilt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:828
__m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1444
__m128i _mm_srai_epi16(__m128i a, int count)
Definition: emmintrin.h:1381
__m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Definition: emmintrin.h:1587
__m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Definition: emmintrin.h:1641
int _mm_ucomineq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:848
__m128i _mm_cvtps_epi32(__m128 a)
Definition: emmintrin.h:1509
__m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1786
__m128d _mm_andnot_pd(__m128d a, __m128d b)
Definition: emmintrin.h:654
__m128d _mm_loadl_pd(__m128d a, double const *dp)
Definition: emmintrin.h:1016
__m128d _mm_cmpneq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:766
__m128i _mm_min_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1219
__m128 _mm_cvtsd_ss(__m128 a, __m128d b)
Definition: emmintrin.h:886
__m128i _mm_andnot_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1333
__m128i _mm_and_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1328
__m128i _mm_setl_epi64(__m128i q)
void _mm_stream_pd(double *p, __m128d a)
Definition: emmintrin.h:1713
__m128 _mm_castpd_ps(__m128d a)
Definition: emmintrin.h:1912
__INTRIN_INLINE_SSE2 __m128i _mm_undefined_si128(void)
Definition: emmintrin.h:1562
int _mm_comineq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:818
__m128i _mm_avg_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1186
int _mm_ucomigt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:838
__m128i _mm_adds_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1159
#define _mm_stream_si64
Definition: emmintrin.h:327
__m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1817
__m128i _mm_adds_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1150
int _mm_comilt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:798
__m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1837
__m128i _mm_cmplt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1466
#define _mm_shufflehi_epi16(a, imm)
Definition: emmintrin.h:1783
__m128d _mm_cvtepi32_pd(__m128i a)
Definition: emmintrin.h:867
__m128i _mm_max_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1201
__m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1826
__m128d _mm_xor_pd(__m128d a, __m128d b)
Definition: emmintrin.h:664
__INTRIN_INLINE_SSE2 void _mm_storeu_si64(void *p, __m128i b)
Definition: emmintrin.h:1676
void _mm_clflush(void const *p)
__m128d _mm_cvtps_pd(__m128 a)
Definition: emmintrin.h:858
__m128d _mm_cmpgt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:684
void _mm_pause(void)
Definition: intrin_x86.h:2036
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si16(void const *a)
Definition: emmintrin.h:989
void _mm_store_sd(double *dp, __m128d a)
Definition: emmintrin.h:1066
__m128d _mm_set_pd(double w, double x)
Definition: emmintrin.h:1045
__m128i _mm_srli_epi32(__m128i a, int count)
Definition: emmintrin.h:1414
int _mm_comigt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:808
__m128i _mm_set_epi64(__m64 q1, __m64 q0)
Definition: emmintrin.h:1577
__m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1456
__m128d _mm_cmplt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:674
__m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1135
__m128i _mm_sub_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1272
__m128i _mm_loadl_epi64(__m128i_u const *p)
Definition: emmintrin.h:1553
__m128i _mm_add_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1145
#define _mm_srli_si128(a, imm)
Definition: emmintrin.h:1401
__m128d _mm_div_sd(__m128d a, __m128d b)
Definition: emmintrin.h:607
__m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1461
#define _mm_shuffle_epi32(a, imm)
Definition: emmintrin.h:1777
__m128d _mm_set1_pd(double w)
Definition: emmintrin.h:1040
__m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1847
__m128d _mm_div_pd(__m128d a, __m128d b)
Definition: emmintrin.h:613
__m128i _mm_srl_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1409
__m128d _mm_max_pd(__m128d a, __m128d b)
Definition: emmintrin.h:644
__m128d _mm_cmpunord_sd(__m128d a, __m128d b)
Definition: emmintrin.h:761
__m128d _mm_unpacklo_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1894
__m128i _mm_subs_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1319
__m128i _mm_loadu_si128(__m128i_u const *p)
Definition: emmintrin.h:1545
__m128i _mm_set1_epi32(int i)
Definition: emmintrin.h:1615
__m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1857
__m128i _mm_max_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1210
__declspec(noinline)
Definition: floatconv.c:47
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
GLuint GLuint GLsizei count
Definition: gl.h:1545
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063
GLdouble n
Definition: glext.h:7729
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble GLdouble w2
Definition: glext.h:8308
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLfloat GLfloat p
Definition: glext.h:8902
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble w1
Definition: glext.h:8308
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLubyte GLubyte GLubyte GLubyte w
Definition: glext.h:6102
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
#define d
Definition: ke_i.h:81
#define a
Definition: ke_i.h:78
#define b
Definition: ke_i.h:79
static CRYPT_DATA_BLOB b4
Definition: msg.c:2284
static CRYPT_DATA_BLOB b3[]
Definition: msg.c:592
static CRYPT_DATA_BLOB b2[]
Definition: msg.c:582
static CRYPT_DATA_BLOB b1[]
Definition: msg.c:573
#define _Out_writes_bytes_(s)
Definition: no_sal2.h:178
#define long
Definition: qsort.c:33
#define __c
Definition: schilyio.h:209
#define LL
Definition: tui.h:167
#define _CRT_ALIGN(x)
Definition: vcruntime.h:168