ReactOS 0.4.16-dev-1163-gec5b142
emmintrin.h
Go to the documentation of this file.
1/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#pragma once
11#ifndef _INCLUDED_EMM
12#define _INCLUDED_EMM
13
14#include <vcruntime.h>
15#include <xmmintrin.h>
16
17#if defined(_MSC_VER) && !defined(__clang__)
18
19typedef union _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128i
20{
21 __int8 m128i_i8[16];
22 __int16 m128i_i16[8];
23 __int32 m128i_i32[4];
24 __int64 m128i_i64[2];
25 unsigned __int8 m128i_u8[16];
26 unsigned __int16 m128i_u16[8];
27 unsigned __int32 m128i_u32[4];
28 unsigned __int64 m128i_u64[2];
29} __m128i;
30#ifdef _STATIC_ASSERT
31_STATIC_ASSERT(sizeof(__m128i) == 16);
32#endif
33
34typedef struct _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128d
35{
36 double m128d_f64[2];
37} __m128d;
38
39typedef __declspec(align(1)) __m128i __m128i_u;
40
41#define __ATTRIBUTE_SSE2__
42
43#else /* _MSC_VER */
44
45typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
46typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
47
48typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
49typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
50
51/* Type defines. */
52typedef double __v2df __attribute__((__vector_size__(16)));
53typedef long long __v2di __attribute__((__vector_size__(16)));
54typedef short __v8hi __attribute__((__vector_size__(16)));
55typedef char __v16qi __attribute__((__vector_size__(16)));
56
57/* Unsigned types */
58typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
59typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
60typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
61
62/* We need an explicitly signed variant for char. Note that this shouldn't
63 * appear in the interface though. */
64typedef signed char __v16qs __attribute__((__vector_size__(16)));
65
66#ifdef __clang__
67#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128)))
68#define __ATTRIBUTE_MMXSSE2__ __attribute__((__target__("mmx,sse2"),__min_vector_width__(128)))
69#else
70#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
71#define __ATTRIBUTE_MMXSSE2__ __attribute__((__target__("mmx,sse2")))
72#endif
73#define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__
74#define __INTRIN_INLINE_MMXSSE2 __INTRIN_INLINE __ATTRIBUTE_MMXSSE2__
75
76#endif /* _MSC_VER */
77
78#ifdef __cplusplus
79extern "C" {
80#endif
81
82extern __m128d _mm_add_sd(__m128d a, __m128d b);
83extern __m128d _mm_add_pd(__m128d a, __m128d b);
84extern __m128d _mm_sub_sd(__m128d a, __m128d b);
85extern __m128d _mm_sub_pd(__m128d a, __m128d b);
86extern __m128d _mm_mul_sd(__m128d a, __m128d b);
87extern __m128d _mm_mul_pd(__m128d a, __m128d b);
88extern __m128d _mm_div_sd(__m128d a, __m128d b);
89extern __m128d _mm_div_pd(__m128d a, __m128d b);
90extern __m128d _mm_sqrt_sd(__m128d a, __m128d b);
91extern __m128d _mm_sqrt_pd(__m128d a);
92extern __m128d _mm_min_sd(__m128d a, __m128d b);
93extern __m128d _mm_min_pd(__m128d a, __m128d b);
94extern __m128d _mm_max_sd(__m128d a, __m128d b);
95extern __m128d _mm_max_pd(__m128d a, __m128d b);
96extern __m128d _mm_and_pd(__m128d a, __m128d b);
97extern __m128d _mm_andnot_pd(__m128d a, __m128d b);
98extern __m128d _mm_or_pd(__m128d a, __m128d b);
99extern __m128d _mm_xor_pd(__m128d a, __m128d b);
100extern __m128d _mm_cmpeq_pd(__m128d a, __m128d b);
101extern __m128d _mm_cmplt_pd(__m128d a, __m128d b);
102extern __m128d _mm_cmple_pd(__m128d a, __m128d b);
103extern __m128d _mm_cmpgt_pd(__m128d a, __m128d b);
104extern __m128d _mm_cmpge_pd(__m128d a, __m128d b);
105extern __m128d _mm_cmpord_pd(__m128d a, __m128d b);
106extern __m128d _mm_cmpunord_pd(__m128d a, __m128d b);
107extern __m128d _mm_cmpneq_pd(__m128d a, __m128d b);
108extern __m128d _mm_cmpnlt_pd(__m128d a, __m128d b);
109extern __m128d _mm_cmpnle_pd(__m128d a, __m128d b);
110extern __m128d _mm_cmpngt_pd(__m128d a, __m128d b);
111extern __m128d _mm_cmpnge_pd(__m128d a, __m128d b);
112extern __m128d _mm_cmpeq_sd(__m128d a, __m128d b);
113extern __m128d _mm_cmplt_sd(__m128d a, __m128d b);
114extern __m128d _mm_cmple_sd(__m128d a, __m128d b);
115extern __m128d _mm_cmpgt_sd(__m128d a, __m128d b);
116extern __m128d _mm_cmpge_sd(__m128d a, __m128d b);
117extern __m128d _mm_cmpord_sd(__m128d a, __m128d b);
118extern __m128d _mm_cmpunord_sd(__m128d a, __m128d b);
119extern __m128d _mm_cmpneq_sd(__m128d a, __m128d b);
120extern __m128d _mm_cmpnlt_sd(__m128d a, __m128d b);
121extern __m128d _mm_cmpnle_sd(__m128d a, __m128d b);
122extern __m128d _mm_cmpngt_sd(__m128d a, __m128d b);
123extern __m128d _mm_cmpnge_sd(__m128d a, __m128d b);
124extern int _mm_comieq_sd(__m128d a, __m128d b);
125extern int _mm_comilt_sd(__m128d a, __m128d b);
126extern int _mm_comile_sd(__m128d a, __m128d b);
127extern int _mm_comigt_sd(__m128d a, __m128d b);
128extern int _mm_comige_sd(__m128d a, __m128d b);
129extern int _mm_comineq_sd(__m128d a, __m128d b);
130extern int _mm_ucomieq_sd(__m128d a, __m128d b);
131extern int _mm_ucomilt_sd(__m128d a, __m128d b);
132extern int _mm_ucomile_sd(__m128d a, __m128d b);
133extern int _mm_ucomigt_sd(__m128d a, __m128d b);
134extern int _mm_ucomige_sd(__m128d a, __m128d b);
135extern int _mm_ucomineq_sd(__m128d a, __m128d b);
136extern __m128 _mm_cvtpd_ps(__m128d a);
137extern __m128d _mm_cvtps_pd(__m128 a);
138extern __m128d _mm_cvtepi32_pd(__m128i a);
139extern __m128i _mm_cvtpd_epi32(__m128d a);
140extern int _mm_cvtsd_si32(__m128d a);
141extern __m128 _mm_cvtsd_ss(__m128 a, __m128d b);
142extern __m128d _mm_cvtsi32_sd(__m128d a, int b);
143extern __m128d _mm_cvtss_sd(__m128d a, __m128 b);
144extern __m128i _mm_cvttpd_epi32(__m128d a);
145extern int _mm_cvttsd_si32(__m128d a);
146extern __m64 _mm_cvtpd_pi32(__m128d a);
147extern __m64 _mm_cvttpd_pi32(__m128d a);
148extern __m128d _mm_cvtpi32_pd(__m64 a);
149extern double _mm_cvtsd_f64(__m128d a);
150extern __m128d _mm_load_pd(double const *dp);
151extern __m128d _mm_load1_pd(double const *dp);
152extern __m128d _mm_loadr_pd(double const *dp);
153extern __m128d _mm_loadu_pd(double const *dp);
154//extern __m128i _mm_loadu_si64(void const *a);
155//extern __m128i _mm_loadu_si32(void const *a);
156//extern __m128i _mm_loadu_si16(void const *a);
157extern __m128d _mm_load_sd(double const *dp);
158extern __m128d _mm_loadh_pd(__m128d a, double const *dp);
159extern __m128d _mm_loadl_pd(__m128d a, double const *dp);
160//extern __m128d _mm_undefined_pd(void);
161extern __m128d _mm_set_sd(double w);
162extern __m128d _mm_set1_pd(double w);
163extern __m128d _mm_set_pd(double w, double x);
164extern __m128d _mm_setr_pd(double w, double x);
165extern __m128d _mm_setzero_pd(void);
166extern __m128d _mm_move_sd(__m128d a, __m128d b);
167extern void _mm_store_sd(double *dp, __m128d a);
168extern void _mm_store_pd(double *dp, __m128d a);
169extern void _mm_store1_pd(double *dp, __m128d a);
170extern void _mm_storeu_pd(double *dp, __m128d a);
171extern void _mm_storer_pd(double *dp, __m128d a);
172extern void _mm_storeh_pd(double *dp, __m128d a);
173extern void _mm_storel_pd(double *dp, __m128d a);
174extern __m128i _mm_add_epi8(__m128i a, __m128i b);
175extern __m128i _mm_add_epi16(__m128i a, __m128i b);
176extern __m128i _mm_add_epi32(__m128i a, __m128i b);
177extern __m64 _mm_add_si64(__m64 a, __m64 b);
178extern __m128i _mm_add_epi64(__m128i a, __m128i b);
179extern __m128i _mm_adds_epi8(__m128i a, __m128i b);
180extern __m128i _mm_adds_epi16(__m128i a, __m128i b);
181extern __m128i _mm_adds_epu8(__m128i a, __m128i b);
182extern __m128i _mm_adds_epu16(__m128i a, __m128i b);
183extern __m128i _mm_avg_epu8(__m128i a, __m128i b);
184extern __m128i _mm_avg_epu16(__m128i a, __m128i b);
185extern __m128i _mm_madd_epi16(__m128i a, __m128i b);
186extern __m128i _mm_max_epi16(__m128i a, __m128i b);
187extern __m128i _mm_max_epu8(__m128i a, __m128i b);
188extern __m128i _mm_min_epi16(__m128i a, __m128i b);
189extern __m128i _mm_min_epu8(__m128i a, __m128i b);
190extern __m128i _mm_mulhi_epi16(__m128i a, __m128i b);
191extern __m128i _mm_mulhi_epu16(__m128i a, __m128i b);
192extern __m128i _mm_mullo_epi16(__m128i a, __m128i b);
193extern __m64 _mm_mul_su32(__m64 a, __m64 b);
194extern __m128i _mm_mul_epu32(__m128i a, __m128i b);
195extern __m128i _mm_sad_epu8(__m128i a, __m128i b);
196extern __m128i _mm_sub_epi8(__m128i a, __m128i b);
197extern __m128i _mm_sub_epi16(__m128i a, __m128i b);
198extern __m128i _mm_sub_epi32(__m128i a, __m128i b);
199extern __m64 _mm_sub_si64(__m64 a, __m64 b);
200extern __m128i _mm_sub_epi64(__m128i a, __m128i b);
201extern __m128i _mm_subs_epi8(__m128i a, __m128i b);
202extern __m128i _mm_subs_epi16(__m128i a, __m128i b);
203extern __m128i _mm_subs_epu8(__m128i a, __m128i b);
204extern __m128i _mm_subs_epu16(__m128i a, __m128i b);
205extern __m128i _mm_and_si128(__m128i a, __m128i b);
206extern __m128i _mm_andnot_si128(__m128i a, __m128i b);
207extern __m128i _mm_or_si128(__m128i a, __m128i b);
208extern __m128i _mm_xor_si128(__m128i a, __m128i b);
209extern __m128i _mm_slli_si128(__m128i a, int i);
210extern __m128i _mm_slli_epi16(__m128i a, int count);
211extern __m128i _mm_sll_epi16(__m128i a, __m128i count);
212extern __m128i _mm_slli_epi32(__m128i a, int count);
213extern __m128i _mm_sll_epi32(__m128i a, __m128i count);
214extern __m128i _mm_slli_epi64(__m128i a, int count);
215extern __m128i _mm_sll_epi64(__m128i a, __m128i count);
216extern __m128i _mm_srai_epi16(__m128i a, int count);
217extern __m128i _mm_sra_epi16(__m128i a, __m128i count);
218extern __m128i _mm_srai_epi32(__m128i a, int count);
219extern __m128i _mm_sra_epi32(__m128i a, __m128i count);
220extern __m128i _mm_srli_si128(__m128i a, int imm);
221extern __m128i _mm_srli_epi16(__m128i a, int count);
222extern __m128i _mm_srl_epi16(__m128i a, __m128i count);
223extern __m128i _mm_srli_epi32(__m128i a, int count);
224extern __m128i _mm_srl_epi32(__m128i a, __m128i count);
225extern __m128i _mm_srli_epi64(__m128i a, int count);
226extern __m128i _mm_srl_epi64(__m128i a, __m128i count);
227extern __m128i _mm_cmpeq_epi8(__m128i a, __m128i b);
228extern __m128i _mm_cmpeq_epi16(__m128i a, __m128i b);
229extern __m128i _mm_cmpeq_epi32(__m128i a, __m128i b);
230extern __m128i _mm_cmpgt_epi8(__m128i a, __m128i b);
231extern __m128i _mm_cmpgt_epi16(__m128i a, __m128i b);
232extern __m128i _mm_cmpgt_epi32(__m128i a, __m128i b);
233extern __m128i _mm_cmplt_epi8(__m128i a, __m128i b);
234extern __m128i _mm_cmplt_epi16(__m128i a, __m128i b);
235extern __m128i _mm_cmplt_epi32(__m128i a, __m128i b);
236#ifdef _M_AMD64
237extern __m128d _mm_cvtsi64_sd(__m128d a, long long b);
238extern long long _mm_cvtsd_si64(__m128d a);
239extern long long _mm_cvttsd_si64(__m128d a);
240#endif
241extern __m128 _mm_cvtepi32_ps(__m128i a);
242extern __m128i _mm_cvtps_epi32(__m128 a);
243extern __m128i _mm_cvttps_epi32(__m128 a);
244extern __m128i _mm_cvtsi32_si128(int a);
245#ifdef _M_AMD64
246extern __m128i _mm_cvtsi64_si128(long long a);
247#endif
248extern int _mm_cvtsi128_si32(__m128i a);
249#ifdef _M_AMD64
250extern long long _mm_cvtsi128_si64(__m128i a);
251#endif
252extern __m128i _mm_load_si128(__m128i const *p);
253extern __m128i _mm_loadu_si128(__m128i_u const *p);
254extern __m128i _mm_loadl_epi64(__m128i_u const *p);
255//extern __m128i _mm_undefined_si128(void);
256//extern __m128i _mm_set_epi64x(long long q1, long long q0); // FIXME
257extern __m128i _mm_set_epi64(__m64 q1, __m64 q0);
258//extern __m128i _mm_set_epi32(int i3, int i1, int i0);
259extern __m128i _mm_set_epi32(int i3, int i2, int i1, int i0);
260//extern __m128i _mm_set_epi16(short w7, short w2, short w1, short w0);
261extern __m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0);
262//extern __m128i _mm_set_epi8(char b15, char b10, char b4, char b3, char b2, char b1, char b0);
263extern __m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
264//extern __m128i _mm_set1_epi64x(long long q); // FIXME
265extern __m128i _mm_set1_epi64(__m64 q);
266extern __m128i _mm_set1_epi32(int i);
267extern __m128i _mm_set1_epi16(short w);
268extern __m128i _mm_set1_epi8(char b);
269extern __m128i _mm_setl_epi64(__m128i q); // FIXME: clang?
270extern __m128i _mm_setr_epi64(__m64 q0, __m64 q1);
271//extern __m128i _mm_setr_epi32(int i0, int i2, int i3);
272extern __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3);
273//extern __m128i _mm_setr_epi16(short w0, short w5, short w6, short w7);
274extern __m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7);
275//extern __m128i _mm_setr_epi8(char b0, char b6, char b11, char b12, char b13, char b14, char b15);
276extern __m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
277extern __m128i _mm_setzero_si128(void);
278extern void _mm_store_si128(__m128i *p, __m128i b);
279extern void _mm_storeu_si128(__m128i_u *p, __m128i b);
280//extern void _mm_storeu_si64(void *p, __m128i b);
281//extern void _mm_storeu_si32(void *p, __m128i b);
282//extern void _mm_storeu_si16(void *p, __m128i b);
283extern void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p);
284extern void _mm_storel_epi64(__m128i_u *p, __m128i a);
285extern void _mm_stream_pd(double *p, __m128d a);
286extern void _mm_stream_si128(__m128i *p, __m128i a);
287extern void _mm_stream_si32(int *p, int a);
288extern void _mm_clflush(void const *p);
289extern void _mm_lfence(void);
290extern void _mm_mfence(void);
291extern __m128i _mm_packs_epi16(__m128i a, __m128i b);
292extern __m128i _mm_packs_epi32(__m128i a, __m128i b);
293extern __m128i _mm_packus_epi16(__m128i a, __m128i b);
294extern int _mm_extract_epi16(__m128i a, int imm);
295extern __m128i _mm_insert_epi16(__m128i a, int b, int imm);
296extern int _mm_movemask_epi8(__m128i a);
297extern __m128i _mm_shuffle_epi32(__m128i a, int imm);
298extern __m128i _mm_shufflelo_epi16(__m128i a, int imm);
299extern __m128i _mm_shufflehi_epi16(__m128i a, int imm);
300extern __m128i _mm_unpackhi_epi8(__m128i a, __m128i b);
301extern __m128i _mm_unpackhi_epi16(__m128i a, __m128i b);
302extern __m128i _mm_unpackhi_epi32(__m128i a, __m128i b);
303extern __m128i _mm_unpackhi_epi64(__m128i a, __m128i b);
304extern __m128i _mm_unpacklo_epi8(__m128i a, __m128i b);
305extern __m128i _mm_unpacklo_epi16(__m128i a, __m128i b);
306extern __m128i _mm_unpacklo_epi32(__m128i a, __m128i b);
307extern __m128i _mm_unpacklo_epi64(__m128i a, __m128i b);
308extern __m64 _mm_movepi64_pi64(__m128i a);
309extern __m128i _mm_movpi64_epi64(__m64 a);
310extern __m128i _mm_move_epi64(__m128i a);
311extern __m128d _mm_unpackhi_pd(__m128d a, __m128d b);
312extern __m128d _mm_unpacklo_pd(__m128d a, __m128d b);
313extern int _mm_movemask_pd(__m128d a);
314extern __m128d _mm_shuffle_pd(__m128d a, __m128d b, int imm);
315extern __m128 _mm_castpd_ps(__m128d a);
316extern __m128i _mm_castpd_si128(__m128d a);
317extern __m128d _mm_castps_pd(__m128 a);
318extern __m128i _mm_castps_si128(__m128 a);
319extern __m128 _mm_castsi128_ps(__m128i a);
320extern __m128d _mm_castsi128_pd(__m128i a);
321void _mm_pause(void);
322
323/* Alternate names */
324#define _mm_set_pd1(a) _mm_set1_pd(a)
325#define _mm_load_pd1(p) _mm_load1_pd(p)
326#define _mm_store_pd1(p, a) _mm_store1_pd((p), (a))
327#define _mm_bslli_si128 _mm_slli_si128
328#define _mm_bsrli_si128 _mm_srli_si128
329#define _mm_stream_si64 _mm_stream_si64x
330
331#if defined(_MSC_VER) && !defined(__clang__)
332
333#pragma intrinsic(_mm_add_sd)
334#pragma intrinsic(_mm_add_pd)
335#pragma intrinsic(_mm_sub_sd)
336#pragma intrinsic(_mm_sub_pd)
337#pragma intrinsic(_mm_mul_sd)
338#pragma intrinsic(_mm_mul_pd)
339#pragma intrinsic(_mm_div_sd)
340#pragma intrinsic(_mm_div_pd)
341#pragma intrinsic(_mm_sqrt_sd)
342#pragma intrinsic(_mm_sqrt_pd)
343#pragma intrinsic(_mm_min_sd)
344#pragma intrinsic(_mm_min_pd)
345#pragma intrinsic(_mm_max_sd)
346#pragma intrinsic(_mm_max_pd)
347#pragma intrinsic(_mm_and_pd)
348#pragma intrinsic(_mm_andnot_pd)
349#pragma intrinsic(_mm_or_pd)
350#pragma intrinsic(_mm_xor_pd)
351#pragma intrinsic(_mm_cmpeq_pd)
352#pragma intrinsic(_mm_cmplt_pd)
353#pragma intrinsic(_mm_cmple_pd)
354#pragma intrinsic(_mm_cmpgt_pd)
355#pragma intrinsic(_mm_cmpge_pd)
356#pragma intrinsic(_mm_cmpord_pd)
357#pragma intrinsic(_mm_cmpunord_pd)
358#pragma intrinsic(_mm_cmpneq_pd)
359#pragma intrinsic(_mm_cmpnlt_pd)
360#pragma intrinsic(_mm_cmpnle_pd)
361#pragma intrinsic(_mm_cmpngt_pd)
362#pragma intrinsic(_mm_cmpnge_pd)
363#pragma intrinsic(_mm_cmpeq_sd)
364#pragma intrinsic(_mm_cmplt_sd)
365#pragma intrinsic(_mm_cmple_sd)
366#pragma intrinsic(_mm_cmpgt_sd)
367#pragma intrinsic(_mm_cmpge_sd)
368#pragma intrinsic(_mm_cmpord_sd)
369#pragma intrinsic(_mm_cmpunord_sd)
370#pragma intrinsic(_mm_cmpneq_sd)
371#pragma intrinsic(_mm_cmpnlt_sd)
372#pragma intrinsic(_mm_cmpnle_sd)
373#pragma intrinsic(_mm_cmpngt_sd)
374#pragma intrinsic(_mm_cmpnge_sd)
375#pragma intrinsic(_mm_comieq_sd)
376#pragma intrinsic(_mm_comilt_sd)
377#pragma intrinsic(_mm_comile_sd)
378#pragma intrinsic(_mm_comigt_sd)
379#pragma intrinsic(_mm_comige_sd)
380#pragma intrinsic(_mm_comineq_sd)
381#pragma intrinsic(_mm_ucomieq_sd)
382#pragma intrinsic(_mm_ucomilt_sd)
383#pragma intrinsic(_mm_ucomile_sd)
384#pragma intrinsic(_mm_ucomigt_sd)
385#pragma intrinsic(_mm_ucomige_sd)
386#pragma intrinsic(_mm_ucomineq_sd)
387#pragma intrinsic(_mm_cvtpd_ps)
388#pragma intrinsic(_mm_cvtps_pd)
389#pragma intrinsic(_mm_cvtepi32_pd)
390#pragma intrinsic(_mm_cvtpd_epi32)
391#pragma intrinsic(_mm_cvtsd_si32)
392#pragma intrinsic(_mm_cvtsd_ss)
393#pragma intrinsic(_mm_cvtsi32_sd)
394#pragma intrinsic(_mm_cvtss_sd)
395#pragma intrinsic(_mm_cvttpd_epi32)
396#pragma intrinsic(_mm_cvttsd_si32)
397//#pragma intrinsic(_mm_cvtpd_pi32)
398//#pragma intrinsic(_mm_cvttpd_pi32)
399//#pragma intrinsic(_mm_cvtpi32_pd)
400#pragma intrinsic(_mm_cvtsd_f64)
401#pragma intrinsic(_mm_load_pd)
402#pragma intrinsic(_mm_load1_pd)
403#pragma intrinsic(_mm_loadr_pd)
404#pragma intrinsic(_mm_loadu_pd)
405//#pragma intrinsic(_mm_loadu_si64)
406//#pragma intrinsic(_mm_loadu_si32)
407//#pragma intrinsic(_mm_loadu_si16)
408#pragma intrinsic(_mm_load_sd)
409#pragma intrinsic(_mm_loadh_pd)
410#pragma intrinsic(_mm_loadl_pd)
411//#pragma intrinsic(_mm_undefined_pd)
412#pragma intrinsic(_mm_set_sd)
413#pragma intrinsic(_mm_set1_pd)
414#pragma intrinsic(_mm_set_pd)
415#pragma intrinsic(_mm_setr_pd)
416#pragma intrinsic(_mm_setzero_pd)
417#pragma intrinsic(_mm_move_sd)
418#pragma intrinsic(_mm_store_sd)
419#pragma intrinsic(_mm_store_pd)
420#pragma intrinsic(_mm_store1_pd)
421#pragma intrinsic(_mm_storeu_pd)
422#pragma intrinsic(_mm_storer_pd)
423#pragma intrinsic(_mm_storeh_pd)
424#pragma intrinsic(_mm_storel_pd)
425#pragma intrinsic(_mm_add_epi8)
426#pragma intrinsic(_mm_add_epi16)
427#pragma intrinsic(_mm_add_epi32)
428//#pragma intrinsic(_mm_add_si64)
429#pragma intrinsic(_mm_add_epi64)
430#pragma intrinsic(_mm_adds_epi8)
431#pragma intrinsic(_mm_adds_epi16)
432#pragma intrinsic(_mm_adds_epu8)
433#pragma intrinsic(_mm_adds_epu16)
434#pragma intrinsic(_mm_avg_epu8)
435#pragma intrinsic(_mm_avg_epu16)
436#pragma intrinsic(_mm_madd_epi16)
437#pragma intrinsic(_mm_max_epi16)
438#pragma intrinsic(_mm_max_epu8)
439#pragma intrinsic(_mm_min_epi16)
440#pragma intrinsic(_mm_min_epu8)
441#pragma intrinsic(_mm_mulhi_epi16)
442#pragma intrinsic(_mm_mulhi_epu16)
443#pragma intrinsic(_mm_mullo_epi16)
444//#pragma intrinsic(_mm_mul_su32)
445#pragma intrinsic(_mm_mul_epu32)
446#pragma intrinsic(_mm_sad_epu8)
447#pragma intrinsic(_mm_sub_epi8)
448#pragma intrinsic(_mm_sub_epi16)
449#pragma intrinsic(_mm_sub_epi32)
450//#pragma intrinsic(_mm_sub_si64)
451#pragma intrinsic(_mm_sub_epi64)
452#pragma intrinsic(_mm_subs_epi8)
453#pragma intrinsic(_mm_subs_epi16)
454#pragma intrinsic(_mm_subs_epu8)
455#pragma intrinsic(_mm_subs_epu16)
456#pragma intrinsic(_mm_and_si128)
457#pragma intrinsic(_mm_andnot_si128)
458#pragma intrinsic(_mm_or_si128)
459#pragma intrinsic(_mm_xor_si128)
460#pragma intrinsic(_mm_slli_si128)
461#pragma intrinsic(_mm_slli_epi16)
462#pragma intrinsic(_mm_sll_epi16)
463#pragma intrinsic(_mm_slli_epi32)
464#pragma intrinsic(_mm_sll_epi32)
465#pragma intrinsic(_mm_slli_epi64)
466#pragma intrinsic(_mm_sll_epi64)
467#pragma intrinsic(_mm_srai_epi16)
468#pragma intrinsic(_mm_sra_epi16)
469#pragma intrinsic(_mm_srai_epi32)
470#pragma intrinsic(_mm_sra_epi32)
471#pragma intrinsic(_mm_srli_si128)
472#pragma intrinsic(_mm_srli_epi16)
473#pragma intrinsic(_mm_srl_epi16)
474#pragma intrinsic(_mm_srli_epi32)
475#pragma intrinsic(_mm_srl_epi32)
476#pragma intrinsic(_mm_srli_epi64)
477#pragma intrinsic(_mm_srl_epi64)
478#pragma intrinsic(_mm_cmpeq_epi8)
479#pragma intrinsic(_mm_cmpeq_epi16)
480#pragma intrinsic(_mm_cmpeq_epi32)
481#pragma intrinsic(_mm_cmpgt_epi8)
482#pragma intrinsic(_mm_cmpgt_epi16)
483#pragma intrinsic(_mm_cmpgt_epi32)
484#pragma intrinsic(_mm_cmplt_epi8)
485#pragma intrinsic(_mm_cmplt_epi16)
486#pragma intrinsic(_mm_cmplt_epi32)
487#ifdef _M_AMD64
488#pragma intrinsic(_mm_cvtsi64_sd)
489#pragma intrinsic(_mm_cvtsd_si64)
490#pragma intrinsic(_mm_cvttsd_si64)
491#endif
492#pragma intrinsic(_mm_cvtepi32_ps)
493#pragma intrinsic(_mm_cvtps_epi32)
494#pragma intrinsic(_mm_cvttps_epi32)
495#pragma intrinsic(_mm_cvtsi32_si128)
496#ifdef _M_AMD64
497#pragma intrinsic(_mm_cvtsi64_si128)
498#endif
499#pragma intrinsic(_mm_cvtsi128_si32)
500#ifdef _M_AMD64
501#pragma intrinsic(_mm_cvtsi128_si64)
502#endif
503#pragma intrinsic(_mm_load_si128)
504#pragma intrinsic(_mm_loadu_si128)
505#pragma intrinsic(_mm_loadl_epi64)
506//#pragma intrinsic(_mm_undefined_si128)
507//#pragma intrinsic(_mm_set_epi64x)
508//#pragma intrinsic(_mm_set_epi64)
509#pragma intrinsic(_mm_set_epi32)
510#pragma intrinsic(_mm_set_epi16)
511#pragma intrinsic(_mm_set_epi8)
512//#pragma intrinsic(_mm_set1_epi64x)
513//#pragma intrinsic(_mm_set1_epi64)
514#pragma intrinsic(_mm_set1_epi32)
515#pragma intrinsic(_mm_set1_epi16)
516#pragma intrinsic(_mm_set1_epi8)
517#pragma intrinsic(_mm_setl_epi64)
518//#pragma intrinsic(_mm_setr_epi64)
519#pragma intrinsic(_mm_setr_epi32)
520#pragma intrinsic(_mm_setr_epi16)
521#pragma intrinsic(_mm_setr_epi8)
522#pragma intrinsic(_mm_setzero_si128)
523#pragma intrinsic(_mm_store_si128)
524#pragma intrinsic(_mm_storeu_si128)
525//#pragma intrinsic(_mm_storeu_si64)
526//#pragma intrinsic(_mm_storeu_si32)
527//#pragma intrinsic(_mm_storeu_si16)
528#pragma intrinsic(_mm_maskmoveu_si128)
529#pragma intrinsic(_mm_storel_epi64)
530#pragma intrinsic(_mm_stream_pd)
531#pragma intrinsic(_mm_stream_si128)
532#pragma intrinsic(_mm_stream_si32)
533#pragma intrinsic(_mm_clflush)
534#pragma intrinsic(_mm_lfence)
535#pragma intrinsic(_mm_mfence)
536#pragma intrinsic(_mm_packs_epi16)
537#pragma intrinsic(_mm_packs_epi32)
538#pragma intrinsic(_mm_packus_epi16)
539#pragma intrinsic(_mm_extract_epi16)
540#pragma intrinsic(_mm_insert_epi16)
541#pragma intrinsic(_mm_movemask_epi8)
542#pragma intrinsic(_mm_shuffle_epi32)
543#pragma intrinsic(_mm_shufflelo_epi16)
544#pragma intrinsic(_mm_shufflehi_epi16)
545#pragma intrinsic(_mm_unpackhi_epi8)
546#pragma intrinsic(_mm_unpackhi_epi16)
547#pragma intrinsic(_mm_unpackhi_epi32)
548#pragma intrinsic(_mm_unpackhi_epi64)
549#pragma intrinsic(_mm_unpacklo_epi8)
550#pragma intrinsic(_mm_unpacklo_epi16)
551#pragma intrinsic(_mm_unpacklo_epi32)
552#pragma intrinsic(_mm_unpacklo_epi64)
553//#pragma intrinsic(_mm_movepi64_pi64)
554//#pragma intrinsic(_mm_movpi64_epi64)
555#pragma intrinsic(_mm_move_epi64)
556#pragma intrinsic(_mm_unpackhi_pd)
557#pragma intrinsic(_mm_unpacklo_pd)
558#pragma intrinsic(_mm_movemask_pd)
559#pragma intrinsic(_mm_shuffle_pd)
560#pragma intrinsic(_mm_castpd_ps)
561#pragma intrinsic(_mm_castpd_si128)
562#pragma intrinsic(_mm_castps_pd)
563#pragma intrinsic(_mm_castps_si128)
564#pragma intrinsic(_mm_castsi128_ps)
565#pragma intrinsic(_mm_castsi128_pd)
566#pragma intrinsic(_mm_pause)
567
568#else /* _MSC_VER */
569
570/*
571 Clang: https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/emmintrin.h
572 Clang older version: https://github.com/llvm/llvm-project/blob/3ef88b31843e040c95f23ff2c3c206f1fa399c05/clang/lib/Headers/emmintrin.h
573 unikraft: https://github.com/unikraft/lib-intel-intrinsics/blob/staging/include/emmintrin.h
574*/
575
576__INTRIN_INLINE_SSE2 __m128d _mm_add_sd(__m128d a, __m128d b)
577{
578 a[0] += b[0];
579 return a;
580}
581
582__INTRIN_INLINE_SSE2 __m128d _mm_add_pd(__m128d a, __m128d b)
583{
584 return (__m128d)((__v2df)a + (__v2df)b);
585}
586
587__INTRIN_INLINE_SSE2 __m128d _mm_sub_sd(__m128d a, __m128d b)
588{
589 a[0] -= b[0];
590 return a;
591}
592
593__INTRIN_INLINE_SSE2 __m128d _mm_sub_pd(__m128d a, __m128d b)
594{
595 return (__m128d)((__v2df)a - (__v2df)b);
596}
597
598__INTRIN_INLINE_SSE2 __m128d _mm_mul_sd(__m128d a, __m128d b)
599{
600 a[0] *= b[0];
601 return a;
602}
603
604__INTRIN_INLINE_SSE2 __m128d _mm_mul_pd(__m128d a, __m128d b)
605{
606 return (__m128d)((__v2df)a * (__v2df)b);
607}
608
609__INTRIN_INLINE_SSE2 __m128d _mm_div_sd(__m128d a, __m128d b)
610{
611 a[0] /= b[0];
612 return a;
613}
614
615__INTRIN_INLINE_SSE2 __m128d _mm_div_pd(__m128d a, __m128d b)
616{
617 return (__m128d)((__v2df)a / (__v2df)b);
618}
619
620__INTRIN_INLINE_SSE2 __m128d _mm_sqrt_sd(__m128d a, __m128d b)
621{
622 __m128d __c = __builtin_ia32_sqrtsd((__v2df)b);
623 return __extension__(__m128d){__c[0], a[1]};
624}
625
627{
628 return __builtin_ia32_sqrtpd((__v2df)a);
629}
630
631__INTRIN_INLINE_SSE2 __m128d _mm_min_sd(__m128d a, __m128d b)
632{
633 return __builtin_ia32_minsd((__v2df)a, (__v2df)b);
634}
635
636__INTRIN_INLINE_SSE2 __m128d _mm_min_pd(__m128d a, __m128d b)
637{
638 return __builtin_ia32_minpd((__v2df)a, (__v2df)b);
639}
640
641__INTRIN_INLINE_SSE2 __m128d _mm_max_sd(__m128d a, __m128d b)
642{
643 return __builtin_ia32_maxsd((__v2df)a, (__v2df)b);
644}
645
646__INTRIN_INLINE_SSE2 __m128d _mm_max_pd(__m128d a, __m128d b)
647{
648 return __builtin_ia32_maxpd((__v2df)a, (__v2df)b);
649}
650
651__INTRIN_INLINE_SSE2 __m128d _mm_and_pd(__m128d a, __m128d b)
652{
653 return (__m128d)((__v2du)a & (__v2du)b);
654}
655
656__INTRIN_INLINE_SSE2 __m128d _mm_andnot_pd(__m128d a, __m128d b)
657{
658 return (__m128d)(~(__v2du)a & (__v2du)b);
659}
660
661__INTRIN_INLINE_SSE2 __m128d _mm_or_pd(__m128d a, __m128d b)
662{
663 return (__m128d)((__v2du)a | (__v2du)b);
664}
665
666__INTRIN_INLINE_SSE2 __m128d _mm_xor_pd(__m128d a, __m128d b)
667{
668 return (__m128d)((__v2du)a ^ (__v2du)b);
669}
670
671__INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_pd(__m128d a, __m128d b)
672{
673 return (__m128d)__builtin_ia32_cmpeqpd((__v2df)a, (__v2df)b);
674}
675
676__INTRIN_INLINE_SSE2 __m128d _mm_cmplt_pd(__m128d a, __m128d b)
677{
678 return (__m128d)__builtin_ia32_cmpltpd((__v2df)a, (__v2df)b);
679}
680
681__INTRIN_INLINE_SSE2 __m128d _mm_cmple_pd(__m128d a, __m128d b)
682{
683 return (__m128d)__builtin_ia32_cmplepd((__v2df)a, (__v2df)b);
684}
685
686__INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
687{
688 return (__m128d)__builtin_ia32_cmpltpd((__v2df)b, (__v2df)a);
689}
690
691__INTRIN_INLINE_SSE2 __m128d _mm_cmpge_pd(__m128d a, __m128d b)
692{
693 return (__m128d)__builtin_ia32_cmplepd((__v2df)b, (__v2df)a);
694}
695
696__INTRIN_INLINE_SSE2 __m128d _mm_cmpord_pd(__m128d a, __m128d b)
697{
698 return (__m128d)__builtin_ia32_cmpordpd((__v2df)a, (__v2df)b);
699}
700
701__INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
702{
703 return (__m128d)__builtin_ia32_cmpunordpd((__v2df)a, (__v2df)b);
704}
705
706__INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_pd(__m128d a, __m128d b)
707{
708 return (__m128d)__builtin_ia32_cmpneqpd((__v2df)a, (__v2df)b);
709}
710
711__INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
712{
713 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)a, (__v2df)b);
714}
715
716__INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_pd(__m128d a, __m128d b)
717{
718 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)a, (__v2df)b);
719}
720
721__INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_pd(__m128d a, __m128d b)
722{
723 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)b, (__v2df)a);
724}
725
726__INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_pd(__m128d a, __m128d b)
727{
728 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)b, (__v2df)a);
729}
730
731__INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
732{
733 return (__m128d)__builtin_ia32_cmpeqsd((__v2df)a, (__v2df)b);
734}
735
736__INTRIN_INLINE_SSE2 __m128d _mm_cmplt_sd(__m128d a, __m128d b)
737{
738 return (__m128d)__builtin_ia32_cmpltsd((__v2df)a, (__v2df)b);
739}
740
741__INTRIN_INLINE_SSE2 __m128d _mm_cmple_sd(__m128d a, __m128d b)
742{
743 return (__m128d)__builtin_ia32_cmplesd((__v2df)a, (__v2df)b);
744}
745
746__INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
747{
748 __m128d __c = __builtin_ia32_cmpltsd((__v2df)b, (__v2df)a);
749 return __extension__(__m128d){__c[0], a[1]};
750}
751
752__INTRIN_INLINE_SSE2 __m128d _mm_cmpge_sd(__m128d a, __m128d b)
753{
754 __m128d __c = __builtin_ia32_cmplesd((__v2df)b, (__v2df)a);
755 return __extension__(__m128d){__c[0], a[1]};
756}
757
758__INTRIN_INLINE_SSE2 __m128d _mm_cmpord_sd(__m128d a, __m128d b)
759{
760 return (__m128d)__builtin_ia32_cmpordsd((__v2df)a, (__v2df)b);
761}
762
763__INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
764{
765 return (__m128d)__builtin_ia32_cmpunordsd((__v2df)a, (__v2df)b);
766}
767
768__INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
769{
770 return (__m128d)__builtin_ia32_cmpneqsd((__v2df)a, (__v2df)b);
771}
772
773__INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
774{
775 return (__m128d)__builtin_ia32_cmpnltsd((__v2df)a, (__v2df)b);
776}
777
778__INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_sd(__m128d a, __m128d b)
779{
780 return (__m128d)__builtin_ia32_cmpnlesd((__v2df)a, (__v2df)b);
781}
782
783__INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_sd(__m128d a, __m128d b)
784{
785 __m128d __c = __builtin_ia32_cmpnltsd((__v2df)b, (__v2df)a);
786 return __extension__(__m128d){__c[0], a[1]};
787}
788
789__INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_sd(__m128d a, __m128d b)
790{
791 __m128d __c = __builtin_ia32_cmpnlesd((__v2df)b, (__v2df)a);
792 return __extension__(__m128d){__c[0], a[1]};
793}
794
796{
797 return __builtin_ia32_comisdeq((__v2df)a, (__v2df)b);
798}
799
801{
802 return __builtin_ia32_comisdlt((__v2df)a, (__v2df)b);
803}
804
806{
807 return __builtin_ia32_comisdle((__v2df)a, (__v2df)b);
808}
809
811{
812 return __builtin_ia32_comisdgt((__v2df)a, (__v2df)b);
813}
814
816{
817 return __builtin_ia32_comisdge((__v2df)a, (__v2df)b);
818}
819
821{
822 return __builtin_ia32_comisdneq((__v2df)a, (__v2df)b);
823}
824
826{
827 return __builtin_ia32_ucomisdeq((__v2df)a, (__v2df)b);
828}
829
831{
832 return __builtin_ia32_ucomisdlt((__v2df)a, (__v2df)b);
833}
834
836{
837 return __builtin_ia32_ucomisdle((__v2df)a, (__v2df)b);
838}
839
841{
842 return __builtin_ia32_ucomisdgt((__v2df)a, (__v2df)b);
843}
844
846{
847 return __builtin_ia32_ucomisdge((__v2df)a, (__v2df)b);
848}
849
851{
852 return __builtin_ia32_ucomisdneq((__v2df)a, (__v2df)b);
853}
854
856{
857 return __builtin_ia32_cvtpd2ps((__v2df)a);
858}
859
861{
862#if HAS_BUILTIN(__builtin_convertvector)
863 return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4sf)a, (__v4sf)a, 0, 1), __v2df);
864#else
865 return __builtin_ia32_cvtps2pd(a);
866#endif
867}
868
870{
871#if HAS_BUILTIN(__builtin_convertvector)
872 return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4si)a, (__v4si)a, 0, 1), __v2df);
873#else
874 return __builtin_ia32_cvtdq2pd((__v4si)a);
875#endif
876}
877
879{
880 return (__m128i)__builtin_ia32_cvtpd2dq((__v2df)a);
881}
882
884{
885 return __builtin_ia32_cvtsd2si((__v2df)a);
886}
887
888__INTRIN_INLINE_SSE2 __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
889{
890 return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)a, (__v2df)b);
891}
892
894 int b)
895{
896 a[0] = b;
897 return a;
898}
899
900__INTRIN_INLINE_SSE2 __m128d _mm_cvtss_sd(__m128d a, __m128 b)
901{
902 a[0] = b[0];
903 return a;
904}
905
907{
908 return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)a);
909}
910
912{
913 return __builtin_ia32_cvttsd2si((__v2df)a);
914}
915
917{
918 return (__m64)__builtin_ia32_cvtpd2pi((__v2df)a);
919}
920
922{
923 return (__m64)__builtin_ia32_cvttpd2pi((__v2df)a);
924}
925
927{
928 return __builtin_ia32_cvtpi2pd((__v2si)a);
929}
930
932{
933 return a[0];
934}
935
936__INTRIN_INLINE_SSE2 __m128d _mm_load_pd(double const *dp)
937{
938 return *(const __m128d *)dp;
939}
940
941__INTRIN_INLINE_SSE2 __m128d _mm_load1_pd(double const *dp)
942{
943 struct __mm_load1_pd_struct {
944 double __u;
945 } __attribute__((__packed__, __may_alias__));
946 double __u = ((const struct __mm_load1_pd_struct *)dp)->__u;
947 return __extension__(__m128d){__u, __u};
948}
949
950// GCC:
951/* Create a selector for use with the SHUFPD instruction. */
952#define _MM_SHUFFLE2(fp1,fp0) \
953 (((fp1) << 1) | (fp0))
954
955__INTRIN_INLINE_SSE2 __m128d _mm_loadr_pd(double const *dp)
956{
957#if HAS_BUILTIN(__builtin_shufflevector)
958 __m128d u = *(const __m128d *)dp;
959 return __builtin_shufflevector((__v2df)u, (__v2df)u, 1, 0);
960#else
961 return (__m128d){ dp[1], dp[0] };
962#endif
963}
964
965__INTRIN_INLINE_SSE2 __m128d _mm_loadu_pd(double const *dp)
966{
967 struct __loadu_pd {
968 __m128d_u __v;
969 } __attribute__((__packed__, __may_alias__));
970 return ((const struct __loadu_pd *)dp)->__v;
971}
972
974{
975 struct __loadu_si64 {
976 long long __v;
977 } __attribute__((__packed__, __may_alias__));
978 long long __u = ((const struct __loadu_si64 *)a)->__v;
979 return __extension__(__m128i)(__v2di){__u, 0LL};
980}
981
983{
984 struct __loadu_si32 {
985 int __v;
986 } __attribute__((__packed__, __may_alias__));
987 int __u = ((const struct __loadu_si32 *)a)->__v;
988 return __extension__(__m128i)(__v4si){__u, 0, 0, 0};
989}
990
992{
993 struct __loadu_si16 {
994 short __v;
995 } __attribute__((__packed__, __may_alias__));
996 short __u = ((const struct __loadu_si16 *)a)->__v;
997 return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
998}
999
1000__INTRIN_INLINE_SSE2 __m128d _mm_load_sd(double const *dp)
1001{
1002 struct __mm_load_sd_struct {
1003 double __u;
1004 } __attribute__((__packed__, __may_alias__));
1005 double __u = ((const struct __mm_load_sd_struct *)dp)->__u;
1006 return __extension__(__m128d){__u, 0};
1007}
1008
1009__INTRIN_INLINE_SSE2 __m128d _mm_loadh_pd(__m128d a, double const *dp)
1010{
1011 struct __mm_loadh_pd_struct {
1012 double __u;
1013 } __attribute__((__packed__, __may_alias__));
1014 double __u = ((const struct __mm_loadh_pd_struct *)dp)->__u;
1015 return __extension__(__m128d){a[0], __u};
1016}
1017
1018__INTRIN_INLINE_SSE2 __m128d _mm_loadl_pd(__m128d a, double const *dp)
1019{
1020 struct __mm_loadl_pd_struct {
1021 double __u;
1022 } __attribute__((__packed__, __may_alias__));
1023 double __u = ((const struct __mm_loadl_pd_struct *)dp)->__u;
1024 return __extension__(__m128d){__u, a[1]};
1025}
1026
1028{
1029#if HAS_BUILTIN(__builtin_ia32_undef128)
1030 return (__m128d)__builtin_ia32_undef128();
1031#else
1032 __m128d undef = undef;
1033 return undef;
1034#endif
1035}
1036
1038{
1039 return __extension__(__m128d){w, 0};
1040}
1041
1043{
1044 return __extension__(__m128d){w, w};
1045}
1046
1047__INTRIN_INLINE_SSE2 __m128d _mm_set_pd(double w, double x)
1048{
1049 return __extension__(__m128d){x, w};
1050}
1051
1052__INTRIN_INLINE_SSE2 __m128d _mm_setr_pd(double w, double x)
1053{
1054 return __extension__(__m128d){w, x};
1055}
1056
1058{
1059 return __extension__(__m128d){0, 0};
1060}
1061
1062__INTRIN_INLINE_SSE2 __m128d _mm_move_sd(__m128d a, __m128d b)
1063{
1064 a[0] = b[0];
1065 return a;
1066}
1067
1068__INTRIN_INLINE_SSE2 void _mm_store_sd(double *dp, __m128d a)
1069{
1070 struct __mm_store_sd_struct {
1071 double __u;
1072 } __attribute__((__packed__, __may_alias__));
1073 ((struct __mm_store_sd_struct *)dp)->__u = a[0];
1074}
1075
1076__INTRIN_INLINE_SSE2 void _mm_store_pd(double *dp, __m128d a)
1077{
1078 *(__m128d *)dp = a;
1079}
1080
1081__INTRIN_INLINE_SSE2 void _mm_store1_pd(double *dp, __m128d a)
1082{
1083#if HAS_BUILTIN(__builtin_shufflevector)
1084 a = __builtin_shufflevector((__v2df)a, (__v2df)a, 0, 0);
1085 _mm_store_pd(dp, a);
1086#else
1087 dp[0] = a[0];
1088 dp[1] = a[0];
1089#endif
1090}
1091
1092__INTRIN_INLINE_SSE2 void _mm_storeu_pd(double *dp, __m128d a)
1093{
1094 struct __storeu_pd {
1095 __m128d_u __v;
1096 } __attribute__((__packed__, __may_alias__));
1097 ((struct __storeu_pd *)dp)->__v = a;
1098}
1099
1100__INTRIN_INLINE_SSE2 void _mm_storer_pd(double *dp, __m128d a)
1101{
1102#if HAS_BUILTIN(__builtin_shufflevector)
1103 a = __builtin_shufflevector((__v2df)a, (__v2df)a, 1, 0);
1104 *(__m128d *)dp = a;
1105#else
1106 dp[0] = a[1];
1107 dp[1] = a[0];
1108#endif
1109}
1110
1111__INTRIN_INLINE_SSE2 void _mm_storeh_pd(double *dp, __m128d a)
1112{
1113 struct __mm_storeh_pd_struct {
1114 double __u;
1115 } __attribute__((__packed__, __may_alias__));
1116 ((struct __mm_storeh_pd_struct *)dp)->__u = a[1];
1117}
1118
1119__INTRIN_INLINE_SSE2 void _mm_storel_pd(double *dp, __m128d a)
1120{
1121 struct __mm_storeh_pd_struct {
1122 double __u;
1123 } __attribute__((__packed__, __may_alias__));
1124 ((struct __mm_storeh_pd_struct *)dp)->__u = a[0];
1125}
1126
1127__INTRIN_INLINE_SSE2 __m128i _mm_add_epi8(__m128i a, __m128i b)
1128{
1129 return (__m128i)((__v16qu)a + (__v16qu)b);
1130}
1131
1132__INTRIN_INLINE_SSE2 __m128i _mm_add_epi16(__m128i a, __m128i b)
1133{
1134 return (__m128i)((__v8hu)a + (__v8hu)b);
1135}
1136
1137__INTRIN_INLINE_SSE2 __m128i _mm_add_epi32(__m128i a, __m128i b)
1138{
1139 return (__m128i)((__v4su)a + (__v4su)b);
1140}
1141
1143{
1144 return (__m64)__builtin_ia32_paddq((__v1di)a, (__v1di)b);
1145}
1146
1147__INTRIN_INLINE_SSE2 __m128i _mm_add_epi64(__m128i a, __m128i b)
1148{
1149 return (__m128i)((__v2du)a + (__v2du)b);
1150}
1151
1152__INTRIN_INLINE_SSE2 __m128i _mm_adds_epi8(__m128i a, __m128i b)
1153{
1154#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1155 return (__m128i)__builtin_elementwise_add_sat((__v16qs)a, (__v16qs)b);
1156#else
1157 return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
1158#endif
1159}
1160
1161__INTRIN_INLINE_SSE2 __m128i _mm_adds_epi16(__m128i a, __m128i b)
1162{
1163#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1164 return (__m128i)__builtin_elementwise_add_sat((__v8hi)a, (__v8hi)b);
1165#else
1166 return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
1167#endif
1168}
1169
1170__INTRIN_INLINE_SSE2 __m128i _mm_adds_epu8(__m128i a, __m128i b)
1171{
1172#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1173 return (__m128i)__builtin_elementwise_add_sat((__v16qu)a, (__v16qu)b);
1174#else
1175 return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
1176#endif
1177}
1178
1179__INTRIN_INLINE_SSE2 __m128i _mm_adds_epu16(__m128i a, __m128i b)
1180{
1181#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1182 return (__m128i)__builtin_elementwise_add_sat((__v8hu)a, (__v8hu)b);
1183#else
1184 return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
1185#endif
1186}
1187
1188__INTRIN_INLINE_SSE2 __m128i _mm_avg_epu8(__m128i a, __m128i b)
1189{
1190 return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
1191}
1192
1193__INTRIN_INLINE_SSE2 __m128i _mm_avg_epu16(__m128i a, __m128i b)
1194{
1195 return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
1196}
1197
1198__INTRIN_INLINE_SSE2 __m128i _mm_madd_epi16(__m128i a, __m128i b)
1199{
1200 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
1201}
1202
1203__INTRIN_INLINE_SSE2 __m128i _mm_max_epi16(__m128i a, __m128i b)
1204{
1205#if HAS_BUILTIN(__builtin_elementwise_max)
1206 return (__m128i)__builtin_elementwise_max((__v8hi)a, (__v8hi)b);
1207#else
1208 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
1209#endif
1210}
1211
1212__INTRIN_INLINE_SSE2 __m128i _mm_max_epu8(__m128i a, __m128i b)
1213{
1214#if HAS_BUILTIN(__builtin_elementwise_max)
1215 return (__m128i)__builtin_elementwise_max((__v16qu)a, (__v16qu)b);
1216#else
1217 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
1218#endif
1219}
1220
1221__INTRIN_INLINE_SSE2 __m128i _mm_min_epi16(__m128i a, __m128i b)
1222{
1223#if HAS_BUILTIN(__builtin_elementwise_min)
1224 return (__m128i)__builtin_elementwise_min((__v8hi)a, (__v8hi)b);
1225#else
1226 return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
1227#endif
1228}
1229
1230__INTRIN_INLINE_SSE2 __m128i _mm_min_epu8(__m128i a, __m128i b)
1231{
1232#if HAS_BUILTIN(__builtin_elementwise_min)
1233 return (__m128i)__builtin_elementwise_min((__v16qu)a, (__v16qu)b);
1234#else
1235 return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
1236#endif
1237}
1238
1239__INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
1240{
1241 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
1242}
1243
1244__INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
1245{
1246 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
1247}
1248
1249__INTRIN_INLINE_SSE2 __m128i _mm_mullo_epi16(__m128i a, __m128i b)
1250{
1251 return (__m128i)((__v8hu)a * (__v8hu)b);
1252}
1253
1255{
1256 return (__m64)__builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
1257}
1258
1259__INTRIN_INLINE_SSE2 __m128i _mm_mul_epu32(__m128i a, __m128i b)
1260{
1261 return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
1262}
1263
1264__INTRIN_INLINE_SSE2 __m128i _mm_sad_epu8(__m128i a, __m128i b)
1265{
1266 return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
1267}
1268
1269__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi8(__m128i a, __m128i b)
1270{
1271 return (__m128i)((__v16qu)a - (__v16qu)b);
1272}
1273
1274__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi16(__m128i a, __m128i b)
1275{
1276 return (__m128i)((__v8hu)a - (__v8hu)b);
1277}
1278
1279__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi32(__m128i a, __m128i b)
1280{
1281 return (__m128i)((__v4su)a - (__v4su)b);
1282}
1283
1285{
1286 return (__m64)__builtin_ia32_psubq((__v1di)a, (__v1di)b);
1287}
1288
1289__INTRIN_INLINE_SSE2 __m128i _mm_sub_epi64(__m128i a, __m128i b)
1290{
1291 return (__m128i)((__v2du)a - (__v2du)b);
1292}
1293
1294__INTRIN_INLINE_SSE2 __m128i _mm_subs_epi8(__m128i a, __m128i b)
1295{
1296#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1297 return (__m128i)__builtin_elementwise_sub_sat((__v16qs)a, (__v16qs)b);
1298#else
1299 return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
1300#endif
1301}
1302
1303__INTRIN_INLINE_SSE2 __m128i _mm_subs_epi16(__m128i a, __m128i b)
1304{
1305#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1306 return (__m128i)__builtin_elementwise_sub_sat((__v8hi)a, (__v8hi)b);
1307#else
1308 return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
1309#endif
1310}
1311
1312__INTRIN_INLINE_SSE2 __m128i _mm_subs_epu8(__m128i a, __m128i b)
1313{
1314#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1315 return (__m128i)__builtin_elementwise_sub_sat((__v16qu)a, (__v16qu)b);
1316#else
1317 return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
1318#endif
1319}
1320
1321__INTRIN_INLINE_SSE2 __m128i _mm_subs_epu16(__m128i a, __m128i b)
1322{
1323#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1324 return (__m128i)__builtin_elementwise_sub_sat((__v8hu)a, (__v8hu)b);
1325#else
1326 return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
1327#endif
1328}
1329
1330__INTRIN_INLINE_SSE2 __m128i _mm_and_si128(__m128i a, __m128i b)
1331{
1332 return (__m128i)((__v2du)a & (__v2du)b);
1333}
1334
1335__INTRIN_INLINE_SSE2 __m128i _mm_andnot_si128(__m128i a, __m128i b)
1336{
1337 return (__m128i)(~(__v2du)a & (__v2du)b);
1338}
1339
1340__INTRIN_INLINE_SSE2 __m128i _mm_or_si128(__m128i a, __m128i b)
1341{
1342 return (__m128i)((__v2du)a | (__v2du)b);
1343}
1344
1345__INTRIN_INLINE_SSE2 __m128i _mm_xor_si128(__m128i a, __m128i b)
1346{
1347 return (__m128i)((__v2du)a ^ (__v2du)b);
1348}
1349
1350#ifdef __clang__
1351#define _mm_slli_si128(a, imm) \
1352 ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1353#else
1354__INTRIN_INLINE_SSE2 __m128i _mm_slli_si128(__m128i a, const int imm)
1355{
1356 return (__m128i)__builtin_ia32_pslldqi128(a, imm * 8);
1357}
1358#endif
1359
1361{
1362 return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
1363}
1364
1366{
1367 return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
1368}
1369
1371{
1372 return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
1373}
1374
1376{
1377 return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
1378}
1379
1381{
1382 return __builtin_ia32_psllqi128((__v2di)a, count);
1383}
1384
1386{
1387 return __builtin_ia32_psllq128((__v2di)a, (__v2di)count);
1388}
1389
1391{
1392 return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
1393}
1394
1396{
1397 return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
1398}
1399
1401{
1402 return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
1403}
1404
1406{
1407 return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
1408}
1409
1410#ifdef __clang__
1411#define _mm_srli_si128(a, imm) \
1412 ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1413#else
1414__INTRIN_INLINE_SSE2 __m128i _mm_srli_si128(__m128i a, const int imm)
1415{
1416 return (__m128i)__builtin_ia32_psrldqi128(a, imm * 8);
1417}
1418#endif
1419
1421{
1422 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
1423}
1424
1426{
1427 return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
1428}
1429
1431{
1432 return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
1433}
1434
1436{
1437 return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
1438}
1439
1441{
1442 return __builtin_ia32_psrlqi128((__v2di)a, count);
1443}
1444
1446{
1447 return __builtin_ia32_psrlq128((__v2di)a, (__v2di)count);
1448}
1449
1450__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
1451{
1452 return (__m128i)((__v16qi)a == (__v16qi)b);
1453}
1454
1455__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
1456{
1457 return (__m128i)((__v8hi)a == (__v8hi)b);
1458}
1459
1460__INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
1461{
1462 return (__m128i)((__v4si)a == (__v4si)b);
1463}
1464
1465__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
1466{
1467 /* This function always performs a signed comparison, but __v16qi is a char
1468 which may be signed or unsigned, so use __v16qs. */
1469 return (__m128i)((__v16qs)a > (__v16qs)b);
1470}
1471
1472__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
1473{
1474 return (__m128i)((__v8hi)a > (__v8hi)b);
1475}
1476
1477__INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
1478{
1479 return (__m128i)((__v4si)a > (__v4si)b);
1480}
1481
1482__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
1483{
1484 return _mm_cmpgt_epi8(b, a);
1485}
1486
1487__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi16(__m128i a, __m128i b)
1488{
1489 return _mm_cmpgt_epi16(b, a);
1490}
1491
1492__INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
1493{
1494 return _mm_cmpgt_epi32(b, a);
1495}
1496
1497#ifdef _M_AMD64
1498
1499__INTRIN_INLINE_SSE2 __m128d _mm_cvtsi64_sd(__m128d a, long long b)
1500{
1501 a[0] = b;
1502 return a;
1503}
1504
1505__INTRIN_INLINE_SSE2 long long _mm_cvtsd_si64(__m128d a)
1506{
1507 return __builtin_ia32_cvtsd2si64((__v2df)a);
1508}
1509
1510__INTRIN_INLINE_SSE2 long long _mm_cvttsd_si64(__m128d a)
1511{
1512 return __builtin_ia32_cvttsd2si64((__v2df)a);
1513}
1514#endif
1515
1517{
1518#if HAS_BUILTIN(__builtin_convertvector)
1519 return (__m128)__builtin_convertvector((__v4si)a, __v4sf);
1520#else
1521 return __builtin_ia32_cvtdq2ps((__v4si)a);
1522#endif
1523}
1524
1526{
1527 return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)a);
1528}
1529
1531{
1532 return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)a);
1533}
1534
1536{
1537 return __extension__(__m128i)(__v4si){a, 0, 0, 0};
1538}
1539
1541{
1542 return __extension__(__m128i)(__v2di){a, 0};
1543}
1544
1546{
1547 __v4si b = (__v4si)a;
1548 return b[0];
1549}
1550
1552{
1553 return a[0];
1554}
1555
1557{
1558 return *p;
1559}
1560
1561__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si128(__m128i_u const *p)
1562{
1563 struct __loadu_si128 {
1564 __m128i_u __v;
1565 } __attribute__((__packed__, __may_alias__));
1566 return ((const struct __loadu_si128 *)p)->__v;
1567}
1568
1569__INTRIN_INLINE_SSE2 __m128i _mm_loadl_epi64(__m128i_u const *p)
1570{
1571 struct __mm_loadl_epi64_struct {
1572 long long __u;
1573 } __attribute__((__packed__, __may_alias__));
1574 return __extension__(__m128i){
1575 ((const struct __mm_loadl_epi64_struct *)p)->__u, 0};
1576}
1577
1579{
1580#if HAS_BUILTIN(__builtin_ia32_undef128)
1581 return (__m128i)__builtin_ia32_undef128();
1582#else
1583 __m128i undef = undef;
1584 return undef;
1585#endif
1586}
1587
1588__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
1589{
1590 return __extension__(__m128i)(__v2di){q0, q1};
1591}
1592
1593__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64(__m64 q1, __m64 q0)
1594{
1595 return _mm_set_epi64x((long long)q1, (long long)q0);
1596}
1597
1598__INTRIN_INLINE_SSE2 __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
1599{
1600 return __extension__(__m128i)(__v4si){i0, i1, i2, i3};
1601}
1602
1604 short w7, short w6, short w5, short w4,
1605 short w3, short w2, short w1, short w0)
1606{
1607 return __extension__(__m128i)(__v8hi){w0, w1, w2, w3, w4, w5, w6, w7};
1608}
1609
1611 char b15, char b14, char b13, char b12,
1612 char b11, char b10, char b9, char b8,
1613 char b7, char b6, char b5, char b4,
1614 char b3, char b2, char b1, char b0)
1615{
1616 return __extension__(__m128i)(__v16qi){
1617 b0, b1, b2, b3, b4, b5, b6, b7,
1618 b8, b9, b10, b11, b12, b13, b14, b15};
1619}
1620
1622{
1623 return _mm_set_epi64x(q, q);
1624}
1625
1627{
1628 return _mm_set_epi64(q, q);
1629}
1630
1632{
1633 return _mm_set_epi32(i, i, i, i);
1634}
1635
1637{
1638 return _mm_set_epi16(w, w, w, w, w, w, w, w);
1639}
1640
1642{
1643 return _mm_set_epi8(b, b, b, b, b, b, b, b, b, b, b,
1644 b, b, b, b, b);
1645}
1646
1647__INTRIN_INLINE_SSE2 __m128i _mm_setr_epi64(__m64 q0, __m64 q1)
1648{
1649 return _mm_set_epi64(q1, q0);
1650}
1651
1652__INTRIN_INLINE_SSE2 __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
1653{
1654 return _mm_set_epi32(i3, i2, i1, i0);
1655}
1656
1658 short w0, short w1, short w2, short w3,
1659 short w4, short w5, short w6, short w7)
1660{
1661 return _mm_set_epi16(w7, w6, w5, w4, w3, w2, w1, w0);
1662}
1663
1665 char b0, char b1, char b2, char b3,
1666 char b4, char b5, char b6, char b7,
1667 char b8, char b9, char b10, char b11,
1668 char b12, char b13, char b14, char b15)
1669{
1670 return _mm_set_epi8(b15, b14, b13, b12, b11, b10, b9, b8,
1671 b7, b6, b5, b4, b3, b2, b1, b0);
1672}
1673
1675{
1676 return __extension__(__m128i)(__v2di){0LL, 0LL};
1677}
1678
1680{
1681 *p = b;
1682}
1683
1684__INTRIN_INLINE_SSE2 void _mm_storeu_si128(__m128i_u *p, __m128i b)
1685{
1686 struct __storeu_si128 {
1687 __m128i_u __v;
1688 } __attribute__((__packed__, __may_alias__));
1689 ((struct __storeu_si128 *)p)->__v = b;
1690}
1691
1693{
1694 struct __storeu_si64 {
1695 long long __v;
1696 } __attribute__((__packed__, __may_alias__));
1697 ((struct __storeu_si64 *)p)->__v = ((__v2di)b)[0];
1698}
1699
1701{
1702 struct __storeu_si32 {
1703 int __v;
1704 } __attribute__((__packed__, __may_alias__));
1705 ((struct __storeu_si32 *)p)->__v = ((__v4si)b)[0];
1706}
1707
1709{
1710 struct __storeu_si16 {
1711 short __v;
1712 } __attribute__((__packed__, __may_alias__));
1713 ((struct __storeu_si16 *)p)->__v = ((__v8hi)b)[0];
1714}
1715
1716__INTRIN_INLINE_SSE2 void _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
1717{
1718 __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
1719}
1720
1721__INTRIN_INLINE_SSE2 void _mm_storel_epi64(__m128i_u *p, __m128i a)
1722{
1723 struct __mm_storel_epi64_struct {
1724 long long __u;
1725 } __attribute__((__packed__, __may_alias__));
1726 ((struct __mm_storel_epi64_struct *)p)->__u = a[0];
1727}
1728
1730{
1731#if HAS_BUILTIN(__builtin_nontemporal_store)
1732 __builtin_nontemporal_store((__v2df)a, (__v2df *)p);
1733#else
1734 __builtin_ia32_movntpd(p, a);
1735#endif
1736}
1737
1739{
1740#if HAS_BUILTIN(__builtin_nontemporal_store)
1741 __builtin_nontemporal_store((__v2di)a, (__v2di*)p);
1742#else
1743 __builtin_ia32_movntdq(p, a);
1744#endif
1745}
1746
1748{
1749 __builtin_ia32_movnti(p, a);
1750}
1751
1752#ifdef _M_AMD64
1753__INTRIN_INLINE_SSE2 void _mm_stream_si64(long long *p, long long a)
1754{
1755 __builtin_ia32_movnti64(p, a);
1756}
1757#endif
1758
1759void _mm_clflush(void const *p);
1760
1761void _mm_lfence(void);
1762
1763void _mm_mfence(void);
1764
1765__INTRIN_INLINE_SSE2 __m128i _mm_packs_epi16(__m128i a, __m128i b)
1766{
1767 return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
1768}
1769
1770__INTRIN_INLINE_SSE2 __m128i _mm_packs_epi32(__m128i a, __m128i b)
1771{
1772 return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
1773}
1774
1775__INTRIN_INLINE_SSE2 __m128i _mm_packus_epi16(__m128i a, __m128i b)
1776{
1777 return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
1778}
1779
1780#define _mm_extract_epi16(a, imm) \
1781 ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
1782 (int)(imm)))
1783
1784#define _mm_insert_epi16(a, b, imm) \
1785 ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
1786 (int)(imm)))
1787
1789{
1790 return __builtin_ia32_pmovmskb128((__v16qi)a);
1791}
1792
1793#define _mm_shuffle_epi32(a, imm) \
1794 ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
1795
1796#define _mm_shufflelo_epi16(a, imm) \
1797 ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
1798
1799#define _mm_shufflehi_epi16(a, imm) \
1800 ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
1801
1803{
1804#if HAS_BUILTIN(__builtin_shufflevector)
1805 return (__m128i)__builtin_shufflevector(
1806 (__v16qi)a, (__v16qi)b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
1807 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
1808#else
1809 return (__m128i)__builtin_ia32_punpckhbw128((__v16qi)a, (__v16qi)b);
1810#endif
1811}
1812
1814{
1815#if HAS_BUILTIN(__builtin_shufflevector)
1816 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8 + 4, 5,
1817 8 + 5, 6, 8 + 6, 7, 8 + 7);
1818#else
1819 return (__m128i)__builtin_ia32_punpckhwd128((__v8hi)a, (__v8hi)b);
1820#endif
1821}
1822
1824{
1825#if HAS_BUILTIN(__builtin_shufflevector)
1826 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4 + 2, 3,
1827 4 + 3);
1828#else
1829 return (__m128i)__builtin_ia32_punpckhdq128((__v4si)a, (__v4si)b);
1830#endif
1831}
1832
1834{
1835#if HAS_BUILTIN(__builtin_shufflevector)
1836 return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 1, 2 + 1);
1837#else
1838 return (__m128i)__builtin_ia32_punpckhqdq128((__v2di)a, (__v2di)b);
1839#endif
1840}
1841
1843{
1844#if HAS_BUILTIN(__builtin_shufflevector)
1845 return (__m128i)__builtin_shufflevector(
1846 (__v16qi)a, (__v16qi)b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
1847 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
1848#else
1849 return (__m128i)__builtin_ia32_punpcklbw128((__v16qi)a, (__v16qi)b);
1850#endif
1851}
1852
1854{
1855#if HAS_BUILTIN(__builtin_shufflevector)
1856 return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8 + 0, 1,
1857 8 + 1, 2, 8 + 2, 3, 8 + 3);
1858#else
1859 return (__m128i)__builtin_ia32_punpcklwd128((__v8hi)a, (__v8hi)b);
1860#endif
1861}
1862
1864{
1865#if HAS_BUILTIN(__builtin_shufflevector)
1866 return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4 + 0, 1,
1867 4 + 1);
1868#else
1869 return (__m128i)__builtin_ia32_punpckldq128((__v4si)a, (__v4si)b);
1870#endif
1871}
1872
1874{
1875#if HAS_BUILTIN(__builtin_shufflevector)
1876 return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 0, 2 + 0);
1877#else
1878 return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)a, (__v2di)b);
1879#endif
1880}
1881
1883{
1884 return (__m64)a[0];
1885}
1886
1888{
1889 return __extension__(__m128i)(__v2di){(long long)a, 0};
1890}
1891
1893{
1894#if HAS_BUILTIN(__builtin_shufflevector)
1895 return __builtin_shufflevector((__v2di)a, _mm_setzero_si128(), 0, 2);
1896#else
1897 return (__m128i)__builtin_ia32_movq128((__v2di)a);
1898#endif
1899}
1900
1901__INTRIN_INLINE_SSE2 __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
1902{
1903#if HAS_BUILTIN(__builtin_shufflevector)
1904 return __builtin_shufflevector((__v2df)a, (__v2df)b, 1, 2 + 1);
1905#else
1906 return (__m128d)__builtin_ia32_unpckhpd((__v2df)a, (__v2df)b);
1907#endif
1908}
1909
1910__INTRIN_INLINE_SSE2 __m128d _mm_unpacklo_pd(__m128d a, __m128d b)
1911{
1912#if HAS_BUILTIN(__builtin_shufflevector)
1913 return __builtin_shufflevector((__v2df)a, (__v2df)b, 0, 2 + 0);
1914#else
1915 return (__m128d)__builtin_ia32_unpcklpd((__v2df)a, (__v2df)b);
1916#endif
1917}
1918
1920{
1921 return __builtin_ia32_movmskpd((__v2df)a);
1922}
1923
1924#define _mm_shuffle_pd(a, b, i) \
1925 ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
1926 (int)(i)))
1927
1929{
1930 return (__m128)a;
1931}
1932
1934{
1935 return (__m128i)a;
1936}
1937
1939{
1940 return (__m128d)a;
1941}
1942
1944{
1945 return (__m128i)a;
1946}
1947
1949{
1950 return (__m128)a;
1951}
1952
1954{
1955 return (__m128d)a;
1956}
1957
1958void _mm_pause(void);
1959
1960#endif /* _MSC_VER */
1961
1962#ifdef __cplusplus
1963} // extern "C"
1964#endif
1965
1966#endif /* _INCLUDED_EMM */
#define _DECLSPEC_INTRIN_TYPE
Definition: _mingw.h:231
#define __int8
Definition: basetyps.h:25
#define __int16
Definition: basetyps.h:22
#define __int64
Definition: basetyps.h:16
#define __int32
Definition: basetyps.h:19
int align(int length, int align)
Definition: dsound8.c:36
#define __INTRIN_INLINE_MMXSSE2
Definition: emmintrin.h:74
__m128 _mm_cvtpd_ps(__m128d a)
Definition: emmintrin.h:855
__m128d _mm_cmpnge_sd(__m128d a, __m128d b)
Definition: emmintrin.h:789
void _mm_storeu_pd(double *dp, __m128d a)
Definition: emmintrin.h:1092
__m128d _mm_add_sd(__m128d a, __m128d b)
Definition: emmintrin.h:576
void _mm_storeu_si128(__m128i_u *p, __m128i b)
Definition: emmintrin.h:1684
__m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Definition: emmintrin.h:1610
__m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1465
__m128i _mm_set1_epi16(short w)
Definition: emmintrin.h:1636
int _mm_cvtsi128_si32(__m128i a)
Definition: emmintrin.h:1545
__m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
Definition: emmintrin.h:1598
void _mm_store_si128(__m128i *p, __m128i b)
Definition: emmintrin.h:1679
__m128i _mm_adds_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1179
__m128i _mm_movpi64_epi64(__m64 a)
Definition: emmintrin.h:1887
__m128i _mm_slli_epi64(__m128i a, int count)
Definition: emmintrin.h:1380
int _mm_ucomile_sd(__m128d a, __m128d b)
Definition: emmintrin.h:835
__m128i _mm_slli_si128(__m128i a, int i)
Definition: emmintrin.h:1354
__m128d _mm_cmpeq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:731
int _mm_cvtsd_si32(__m128d a)
Definition: emmintrin.h:883
int _mm_comile_sd(__m128d a, __m128d b)
Definition: emmintrin.h:805
__m128i _mm_castps_si128(__m128 a)
Definition: emmintrin.h:1943
__m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:711
__m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
Definition: emmintrin.h:1652
__m128i _mm_setzero_si128(void)
Definition: emmintrin.h:1674
__m128i _mm_srl_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1445
__m128d _mm_add_pd(__m128d a, __m128d b)
Definition: emmintrin.h:582
__m128i _mm_cvtpd_epi32(__m128d a)
Definition: emmintrin.h:878
__m128i _mm_xor_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1345
__m128i _mm_move_epi64(__m128i a)
Definition: emmintrin.h:1892
__m128d _mm_sub_sd(__m128d a, __m128d b)
Definition: emmintrin.h:587
__m128d _mm_loadh_pd(__m128d a, double const *dp)
Definition: emmintrin.h:1009
__m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:773
__m128i _mm_srli_epi64(__m128i a, int count)
Definition: emmintrin.h:1440
__m128d _mm_setr_pd(double w, double x)
Definition: emmintrin.h:1052
#define __INTRIN_INLINE_SSE2
Definition: emmintrin.h:73
__m128d _mm_cmpord_sd(__m128d a, __m128d b)
Definition: emmintrin.h:758
__m128i _mm_set1_epi64(__m64 q)
Definition: emmintrin.h:1626
__m128d _mm_cmpunord_pd(__m128d a, __m128d b)
Definition: emmintrin.h:701
__m128i _mm_packs_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1770
__m128d _mm_castps_pd(__m128 a)
Definition: emmintrin.h:1938
void _mm_store1_pd(double *dp, __m128d a)
Definition: emmintrin.h:1081
__m128i _mm_sad_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1264
__INTRIN_INLINE_SSE2 __m128d _mm_undefined_pd(void)
Definition: emmintrin.h:1027
__INTRIN_INLINE_SSE2 void _mm_storeu_si32(void *p, __m128i b)
Definition: emmintrin.h:1700
__m128d _mm_setzero_pd(void)
Definition: emmintrin.h:1057
__m128 _mm_castsi128_ps(__m128i a)
Definition: emmintrin.h:1948
__m128d _mm_cmpneq_pd(__m128d a, __m128d b)
Definition: emmintrin.h:706
void _mm_storel_epi64(__m128i_u *p, __m128i a)
Definition: emmintrin.h:1721
__m128i _mm_packus_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1775
__m128d _mm_set_sd(double w)
Definition: emmintrin.h:1037
__m128 _mm_cvtepi32_ps(__m128i a)
Definition: emmintrin.h:1516
__m128i _mm_adds_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1170
__m128i _mm_sub_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1279
__m128i _mm_castpd_si128(__m128d a)
Definition: emmintrin.h:1933
__m128i _mm_add_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1132
__m128d _mm_cmpnge_pd(__m128d a, __m128d b)
Definition: emmintrin.h:726
int _mm_comige_sd(__m128d a, __m128d b)
Definition: emmintrin.h:815
__m128d _mm_cmpge_pd(__m128d a, __m128d b)
Definition: emmintrin.h:691
__INTRIN_INLINE_SSE2 void _mm_storeu_si16(void *p, __m128i b)
Definition: emmintrin.h:1708
__m128d _mm_loadr_pd(double const *dp)
Definition: emmintrin.h:955
__m128i _mm_mulhi_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1244
__m128i _mm_slli_epi32(__m128i a, int count)
Definition: emmintrin.h:1370
__m128i _mm_load_si128(__m128i const *p)
Definition: emmintrin.h:1556
__m128d _mm_max_sd(__m128d a, __m128d b)
Definition: emmintrin.h:641
__m128d _mm_and_pd(__m128d a, __m128d b)
Definition: emmintrin.h:651
__m128i _mm_mul_epu32(__m128i a, __m128i b)
Definition: emmintrin.h:1259
__m128i _mm_cvttpd_epi32(__m128d a)
Definition: emmintrin.h:906
int _mm_ucomige_sd(__m128d a, __m128d b)
Definition: emmintrin.h:845
__m128d _mm_sub_pd(__m128d a, __m128d b)
Definition: emmintrin.h:593
__m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1450
__m128d _mm_load1_pd(double const *dp)
Definition: emmintrin.h:941
__m128d _mm_load_pd(double const *dp)
Definition: emmintrin.h:936
__m128d _mm_min_pd(__m128d a, __m128d b)
Definition: emmintrin.h:636
__m128i _mm_sll_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1375
__m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1813
__m64 _mm_sub_si64(__m64 a, __m64 b)
Definition: emmintrin.h:1284
void _mm_stream_si32(int *p, int a)
Definition: emmintrin.h:1747
__m128i _mm_subs_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1312
__m128i _mm_srl_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1435
__m128i _mm_mulhi_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1239
#define _mm_shuffle_pd(a, b, i)
Definition: emmintrin.h:1924
void _mm_stream_si128(__m128i *p, __m128i a)
Definition: emmintrin.h:1738
void _mm_mfence(void)
Definition: intrin_x86.h:99
__m128d _mm_or_pd(__m128d a, __m128d b)
Definition: emmintrin.h:661
__m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1455
__m128d _mm_cmpge_sd(__m128d a, __m128d b)
Definition: emmintrin.h:752
__m128d _mm_mul_sd(__m128d a, __m128d b)
Definition: emmintrin.h:598
__m128i _mm_set1_epi8(char b)
Definition: emmintrin.h:1641
__m128i _mm_sra_epi32(__m128i a, __m128i count)
Definition: emmintrin.h:1405
__m128d _mm_sqrt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:620
__m128i _mm_srai_epi32(__m128i a, int count)
Definition: emmintrin.h:1400
__m128i _mm_cvtsi32_si128(int a)
Definition: emmintrin.h:1535
__m128i _mm_slli_epi16(__m128i a, int count)
Definition: emmintrin.h:1360
__m128d _mm_cmpeq_pd(__m128d a, __m128d b)
Definition: emmintrin.h:671
__m128i _mm_subs_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1294
void _mm_storer_pd(double *dp, __m128d a)
Definition: emmintrin.h:1100
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
Definition: emmintrin.h:45
__INTRIN_INLINE_SSE2 __m128i _mm_cvtsi64_si128(long long a)
Definition: emmintrin.h:1540
int _mm_movemask_pd(__m128d a)
Definition: emmintrin.h:1919
__m128i _mm_setr_epi64(__m64 q0, __m64 q1)
Definition: emmintrin.h:1647
__m128i _mm_sub_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1289
__m128d _mm_move_sd(__m128d a, __m128d b)
Definition: emmintrin.h:1062
__m128i _mm_min_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1230
__m64 _mm_add_si64(__m64 a, __m64 b)
Definition: emmintrin.h:1142
__m128d _mm_cvtpi32_pd(__m64 a)
Definition: emmintrin.h:926
__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
Definition: emmintrin.h:1588
__m128d _mm_unpackhi_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1901
int _mm_cvttsd_si32(__m128d a)
Definition: emmintrin.h:911
__m128d _mm_cmpnle_pd(__m128d a, __m128d b)
Definition: emmintrin.h:716
__m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1823
__m128d _mm_cmpnle_sd(__m128d a, __m128d b)
Definition: emmintrin.h:778
__m128i _mm_add_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1127
__m128d _mm_cmple_sd(__m128d a, __m128d b)
Definition: emmintrin.h:741
__m128d _mm_cmple_pd(__m128d a, __m128d b)
Definition: emmintrin.h:681
__m128i _mm_cmplt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1492
#define _mm_insert_epi16(a, b, imm)
Definition: emmintrin.h:1784
__m128d _mm_loadu_pd(double const *dp)
Definition: emmintrin.h:965
__m128i _mm_avg_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1193
__m64 _mm_mul_su32(__m64 a, __m64 b)
Definition: emmintrin.h:1254
__m128d _mm_cvtss_sd(__m128d a, __m128 b)
Definition: emmintrin.h:900
int _mm_ucomieq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:825
__m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Definition: emmintrin.h:1664
__m128i _mm_sll_epi64(__m128i a, __m128i count)
Definition: emmintrin.h:1385
__m128d _mm_cmpngt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:783
void _mm_storel_pd(double *dp, __m128d a)
Definition: emmintrin.h:1119
__m128d _mm_cmplt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:736
__m128d _mm_cvtsi32_sd(__m128d a, int b)
Definition: emmintrin.h:893
__m128i _mm_or_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1340
__INTRIN_INLINE_SSE2 long long _mm_cvtsi128_si64(__m128i a)
Definition: emmintrin.h:1551
__m128i _mm_cmplt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1487
__m128i _mm_subs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1303
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si64(void const *a)
Definition: emmintrin.h:973
__m128d _mm_cmpngt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:721
#define _mm_extract_epi16(a, imm)
Definition: emmintrin.h:1780
double _mm_cvtsd_f64(__m128d a)
Definition: emmintrin.h:931
#define _mm_shufflelo_epi16(a, imm)
Definition: emmintrin.h:1796
__m128i _mm_packs_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1765
__INTRIN_INLINE_SSE2 __m128i _mm_set1_epi64x(long long q)
Definition: emmintrin.h:1621
__m128d _mm_min_sd(__m128d a, __m128d b)
Definition: emmintrin.h:631
void _mm_store_pd(double *dp, __m128d a)
Definition: emmintrin.h:1076
__m128i _mm_srli_epi16(__m128i a, int count)
Definition: emmintrin.h:1420
__m128i _mm_sub_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1269
__m128d _mm_castsi128_pd(__m128i a)
Definition: emmintrin.h:1953
__m128d _mm_cmpord_pd(__m128d a, __m128d b)
Definition: emmintrin.h:696
void _mm_storeh_pd(double *dp, __m128d a)
Definition: emmintrin.h:1111
__m128d _mm_mul_pd(__m128d a, __m128d b)
Definition: emmintrin.h:604
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si32(void const *a)
Definition: emmintrin.h:982
__m64 _mm_cvttpd_pi32(__m128d a)
Definition: emmintrin.h:921
__m128i _mm_sll_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1365
__m128d _mm_sqrt_pd(__m128d a)
Definition: emmintrin.h:626
__m64 _mm_cvtpd_pi32(__m128d a)
Definition: emmintrin.h:916
__m128i _mm_mullo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1249
__m128i _mm_sra_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1395
int _mm_comieq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:795
__m128i _mm_cvttps_epi32(__m128 a)
Definition: emmintrin.h:1530
__m128d _mm_load_sd(double const *dp)
Definition: emmintrin.h:1000
__m64 _mm_movepi64_pi64(__m128i a)
Definition: emmintrin.h:1882
void _mm_lfence(void)
Definition: intrin_x86.h:106
void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p)
int _mm_movemask_epi8(__m128i a)
Definition: emmintrin.h:1788
__m128i _mm_madd_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1198
__m128d _mm_cmpgt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:746
int _mm_ucomilt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:830
__m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1460
__m128i _mm_srai_epi16(__m128i a, int count)
Definition: emmintrin.h:1390
__m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
Definition: emmintrin.h:1603
__m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
Definition: emmintrin.h:1657
int _mm_ucomineq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:850
__m128i _mm_cvtps_epi32(__m128 a)
Definition: emmintrin.h:1525
__m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1802
__m128d _mm_andnot_pd(__m128d a, __m128d b)
Definition: emmintrin.h:656
__m128d _mm_loadl_pd(__m128d a, double const *dp)
Definition: emmintrin.h:1018
__m128d _mm_cmpneq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:768
__m128i _mm_min_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1221
__m128 _mm_cvtsd_ss(__m128 a, __m128d b)
Definition: emmintrin.h:888
__m128i _mm_andnot_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1335
__m128i _mm_and_si128(__m128i a, __m128i b)
Definition: emmintrin.h:1330
__m128i _mm_setl_epi64(__m128i q)
void _mm_stream_pd(double *p, __m128d a)
Definition: emmintrin.h:1729
__m128 _mm_castpd_ps(__m128d a)
Definition: emmintrin.h:1928
__INTRIN_INLINE_SSE2 __m128i _mm_undefined_si128(void)
Definition: emmintrin.h:1578
int _mm_comineq_sd(__m128d a, __m128d b)
Definition: emmintrin.h:820
__m128i _mm_avg_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1188
int _mm_ucomigt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:840
__m128i _mm_adds_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1161
#define _mm_stream_si64
Definition: emmintrin.h:329
__m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1833
__m128i _mm_srli_si128(__m128i a, int imm)
Definition: emmintrin.h:1414
__m128i _mm_adds_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1152
int _mm_comilt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:800
__m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1853
__m128i _mm_cmplt_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1482
#define _mm_shufflehi_epi16(a, imm)
Definition: emmintrin.h:1799
__m128d _mm_cvtepi32_pd(__m128i a)
Definition: emmintrin.h:869
__m128i _mm_max_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1203
__m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1842
__m128d _mm_xor_pd(__m128d a, __m128d b)
Definition: emmintrin.h:666
__INTRIN_INLINE_SSE2 void _mm_storeu_si64(void *p, __m128i b)
Definition: emmintrin.h:1692
void _mm_clflush(void const *p)
__m128d _mm_cvtps_pd(__m128 a)
Definition: emmintrin.h:860
__m128d _mm_cmpgt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:686
void _mm_pause(void)
Definition: intrin_x86.h:2059
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si16(void const *a)
Definition: emmintrin.h:991
void _mm_store_sd(double *dp, __m128d a)
Definition: emmintrin.h:1068
__m128d _mm_set_pd(double w, double x)
Definition: emmintrin.h:1047
__m128i _mm_srli_epi32(__m128i a, int count)
Definition: emmintrin.h:1430
int _mm_comigt_sd(__m128d a, __m128d b)
Definition: emmintrin.h:810
__m128i _mm_set_epi64(__m64 q1, __m64 q0)
Definition: emmintrin.h:1593
__m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1472
__m128d _mm_cmplt_pd(__m128d a, __m128d b)
Definition: emmintrin.h:676
__m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1137
__m128i _mm_sub_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1274
__m128i _mm_loadl_epi64(__m128i_u const *p)
Definition: emmintrin.h:1569
__m128i _mm_add_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1147
__m128d _mm_div_sd(__m128d a, __m128d b)
Definition: emmintrin.h:609
__m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1477
#define _mm_shuffle_epi32(a, imm)
Definition: emmintrin.h:1793
__m128d _mm_set1_pd(double w)
Definition: emmintrin.h:1042
__m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
Definition: emmintrin.h:1863
__m128d _mm_div_pd(__m128d a, __m128d b)
Definition: emmintrin.h:615
__m128i _mm_srl_epi16(__m128i a, __m128i count)
Definition: emmintrin.h:1425
__m128d _mm_max_pd(__m128d a, __m128d b)
Definition: emmintrin.h:646
__m128d _mm_cmpunord_sd(__m128d a, __m128d b)
Definition: emmintrin.h:763
__m128d _mm_unpacklo_pd(__m128d a, __m128d b)
Definition: emmintrin.h:1910
__m128i _mm_subs_epu16(__m128i a, __m128i b)
Definition: emmintrin.h:1321
__m128i _mm_loadu_si128(__m128i_u const *p)
Definition: emmintrin.h:1561
__m128i _mm_set1_epi32(int i)
Definition: emmintrin.h:1631
__m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
Definition: emmintrin.h:1873
__m128i _mm_max_epu8(__m128i a, __m128i b)
Definition: emmintrin.h:1212
void __declspec(noinline) __cdecl _free_base(void *const block)
Definition: free_base.cpp:98
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
GLuint GLuint GLsizei count
Definition: gl.h:1545
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063
GLdouble n
Definition: glext.h:7729
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble GLdouble w2
Definition: glext.h:8308
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
GLfloat GLfloat p
Definition: glext.h:8902
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble w1
Definition: glext.h:8308
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
GLubyte GLubyte GLubyte GLubyte w
Definition: glext.h:6102
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
#define d
Definition: ke_i.h:81
#define a
Definition: ke_i.h:78
#define b
Definition: ke_i.h:79
static CRYPT_DATA_BLOB b4
Definition: msg.c:2284
static CRYPT_DATA_BLOB b3[]
Definition: msg.c:592
static CRYPT_DATA_BLOB b2[]
Definition: msg.c:582
static CRYPT_DATA_BLOB b1[]
Definition: msg.c:573
#define _Out_writes_bytes_(s)
Definition: no_sal2.h:178
#define long
Definition: qsort.c:33
#define __c
Definition: schilyio.h:209
#define LL
Definition: tui.h:166
#define _STATIC_ASSERT(expr)
Definition: corecrt.h:305
#define _CRT_ALIGN(x)
Definition: corecrt.h:217