ReactOS 0.4.16-dev-1946-g52006dd
immintrin.h
Go to the documentation of this file.
1/*
2 * PROJECT: ReactOS SDK
3 * LICENSE: MIT (https://spdx.org/licenses/MIT)
4 * PURPOSE: Intrinsics for the SSE2 instruction set
5 * COPYRIGHT: Copyright 2024 Timo Kreuzer (timo.kreuzer@reactos.org)
6 */
7
8#pragma once
9
10#define _INCLUDED_IMM
11
12//#include <wmmintrin.h>
13#include <emmintrin.h>
14
15#if defined(_MSC_VER) && !defined(__clang__)
16
17typedef union _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(32) __m256i
18{
19 __int8 m256i_i8[32];
20 __int16 m256i_i16[16];
21 __int32 m256i_i32[8];
22 __int64 m256i_i64[4];
23 unsigned __int8 m256i_u8[32];
24 unsigned __int16 m256i_u16[16];
25 unsigned __int32 m256i_u32[8];
26 unsigned __int64 m256i_u64[4];
27} __m256i;
28
29#else /* _MSC_VER */
30
31typedef char __v32qi __attribute__ ((__vector_size__ (32)));
32typedef short __v16hi __attribute__ ((__vector_size__ (32)));
33typedef long long __v4di __attribute__ ((__vector_size__ (32)));
34
35typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__));
36
37#endif /* _MSC_VER */
38
39#ifdef __cplusplus
40extern "C" {
41#endif
42
43extern __m256i __cdecl _mm256_cmpeq_epi8(__m256i, __m256i);
44extern __m256i __cdecl _mm256_cmpeq_epi16(__m256i, __m256i);
45extern int __cdecl _mm256_movemask_epi8(__m256i);
46extern __m256i __cdecl _mm256_setzero_si256(void);
47extern void __cdecl _mm256_zeroupper(void);
48
49extern int __cdecl _rdrand16_step(unsigned short *random_val);
50extern int __cdecl _rdrand32_step(unsigned int *random_val);
51#if defined(_M_X64)
52extern int __cdecl _rdrand64_step(unsigned __int64 *random_val);
53#endif
54
55extern int __cdecl _rdseed16_step(unsigned short *random_val);
56extern int __cdecl _rdseed32_step(unsigned int *random_val);
57#if defined(_M_X64)
58extern int __cdecl _rdseed64_step(unsigned __int64 *random_val);
59#endif
60
61void __cdecl _fxsave(void *);
62void __cdecl _fxrstor(void const *);
63void __cdecl _xsave(void *, unsigned __int64);
64void __cdecl _xsavec(void *, unsigned __int64);
65void __cdecl _xsaveopt(void *, unsigned __int64);
66void __cdecl _xsaves(void *, unsigned __int64);
67void __cdecl _xrstor(void const *, unsigned __int64);
68void __cdecl _xrstors(void const *, unsigned __int64);
69#if defined (_M_X64)
70void __cdecl _fxsave64(void *);
71void __cdecl _fxrstor64(void const *);
72void __cdecl _xsave64(void *, unsigned __int64);
73void __cdecl _xsavec64(void *, unsigned __int64);
74void __cdecl _xsaveopt64(void *, unsigned __int64);
75void __cdecl _xsaves64(void *, unsigned __int64);
76void __cdecl _xrstor64(void const *, unsigned __int64);
77void __cdecl _xrstors64(void const *, unsigned __int64);
78#endif
79
80unsigned __int64 __cdecl _xgetbv(unsigned int);
81void __cdecl _xsetbv(unsigned int, unsigned __int64);
82
83
84#if defined(_MSC_VER) && !defined(__clang__)
85
86#pragma intrinsic(_mm256_cmpeq_epi8)
87#pragma intrinsic(_mm256_cmpeq_epi16)
88#pragma intrinsic(_mm256_movemask_epi8)
89#pragma intrinsic(_mm256_setzero_si256)
90#pragma intrinsic(_mm256_zeroupper)
91
92#pragma intrinsic(_rdrand16_step)
93#pragma intrinsic(_rdrand32_step)
94#if defined(_M_X64)
95#pragma intrinsic(_rdrand64_step)
96#endif
97#pragma intrinsic(_rdseed16_step)
98#pragma intrinsic(_rdseed32_step)
99#if defined(_M_X64)
100#pragma intrinsic(_rdseed64_step)
101#endif
102
103#pragma intrinsic(_fxsave)
104#pragma intrinsic(_fxrstor)
105#pragma intrinsic(_xsave)
106#pragma intrinsic(_xsaveopt)
107#pragma intrinsic(_xsavec)
108#pragma intrinsic(_xsaves)
109#pragma intrinsic(_xrstor)
110#pragma intrinsic(_xrstors)
111#if defined (_M_X64)
112#pragma intrinsic(_fxsave64)
113#pragma intrinsic(_fxrstor64)
114#pragma intrinsic(_xsave64)
115#pragma intrinsic(_xsaveopt64)
116#pragma intrinsic(_xsavec64)
117#pragma intrinsic(_xsaves64)
118#pragma intrinsic(_xrstor64)
119#pragma intrinsic(_xrstors64)
120#endif
121
122#pragma intrinsic(_xgetbv)
123#pragma intrinsic(_xsetbv)
124
125#else /* _MSC_VER */
126
127#ifdef __clang__
128#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128)))
129#define __ATTRIBUTE_AVX__ __attribute__((__target__("avx"),__min_vector_width__(256)))
130#define __ATTRIBUTE_AVX2__ __attribute__((__target__("avx2"),__min_vector_width__(256)))
131#else
132#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
133#define __ATTRIBUTE_AVX__ __attribute__((__target__("avx")))
134#define __ATTRIBUTE_AVX2__ __attribute__((__target__("avx2")))
135#endif
136#define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__
137#define __INTRIN_INLINE_AVX __INTRIN_INLINE __ATTRIBUTE_AVX__
138#define __INTRIN_INLINE_AVX2 __INTRIN_INLINE __ATTRIBUTE_AVX2__
139
140__INTRIN_INLINE_AVX __m256i __cdecl _mm256_cmpeq_epi8(__m256i __A, __m256i __B)
141{
142 return (__m256i)((__v32qi)__A == (__v32qi)__B);
143}
144
145__INTRIN_INLINE_AVX __m256i __cdecl _mm256_cmpeq_epi16(__m256i __A, __m256i __B)
146{
147 return (__m256i)((__v16hi)__A == (__v16hi)__B);
148}
149
151{
152 return __builtin_ia32_pmovmskb256((__v32qi)__A);
153}
154
156{
157 return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
158}
159
161{
162 __asm__ __volatile__("vzeroupper");
163}
164
165__INTRIN_INLINE int _rdrand16_step(unsigned short* random_val)
166{
167 unsigned char ok;
168 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok));
169 return (int)ok;
170}
171
172__INTRIN_INLINE int _rdrand32_step(unsigned int* random_val)
173{
174 unsigned char ok;
175 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok));
176 return (int)ok;
177}
178
179#if defined(__x86_64__)
180__INTRIN_INLINE int _rdrand64_step(unsigned __int64* random_val)
181{
182 unsigned char ok;
183 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok));
184 return (int)ok;
185}
186#endif // __x86_64__
187
188__INTRIN_INLINE int _rdseed16_step(unsigned short* random_val)
189{
190 unsigned char ok;
191 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok));
192 return (int)ok;
193}
194
195__INTRIN_INLINE int _rdseed32_step(unsigned int* random_val)
196{
197 unsigned char ok;
198 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok));
199 return (int)ok;
200}
201
202#if defined(__x86_64__)
203__INTRIN_INLINE int _rdseed64_step(unsigned __int64* random_val)
204{
205 unsigned char ok;
206 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok));
207 return (int)ok;
208}
209#endif // __x86_64__
210
212{
213#if 0 // Needs newer GCC
214 __builtin_ia32_fxsave(__P);
215#else
216 __asm__ __volatile__("fxsave (%0)" : : "r"(__P));
217#endif
218}
219
220__INTRIN_INLINE void _fxrstor(void const *__P)
221{
222#if 0 // Needs newer GCC
223 __builtin_ia32_fxrstor((void*)__P);
224#else
225 __asm__ __volatile__("fxrstor (%0)" : : "r"(__P));
226#endif
227}
228
229#if defined(__x86_64__)
230__INTRIN_INLINE void _fxsave64(void *__P)
231{
232 __builtin_ia32_fxsave64(__P);
233}
234
235__INTRIN_INLINE void _fxrstor64(void const *__P)
236{
237 __builtin_ia32_fxrstor64((void*)__P);
238}
239#endif // __x86_64__
240
241#ifdef __clang__
242#define __ATTRIBUTE_XSAVE__ __attribute__((__target__("xsave")))
243#else
244#define __ATTRIBUTE_XSAVE__
245#endif
246#define __INTRIN_INLINE_XSAVE __INTRIN_INLINE __ATTRIBUTE_XSAVE__
247
248__INTRIN_INLINE_XSAVE void _xsave(void *__P, unsigned __int64 __M)
249{
250 __builtin_ia32_xsave(__P, __M);
251}
252
253__INTRIN_INLINE_XSAVE void _xsavec(void *__P, unsigned __int64 __M)
254{
255#if 0 // Needs newer GCC
256 __builtin_ia32_xsavec(__P, __M);
257#else
258 __asm__ __volatile__("xsavec %0" : "=m" (*(char*)__P) : "a" ((unsigned int)__M), "d" ((unsigned int)(__M >> 32)) :"memory");
259#endif
260}
261
263{
264#if 0 // Needs newer GCC
265 __builtin_ia32_xsaveopt(__P, __M);
266#else
267 __asm__ __volatile__("xsaveopt %0" : "=m" (*(char*)__P) : "a" ((unsigned int)__M), "d" ((unsigned int)(__M >> 32)) :"memory");
268#endif
269}
270
271__INTRIN_INLINE_XSAVE void _xsaves(void *__P, unsigned __int64 __M)
272{
273#if 0 // Needs newer GCC
274 __builtin_ia32_xsaves(__P, __M);
275#else
276 __asm__ __volatile__("xsaves %0" : "=m" (*(char*)__P) : "a" ((unsigned int)__M), "d" ((unsigned int)(__M >> 32)) :"memory");
277#endif
278}
279
280__INTRIN_INLINE_XSAVE void _xrstor(void const *__P, unsigned __int64 __M)
281{
282 __builtin_ia32_xrstor((void*)__P, __M);
283}
284
285__INTRIN_INLINE_XSAVE void _xrstors(void const *__P, unsigned __int64 __M)
286{
287#if 0 // Needs newer GCC
288 __builtin_ia32_xrstors((void*)__P, __M);
289#else
290 __asm__ __volatile__("xrstors %0" : "=m" (*(char*)__P) : "a" ((unsigned int)__M), "d" ((unsigned int)(__M >> 32)) :"memory");
291#endif
292}
293
294#if defined(__x86_64__)
295__INTRIN_INLINE_XSAVE void _xsave64(void *__P, unsigned __int64 __M)
296{
297 __builtin_ia32_xsave64(__P, __M);
298}
299
300__INTRIN_INLINE_XSAVE void _xsavec64(void *__P, unsigned __int64 __M)
301{
302#if 0 // Needs newer GCC
303 __builtin_ia32_xsavec64(__P, __M);
304#else
305 __asm__ __volatile__("xsavec %0" : "=m" (*(char*)__P) : "a" ((unsigned int)__M), "d" ((unsigned int)(__M >> 32)) : "memory");
306#endif
307}
308
309__INTRIN_INLINE_XSAVE void _xsaveopt64(void *__P, unsigned __int64 __M)
310{
311#if 0 // Needs newer GCC
312 __builtin_ia32_xsaveopt64(__P, __M);
313#else
314 __asm__ __volatile__("xsaveopt %0" : "=m" (*(char*)__P) : "a" ((unsigned int)__M), "d" ((unsigned int)(__M >> 32)) : "memory");
315#endif
316}
317
318__INTRIN_INLINE_XSAVE void _xsaves64(void *__P, unsigned __int64 __M)
319{
320#if 0 // Needs newer GCC
321 __builtin_ia32_xsaves64(__P, __M);
322#else
323 __asm__ __volatile__("xsaves %0" : "=m" (*(char*)__P) : "a" ((unsigned int)__M), "d" ((unsigned int)(__M >> 32)) : "memory");
324#endif
325}
326
327__INTRIN_INLINE_XSAVE void _xrstor64(void const *__P, unsigned __int64 __M)
328{
329 __builtin_ia32_xrstor64((void*)__P, __M);
330}
331
332__INTRIN_INLINE_XSAVE void _xrstors64(void const *__P, unsigned __int64 __M)
333{
334#if 0 // Needs newer GCC
335 __builtin_ia32_xrstors64((void*)__P, __M);
336#else
337 __asm__ __volatile__("xrstors %0" : "=m" (*(char*)__P) : "a" ((unsigned int)__M), "d" ((unsigned int)(__M >> 32)) : "memory");
338#endif
339}
340#endif // __x86_64__
341
342#ifndef __clang__
343__INTRIN_INLINE unsigned __int64 _xgetbv(unsigned int __A)
344{
345 return __builtin_ia32_xgetbv(__A);
346}
347
348__INTRIN_INLINE void _xsetbv(unsigned int __A, unsigned __int64 __V)
349{
350 __builtin_ia32_xsetbv(__A, __V);
351}
352#endif // !__clang__
353
354#endif /* _MSC_VER */
355
356#ifdef __cplusplus
357} // extern "C"
358#endif
#define __INTRIN_INLINE
Definition: _mingw.h:245
#define _DECLSPEC_INTRIN_TYPE
Definition: _mingw.h:231
#define __cdecl
Definition: accygwin.h:79
#define ok(value,...)
Definition: atltest.h:57
#define __P(x)
Definition: various.h:10
#define __int8
Definition: basetyps.h:25
#define __int16
Definition: basetyps.h:22
#define __int64
Definition: basetyps.h:16
#define __int32
Definition: basetyps.h:19
void __cdecl _fxrstor(void const *)
Definition: immintrin.h:220
int __cdecl _rdseed32_step(unsigned int *random_val)
Definition: immintrin.h:195
void __cdecl _xsaves(void *, unsigned __int64)
Definition: immintrin.h:271
#define __INTRIN_INLINE_XSAVE
Definition: immintrin.h:246
int __cdecl _rdrand32_step(unsigned int *random_val)
Definition: immintrin.h:172
#define __INTRIN_INLINE_AVX
Definition: immintrin.h:137
int __cdecl _rdseed16_step(unsigned short *random_val)
Definition: immintrin.h:188
__m256i __cdecl _mm256_setzero_si256(void)
Definition: immintrin.h:155
void __cdecl _xrstors(void const *, unsigned __int64)
Definition: immintrin.h:285
int __cdecl _rdrand16_step(unsigned short *random_val)
Definition: immintrin.h:165
unsigned __int64 __cdecl _xgetbv(unsigned int)
Definition: immintrin.h:343
char __v32qi __attribute__((__vector_size__(32)))
Definition: immintrin.h:31
void __cdecl _fxsave(void *)
Definition: immintrin.h:211
#define __INTRIN_INLINE_AVX2
Definition: immintrin.h:138
int __cdecl _mm256_movemask_epi8(__m256i)
Definition: immintrin.h:150
void __cdecl _xsetbv(unsigned int, unsigned __int64)
Definition: immintrin.h:348
void __cdecl _xrstor(void const *, unsigned __int64)
Definition: immintrin.h:280
__m256i __cdecl _mm256_cmpeq_epi8(__m256i, __m256i)
Definition: immintrin.h:140
void __cdecl _mm256_zeroupper(void)
Definition: immintrin.h:160
void __cdecl _xsavec(void *, unsigned __int64)
Definition: immintrin.h:253
__m256i __cdecl _mm256_cmpeq_epi16(__m256i, __m256i)
Definition: immintrin.h:145
void __cdecl _xsaveopt(void *, unsigned __int64)
Definition: immintrin.h:262
void __cdecl _xsave(void *, unsigned __int64)
Definition: immintrin.h:248
__asm__(".p2align 4, 0x90\n" ".seh_proc __seh2_global_filter_func\n" "__seh2_global_filter_func:\n" "\tsub %rbp, %rax\n" "\tpush %rbp\n" "\t.seh_pushreg %rbp\n" "\tsub $32, %rsp\n" "\t.seh_stackalloc 32\n" "\t.seh_endprologue\n" "\tsub %rax, %rdx\n" "\tmov %rdx, %rbp\n" "\tjmp *%r8\n" "__seh2_global_filter_func_exit:\n" "\t.p2align 4\n" "\tadd $32, %rsp\n" "\tpop %rbp\n" "\tret\n" "\t.seh_endproc")
#define _CRT_ALIGN(x)
Definition: corecrt.h:217