ReactOS 0.4.16-dev-1946-g52006dd
corecrt_internal_simd.h
Go to the documentation of this file.
1//
2// corecrt_internal_simd.h
3//
4// Copyright (c) Microsoft Corporation. All rights reserved.
5//
6// This internal header defines internal SIMD utilities. This header may only
7// be included in C++ translation units.
8//
9#pragma once
10
11#include <intrin.h>
12#include <isa_availability.h>
13#include <stdint.h>
14
15#if (defined _M_IX86 || defined _M_X64) && !defined(_M_HYBRID_X86_ARM64) && !defined(_M_ARM64EC)
16 #define _CRT_SIMD_SUPPORT_AVAILABLE
17#endif
18
19#if defined _CRT_SIMD_SUPPORT_AVAILABLE
20
21#if defined(__clang__)
22#define _UCRT_ENABLE_SSE2 \
23 _Pragma("clang attribute push(__attribute__((target(\"sse2\"))), apply_to=function)")
24#define _UCRT_ENABLE_AVX2 \
25 _Pragma("clang attribute push(__attribute__((target(\"sse2,avx,avx2\"))), apply_to=function)")
26#define _UCRT_RESTORE_DEFAULT_ISA \
27 _Pragma("clang attribute pop")
28#elif defined(__GNUC__)
29#define _UCRT_ENABLE_SSE2 \
30 _Pragma("GCC push_options") \
31 _Pragma("GCC target(\"sse2\")")
32#define _UCRT_ENABLE_AVX2 \
33 _Pragma("GCC push_options") \
34 _Pragma("GCC target(\"avx2\")")
35#define _UCRT_RESTORE_DEFAULT_ISA \
36 _Pragma("GCC pop_options")
37#else
38#define _UCRT_ENABLE_SSE2
39#define _UCRT_ENABLE_AVX2
40#define _UCRT_RESTORE_DEFAULT_ISA
41#endif
42
43
44 extern "C" int __isa_available;
45
46 enum class __crt_simd_isa
47 {
48 sse2,
49 avx2
50 };
51
52 template <__crt_simd_isa Isa>
53 struct __crt_simd_cleanup_guard;
54
55 template <__crt_simd_isa Isa>
56 struct __crt_simd_pack_traits;
57
58 template <__crt_simd_isa Isa, typename Element>
59 struct __crt_simd_traits;
60
61
62
63 template <__crt_simd_isa Isa, typename Element>
64 struct __crt_simd_element_traits
65 : __crt_simd_pack_traits<Isa>
66 {
67 using element_type = Element;
68 using __crt_simd_pack_traits<Isa>::pack_size;
69
70 enum : size_t
71 {
72 element_size = sizeof(element_type),
73 elements_per_pack = pack_size / element_size
74 };
75 };
76
77
78_UCRT_ENABLE_SSE2
79
80 template <>
81 struct __crt_simd_cleanup_guard<__crt_simd_isa::sse2>
82 {
83 // No cleanup required for SSE2 usage, however we still need to define
84 // the no-op destructor in order to avoid unreferened local variable
85 // warnings when this cleanup guard is used.
86 ~__crt_simd_cleanup_guard() throw()
87 {
88 }
89 };
90
91 template <>
92 struct __crt_simd_pack_traits<__crt_simd_isa::sse2>
93 {
94 using pack_type = __m128i;
95
96 enum : size_t { pack_size = sizeof(pack_type) };
97
98 static __forceinline pack_type get_zero_pack() throw()
99 {
100 return _mm_setzero_si128();
101 }
102
103 static __forceinline int compute_byte_mask(pack_type const x) throw()
104 {
105 return _mm_movemask_epi8(x);
106 }
107 };
108
109 template <>
110 struct __crt_simd_traits<__crt_simd_isa::sse2, uint8_t>
111 : __crt_simd_element_traits<__crt_simd_isa::sse2, uint8_t>
112 {
113 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
114 {
115 return _mm_cmpeq_epi8(x, y);
116 }
117 };
118
119 template <>
120 struct __crt_simd_traits<__crt_simd_isa::sse2, uint16_t>
121 : __crt_simd_element_traits<__crt_simd_isa::sse2, uint16_t>
122 {
123 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
124 {
125 return _mm_cmpeq_epi16(x, y);
126 }
127 };
128
129_UCRT_RESTORE_DEFAULT_ISA
130
131_UCRT_ENABLE_AVX2
132
133 template <>
134 struct __crt_simd_cleanup_guard<__crt_simd_isa::avx2>
135 {
136 ~__crt_simd_cleanup_guard()
137 {
138 // After executing AVX2 instructions, we must zero the upper halves
139 // of the YMM registers before returning. See the Intel article
140 // "Intel AVX State Transitions: Migrating SSE Code to AVX" for
141 // further details.
143 }
144 };
145
146 template <>
147 struct __crt_simd_pack_traits<__crt_simd_isa::avx2>
148 {
149 using pack_type = __m256i;
150
151 enum : size_t { pack_size = sizeof(pack_type) };
152
153 static __forceinline pack_type get_zero_pack() throw()
154 {
155 return _mm256_setzero_si256();
156 }
157
158 static __forceinline int compute_byte_mask(pack_type const x) throw()
159 {
160 return _mm256_movemask_epi8(x);
161 }
162 };
163
164 template <>
165 struct __crt_simd_traits<__crt_simd_isa::avx2, uint8_t>
166 : __crt_simd_element_traits<__crt_simd_isa::avx2, uint8_t>
167 {
168 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
169 {
170 return _mm256_cmpeq_epi8(x, y);
171 }
172 };
173
174 template <>
175 struct __crt_simd_traits<__crt_simd_isa::avx2, uint16_t>
176 : __crt_simd_element_traits<__crt_simd_isa::avx2, uint16_t>
177 {
178 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
179 {
180 return _mm256_cmpeq_epi16(x, y);
181 }
182 };
183
184_UCRT_RESTORE_DEFAULT_ISA
185
186#endif // _CRT_SIMD_SUPPORT_AVAILABLE
unsigned short int uint16_t
Definition: acefiex.h:54
result_buffer_count char *const _In_ int const _In_ bool const _In_ unsigned const _In_ STRFLT const _In_ bool const _Inout_ __crt_cached_ptd_host &ptd throw()
Definition: cvt.cpp:119
size_t const element_size
Definition: debug_heap.cpp:510
__m128i _mm_setzero_si128(void)
Definition: emmintrin.h:1674
__m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1450
__m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1455
int _mm_movemask_epi8(__m128i a)
Definition: emmintrin.h:1788
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
GLint GLint GLint GLint GLint GLint y
Definition: gl.h:1548
__m256i __cdecl _mm256_setzero_si256(void)
Definition: immintrin.h:155
int __cdecl _mm256_movemask_epi8(__m256i)
Definition: immintrin.h:150
__m256i __cdecl _mm256_cmpeq_epi8(__m256i, __m256i)
Definition: immintrin.h:140
void __cdecl _mm256_zeroupper(void)
Definition: immintrin.h:160
__m256i __cdecl _mm256_cmpeq_epi16(__m256i, __m256i)
Definition: immintrin.h:145
int __isa_available
BYTE uint8_t
Definition: msvideo1.c:66
@ Isa
Definition: restypes.h:122