ReactOS 0.4.16-dev-1020-gf135cab
corecrt_internal_simd.h
Go to the documentation of this file.
1//
2// corecrt_internal_simd.h
3//
4// Copyright (c) Microsoft Corporation. All rights reserved.
5//
6// This internal header defines internal SIMD utilities. This header may only
7// be included in C++ translation units.
8//
9#pragma once
10
11#include <intrin.h>
12#include <isa_availability.h>
13#include <stdint.h>
14
15#if (defined _M_IX86 || defined _M_X64) && !defined(_M_HYBRID_X86_ARM64) && !defined(_M_ARM64EC)
16 #define _CRT_SIMD_SUPPORT_AVAILABLE
17#endif
18
19#if defined _CRT_SIMD_SUPPORT_AVAILABLE
20
21#if defined(__clang__)
22#define _UCRT_ENABLE_EXTENDED_ISA \
23 _Pragma("clang attribute push(__attribute__((target(\"sse2,avx,avx2\"))), apply_to=function)")
24#define _UCRT_RESTORE_DEFAULT_ISA \
25 _Pragma("clang attribute pop")
26#elif defined(__GNUC__)
27#define _UCRT_ENABLE_EXTENDED_ISA \
28 _Pragma("GCC push_options") \
29 _Pragma("GCC target(\"avx2\")")
30#define _UCRT_RESTORE_DEFAULT_ISA \
31 _Pragma("GCC pop_options")
32#else
33#define _UCRT_ENABLE_EXTENDED_ISA
34#define _UCRT_RESTORE_DEFAULT_ISA
35#endif
36
37_UCRT_ENABLE_EXTENDED_ISA
38
39 extern "C" int __isa_available;
40
41 enum class __crt_simd_isa
42 {
43 sse2,
44 avx2
45 };
46
47 template <__crt_simd_isa Isa>
48 struct __crt_simd_cleanup_guard;
49
50 template <__crt_simd_isa Isa>
51 struct __crt_simd_pack_traits;
52
53 template <__crt_simd_isa Isa, typename Element>
54 struct __crt_simd_traits;
55
56
57
58 template <__crt_simd_isa Isa, typename Element>
59 struct __crt_simd_element_traits
60 : __crt_simd_pack_traits<Isa>
61 {
62 using element_type = Element;
63 using __crt_simd_pack_traits<Isa>::pack_size;
64
65 enum : size_t
66 {
67 element_size = sizeof(element_type),
68 elements_per_pack = pack_size / element_size
69 };
70 };
71
72
73
74 template <>
75 struct __crt_simd_cleanup_guard<__crt_simd_isa::sse2>
76 {
77 // No cleanup required for SSE2 usage, however we still need to define
78 // the no-op destructor in order to avoid unreferened local variable
79 // warnings when this cleanup guard is used.
80 ~__crt_simd_cleanup_guard() throw()
81 {
82 }
83 };
84
85 template <>
86 struct __crt_simd_pack_traits<__crt_simd_isa::sse2>
87 {
88 using pack_type = __m128i;
89
90 enum : size_t { pack_size = sizeof(pack_type) };
91
92 static __forceinline pack_type get_zero_pack() throw()
93 {
94 return _mm_setzero_si128();
95 }
96
97 static __forceinline int compute_byte_mask(pack_type const x) throw()
98 {
99 return _mm_movemask_epi8(x);
100 }
101 };
102
103 template <>
104 struct __crt_simd_traits<__crt_simd_isa::sse2, uint8_t>
105 : __crt_simd_element_traits<__crt_simd_isa::sse2, uint8_t>
106 {
107 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
108 {
109 return _mm_cmpeq_epi8(x, y);
110 }
111 };
112
113 template <>
114 struct __crt_simd_traits<__crt_simd_isa::sse2, uint16_t>
115 : __crt_simd_element_traits<__crt_simd_isa::sse2, uint16_t>
116 {
117 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
118 {
119 return _mm_cmpeq_epi16(x, y);
120 }
121 };
122
123
124
125 template <>
126 struct __crt_simd_cleanup_guard<__crt_simd_isa::avx2>
127 {
128 ~__crt_simd_cleanup_guard()
129 {
130 // After executing AVX2 instructions, we must zero the upper halves
131 // of the YMM registers before returning. See the Intel article
132 // "Intel AVX State Transitions: Migrating SSE Code to AVX" for
133 // further details.
135 }
136 };
137
138 template <>
139 struct __crt_simd_pack_traits<__crt_simd_isa::avx2>
140 {
141 using pack_type = __m256i;
142
143 enum : size_t { pack_size = sizeof(pack_type) };
144
145 static __forceinline pack_type get_zero_pack() throw()
146 {
147 return _mm256_setzero_si256();
148 }
149
150 static __forceinline int compute_byte_mask(pack_type const x) throw()
151 {
152 return _mm256_movemask_epi8(x);
153 }
154 };
155
156 template <>
157 struct __crt_simd_traits<__crt_simd_isa::avx2, uint8_t>
158 : __crt_simd_element_traits<__crt_simd_isa::avx2, uint8_t>
159 {
160 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
161 {
162 return _mm256_cmpeq_epi8(x, y);
163 }
164 };
165
166 template <>
167 struct __crt_simd_traits<__crt_simd_isa::avx2, uint16_t>
168 : __crt_simd_element_traits<__crt_simd_isa::avx2, uint16_t>
169 {
170 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
171 {
172 return _mm256_cmpeq_epi16(x, y);
173 }
174 };
175
176_UCRT_RESTORE_DEFAULT_ISA
177
178#endif // _CRT_SIMD_SUPPORT_AVAILABLE
unsigned short int uint16_t
Definition: acefiex.h:54
result_buffer_count char *const _In_ int const _In_ bool const _In_ unsigned const _In_ STRFLT const _In_ bool const _Inout_ __crt_cached_ptd_host &ptd throw()
Definition: cvt.cpp:119
size_t const element_size
Definition: debug_heap.cpp:510
__m128i _mm_setzero_si128(void)
Definition: emmintrin.h:1672
__m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
Definition: emmintrin.h:1448
__m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
Definition: emmintrin.h:1453
int _mm_movemask_epi8(__m128i a)
Definition: emmintrin.h:1786
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
GLint GLint GLint GLint GLint GLint y
Definition: gl.h:1548
@ Isa
Definition: hwresource.cpp:138
__m256i __cdecl _mm256_setzero_si256(void)
Definition: immintrin.h:111
int __cdecl _mm256_movemask_epi8(__m256i)
Definition: immintrin.h:106
__m256i __cdecl _mm256_cmpeq_epi8(__m256i, __m256i)
Definition: immintrin.h:96
void __cdecl _mm256_zeroupper(void)
Definition: immintrin.h:116
__m256i __cdecl _mm256_cmpeq_epi16(__m256i, __m256i)
Definition: immintrin.h:101
int __isa_available
BYTE uint8_t
Definition: msvideo1.c:66