17#if defined(_MSC_VER) && !defined(__clang__)
25 unsigned __int8 m128i_u8[16];
39#define __ATTRIBUTE_SSE2__
43typedef double __m128d
__attribute__((__vector_size__(16), __aligned__(16)));
44typedef long long __m128i
__attribute__((__vector_size__(16), __aligned__(16)));
46typedef double __m128d_u
__attribute__((__vector_size__(16), __aligned__(1)));
47typedef long long __m128i_u
__attribute__((__vector_size__(16), __aligned__(1)));
56typedef unsigned long long __v2du
__attribute__((__vector_size__(16)));
57typedef unsigned short __v8hu
__attribute__((__vector_size__(16)));
58typedef unsigned char __v16qu
__attribute__((__vector_size__(16)));
62typedef signed char __v16qs
__attribute__((__vector_size__(16)));
65#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128)))
66#define __ATTRIBUTE_MMXSSE2__ __attribute__((__target__("mmx,sse2"),__min_vector_width__(128)))
68#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
69#define __ATTRIBUTE_MMXSSE2__ __attribute__((__target__("mmx,sse2")))
71#define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__
72#define __INTRIN_INLINE_MMXSSE2 __INTRIN_INLINE __ATTRIBUTE_MMXSSE2__
235extern __m128d _mm_cvtsi64_sd(__m128d
a,
long long b);
236extern long long _mm_cvtsd_si64(__m128d
a);
237extern long long _mm_cvttsd_si64(__m128d
a);
259extern __m128i
_mm_set_epi16(
short w7,
short w6,
short w5,
short w4,
short w3,
short w2,
short w1,
short w0);
261extern __m128i
_mm_set_epi8(
char b15,
char b14,
char b13,
char b12,
char b11,
char b10,
char b9,
char b8,
char b7,
char b6,
char b5,
char b4,
char b3,
char b2,
char b1,
char b0);
272extern __m128i
_mm_setr_epi16(
short w0,
short w1,
short w2,
short w3,
short w4,
short w5,
short w6,
short w7);
274extern __m128i
_mm_setr_epi8(
char b15,
char b14,
char b13,
char b12,
char b11,
char b10,
char b9,
char b8,
char b7,
char b6,
char b5,
char b4,
char b3,
char b2,
char b1,
char b0);
322#define _mm_set_pd1(a) _mm_set1_pd(a)
323#define _mm_load_pd1(p) _mm_load1_pd(p)
324#define _mm_store_pd1(p, a) _mm_store1_pd((p), (a))
325#define _mm_bslli_si128 _mm_slli_si128
326#define _mm_bsrli_si128 _mm_srli_si128
327#define _mm_stream_si64 _mm_stream_si64x
329#if defined(_MSC_VER) && !defined(__clang__)
331#pragma intrinsic(_mm_add_sd)
332#pragma intrinsic(_mm_add_pd)
333#pragma intrinsic(_mm_sub_sd)
334#pragma intrinsic(_mm_sub_pd)
335#pragma intrinsic(_mm_mul_sd)
336#pragma intrinsic(_mm_mul_pd)
337#pragma intrinsic(_mm_div_sd)
338#pragma intrinsic(_mm_div_pd)
339#pragma intrinsic(_mm_sqrt_sd)
340#pragma intrinsic(_mm_sqrt_pd)
341#pragma intrinsic(_mm_min_sd)
342#pragma intrinsic(_mm_min_pd)
343#pragma intrinsic(_mm_max_sd)
344#pragma intrinsic(_mm_max_pd)
345#pragma intrinsic(_mm_and_pd)
346#pragma intrinsic(_mm_andnot_pd)
347#pragma intrinsic(_mm_or_pd)
348#pragma intrinsic(_mm_xor_pd)
349#pragma intrinsic(_mm_cmpeq_pd)
350#pragma intrinsic(_mm_cmplt_pd)
351#pragma intrinsic(_mm_cmple_pd)
352#pragma intrinsic(_mm_cmpgt_pd)
353#pragma intrinsic(_mm_cmpge_pd)
354#pragma intrinsic(_mm_cmpord_pd)
355#pragma intrinsic(_mm_cmpunord_pd)
356#pragma intrinsic(_mm_cmpneq_pd)
357#pragma intrinsic(_mm_cmpnlt_pd)
358#pragma intrinsic(_mm_cmpnle_pd)
359#pragma intrinsic(_mm_cmpngt_pd)
360#pragma intrinsic(_mm_cmpnge_pd)
361#pragma intrinsic(_mm_cmpeq_sd)
362#pragma intrinsic(_mm_cmplt_sd)
363#pragma intrinsic(_mm_cmple_sd)
364#pragma intrinsic(_mm_cmpgt_sd)
365#pragma intrinsic(_mm_cmpge_sd)
366#pragma intrinsic(_mm_cmpord_sd)
367#pragma intrinsic(_mm_cmpunord_sd)
368#pragma intrinsic(_mm_cmpneq_sd)
369#pragma intrinsic(_mm_cmpnlt_sd)
370#pragma intrinsic(_mm_cmpnle_sd)
371#pragma intrinsic(_mm_cmpngt_sd)
372#pragma intrinsic(_mm_cmpnge_sd)
373#pragma intrinsic(_mm_comieq_sd)
374#pragma intrinsic(_mm_comilt_sd)
375#pragma intrinsic(_mm_comile_sd)
376#pragma intrinsic(_mm_comigt_sd)
377#pragma intrinsic(_mm_comige_sd)
378#pragma intrinsic(_mm_comineq_sd)
379#pragma intrinsic(_mm_ucomieq_sd)
380#pragma intrinsic(_mm_ucomilt_sd)
381#pragma intrinsic(_mm_ucomile_sd)
382#pragma intrinsic(_mm_ucomigt_sd)
383#pragma intrinsic(_mm_ucomige_sd)
384#pragma intrinsic(_mm_ucomineq_sd)
385#pragma intrinsic(_mm_cvtpd_ps)
386#pragma intrinsic(_mm_cvtps_pd)
387#pragma intrinsic(_mm_cvtepi32_pd)
388#pragma intrinsic(_mm_cvtpd_epi32)
389#pragma intrinsic(_mm_cvtsd_si32)
390#pragma intrinsic(_mm_cvtsd_ss)
391#pragma intrinsic(_mm_cvtsi32_sd)
392#pragma intrinsic(_mm_cvtss_sd)
393#pragma intrinsic(_mm_cvttpd_epi32)
394#pragma intrinsic(_mm_cvttsd_si32)
398#pragma intrinsic(_mm_cvtsd_f64)
399#pragma intrinsic(_mm_load_pd)
400#pragma intrinsic(_mm_load1_pd)
401#pragma intrinsic(_mm_loadr_pd)
402#pragma intrinsic(_mm_loadu_pd)
406#pragma intrinsic(_mm_load_sd)
407#pragma intrinsic(_mm_loadh_pd)
408#pragma intrinsic(_mm_loadl_pd)
410#pragma intrinsic(_mm_set_sd)
411#pragma intrinsic(_mm_set1_pd)
412#pragma intrinsic(_mm_set_pd)
413#pragma intrinsic(_mm_setr_pd)
414#pragma intrinsic(_mm_setzero_pd)
415#pragma intrinsic(_mm_move_sd)
416#pragma intrinsic(_mm_store_sd)
417#pragma intrinsic(_mm_store_pd)
418#pragma intrinsic(_mm_store1_pd)
419#pragma intrinsic(_mm_storeu_pd)
420#pragma intrinsic(_mm_storer_pd)
421#pragma intrinsic(_mm_storeh_pd)
422#pragma intrinsic(_mm_storel_pd)
423#pragma intrinsic(_mm_add_epi8)
424#pragma intrinsic(_mm_add_epi16)
425#pragma intrinsic(_mm_add_epi32)
427#pragma intrinsic(_mm_add_epi64)
428#pragma intrinsic(_mm_adds_epi8)
429#pragma intrinsic(_mm_adds_epi16)
430#pragma intrinsic(_mm_adds_epu8)
431#pragma intrinsic(_mm_adds_epu16)
432#pragma intrinsic(_mm_avg_epu8)
433#pragma intrinsic(_mm_avg_epu16)
434#pragma intrinsic(_mm_madd_epi16)
435#pragma intrinsic(_mm_max_epi16)
436#pragma intrinsic(_mm_max_epu8)
437#pragma intrinsic(_mm_min_epi16)
438#pragma intrinsic(_mm_min_epu8)
439#pragma intrinsic(_mm_mulhi_epi16)
440#pragma intrinsic(_mm_mulhi_epu16)
441#pragma intrinsic(_mm_mullo_epi16)
443#pragma intrinsic(_mm_mul_epu32)
444#pragma intrinsic(_mm_sad_epu8)
445#pragma intrinsic(_mm_sub_epi8)
446#pragma intrinsic(_mm_sub_epi16)
447#pragma intrinsic(_mm_sub_epi32)
449#pragma intrinsic(_mm_sub_epi64)
450#pragma intrinsic(_mm_subs_epi8)
451#pragma intrinsic(_mm_subs_epi16)
452#pragma intrinsic(_mm_subs_epu8)
453#pragma intrinsic(_mm_subs_epu16)
454#pragma intrinsic(_mm_and_si128)
455#pragma intrinsic(_mm_andnot_si128)
456#pragma intrinsic(_mm_or_si128)
457#pragma intrinsic(_mm_xor_si128)
458#pragma intrinsic(_mm_slli_si128)
459#pragma intrinsic(_mm_slli_epi16)
460#pragma intrinsic(_mm_sll_epi16)
461#pragma intrinsic(_mm_slli_epi32)
462#pragma intrinsic(_mm_sll_epi32)
463#pragma intrinsic(_mm_slli_epi64)
464#pragma intrinsic(_mm_sll_epi64)
465#pragma intrinsic(_mm_srai_epi16)
466#pragma intrinsic(_mm_sra_epi16)
467#pragma intrinsic(_mm_srai_epi32)
468#pragma intrinsic(_mm_sra_epi32)
469#pragma intrinsic(_mm_srli_si128)
470#pragma intrinsic(_mm_srli_epi16)
471#pragma intrinsic(_mm_srl_epi16)
472#pragma intrinsic(_mm_srli_epi32)
473#pragma intrinsic(_mm_srl_epi32)
474#pragma intrinsic(_mm_srli_epi64)
475#pragma intrinsic(_mm_srl_epi64)
476#pragma intrinsic(_mm_cmpeq_epi8)
477#pragma intrinsic(_mm_cmpeq_epi16)
478#pragma intrinsic(_mm_cmpeq_epi32)
479#pragma intrinsic(_mm_cmpgt_epi8)
480#pragma intrinsic(_mm_cmpgt_epi16)
481#pragma intrinsic(_mm_cmpgt_epi32)
482#pragma intrinsic(_mm_cmplt_epi8)
483#pragma intrinsic(_mm_cmplt_epi16)
484#pragma intrinsic(_mm_cmplt_epi32)
486#pragma intrinsic(_mm_cvtsi64_sd)
487#pragma intrinsic(_mm_cvtsd_si64)
488#pragma intrinsic(_mm_cvttsd_si64)
490#pragma intrinsic(_mm_cvtepi32_ps)
491#pragma intrinsic(_mm_cvtps_epi32)
492#pragma intrinsic(_mm_cvttps_epi32)
493#pragma intrinsic(_mm_cvtsi32_si128)
495#pragma intrinsic(_mm_cvtsi64_si128)
497#pragma intrinsic(_mm_cvtsi128_si32)
499#pragma intrinsic(_mm_cvtsi128_si64)
501#pragma intrinsic(_mm_load_si128)
502#pragma intrinsic(_mm_loadu_si128)
503#pragma intrinsic(_mm_loadl_epi64)
507#pragma intrinsic(_mm_set_epi32)
508#pragma intrinsic(_mm_set_epi16)
509#pragma intrinsic(_mm_set_epi8)
512#pragma intrinsic(_mm_set1_epi32)
513#pragma intrinsic(_mm_set1_epi16)
514#pragma intrinsic(_mm_set1_epi8)
515#pragma intrinsic(_mm_setl_epi64)
517#pragma intrinsic(_mm_setr_epi32)
518#pragma intrinsic(_mm_setr_epi16)
519#pragma intrinsic(_mm_setr_epi8)
520#pragma intrinsic(_mm_setzero_si128)
521#pragma intrinsic(_mm_store_si128)
522#pragma intrinsic(_mm_storeu_si128)
526#pragma intrinsic(_mm_maskmoveu_si128)
527#pragma intrinsic(_mm_storel_epi64)
528#pragma intrinsic(_mm_stream_pd)
529#pragma intrinsic(_mm_stream_si128)
530#pragma intrinsic(_mm_stream_si32)
531#pragma intrinsic(_mm_clflush)
532#pragma intrinsic(_mm_lfence)
533#pragma intrinsic(_mm_mfence)
534#pragma intrinsic(_mm_packs_epi16)
535#pragma intrinsic(_mm_packs_epi32)
536#pragma intrinsic(_mm_packus_epi16)
537#pragma intrinsic(_mm_extract_epi16)
538#pragma intrinsic(_mm_insert_epi16)
539#pragma intrinsic(_mm_movemask_epi8)
540#pragma intrinsic(_mm_shuffle_epi32)
541#pragma intrinsic(_mm_shufflelo_epi16)
542#pragma intrinsic(_mm_shufflehi_epi16)
543#pragma intrinsic(_mm_unpackhi_epi8)
544#pragma intrinsic(_mm_unpackhi_epi16)
545#pragma intrinsic(_mm_unpackhi_epi32)
546#pragma intrinsic(_mm_unpackhi_epi64)
547#pragma intrinsic(_mm_unpacklo_epi8)
548#pragma intrinsic(_mm_unpacklo_epi16)
549#pragma intrinsic(_mm_unpacklo_epi32)
550#pragma intrinsic(_mm_unpacklo_epi64)
553#pragma intrinsic(_mm_move_epi64)
554#pragma intrinsic(_mm_unpackhi_pd)
555#pragma intrinsic(_mm_unpacklo_pd)
556#pragma intrinsic(_mm_movemask_pd)
557#pragma intrinsic(_mm_shuffle_pd)
558#pragma intrinsic(_mm_castpd_ps)
559#pragma intrinsic(_mm_castpd_si128)
560#pragma intrinsic(_mm_castps_pd)
561#pragma intrinsic(_mm_castps_si128)
562#pragma intrinsic(_mm_castsi128_ps)
563#pragma intrinsic(_mm_castsi128_pd)
564#pragma intrinsic(_mm_pause)
582 return (__m128d)((__v2df)
a + (__v2df)
b);
593 return (__m128d)((__v2df)
a - (__v2df)
b);
604 return (__m128d)((__v2df)
a * (__v2df)
b);
615 return (__m128d)((__v2df)
a / (__v2df)
b);
620 __m128d
__c = __builtin_ia32_sqrtsd((__v2df)
b);
621 return __extension__(__m128d){
__c[0],
a[1]};
626 return __builtin_ia32_sqrtpd((__v2df)
a);
631 return __builtin_ia32_minsd((__v2df)
a, (__v2df)
b);
636 return __builtin_ia32_minpd((__v2df)
a, (__v2df)
b);
641 return __builtin_ia32_maxsd((__v2df)
a, (__v2df)
b);
646 return __builtin_ia32_maxpd((__v2df)
a, (__v2df)
b);
651 return (__m128d)((__v2du)
a & (__v2du)
b);
656 return (__m128d)(~(__v2du)
a & (__v2du)
b);
661 return (__m128d)((__v2du)
a | (__v2du)
b);
666 return (__m128d)((__v2du)
a ^ (__v2du)
b);
671 return (__m128d)__builtin_ia32_cmpeqpd((__v2df)
a, (__v2df)
b);
676 return (__m128d)__builtin_ia32_cmpltpd((__v2df)
a, (__v2df)
b);
681 return (__m128d)__builtin_ia32_cmplepd((__v2df)
a, (__v2df)
b);
686 return (__m128d)__builtin_ia32_cmpltpd((__v2df)
b, (__v2df)
a);
691 return (__m128d)__builtin_ia32_cmplepd((__v2df)
b, (__v2df)
a);
696 return (__m128d)__builtin_ia32_cmpordpd((__v2df)
a, (__v2df)
b);
701 return (__m128d)__builtin_ia32_cmpunordpd((__v2df)
a, (__v2df)
b);
706 return (__m128d)__builtin_ia32_cmpneqpd((__v2df)
a, (__v2df)
b);
711 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)
a, (__v2df)
b);
716 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)
a, (__v2df)
b);
721 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)
b, (__v2df)
a);
726 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)
b, (__v2df)
a);
731 return (__m128d)__builtin_ia32_cmpeqsd((__v2df)
a, (__v2df)
b);
736 return (__m128d)__builtin_ia32_cmpltsd((__v2df)
a, (__v2df)
b);
741 return (__m128d)__builtin_ia32_cmplesd((__v2df)
a, (__v2df)
b);
746 __m128d
__c = __builtin_ia32_cmpltsd((__v2df)
b, (__v2df)
a);
747 return __extension__(__m128d){
__c[0],
a[1]};
752 __m128d
__c = __builtin_ia32_cmplesd((__v2df)
b, (__v2df)
a);
753 return __extension__(__m128d){
__c[0],
a[1]};
758 return (__m128d)__builtin_ia32_cmpordsd((__v2df)
a, (__v2df)
b);
763 return (__m128d)__builtin_ia32_cmpunordsd((__v2df)
a, (__v2df)
b);
768 return (__m128d)__builtin_ia32_cmpneqsd((__v2df)
a, (__v2df)
b);
773 return (__m128d)__builtin_ia32_cmpnltsd((__v2df)
a, (__v2df)
b);
778 return (__m128d)__builtin_ia32_cmpnlesd((__v2df)
a, (__v2df)
b);
783 __m128d
__c = __builtin_ia32_cmpnltsd((__v2df)
b, (__v2df)
a);
784 return __extension__(__m128d){
__c[0],
a[1]};
789 __m128d
__c = __builtin_ia32_cmpnlesd((__v2df)
b, (__v2df)
a);
790 return __extension__(__m128d){
__c[0],
a[1]};
795 return __builtin_ia32_comisdeq((__v2df)
a, (__v2df)
b);
800 return __builtin_ia32_comisdlt((__v2df)
a, (__v2df)
b);
805 return __builtin_ia32_comisdle((__v2df)
a, (__v2df)
b);
810 return __builtin_ia32_comisdgt((__v2df)
a, (__v2df)
b);
815 return __builtin_ia32_comisdge((__v2df)
a, (__v2df)
b);
820 return __builtin_ia32_comisdneq((__v2df)
a, (__v2df)
b);
825 return __builtin_ia32_ucomisdeq((__v2df)
a, (__v2df)
b);
830 return __builtin_ia32_ucomisdlt((__v2df)
a, (__v2df)
b);
835 return __builtin_ia32_ucomisdle((__v2df)
a, (__v2df)
b);
840 return __builtin_ia32_ucomisdgt((__v2df)
a, (__v2df)
b);
845 return __builtin_ia32_ucomisdge((__v2df)
a, (__v2df)
b);
850 return __builtin_ia32_ucomisdneq((__v2df)
a, (__v2df)
b);
855 return __builtin_ia32_cvtpd2ps((__v2df)
a);
860#if HAS_BUILTIN(__builtin_convertvector)
861 return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4sf)
a, (__v4sf)
a, 0, 1), __v2df);
863 return __builtin_ia32_cvtps2pd(
a);
869#if HAS_BUILTIN(__builtin_convertvector)
870 return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4si)
a, (__v4si)
a, 0, 1), __v2df);
872 return __builtin_ia32_cvtdq2pd((__v4si)
a);
878 return (__m128i)__builtin_ia32_cvtpd2dq((__v2df)
a);
883 return __builtin_ia32_cvtsd2si((__v2df)
a);
888 return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)
a, (__v2df)
b);
906 return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)
a);
911 return __builtin_ia32_cvttsd2si((__v2df)
a);
916 return (__m64)__builtin_ia32_cvtpd2pi((__v2df)
a);
921 return (__m64)__builtin_ia32_cvttpd2pi((__v2df)
a);
926 return __builtin_ia32_cvtpi2pd((__v2si)
a);
936 return *(
const __m128d *)dp;
941 struct __mm_load1_pd_struct {
944 double __u = ((
const struct __mm_load1_pd_struct *)dp)->__u;
945 return __extension__(__m128d){__u, __u};
950#define _MM_SHUFFLE2(fp1,fp0) \
951 (((fp1) << 1) | (fp0))
955#if HAS_BUILTIN(__builtin_shufflevector)
956 __m128d
u = *(
const __m128d *)dp;
957 return __builtin_shufflevector((__v2df)
u, (__v2df)
u, 1, 0);
959 return (__m128d){ dp[1], dp[0] };
968 return ((
const struct __loadu_pd *)dp)->__v;
973 struct __loadu_si64 {
976 long long __u = ((
const struct __loadu_si64 *)
a)->__v;
977 return __extension__(__m128i)(__v2di){__u, 0
LL};
982 struct __loadu_si32 {
985 int __u = ((
const struct __loadu_si32 *)
a)->__v;
986 return __extension__(__m128i)(__v4si){__u, 0, 0, 0};
991 struct __loadu_si16 {
994 short __u = ((
const struct __loadu_si16 *)
a)->__v;
995 return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
1000 struct __mm_load_sd_struct {
1003 double __u = ((
const struct __mm_load_sd_struct *)dp)->__u;
1004 return __extension__(__m128d){__u, 0};
1009 struct __mm_loadh_pd_struct {
1012 double __u = ((
const struct __mm_loadh_pd_struct *)dp)->__u;
1013 return __extension__(__m128d){
a[0], __u};
1018 struct __mm_loadl_pd_struct {
1021 double __u = ((
const struct __mm_loadl_pd_struct *)dp)->__u;
1022 return __extension__(__m128d){__u,
a[1]};
1027#if HAS_BUILTIN(__builtin_ia32_undef128)
1028 return (__m128d)__builtin_ia32_undef128();
1030 __m128d undef = undef;
1037 return __extension__(__m128d){
w, 0};
1042 return __extension__(__m128d){
w,
w};
1047 return __extension__(__m128d){
x,
w};
1052 return __extension__(__m128d){
w,
x};
1057 return __extension__(__m128d){0, 0};
1068 struct __mm_store_sd_struct {
1071 ((
struct __mm_store_sd_struct *)dp)->__u =
a[0];
1081#if HAS_BUILTIN(__builtin_shufflevector)
1082 a = __builtin_shufflevector((__v2df)
a, (__v2df)
a, 0, 0);
1092 struct __storeu_pd {
1095 ((
struct __storeu_pd *)dp)->__v =
a;
1100#if HAS_BUILTIN(__builtin_shufflevector)
1101 a = __builtin_shufflevector((__v2df)
a, (__v2df)
a, 1, 0);
1111 struct __mm_storeh_pd_struct {
1114 ((
struct __mm_storeh_pd_struct *)dp)->__u =
a[1];
1119 struct __mm_storeh_pd_struct {
1122 ((
struct __mm_storeh_pd_struct *)dp)->__u =
a[0];
1127 return (__m128i)((__v16qu)
a + (__v16qu)
b);
1132 return (__m128i)((__v8hu)
a + (__v8hu)
b);
1137 return (__m128i)((__v4su)
a + (__v4su)
b);
1142 return (__m64)__builtin_ia32_paddq((__v1di)
a, (__v1di)
b);
1147 return (__m128i)((__v2du)
a + (__v2du)
b);
1152#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1153 return (__m128i)__builtin_elementwise_add_sat((__v16qs)
a, (__v16qs)
b);
1155 return (__m128i)__builtin_ia32_paddsb128((__v16qi)
a, (__v16qi)
b);
1161#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1162 return (__m128i)__builtin_elementwise_add_sat((__v8hi)
a, (__v8hi)
b);
1164 return (__m128i)__builtin_ia32_paddsw128((__v8hi)
a, (__v8hi)
b);
1170#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1171 return (__m128i)__builtin_elementwise_add_sat((__v16qu)
a, (__v16qu)
b);
1173 return (__m128i)__builtin_ia32_paddusb128((__v16qi)
a, (__v16qi)
b);
1179#if HAS_BUILTIN(__builtin_elementwise_add_sat)
1180 return (__m128i)__builtin_elementwise_add_sat((__v8hu)
a, (__v8hu)
b);
1182 return (__m128i)__builtin_ia32_paddusw128((__v8hi)
a, (__v8hi)
b);
1188 return (__m128i)__builtin_ia32_pavgb128((__v16qi)
a, (__v16qi)
b);
1193 return (__m128i)__builtin_ia32_pavgw128((__v8hi)
a, (__v8hi)
b);
1198 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)
a, (__v8hi)
b);
1203#if HAS_BUILTIN(__builtin_elementwise_max)
1204 return (__m128i)__builtin_elementwise_max((__v8hi)
a, (__v8hi)
b);
1206 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)
a, (__v8hi)
b);
1212#if HAS_BUILTIN(__builtin_elementwise_max)
1213 return (__m128i)__builtin_elementwise_max((__v16qu)
a, (__v16qu)
b);
1215 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)
a, (__v16qi)
b);
1221#if HAS_BUILTIN(__builtin_elementwise_min)
1222 return (__m128i)__builtin_elementwise_min((__v8hi)
a, (__v8hi)
b);
1224 return (__m128i)__builtin_ia32_pminsw128((__v8hi)
a, (__v8hi)
b);
1230#if HAS_BUILTIN(__builtin_elementwise_min)
1231 return (__m128i)__builtin_elementwise_min((__v16qu)
a, (__v16qu)
b);
1233 return (__m128i)__builtin_ia32_pminub128((__v16qi)
a, (__v16qi)
b);
1239 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)
a, (__v8hi)
b);
1244 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)
a, (__v8hi)
b);
1249 return (__m128i)((__v8hu)
a * (__v8hu)
b);
1254 return (__m64)__builtin_ia32_pmuludq((__v2si)
a, (__v2si)
b);
1259 return __builtin_ia32_pmuludq128((__v4si)
a, (__v4si)
b);
1264 return __builtin_ia32_psadbw128((__v16qi)
a, (__v16qi)
b);
1269 return (__m128i)((__v16qu)
a - (__v16qu)
b);
1274 return (__m128i)((__v8hu)
a - (__v8hu)
b);
1279 return (__m128i)((__v4su)
a - (__v4su)
b);
1284 return (__m64)__builtin_ia32_psubq((__v1di)
a, (__v1di)
b);
1289 return (__m128i)((__v2du)
a - (__v2du)
b);
1294#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1295 return (__m128i)__builtin_elementwise_sub_sat((__v16qs)
a, (__v16qs)
b);
1297 return (__m128i)__builtin_ia32_psubsb128((__v16qi)
a, (__v16qi)
b);
1303#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1304 return (__m128i)__builtin_elementwise_sub_sat((__v8hi)
a, (__v8hi)
b);
1306 return (__m128i)__builtin_ia32_psubsw128((__v8hi)
a, (__v8hi)
b);
1312#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1313 return (__m128i)__builtin_elementwise_sub_sat((__v16qu)
a, (__v16qu)
b);
1315 return (__m128i)__builtin_ia32_psubusb128((__v16qi)
a, (__v16qi)
b);
1321#if HAS_BUILTIN(__builtin_elementwise_sub_sat)
1322 return (__m128i)__builtin_elementwise_sub_sat((__v8hu)
a, (__v8hu)
b);
1324 return (__m128i)__builtin_ia32_psubusw128((__v8hi)
a, (__v8hi)
b);
1330 return (__m128i)((__v2du)
a & (__v2du)
b);
1335 return (__m128i)(~(__v2du)
a & (__v2du)
b);
1340 return (__m128i)((__v2du)
a | (__v2du)
b);
1345 return (__m128i)((__v2du)
a ^ (__v2du)
b);
1349#define _mm_slli_si128(a, imm) \
1350 ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1354 return (__m128i)__builtin_ia32_pslldqi128(
a, imm * 8);
1360 return (__m128i)__builtin_ia32_psllwi128((__v8hi)
a,
count);
1365 return (__m128i)__builtin_ia32_psllw128((__v8hi)
a, (__v8hi)
count);
1370 return (__m128i)__builtin_ia32_pslldi128((__v4si)
a,
count);
1375 return (__m128i)__builtin_ia32_pslld128((__v4si)
a, (__v4si)
count);
1380 return __builtin_ia32_psllqi128((__v2di)
a,
count);
1385 return __builtin_ia32_psllq128((__v2di)
a, (__v2di)
count);
1390 return (__m128i)__builtin_ia32_psrawi128((__v8hi)
a,
count);
1395 return (__m128i)__builtin_ia32_psraw128((__v8hi)
a, (__v8hi)
count);
1400 return (__m128i)__builtin_ia32_psradi128((__v4si)
a,
count);
1405 return (__m128i)__builtin_ia32_psrad128((__v4si)
a, (__v4si)
count);
1409#define _mm_srli_si128(a, imm) \
1410 ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
1414 return (__m128i)__builtin_ia32_psrldqi128(
a, imm * 8);
1420 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)
a,
count);
1425 return (__m128i)__builtin_ia32_psrlw128((__v8hi)
a, (__v8hi)
count);
1430 return (__m128i)__builtin_ia32_psrldi128((__v4si)
a,
count);
1435 return (__m128i)__builtin_ia32_psrld128((__v4si)
a, (__v4si)
count);
1440 return __builtin_ia32_psrlqi128((__v2di)
a,
count);
1445 return __builtin_ia32_psrlq128((__v2di)
a, (__v2di)
count);
1450 return (__m128i)((__v16qi)
a == (__v16qi)
b);
1455 return (__m128i)((__v8hi)
a == (__v8hi)
b);
1460 return (__m128i)((__v4si)
a == (__v4si)
b);
1467 return (__m128i)((__v16qs)
a > (__v16qs)
b);
1472 return (__m128i)((__v8hi)
a > (__v8hi)
b);
1477 return (__m128i)((__v4si)
a > (__v4si)
b);
1505 return __builtin_ia32_cvtsd2si64((__v2df)
a);
1510 return __builtin_ia32_cvttsd2si64((__v2df)
a);
1516#if HAS_BUILTIN(__builtin_convertvector)
1517 return (__m128)__builtin_convertvector((__v4si)
a, __v4sf);
1519 return __builtin_ia32_cvtdq2ps((__v4si)
a);
1525 return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)
a);
1530 return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)
a);
1535 return __extension__(__m128i)(__v4si){
a, 0, 0, 0};
1540 return __extension__(__m128i)(__v2di){
a, 0};
1545 __v4si
b = (__v4si)
a;
1561 struct __loadu_si128 {
1564 return ((
const struct __loadu_si128 *)
p)->__v;
1569 struct __mm_loadl_epi64_struct {
1572 return __extension__(__m128i){
1573 ((
const struct __mm_loadl_epi64_struct *)
p)->__u, 0};
1578#if HAS_BUILTIN(__builtin_ia32_undef128)
1579 return (__m128i)__builtin_ia32_undef128();
1581 __m128i undef = undef;
1588 return __extension__(__m128i)(__v2di){q0, q1};
1598 return __extension__(__m128i)(__v4si){i0, i1, i2, i3};
1602 short w7,
short w6,
short w5,
short w4,
1603 short w3,
short w2,
short w1,
short w0)
1605 return __extension__(__m128i)(__v8hi){w0,
w1,
w2, w3, w4, w5, w6, w7};
1609 char b15,
char b14,
char b13,
char b12,
1610 char b11,
char b10,
char b9,
char b8,
1611 char b7,
char b6,
char b5,
char b4,
1612 char b3,
char b2,
char b1,
char b0)
1614 return __extension__(__m128i)(__v16qi){
1616 b8, b9, b10, b11, b12, b13, b14, b15};
1641 return _mm_set_epi8(
b,
b,
b,
b,
b,
b,
b,
b,
b,
b,
b,
1656 short w0,
short w1,
short w2,
short w3,
1657 short w4,
short w5,
short w6,
short w7)
1663 char b0,
char b1,
char b2,
char b3,
1664 char b4,
char b5,
char b6,
char b7,
1665 char b8,
char b9,
char b10,
char b11,
1666 char b12,
char b13,
char b14,
char b15)
1668 return _mm_set_epi8(b15, b14, b13, b12, b11, b10, b9, b8,
1674 return __extension__(__m128i)(__v2di){0
LL, 0
LL};
1684 struct __storeu_si128 {
1687 ((
struct __storeu_si128 *)
p)->__v =
b;
1692 struct __storeu_si64 {
1695 ((
struct __storeu_si64 *)
p)->__v = ((__v2di)
b)[0];
1700 struct __storeu_si32 {
1703 ((
struct __storeu_si32 *)
p)->__v = ((__v4si)
b)[0];
1708 struct __storeu_si16 {
1711 ((
struct __storeu_si16 *)
p)->__v = ((__v8hi)
b)[0];
1716 __builtin_ia32_maskmovdqu((__v16qi)
d, (__v16qi)
n,
p);
1721 struct __mm_storel_epi64_struct {
1724 ((
struct __mm_storel_epi64_struct *)
p)->__u =
a[0];
1729#if HAS_BUILTIN(__builtin_nontemporal_store)
1730 __builtin_nontemporal_store((__v2df)
a, (__v2df *)
p);
1732 __builtin_ia32_movntpd(
p,
a);
1738#if HAS_BUILTIN(__builtin_nontemporal_store)
1739 __builtin_nontemporal_store((__v2di)
a, (__v2di*)
p);
1741 __builtin_ia32_movntdq(
p,
a);
1747 __builtin_ia32_movnti(
p,
a);
1753 __builtin_ia32_movnti64(
p,
a);
1765 return (__m128i)__builtin_ia32_packsswb128((__v8hi)
a, (__v8hi)
b);
1770 return (__m128i)__builtin_ia32_packssdw128((__v4si)
a, (__v4si)
b);
1775 return (__m128i)__builtin_ia32_packuswb128((__v8hi)
a, (__v8hi)
b);
1778#define _mm_extract_epi16(a, imm) \
1779 ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
1782#define _mm_insert_epi16(a, b, imm) \
1783 ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
1788 return __builtin_ia32_pmovmskb128((__v16qi)
a);
1791#define _mm_shuffle_epi32(a, imm) \
1792 ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
1794#define _mm_shufflelo_epi16(a, imm) \
1795 ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
1797#define _mm_shufflehi_epi16(a, imm) \
1798 ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
1802#if HAS_BUILTIN(__builtin_shufflevector)
1803 return (__m128i)__builtin_shufflevector(
1804 (__v16qi)
a, (__v16qi)
b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
1805 16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
1807 return (__m128i)__builtin_ia32_punpckhbw128((__v16qi)
a, (__v16qi)
b);
1813#if HAS_BUILTIN(__builtin_shufflevector)
1814 return (__m128i)__builtin_shufflevector((__v8hi)
a, (__v8hi)
b, 4, 8 + 4, 5,
1815 8 + 5, 6, 8 + 6, 7, 8 + 7);
1817 return (__m128i)__builtin_ia32_punpckhwd128((__v8hi)
a, (__v8hi)
b);
1823#if HAS_BUILTIN(__builtin_shufflevector)
1824 return (__m128i)__builtin_shufflevector((__v4si)
a, (__v4si)
b, 2, 4 + 2, 3,
1827 return (__m128i)__builtin_ia32_punpckhdq128((__v4si)
a, (__v4si)
b);
1833#if HAS_BUILTIN(__builtin_shufflevector)
1834 return (__m128i)__builtin_shufflevector((__v2di)
a, (__v2di)
b, 1, 2 + 1);
1836 return (__m128i)__builtin_ia32_punpckhqdq128((__v2di)
a, (__v2di)
b);
1842#if HAS_BUILTIN(__builtin_shufflevector)
1843 return (__m128i)__builtin_shufflevector(
1844 (__v16qi)
a, (__v16qi)
b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
1845 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
1847 return (__m128i)__builtin_ia32_punpcklbw128((__v16qi)
a, (__v16qi)
b);
1853#if HAS_BUILTIN(__builtin_shufflevector)
1854 return (__m128i)__builtin_shufflevector((__v8hi)
a, (__v8hi)
b, 0, 8 + 0, 1,
1855 8 + 1, 2, 8 + 2, 3, 8 + 3);
1857 return (__m128i)__builtin_ia32_punpcklwd128((__v8hi)
a, (__v8hi)
b);
1863#if HAS_BUILTIN(__builtin_shufflevector)
1864 return (__m128i)__builtin_shufflevector((__v4si)
a, (__v4si)
b, 0, 4 + 0, 1,
1867 return (__m128i)__builtin_ia32_punpckldq128((__v4si)
a, (__v4si)
b);
1873#if HAS_BUILTIN(__builtin_shufflevector)
1874 return (__m128i)__builtin_shufflevector((__v2di)
a, (__v2di)
b, 0, 2 + 0);
1876 return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)
a, (__v2di)
b);
1887 return __extension__(__m128i)(__v2di){(
long long)
a, 0};
1892#if HAS_BUILTIN(__builtin_shufflevector)
1895 return (__m128i)__builtin_ia32_movq128((__v2di)
a);
1901#if HAS_BUILTIN(__builtin_shufflevector)
1902 return __builtin_shufflevector((__v2df)
a, (__v2df)
b, 1, 2 + 1);
1904 return (__m128d)__builtin_ia32_unpckhpd((__v2df)
a, (__v2df)
b);
1910#if HAS_BUILTIN(__builtin_shufflevector)
1911 return __builtin_shufflevector((__v2df)
a, (__v2df)
b, 0, 2 + 0);
1913 return (__m128d)__builtin_ia32_unpcklpd((__v2df)
a, (__v2df)
b);
1919 return __builtin_ia32_movmskpd((__v2df)
a);
1922#define _mm_shuffle_pd(a, b, i) \
1923 ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
#define _DECLSPEC_INTRIN_TYPE
int align(int length, int align)
#define __INTRIN_INLINE_MMXSSE2
__m128 _mm_cvtpd_ps(__m128d a)
__m128d _mm_cmpnge_sd(__m128d a, __m128d b)
void _mm_storeu_pd(double *dp, __m128d a)
__m128d _mm_add_sd(__m128d a, __m128d b)
void _mm_storeu_si128(__m128i_u *p, __m128i b)
__m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
__m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
__m128i _mm_set1_epi16(short w)
int _mm_cvtsi128_si32(__m128i a)
__m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
void _mm_store_si128(__m128i *p, __m128i b)
__m128i _mm_adds_epu16(__m128i a, __m128i b)
__m128i _mm_movpi64_epi64(__m64 a)
__m128i _mm_slli_epi64(__m128i a, int count)
int _mm_ucomile_sd(__m128d a, __m128d b)
__m128i _mm_slli_si128(__m128i a, int i)
__m128d _mm_cmpeq_sd(__m128d a, __m128d b)
int _mm_cvtsd_si32(__m128d a)
int _mm_comile_sd(__m128d a, __m128d b)
__m128i _mm_castps_si128(__m128 a)
__m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
__m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
__m128i _mm_setzero_si128(void)
__m128i _mm_srl_epi64(__m128i a, __m128i count)
__m128d _mm_add_pd(__m128d a, __m128d b)
__m128i _mm_cvtpd_epi32(__m128d a)
__m128i _mm_xor_si128(__m128i a, __m128i b)
__m128i _mm_move_epi64(__m128i a)
__m128d _mm_sub_sd(__m128d a, __m128d b)
__m128d _mm_loadh_pd(__m128d a, double const *dp)
__m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
__m128i _mm_srli_epi64(__m128i a, int count)
__m128d _mm_setr_pd(double w, double x)
#define __INTRIN_INLINE_SSE2
__m128d _mm_cmpord_sd(__m128d a, __m128d b)
__m128i _mm_set1_epi64(__m64 q)
__m128d _mm_cmpunord_pd(__m128d a, __m128d b)
__m128i _mm_packs_epi32(__m128i a, __m128i b)
__m128d _mm_castps_pd(__m128 a)
void _mm_store1_pd(double *dp, __m128d a)
__m128i _mm_sad_epu8(__m128i a, __m128i b)
__INTRIN_INLINE_SSE2 __m128d _mm_undefined_pd(void)
__INTRIN_INLINE_SSE2 void _mm_storeu_si32(void *p, __m128i b)
__m128d _mm_setzero_pd(void)
__m128 _mm_castsi128_ps(__m128i a)
__m128d _mm_cmpneq_pd(__m128d a, __m128d b)
void _mm_storel_epi64(__m128i_u *p, __m128i a)
__m128i _mm_packus_epi16(__m128i a, __m128i b)
__m128d _mm_set_sd(double w)
__m128 _mm_cvtepi32_ps(__m128i a)
__m128i _mm_adds_epu8(__m128i a, __m128i b)
__m128i _mm_sub_epi32(__m128i a, __m128i b)
__m128i _mm_castpd_si128(__m128d a)
__m128i _mm_add_epi16(__m128i a, __m128i b)
__m128d _mm_cmpnge_pd(__m128d a, __m128d b)
int _mm_comige_sd(__m128d a, __m128d b)
__m128d _mm_cmpge_pd(__m128d a, __m128d b)
__INTRIN_INLINE_SSE2 void _mm_storeu_si16(void *p, __m128i b)
__m128d _mm_loadr_pd(double const *dp)
__m128i _mm_mulhi_epu16(__m128i a, __m128i b)
__m128i _mm_slli_epi32(__m128i a, int count)
__m128i _mm_load_si128(__m128i const *p)
__m128d _mm_max_sd(__m128d a, __m128d b)
__m128d _mm_and_pd(__m128d a, __m128d b)
__m128i _mm_mul_epu32(__m128i a, __m128i b)
__m128i _mm_cvttpd_epi32(__m128d a)
int _mm_ucomige_sd(__m128d a, __m128d b)
__m128d _mm_sub_pd(__m128d a, __m128d b)
__m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
__m128d _mm_load1_pd(double const *dp)
__m128d _mm_load_pd(double const *dp)
__m128d _mm_min_pd(__m128d a, __m128d b)
__m128i _mm_sll_epi32(__m128i a, __m128i count)
__m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
__m64 _mm_sub_si64(__m64 a, __m64 b)
void _mm_stream_si32(int *p, int a)
__m128i _mm_subs_epu8(__m128i a, __m128i b)
__m128i _mm_srl_epi32(__m128i a, __m128i count)
__m128i _mm_mulhi_epi16(__m128i a, __m128i b)
#define _mm_shuffle_pd(a, b, i)
void _mm_stream_si128(__m128i *p, __m128i a)
__m128d _mm_or_pd(__m128d a, __m128d b)
__m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
__m128d _mm_cmpge_sd(__m128d a, __m128d b)
__m128d _mm_mul_sd(__m128d a, __m128d b)
__m128i _mm_set1_epi8(char b)
__m128i _mm_sra_epi32(__m128i a, __m128i count)
__m128d _mm_sqrt_sd(__m128d a, __m128d b)
__m128i _mm_srai_epi32(__m128i a, int count)
__m128i _mm_cvtsi32_si128(int a)
__m128i _mm_slli_epi16(__m128i a, int count)
__m128d _mm_cmpeq_pd(__m128d a, __m128d b)
__m128i _mm_subs_epi8(__m128i a, __m128i b)
void _mm_storer_pd(double *dp, __m128d a)
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
__INTRIN_INLINE_SSE2 __m128i _mm_cvtsi64_si128(long long a)
int _mm_movemask_pd(__m128d a)
__m128i _mm_setr_epi64(__m64 q0, __m64 q1)
__m128i _mm_sub_epi64(__m128i a, __m128i b)
__m128d _mm_move_sd(__m128d a, __m128d b)
__m128i _mm_min_epu8(__m128i a, __m128i b)
__m64 _mm_add_si64(__m64 a, __m64 b)
__m128d _mm_cvtpi32_pd(__m64 a)
__INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
__m128d _mm_unpackhi_pd(__m128d a, __m128d b)
int _mm_cvttsd_si32(__m128d a)
__m128d _mm_cmpnle_pd(__m128d a, __m128d b)
__m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
__m128d _mm_cmpnle_sd(__m128d a, __m128d b)
__m128i _mm_add_epi8(__m128i a, __m128i b)
__m128d _mm_cmple_sd(__m128d a, __m128d b)
__m128d _mm_cmple_pd(__m128d a, __m128d b)
__m128i _mm_cmplt_epi32(__m128i a, __m128i b)
#define _mm_insert_epi16(a, b, imm)
__m128d _mm_loadu_pd(double const *dp)
__m128i _mm_avg_epu16(__m128i a, __m128i b)
__m64 _mm_mul_su32(__m64 a, __m64 b)
__m128d _mm_cvtss_sd(__m128d a, __m128 b)
int _mm_ucomieq_sd(__m128d a, __m128d b)
__m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
__m128i _mm_sll_epi64(__m128i a, __m128i count)
__m128d _mm_cmpngt_sd(__m128d a, __m128d b)
void _mm_storel_pd(double *dp, __m128d a)
__m128d _mm_cmplt_sd(__m128d a, __m128d b)
__m128d _mm_cvtsi32_sd(__m128d a, int b)
__m128i _mm_or_si128(__m128i a, __m128i b)
__INTRIN_INLINE_SSE2 long long _mm_cvtsi128_si64(__m128i a)
__m128i _mm_cmplt_epi16(__m128i a, __m128i b)
__m128i _mm_subs_epi16(__m128i a, __m128i b)
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si64(void const *a)
__m128d _mm_cmpngt_pd(__m128d a, __m128d b)
#define _mm_extract_epi16(a, imm)
double _mm_cvtsd_f64(__m128d a)
#define _mm_shufflelo_epi16(a, imm)
__m128i _mm_packs_epi16(__m128i a, __m128i b)
__INTRIN_INLINE_SSE2 __m128i _mm_set1_epi64x(long long q)
__m128d _mm_min_sd(__m128d a, __m128d b)
void _mm_store_pd(double *dp, __m128d a)
__m128i _mm_srli_epi16(__m128i a, int count)
__m128i _mm_sub_epi8(__m128i a, __m128i b)
__m128d _mm_castsi128_pd(__m128i a)
__m128d _mm_cmpord_pd(__m128d a, __m128d b)
void _mm_storeh_pd(double *dp, __m128d a)
__m128d _mm_mul_pd(__m128d a, __m128d b)
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si32(void const *a)
__m64 _mm_cvttpd_pi32(__m128d a)
__m128i _mm_sll_epi16(__m128i a, __m128i count)
__m128d _mm_sqrt_pd(__m128d a)
__m64 _mm_cvtpd_pi32(__m128d a)
__m128i _mm_mullo_epi16(__m128i a, __m128i b)
__m128i _mm_sra_epi16(__m128i a, __m128i count)
int _mm_comieq_sd(__m128d a, __m128d b)
__m128i _mm_cvttps_epi32(__m128 a)
__m128d _mm_load_sd(double const *dp)
__m64 _mm_movepi64_pi64(__m128i a)
void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p)
int _mm_movemask_epi8(__m128i a)
__m128i _mm_madd_epi16(__m128i a, __m128i b)
__m128d _mm_cmpgt_sd(__m128d a, __m128d b)
int _mm_ucomilt_sd(__m128d a, __m128d b)
__m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
__m128i _mm_srai_epi16(__m128i a, int count)
__m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
__m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
int _mm_ucomineq_sd(__m128d a, __m128d b)
__m128i _mm_cvtps_epi32(__m128 a)
__m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
__m128d _mm_andnot_pd(__m128d a, __m128d b)
__m128d _mm_loadl_pd(__m128d a, double const *dp)
__m128d _mm_cmpneq_sd(__m128d a, __m128d b)
__m128i _mm_min_epi16(__m128i a, __m128i b)
__m128 _mm_cvtsd_ss(__m128 a, __m128d b)
__m128i _mm_andnot_si128(__m128i a, __m128i b)
__m128i _mm_and_si128(__m128i a, __m128i b)
__m128i _mm_setl_epi64(__m128i q)
void _mm_stream_pd(double *p, __m128d a)
__m128 _mm_castpd_ps(__m128d a)
__INTRIN_INLINE_SSE2 __m128i _mm_undefined_si128(void)
int _mm_comineq_sd(__m128d a, __m128d b)
__m128i _mm_avg_epu8(__m128i a, __m128i b)
int _mm_ucomigt_sd(__m128d a, __m128d b)
__m128i _mm_adds_epi16(__m128i a, __m128i b)
__m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
__m128i _mm_srli_si128(__m128i a, int imm)
__m128i _mm_adds_epi8(__m128i a, __m128i b)
int _mm_comilt_sd(__m128d a, __m128d b)
__m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
__m128i _mm_cmplt_epi8(__m128i a, __m128i b)
#define _mm_shufflehi_epi16(a, imm)
__m128d _mm_cvtepi32_pd(__m128i a)
__m128i _mm_max_epi16(__m128i a, __m128i b)
__m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
__m128d _mm_xor_pd(__m128d a, __m128d b)
__INTRIN_INLINE_SSE2 void _mm_storeu_si64(void *p, __m128i b)
void _mm_clflush(void const *p)
__m128d _mm_cvtps_pd(__m128 a)
__m128d _mm_cmpgt_pd(__m128d a, __m128d b)
__INTRIN_INLINE_SSE2 __m128i _mm_loadu_si16(void const *a)
void _mm_store_sd(double *dp, __m128d a)
__m128d _mm_set_pd(double w, double x)
__m128i _mm_srli_epi32(__m128i a, int count)
int _mm_comigt_sd(__m128d a, __m128d b)
__m128i _mm_set_epi64(__m64 q1, __m64 q0)
__m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
__m128d _mm_cmplt_pd(__m128d a, __m128d b)
__m128i _mm_add_epi32(__m128i a, __m128i b)
__m128i _mm_sub_epi16(__m128i a, __m128i b)
__m128i _mm_loadl_epi64(__m128i_u const *p)
__m128i _mm_add_epi64(__m128i a, __m128i b)
__m128d _mm_div_sd(__m128d a, __m128d b)
__m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
#define _mm_shuffle_epi32(a, imm)
__m128d _mm_set1_pd(double w)
__m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
__m128d _mm_div_pd(__m128d a, __m128d b)
__m128i _mm_srl_epi16(__m128i a, __m128i count)
__m128d _mm_max_pd(__m128d a, __m128d b)
__m128d _mm_cmpunord_sd(__m128d a, __m128d b)
__m128d _mm_unpacklo_pd(__m128d a, __m128d b)
__m128i _mm_subs_epu16(__m128i a, __m128i b)
__m128i _mm_loadu_si128(__m128i_u const *p)
__m128i _mm_set1_epi32(int i)
__m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
__m128i _mm_max_epu8(__m128i a, __m128i b)
void __declspec(noinline) __cdecl _free_base(void *const block)
GLint GLint GLint GLint GLint x
GLuint GLuint GLsizei count
GLdouble GLdouble GLdouble GLdouble q
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble GLdouble w2
GLboolean GLboolean GLboolean b
GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint GLdouble w1
GLboolean GLboolean GLboolean GLboolean a
GLubyte GLubyte GLubyte GLubyte w
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
static CRYPT_DATA_BLOB b4
static CRYPT_DATA_BLOB b3[]
static CRYPT_DATA_BLOB b2[]
static CRYPT_DATA_BLOB b1[]
#define _Out_writes_bytes_(s)
#define _STATIC_ASSERT(expr)