aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaochen Jiang <haochen.jiang@intel.com>2023-08-28 11:14:54 +0800
committerliuhongt <hongtao.liu@intel.com>2023-09-22 10:30:42 +0800
commite7a9a3b2b69b7e82f4cbfb618caff846c6af2d31 (patch)
treeb50d8bf85f1c02652669dfe8e9d53967ba9fe78a
parentb46a1223bd29c3c1128997842183846033134615 (diff)
downloadgcc-e7a9a3b2b69b7e82f4cbfb618caff846c6af2d31.zip
gcc-e7a9a3b2b69b7e82f4cbfb618caff846c6af2d31.tar.gz
gcc-e7a9a3b2b69b7e82f4cbfb618caff846c6af2d31.tar.bz2
Push evex512 target for 512 bit intrins
gcc/ChangeLog: * config/i386/avx512fintrin.h: Add evex512 target for 512 bit intrins.
-rw-r--r--gcc/config/i386/avx512fintrin.h7411
1 files changed, 3745 insertions, 3666 deletions
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 517e787..85bf72d 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -34,6 +34,3748 @@
#define __DISABLE_AVX512F__
#endif /* __AVX512F__ */
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+typedef unsigned int __mmask32;
+
+/* Constants for mantissa extraction */
+typedef enum
+{
+ _MM_MANT_NORM_1_2, /* interval [1, 2) */
+ _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
+ _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
+ _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
+} _MM_MANTISSA_NORM_ENUM;
+
+typedef enum
+{
+ _MM_MANT_SIGN_src, /* sign = sign(SRC) */
+ _MM_MANT_SIGN_zero, /* sign = 0 */
+ _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
+} _MM_MANTISSA_SIGN_ENUM;
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_add_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_addsd_round(A, B, C)
+
+#define _mm_mask_add_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_add_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+
+#define _mm_add_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_addss_round(A, B, C)
+
+#define _mm_mask_add_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_add_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+
+#define _mm_sub_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_subsd_round(A, B, C)
+
+#define _mm_mask_sub_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_sub_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+
+#define _mm_sub_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_subss_round(A, B, C)
+
+#define _mm_mask_sub_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_sub_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+
+#endif
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
+ (__v2df) __A);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
+ (__v4sf) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
+ (__v2df) __A);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
+ (__v4sf) __A);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+#else
+#define _mm_sqrt_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
+ (__v2df) _mm_setzero_pd (), -1, C)
+
+#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
+
+#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
+ (__v2df) _mm_setzero_pd (), U, C)
+
+#define _mm_sqrt_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
+ (__v4sf) _mm_setzero_ps (), -1, C)
+
+#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
+
+#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
+ (__v4sf) _mm_setzero_ps (), U, C)
+
+#define _mm_mul_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_mulsd_round(A, B, C)
+
+#define _mm_mask_mul_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_mul_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+
+#define _mm_mul_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_mulss_round(A, B, C)
+
+#define _mm_mask_mul_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_mul_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+
+#define _mm_div_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_divsd_round(A, B, C)
+
+#define _mm_mask_div_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_div_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+
+#define _mm_div_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_divss_round(A, B, C)
+
+#define _mm_mask_div_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_div_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+
+#define _mm_scalef_round_sd(A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), \
+ (__v2df) _mm_undefined_pd (), \
+ -1, (C)))
+
+#define _mm_scalef_round_ss(A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), \
+ (__v4sf) _mm_undefined_ps (), \
+ -1, (C)))
+
+#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
+
+#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
+
+#define _mm_maskz_scalef_round_sd(U, A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), \
+ (__v2df) _mm_setzero_pd (), \
+ (U), (C)))
+
+#define _mm_maskz_scalef_round_ss(U, A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), \
+ (__v4sf) _mm_setzero_ps (), \
+ (U), (C)))
+#endif
+
+#define _mm_mask_sqrt_sd(W, U, A, B) \
+ _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_sqrt_sd(U, A, B) \
+ _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_sqrt_ss(W, U, A, B) \
+ _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_sqrt_ss(U, A, B) \
+ _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_scalef_sd(W, U, A, B) \
+ _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_scalef_sd(U, A, B) \
+ _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_scalef_ss(W, U, A, B) \
+ _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_scalef_ss(U, A, B) \
+ _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_sd (__m128d __A, unsigned __B)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
+
+#define _mm_cvt_roundi64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
+
+#define _mm_cvt_roundsi64_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
+#endif
+
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
+
+#define _mm_cvt_roundi32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
+
+#define _mm_cvt_roundsi32_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
+}
+#else
+#define _mm_cvt_roundu64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
+
+#define _mm_cvt_roundi64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
+
+#define _mm_cvt_roundsi64_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
+#endif
+
+#endif
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
+{
+ return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_ss (__mmask8 __U, const float *__P)
+{
+ return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
+{
+ return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_sd (__mmask8 __U, const double *__P)
+{
+ return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
+ (__v4sf) __W, __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (), __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
+ (__v2df) __W, __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
+ const int __imm, const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128i __C, const int __imm, const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128i __C, const int __imm, const int __R)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
+ const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
+ __m128i __C, const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128i __C, const int __imm, const int __R)
+{
+ return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), (R)))
+
+#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), (R)))
+
+#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
+ ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), (R)))
+
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_u64 (__m128 __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_si64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_i64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
+}
+#else
+#define _mm_cvt_roundss_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
+
+#define _mm_cvt_roundss_si64(A, B) \
+ ((long long)__builtin_ia32_vcvtss2si64(A, B))
+
+#define _mm_cvt_roundss_i64(A, B) \
+ ((long long)__builtin_ia32_vcvtss2si64(A, B))
+
+#define _mm_cvtt_roundss_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
+
+#define _mm_cvtt_roundss_i64(A, B) \
+ ((long long)__builtin_ia32_vcvttss2si64(A, B))
+
+#define _mm_cvtt_roundss_si64(A, B) \
+ ((long long)__builtin_ia32_vcvttss2si64(A, B))
+#endif
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_u32 (__m128 __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_si32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_i32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
+}
+#else
+#define _mm_cvt_roundss_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
+
+#define _mm_cvt_roundss_si32(A, B) \
+ ((int)__builtin_ia32_vcvtss2si32(A, B))
+
+#define _mm_cvt_roundss_i32(A, B) \
+ ((int)__builtin_ia32_vcvtss2si32(A, B))
+
+#define _mm_cvtt_roundss_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
+
+#define _mm_cvtt_roundss_si32(A, B) \
+ ((int)__builtin_ia32_vcvttss2si32(A, B))
+
+#define _mm_cvtt_roundss_i32(A, B) \
+ ((int)__builtin_ia32_vcvttss2si32(A, B))
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
+}
+#else
+#define _mm_cvt_roundsd_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
+
+#define _mm_cvt_roundsd_si64(A, B) \
+ ((long long)__builtin_ia32_vcvtsd2si64(A, B))
+
+#define _mm_cvt_roundsd_i64(A, B) \
+ ((long long)__builtin_ia32_vcvtsd2si64(A, B))
+
+#define _mm_cvtt_roundsd_u64(A, B) \
+ ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
+
+#define _mm_cvtt_roundsd_si64(A, B) \
+ ((long long)__builtin_ia32_vcvttsd2si64(A, B))
+
+#define _mm_cvtt_roundsd_i64(A, B) \
+ ((long long)__builtin_ia32_vcvttsd2si64(A, B))
+#endif
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
+{
+ return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
+ (__v2df) __B,
+ (__v4sf) __W,
+ __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
+ __m128d __B, const int __R)
+{
+ return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
+ (__v2df) __B,
+ _mm_setzero_ps (),
+ __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
+ (__v4sf) __B,
+ (__v2df) __W,
+ __U,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
+ __m128 __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
+ (__v4sf) __B,
+ _mm_setzero_pd (),
+ __U,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_sd (__m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ (__v2df) __W,
+ __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ (__v2df)
+ _mm_setzero_pd(),
+ __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_round_ss (__m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ (__v4sf) __W,
+ __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D, const int __R)
+{
+ return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ (__v4sf)
+ _mm_setzero_ps(),
+ __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
+ const int __R)
+{
+ return (__m128)
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
+ __m128 __D, const int __imm, const int __R)
+{
+ return (__m128)
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
+ (__v4sf) __D, __imm,
+ (__v4sf) __A,
+ (__mmask8) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
+ const int __imm, const int __R)
+{
+ return (__m128)
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
+ (__v4sf) __C, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __A,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
+ const int __R)
+{
+ return (__m128d)
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
+ __m128d __D, const int __imm, const int __R)
+{
+ return (__m128d)
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
+ (__v2df) __D, __imm,
+ (__v2df) __A,
+ (__mmask8) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
+ const int __imm, const int __R)
+{
+ return (__m128d)
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
+ (__v2df) __C, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __A,
+ __R);
+}
+
+#else
+#define _mm_cvt_roundsd_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
+
+#define _mm_cvt_roundsd_si32(A, B) \
+ ((int)__builtin_ia32_vcvtsd2si32(A, B))
+
+#define _mm_cvt_roundsd_i32(A, B) \
+ ((int)__builtin_ia32_vcvtsd2si32(A, B))
+
+#define _mm_cvtt_roundsd_u32(A, B) \
+ ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
+
+#define _mm_cvtt_roundsd_si32(A, B) \
+ ((int)__builtin_ia32_vcvttsd2si32(A, B))
+
+#define _mm_cvtt_roundsd_i32(A, B) \
+ ((int)__builtin_ia32_vcvttsd2si32(A, B))
+
+#define _mm_cvt_roundsd_ss(A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
+
+#define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
+
+#define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_setzero_ps (), \
+ (U), (C))
+
+#define _mm_cvt_roundss_sd(A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
+
+#define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
+
+#define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_setzero_pd (), \
+ (U), (C))
+
+#define _mm_getmant_round_sd(X, Y, C, D, R) \
+ ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (R)))
+
+#define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
+ ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
+ ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm_getmant_round_ss(X, Y, C, D, R) \
+ ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (R)))
+
+#define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
+ ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
+ ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U),\
+ (R)))
+
+#define _mm_getexp_round_ss(A, B, R) \
+ ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
+
+#define _mm_mask_getexp_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_getexp_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+
+#define _mm_getexp_round_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
+
+#define _mm_mask_getexp_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_getexp_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+
+#define _mm_roundscale_round_ss(A, B, I, R) \
+ ((__m128) \
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
+ (__v4sf) (__m128) (B), \
+ (int) (I), \
+ (__v4sf) _mm_setzero_ps (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+#define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \
+ ((__m128) \
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
+ (__v4sf) (__m128) (C), \
+ (int) (I), \
+ (__v4sf) (__m128) (A), \
+ (__mmask8) (U), \
+ (int) (R)))
+#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
+ ((__m128) \
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
+ (__v4sf) (__m128) (B), \
+ (int) (I), \
+ (__v4sf) _mm_setzero_ps (), \
+ (__mmask8) (U), \
+ (int) (R)))
+#define _mm_roundscale_round_sd(A, B, I, R) \
+ ((__m128d) \
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
+ (__v2df) (__m128d) (B), \
+ (int) (I), \
+ (__v2df) _mm_setzero_pd (), \
+ (__mmask8) (-1), \
+ (int) (R)))
+#define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \
+ ((__m128d) \
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
+ (__v2df) (__m128d) (C), \
+ (int) (I), \
+ (__v2df) (__m128d) (A), \
+ (__mmask8) (U), \
+ (int) (R)))
+#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
+ ((__m128d) \
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
+ (__v2df) (__m128d) (B), \
+ (int) (I), \
+ (__v2df) _mm_setzero_pd (), \
+ (__mmask8) (U), \
+ (int) (R)))
+
+#endif
+
+#define _mm_mask_cvtss_sd(W, U, A, B) \
+ _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_cvtss_sd(U, A, B) \
+ _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_cvtsd_ss(W, U, A, B) \
+ _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_cvtsd_ss(U, A, B) \
+ _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#ifdef __OPTIMIZE__
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
+{
+ return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
+ (__mmask8) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
+{
+ return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
+ (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
+ const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) __M, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) -1, __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
+ const int __P, const int __R)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) __M, __R);
+}
+
+#else
+#define _kshiftli_mask16(X, Y) \
+ ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask16(X, Y) \
+ ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
+
+#define _mm_cmp_round_sd_mask(X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)-1, R))
+
+#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (M), R))
+
+#define _mm_cmp_round_ss_mask(X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)-1, R))
+
+#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (M), R))
+
+#endif
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
+{
+ *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
+ return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
+{
+ return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
+{
+ return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtmask16_u32 (__mmask16 __A)
+{
+ return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_cvtu32_mask16 (unsigned int __A)
+{
+ return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_load_mask16 (__mmask16 *__A)
+{
+ return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_store_mask16 (__mmask16 *__A, __mmask16 __B)
+{
+ *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask16 (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kandn_mask16 (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
+ (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kor_mask16 (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxnor_mask16 (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxor_mask16 (__mmask16 __A, __mmask16 __B)
+{
+ return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_knot_mask16 (__mmask16 __A)
+{
+ return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
+{
+ return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_max_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_maxsd_round(A, B, C)
+
+#define _mm_mask_max_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_max_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+
+#define _mm_max_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_maxss_round(A, B, C)
+
+#define _mm_mask_max_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_max_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+
+#define _mm_min_round_sd(A, B, C) \
+ (__m128d)__builtin_ia32_minsd_round(A, B, C)
+
+#define _mm_mask_min_round_sd(W, U, A, B, C) \
+ (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_min_round_sd(U, A, B, C) \
+ (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
+
+#define _mm_min_round_ss(A, B, C) \
+ (__m128)__builtin_ia32_minss_round(A, B, C)
+
+#define _mm_mask_min_round_ss(W, U, A, B, C) \
+ (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
+
+#define _mm_maskz_min_round_ss(U, A, B, C) \
+ (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
+
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ __R);
+}
+#else
+#define _mm_fmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
+
+#define _mm_fmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
+
+#define _mm_fmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
+
+#define _mm_fmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
+
+#define _mm_fnmadd_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
+
+#define _mm_fnmadd_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
+
+#define _mm_fnmsub_round_sd(A, B, C, R) \
+ (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
+
+#define _mm_fnmsub_round_ss(A, B, C, R) \
+ (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
+#endif
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ (__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ (__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
+ -(__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
+ -(__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
+ const int __R)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
+ -(__v2df) __A,
+ -(__v2df) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
+ const int __R)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
+ -(__v4sf) __A,
+ -(__v4sf) __B,
+ (__mmask8) __U, __R);
+}
+#else
+#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
+
+#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
+ (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
+
+#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
+
+#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
+ (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
+
+#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
+
+#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
+ (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
+
+#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
+
+#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
+ (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
+
+#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
+ (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
+
+#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
+ (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
+
+#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
+
+#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
+ (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
+
+#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
+
+#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
+ (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
+
+#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
+
+#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
+ (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
+
+#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
+
+#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
+ (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
+
+#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
+
+#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
+ (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
+
+#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
+ (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
+
+#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
+ (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
+
+#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
+ (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
+
+#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
+ (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
+{
+ return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
+{
+ return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
+}
+#else
+#define _mm_comi_round_ss(A, B, C, D)\
+__builtin_ia32_vcomiss(A, B, C, D)
+#define _mm_comi_round_sd(A, B, C, D)\
+__builtin_ia32_vcomisd(A, B, C, D)
+#endif
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128 __B)
+{
+ return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
+{
+ return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_ss (__m128 __A, unsigned __B)
+{
+ return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm_fixupimm_sd(X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_fixupimm_ss(X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#endif
+
+#ifdef __x86_64__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_u64 (__m128 __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_u64 (__m128 __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_i64 (__m128 __A)
+{
+ return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif /* __x86_64__ */
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_u32 (__m128 __A)
+{
+ return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_u32 (__m128 __A)
+{
+ return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_i32 (__m128 __A)
+{
+ return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_i32 (__m128d __A)
+{
+ return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_i32 (__m128 __A)
+{
+ return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_sd (__m128d __A, int __B)
+{
+ return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_ss (__m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_u64 (__m128d __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_u64 (__m128d __A)
+{
+ return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_i64 (__m128d __A)
+{
+ return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_i64 (__m128d __A)
+{
+ return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_i64 (__m128 __A)
+{
+ return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_sd (__m128d __A, long long __B)
+{
+ return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif /* __x86_64__ */
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_u32 (__m128d __A)
+{
+ return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_u32 (__m128d __A)
+{
+ return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_i32 (__m128d __A)
+{
+ return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
+ (__v4sf) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
+ (__v2df) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ (__v2df) __W,
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
+ _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
+ (__v2df) __B,
+ (__D << 2) | __C,
+ (__v2df)
+ _mm_setzero_pd(),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
+ _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ (__v4sf) __W,
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
+ _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
+{
+ return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
+ (__v4sf) __B,
+ (__D << 2) | __C,
+ (__v4sf)
+ _mm_setzero_ps(),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
+{
+ return (__m128)
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
+ const int __imm)
+{
+ return (__m128)
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
+ (__v4sf) __D, __imm,
+ (__v4sf) __A,
+ (__mmask8) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
+ const int __imm)
+{
+ return (__m128)
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
+ (__v4sf) __C, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
+{
+ return (__m128d)
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
+ const int __imm)
+{
+ return (__m128d)
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
+ (__v2df) __D, __imm,
+ (__v2df) __A,
+ (__mmask8) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
+ const int __imm)
+{
+ return (__m128d)
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
+ (__v2df) __C, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) __M,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#else
+#define _mm_getmant_sd(X, Y, C, D) \
+ ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
+ ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_getmant_sd(U, X, Y, C, D) \
+ ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getmant_ss(X, Y, C, D) \
+ ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
+ ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_getmant_ss(U, X, Y, C, D) \
+ ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_getexp_ss(A, B) \
+ ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getexp_ss(W, U, A, B) \
+ (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
+ _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_getexp_ss(U, A, B) \
+ (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
+ _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_getexp_sd(A, B) \
+ ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getexp_sd(W, U, A, B) \
+ (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
+ _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_getexp_sd(U, A, B) \
+ (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
+ _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_roundscale_ss(A, B, I) \
+ ((__m128) \
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
+ (__v4sf) (__m128) (B), \
+ (int) (I), \
+ (__v4sf) _mm_setzero_ps (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_roundscale_ss(A, U, B, C, I) \
+ ((__m128) \
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
+ (__v4sf) (__m128) (C), \
+ (int) (I), \
+ (__v4sf) (__m128) (A), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_roundscale_ss(U, A, B, I) \
+ ((__m128) \
+ __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
+ (__v4sf) (__m128) (B), \
+ (int) (I), \
+ (__v4sf) _mm_setzero_ps (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm_roundscale_sd(A, B, I) \
+ ((__m128d) \
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
+ (__v2df) (__m128d) (B), \
+ (int) (I), \
+ (__v2df) _mm_setzero_pd (), \
+ (__mmask8) (-1), \
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm_mask_roundscale_sd(A, U, B, C, I) \
+ ((__m128d) \
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
+ (__v2df) (__m128d) (C), \
+ (int) (I), \
+ (__v2df) (__m128d) (A), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm_maskz_roundscale_sd(U, A, B, I) \
+ ((__m128d) \
+ __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
+ (__v2df) (__m128d) (B), \
+ (int) (I), \
+ (__v2df) _mm_setzero_pd (), \
+ (__mmask8) (U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_cmp_sd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ M,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_cmp_ss_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ M,_MM_FROUND_CUR_DIRECTION))
+
+#endif
+
+#ifdef __DISABLE_AVX512F__
+#undef __DISABLE_AVX512F__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512F__ */
+
+#if !defined (__AVX512F__) || !defined (__EVEX512__)
+#pragma GCC push_options
+#pragma GCC target("avx512f,evex512")
+#define __DISABLE_AVX512F_512__
+#endif /* __AVX512F_512__ */
+
/* Internal data types for implementing the intrinsics. */
typedef double __v8df __attribute__ ((__vector_size__ (64)));
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
@@ -57,9 +3799,6 @@ typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __al
typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
-typedef unsigned char __mmask8;
-typedef unsigned short __mmask16;
-
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_int2mask (int __M)
@@ -1498,174 +5237,6 @@ _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
(__mmask16) __U);
}
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
-
-#else
-#define _mm_add_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_addsd_round(A, B, C)
-
-#define _mm_mask_add_round_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_add_round_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
-
-#define _mm_add_round_ss(A, B, C) \
- (__m128)__builtin_ia32_addss_round(A, B, C)
-
-#define _mm_mask_add_round_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_add_round_ss(U, A, B, C) \
- (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
-
-#define _mm_sub_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_subsd_round(A, B, C)
-
-#define _mm_mask_sub_round_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_sub_round_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
-
-#define _mm_sub_round_ss(A, B, C) \
- (__m128)__builtin_ia32_subss_round(A, B, C)
-
-#define _mm_mask_sub_round_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_sub_round_ss(U, A, B, C) \
- (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
-
-#endif
-
/* Constant helper to represent the ternary logic operations among
vector A, B and C. */
typedef enum
@@ -1856,62 +5427,6 @@ _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
(__mmask16) __U);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp14_sd (__m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
- (__v2df) __A);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
- (__v2df) __A,
- (__v2df) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
- (__v2df) __A,
- (__v2df) _mm_setzero_ps (),
- (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp14_ss (__m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
- (__v4sf) __A);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
- (__v4sf) __A,
- (__v4sf) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
- (__v4sf) __A,
- (__v4sf) _mm_setzero_ps (),
- (__mmask8) __U);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt14_pd (__m512d __A)
@@ -1970,62 +5485,6 @@ _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
(__mmask16) __U);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt14_sd (__m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
- (__v2df) __A);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
- (__v2df) __A,
- (__v2df) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
- (__v2df) __A,
- (__v2df) _mm_setzero_pd (),
- (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt14_ss (__m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
- (__v4sf) __A);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
- (__v4sf) __A,
- (__v4sf) __W,
- (__mmask8) __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
- (__v4sf) __A,
- (__v4sf) _mm_setzero_ps (),
- (__mmask8) __U);
-}
-
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -2086,71 +5545,6 @@ _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
(__mmask16) __U, __R);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
- (__v2df) __A,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
- (__v2df) __A,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
- (__v2df) __A,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
- (__v4sf) __A,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
- (__v4sf) __A,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
- (__v4sf) __A,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
#else
#define _mm512_sqrt_round_pd(A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
@@ -2170,41 +5564,8 @@ _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
#define _mm512_maskz_sqrt_round_ps(U, A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
-#define _mm_sqrt_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
- (__v2df) _mm_setzero_pd (), -1, C)
-
-#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
-
-#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
- (__v2df) _mm_setzero_pd (), U, C)
-
-#define _mm_sqrt_round_ss(A, B, C) \
- (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
- (__v4sf) _mm_setzero_ps (), -1, C)
-
-#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
-
-#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
- (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
- (__v4sf) _mm_setzero_ps (), U, C)
#endif
-#define _mm_mask_sqrt_sd(W, U, A, B) \
- _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_sqrt_sd(U, A, B) \
- _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_sqrt_ss(W, U, A, B) \
- _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_sqrt_ss(U, A, B) \
- _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi8_epi32 (__m128i __A)
@@ -2802,134 +6163,6 @@ _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
(__mmask16) __U, __R);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
-
#else
#define _mm512_mul_round_pd(A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
@@ -2967,42 +6200,6 @@ _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
#define _mm512_maskz_div_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
-#define _mm_mul_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_mulsd_round(A, B, C)
-
-#define _mm_mask_mul_round_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_mul_round_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
-
-#define _mm_mul_round_ss(A, B, C) \
- (__m128)__builtin_ia32_mulss_round(A, B, C)
-
-#define _mm_mask_mul_round_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_mul_round_ss(U, A, B, C) \
- (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
-
-#define _mm_div_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_divsd_round(A, B, C)
-
-#define _mm_mask_div_round_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_div_round_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
-
-#define _mm_div_round_ss(A, B, C) \
- (__m128)__builtin_ia32_divss_round(A, B, C)
-
-#define _mm_mask_div_round_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_div_round_ss(U, A, B, C) \
- (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
-
#endif
#ifdef __OPTIMIZE__
@@ -3246,72 +6443,6 @@ _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
(__mmask16) __U, __R);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
#else
#define _mm512_scalef_round_pd(A, B, C) \
((__m512d) \
@@ -3343,51 +6474,8 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
(__v16sf) _mm512_setzero_ps(), \
(U), (C)))
-#define _mm_scalef_round_sd(A, B, C) \
- ((__m128d) \
- __builtin_ia32_scalefsd_mask_round ((A), (B), \
- (__v2df) _mm_undefined_pd (), \
- -1, (C)))
-
-#define _mm_scalef_round_ss(A, B, C) \
- ((__m128) \
- __builtin_ia32_scalefss_mask_round ((A), (B), \
- (__v4sf) _mm_undefined_ps (), \
- -1, (C)))
-
-#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
- ((__m128d) \
- __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
-
-#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
- ((__m128) \
- __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
-
-#define _mm_maskz_scalef_round_sd(U, A, B, C) \
- ((__m128d) \
- __builtin_ia32_scalefsd_mask_round ((A), (B), \
- (__v2df) _mm_setzero_pd (), \
- (U), (C)))
-
-#define _mm_maskz_scalef_round_ss(U, A, B, C) \
- ((__m128) \
- __builtin_ia32_scalefss_mask_round ((A), (B), \
- (__v4sf) _mm_setzero_ps (), \
- (U), (C)))
#endif
-#define _mm_mask_scalef_sd(W, U, A, B) \
- _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_scalef_sd(U, A, B) \
- _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_scalef_ss(W, U, A, B) \
- _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_scalef_ss(U, A, B) \
- _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -5188,115 +8276,6 @@ _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
#endif
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu32_sd (__m128d __A, unsigned __B)
-{
- return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
-}
-
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
-{
- return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
-{
- return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
-{
- return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
-}
-#else
-#define _mm_cvt_roundu64_sd(A, B, C) \
- (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
-
-#define _mm_cvt_roundi64_sd(A, B, C) \
- (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
-
-#define _mm_cvt_roundsi64_sd(A, B, C) \
- (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
-#endif
-
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
-}
-#else
-#define _mm_cvt_roundu32_ss(A, B, C) \
- (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
-
-#define _mm_cvt_roundi32_ss(A, B, C) \
- (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
-
-#define _mm_cvt_roundsi32_ss(A, B, C) \
- (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
-#endif
-
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
-}
-#else
-#define _mm_cvt_roundu64_ss(A, B, C) \
- (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
-
-#define _mm_cvt_roundi64_ss(A, B, C) \
- (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
-
-#define _mm_cvt_roundsi64_ss(A, B, C) \
- (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
-#endif
-
-#endif
-
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_epi8 (__m512i __A)
@@ -6394,83 +9373,6 @@ _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
(__mmask16) __U);
}
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
-{
- return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_load_ss (__mmask8 __U, const float *__P)
-{
- return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
- __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
-{
- return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_load_sd (__mmask8 __U, const double *__P)
-{
- return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
- __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
- (__v4sf) __W, __U);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
- (__v4sf) _mm_setzero_ps (), __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
- (__v2df) __W, __U);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
- (__v2df) _mm_setzero_pd (),
- __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
-{
- __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
-{
- __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
-}
-
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_loadu_epi64 (void const *__P)
@@ -7273,73 +10175,6 @@ _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
(__mmask16) __U, __R);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
- const int __imm, const int __R)
-{
- return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2di) __C, __imm,
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
- __m128i __C, const int __imm, const int __R)
-{
- return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2di) __C, __imm,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- __m128i __C, const int __imm, const int __R)
-{
- return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
- (__v2df) __B,
- (__v2di) __C,
- __imm,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
- const int __imm, const int __R)
-{
- return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4si) __C, __imm,
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
- __m128i __C, const int __imm, const int __R)
-{
- return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4si) __C, __imm,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- __m128i __C, const int __imm, const int __R)
-{
- return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
- (__v4sf) __B,
- (__v4si) __C, __imm,
- (__mmask8) __U, __R);
-}
-
#else
#define _mm512_shuffle_pd(X, Y, C) \
((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
@@ -7407,35 +10242,6 @@ _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
(__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
(__mmask16)(U), (R)))
-#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
- ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
- (__mmask8)(-1), (R)))
-
-#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
- ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
- (__mmask8)(U), (R)))
-
-#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
- ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
- (__mmask8)(U), (R)))
-
-#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
- ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
- (__mmask8)(-1), (R)))
-
-#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
- ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
- (__mmask8)(U), (R)))
-
-#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
- ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
- (__mmask8)(U), (R)))
#endif
extern __inline __m512
@@ -8169,258 +10975,6 @@ _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
(__mmask8) __U);
}
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_u64 (__m128 __A, const int __R)
-{
- return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_si64 (__m128 __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_i64 (__m128 __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
-{
- return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
-}
-#else
-#define _mm_cvt_roundss_u64(A, B) \
- ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
-
-#define _mm_cvt_roundss_si64(A, B) \
- ((long long)__builtin_ia32_vcvtss2si64(A, B))
-
-#define _mm_cvt_roundss_i64(A, B) \
- ((long long)__builtin_ia32_vcvtss2si64(A, B))
-
-#define _mm_cvtt_roundss_u64(A, B) \
- ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
-
-#define _mm_cvtt_roundss_i64(A, B) \
- ((long long)__builtin_ia32_vcvttss2si64(A, B))
-
-#define _mm_cvtt_roundss_si64(A, B) \
- ((long long)__builtin_ia32_vcvttss2si64(A, B))
-#endif
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_u32 (__m128 __A, const int __R)
-{
- return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_si32 (__m128 __A, const int __R)
-{
- return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_i32 (__m128 __A, const int __R)
-{
- return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
-{
- return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
-{
- return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
-{
- return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
-}
-#else
-#define _mm_cvt_roundss_u32(A, B) \
- ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
-
-#define _mm_cvt_roundss_si32(A, B) \
- ((int)__builtin_ia32_vcvtss2si32(A, B))
-
-#define _mm_cvt_roundss_i32(A, B) \
- ((int)__builtin_ia32_vcvtss2si32(A, B))
-
-#define _mm_cvtt_roundss_u32(A, B) \
- ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
-
-#define _mm_cvtt_roundss_si32(A, B) \
- ((int)__builtin_ia32_vcvttss2si32(A, B))
-
-#define _mm_cvtt_roundss_i32(A, B) \
- ((int)__builtin_ia32_vcvttss2si32(A, B))
-#endif
-
-#ifdef __x86_64__
-#ifdef __OPTIMIZE__
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
-{
- return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
-{
- return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
-}
-#else
-#define _mm_cvt_roundsd_u64(A, B) \
- ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
-
-#define _mm_cvt_roundsd_si64(A, B) \
- ((long long)__builtin_ia32_vcvtsd2si64(A, B))
-
-#define _mm_cvt_roundsd_i64(A, B) \
- ((long long)__builtin_ia32_vcvtsd2si64(A, B))
-
-#define _mm_cvtt_roundsd_u64(A, B) \
- ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
-
-#define _mm_cvtt_roundsd_si64(A, B) \
- ((long long)__builtin_ia32_vcvttsd2si64(A, B))
-
-#define _mm_cvtt_roundsd_i64(A, B) \
- ((long long)__builtin_ia32_vcvttsd2si64(A, B))
-#endif
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
-{
- return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
-{
- return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
-{
- return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
-{
- return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
-{
- return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
-{
- return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
-}
-#else
-#define _mm_cvt_roundsd_u32(A, B) \
- ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
-
-#define _mm_cvt_roundsd_si32(A, B) \
- ((int)__builtin_ia32_vcvtsd2si32(A, B))
-
-#define _mm_cvt_roundsd_i32(A, B) \
- ((int)__builtin_ia32_vcvtsd2si32(A, B))
-
-#define _mm_cvtt_roundsd_u32(A, B) \
- ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
-
-#define _mm_cvtt_roundsd_si32(A, B) \
- ((int)__builtin_ia32_vcvttsd2si32(A, B))
-
-#define _mm_cvtt_roundsd_i32(A, B) \
- ((int)__builtin_ia32_vcvttsd2si32(A, B))
-#endif
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_movedup_pd (__m512d __A)
@@ -8741,71 +11295,6 @@ _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
(__mmask8) __U, __R);
}
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128d __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
- (__v2df) __B,
- (__v4sf) __W,
- __U,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
- __m128d __B, const int __R)
-{
- return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
- (__v2df) __B,
- _mm_setzero_ps (),
- __U,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
-{
- return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128 __B, const int __R)
-{
- return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
- (__v4sf) __B,
- (__v2df) __W,
- __U,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
- __m128 __B, const int __R)
-{
- return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
- (__v4sf) __B,
- _mm_setzero_pd (),
- __U,
- __R);
-}
#else
#define _mm512_cvt_roundpd_ps(A, B) \
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
@@ -8816,40 +11305,8 @@ _mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
-#define _mm_cvt_roundsd_ss(A, B, C) \
- (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
-
-#define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
-
-#define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
- (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_setzero_ps (), \
- (U), (C))
-
-#define _mm_cvt_roundss_sd(A, B, C) \
- (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
-
-#define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
-
-#define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_setzero_pd (), \
- (U), (C))
-
#endif
-#define _mm_mask_cvtss_sd(W, U, A, B) \
- _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_cvtss_sd(U, A, B) \
- _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_cvtsd_ss(W, U, A, B) \
- _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_cvtsd_ss(U, A, B) \
- _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_stream_si512 (__m512i * __P, __m512i __A)
@@ -8878,87 +11335,7 @@ _mm512_stream_load_si512 (void *__P)
return __builtin_ia32_movntdqa512 ((__v8di *)__P);
}
-/* Constants for mantissa extraction */
-typedef enum
-{
- _MM_MANT_NORM_1_2, /* interval [1, 2) */
- _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
- _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
- _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
-} _MM_MANTISSA_NORM_ENUM;
-
-typedef enum
-{
- _MM_MANT_SIGN_src, /* sign = sign(SRC) */
- _MM_MANT_SIGN_zero, /* sign = 0 */
- _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
-} _MM_MANTISSA_SIGN_ENUM;
-
#ifdef __OPTIMIZE__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getexp_round_ps (__m512 __A, const int __R)
@@ -9091,84 +11468,6 @@ _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
__U, __R);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_round_sd (__m128d __A, __m128d __B,
- _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
-{
- return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
- (__v2df) __B,
- (__D << 2) | __C,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
-{
- return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__D << 2) | __C,
- (__v2df) __W,
- __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
-{
- return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__D << 2) | __C,
- (__v2df)
- _mm_setzero_pd(),
- __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_round_ss (__m128 __A, __m128 __B,
- _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
-{
- return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
- (__v4sf) __B,
- (__D << 2) | __C,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
-{
- return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__D << 2) | __C,
- (__v4sf) __W,
- __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D, const int __R)
-{
- return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__D << 2) | __C,
- (__v4sf)
- _mm_setzero_ps(),
- __U, __R);
-}
-
#else
#define _mm512_getmant_round_pd(X, B, C, R) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
@@ -9210,68 +11509,6 @@ _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
(__v16sf)(__m512)_mm512_setzero_ps(), \
(__mmask16)(U),\
(R)))
-#define _mm_getmant_round_sd(X, Y, C, D, R) \
- ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (int)(((D)<<2) | (C)), \
- (R)))
-
-#define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
- ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U),\
- (R)))
-
-#define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
- ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v2df)(__m128d)_mm_setzero_pd(), \
- (__mmask8)(U),\
- (R)))
-
-#define _mm_getmant_round_ss(X, Y, C, D, R) \
- ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), \
- (int)(((D)<<2) | (C)), \
- (R)))
-
-#define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
- ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U),\
- (R)))
-
-#define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
- ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)(__m128)_mm_setzero_ps(), \
- (__mmask8)(U),\
- (R)))
-
-#define _mm_getexp_round_ss(A, B, R) \
- ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
-
-#define _mm_mask_getexp_round_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_getexp_round_ss(U, A, B, C) \
- (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
-
-#define _mm_getexp_round_sd(A, B, R) \
- ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
-
-#define _mm_mask_getexp_round_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_getexp_round_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
-
#define _mm512_getexp_round_ps(A, R) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
@@ -9363,88 +11600,6 @@ _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
(__mmask8) __A, __R);
}
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
- const int __R)
-{
- return (__m128)
- __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
- (__v4sf) __B, __imm,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
- __m128 __D, const int __imm, const int __R)
-{
- return (__m128)
- __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
- (__v4sf) __D, __imm,
- (__v4sf) __A,
- (__mmask8) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
- const int __imm, const int __R)
-{
- return (__m128)
- __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
- (__v4sf) __C, __imm,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __A,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
- const int __R)
-{
- return (__m128d)
- __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
- (__v2df) __B, __imm,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
- __m128d __D, const int __imm, const int __R)
-{
- return (__m128d)
- __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
- (__v2df) __D, __imm,
- (__v2df) __A,
- (__mmask8) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
- const int __imm, const int __R)
-{
- return (__m128d)
- __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
- (__v2df) __C, __imm,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __A,
- __R);
-}
-
#else
#define _mm512_roundscale_round_ps(A, B, R) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
@@ -9472,54 +11627,6 @@ _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
(int)(C), \
(__v8df)_mm512_setzero_pd(),\
(__mmask8)(A), R))
-#define _mm_roundscale_round_ss(A, B, I, R) \
- ((__m128) \
- __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
- (__v4sf) (__m128) (B), \
- (int) (I), \
- (__v4sf) _mm_setzero_ps (), \
- (__mmask8) (-1), \
- (int) (R)))
-#define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \
- ((__m128) \
- __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
- (__v4sf) (__m128) (C), \
- (int) (I), \
- (__v4sf) (__m128) (A), \
- (__mmask8) (U), \
- (int) (R)))
-#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
- ((__m128) \
- __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
- (__v4sf) (__m128) (B), \
- (int) (I), \
- (__v4sf) _mm_setzero_ps (), \
- (__mmask8) (U), \
- (int) (R)))
-#define _mm_roundscale_round_sd(A, B, I, R) \
- ((__m128d) \
- __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
- (__v2df) (__m128d) (B), \
- (int) (I), \
- (__v2df) _mm_setzero_pd (), \
- (__mmask8) (-1), \
- (int) (R)))
-#define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \
- ((__m128d) \
- __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
- (__v2df) (__m128d) (C), \
- (int) (I), \
- (__v2df) (__m128d) (A), \
- (__mmask8) (U), \
- (int) (R)))
-#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
- ((__m128d) \
- __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
- (__v2df) (__m128d) (B), \
- (int) (I), \
- (__v2df) _mm_setzero_pd (), \
- (__mmask8) (U), \
- (int) (R)))
#endif
extern __inline __m512
@@ -10068,22 +12175,6 @@ _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
#define _MM_CMPINT_GT 0x6
#ifdef __OPTIMIZE__
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
-{
- return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
- (__mmask8) __B);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
-{
- return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
- (__mmask8) __B);
-}
-
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
@@ -10199,51 +12290,7 @@ _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
(__mmask16) __U, __R);
}
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
-{
- return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
- (__v2df) __Y, __P,
- (__mmask8) -1, __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
- const int __P, const int __R)
-{
- return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
- (__v2df) __Y, __P,
- (__mmask8) __M, __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
-{
- return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
- (__v4sf) __Y, __P,
- (__mmask8) -1, __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
- const int __P, const int __R)
-{
- return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
- (__v4sf) __Y, __P,
- (__mmask8) __M, __R);
-}
-
#else
-#define _kshiftli_mask16(X, Y) \
- ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
-
-#define _kshiftri_mask16(X, Y) \
- ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
-
#define _mm512_cmp_epi64_mask(X, Y, P) \
((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), (int)(P),\
@@ -10304,25 +12351,6 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
(__v16sf)(__m512)(Y), (int)(P),\
(__mmask16)(M), R))
-#define _mm_cmp_round_sd_mask(X, Y, P, R) \
- ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P),\
- (__mmask8)-1, R))
-
-#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
- ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P),\
- (M), R))
-
-#define _mm_cmp_round_ss_mask(X, Y, P, R) \
- ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P), \
- (__mmask8)-1, R))
-
-#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
- ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P), \
- (M), R))
#endif
#ifdef __OPTIMIZE__
@@ -11110,66 +13138,6 @@ _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
(), (__mmask16) __U);
}
-/* Mask arithmetic operations */
-#define _kand_mask16 _mm512_kand
-#define _kandn_mask16 _mm512_kandn
-#define _knot_mask16 _mm512_knot
-#define _kor_mask16 _mm512_kor
-#define _kxnor_mask16 _mm512_kxnor
-#define _kxor_mask16 _mm512_kxor
-
-extern __inline unsigned char
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
-{
- *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
- return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
-}
-
-extern __inline unsigned char
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
-{
- return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
- (__mmask16) __B);
-}
-
-extern __inline unsigned char
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
-{
- return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
- (__mmask16) __B);
-}
-
-extern __inline unsigned int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtmask16_u32 (__mmask16 __A)
-{
- return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtu32_mask16 (unsigned int __A)
-{
- return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_load_mask16 (__mmask16 *__A)
-{
- return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_store_mask16 (__mmask16 *__A, __mmask16 __B)
-{
- *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
-}
-
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_kand (__mmask16 __A, __mmask16 __B)
@@ -11236,13 +13204,6 @@ _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
}
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
-{
- return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
-}
-
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -11593,174 +13554,6 @@ _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
(__mmask16) __U);
}
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U, __R);
-}
-
-#else
-#define _mm_max_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_maxsd_round(A, B, C)
-
-#define _mm_mask_max_round_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_max_round_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
-
-#define _mm_max_round_ss(A, B, C) \
- (__m128)__builtin_ia32_maxss_round(A, B, C)
-
-#define _mm_mask_max_round_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_max_round_ss(U, A, B, C) \
- (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
-
-#define _mm_min_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_minsd_round(A, B, C)
-
-#define _mm_mask_min_round_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_min_round_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
-
-#define _mm_min_round_ss(A, B, C) \
- (__m128)__builtin_ia32_minss_round(A, B, C)
-
-#define _mm_mask_min_round_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
-
-#define _mm_maskz_min_round_ss(U, A, B, C) \
- (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
-
-#endif
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
@@ -11797,735 +13590,6 @@ _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
(__mmask16) __U);
}
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
- (__v2df) __A,
- -(__v2df) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
- (__v4sf) __A,
- -(__v4sf) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
- -(__v2df) __A,
- -(__v2df) __B,
- __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
- -(__v4sf) __A,
- -(__v4sf) __B,
- __R);
-}
-#else
-#define _mm_fmadd_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
-
-#define _mm_fmadd_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
-
-#define _mm_fmsub_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
-
-#define _mm_fmsub_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
-
-#define _mm_fnmadd_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
-
-#define _mm_fnmadd_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
-
-#define _mm_fnmsub_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
-
-#define _mm_fnmsub_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
-#endif
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
- (__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- (__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
-{
- return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
-{
- return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
- (__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
- (__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
- -(__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- -(__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
-{
- return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
-{
- return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
- -(__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
- -(__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
- (__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- (__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
- (__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
- (__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
- (__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
- (__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
- -(__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
- -(__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
- -(__v2df) __A,
- (__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
- -(__v4sf) __A,
- (__v4sf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
- const int __R)
-{
- return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
- -(__v2df) __A,
- -(__v2df) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
- const int __R)
-{
- return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
- -(__v4sf) __A,
- -(__v4sf) __B,
- (__mmask8) __U, __R);
-}
-#else
-#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
-
-#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
- (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
-
-#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
-
-#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
- (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
-
-#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
-
-#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
- (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
-
-#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
-
-#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
- (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
-
-#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
- (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
-
-#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
- (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
-
-#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
-
-#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
- (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
-
-#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
-
-#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
- (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
-
-#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
-
-#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
- (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
-
-#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
-
-#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
- (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
-
-#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
-
-#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
- (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
-
-#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
- (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
-
-#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
- (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
-
-#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
- (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
-
-#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
- (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
-#endif
-
-#ifdef __OPTIMIZE__
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
-{
- return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
-{
- return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
-}
-#else
-#define _mm_comi_round_ss(A, B, C, D)\
-__builtin_ia32_vcomiss(A, B, C, D)
-#define _mm_comi_round_sd(A, B, C, D)\
-__builtin_ia32_vcomisd(A, B, C, D)
-#endif
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_pd (__m512d __A)
@@ -12650,52 +13714,6 @@ _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_pd (__m512d __A, __m512d __B)
@@ -12756,52 +13774,6 @@ _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_pd (__m512d __A, __m512d __B)
@@ -12862,54 +13834,6 @@ _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B)
-{
- return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B)
-{
- return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_pd (__m512d __M, __m512d __V)
@@ -12970,54 +13894,6 @@ _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
- __m128d __B)
-{
- return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
- __m128 __B)
-{
- return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_max_pd (__m512d __A, __m512d __B)
@@ -13088,52 +13964,6 @@ _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_min_pd (__m512d __A, __m512d __B)
@@ -13204,52 +14034,6 @@ _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_scalef_pd (__m512d __A, __m512d __B)
@@ -13320,30 +14104,6 @@ _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_sd (__m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_scalef_ss (__m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
@@ -14142,32 +14902,6 @@ _mm512_cvtss_f32 (__m512 __A)
return __A[0];
}
-#ifdef __x86_64__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
-{
- return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
-{
- return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-#endif
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu32_ss (__m128 __A, unsigned __B)
-{
- return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_ps (__m512i __A)
@@ -14309,76 +15043,6 @@ _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
-{
- return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2di) __C, __imm,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
- __m128i __C, const int __imm)
-{
- return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2di) __C, __imm,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
- __m128i __C, const int __imm)
-{
- return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
- (__v2df) __B,
- (__v2di) __C,
- __imm,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
-{
- return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4si) __C, __imm,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
- __m128i __C, const int __imm)
-{
- return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4si) __C, __imm,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
- __m128i __C, const int __imm)
-{
- return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
- (__v4sf) __B,
- (__v4si) __C, __imm,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
#else
#define _mm512_fixupimm_pd(X, Y, Z, C) \
((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
@@ -14410,65 +15074,8 @@ _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
(__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
(__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
-#define _mm_fixupimm_sd(X, Y, Z, C) \
- ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
- (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
- ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
- (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
- ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
- (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_fixupimm_ss(X, Y, Z, C) \
- ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
- (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
- ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
- (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
- ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
- (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
#endif
-#ifdef __x86_64__
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_u64 (__m128 __A)
-{
- return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
- __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_u64 (__m128 __A)
-{
- return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
- __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_i64 (__m128 __A)
-{
- return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-#endif /* __x86_64__ */
-
extern __inline int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsi512_si32 (__m512i __A)
@@ -14477,138 +15084,6 @@ _mm512_cvtsi512_si32 (__m512i __A)
return __B[0];
}
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_u32 (__m128 __A)
-{
- return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_u32 (__m128 __A)
-{
- return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_i32 (__m128 __A)
-{
- return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_i32 (__m128d __A)
-{
- return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_i32 (__m128 __A)
-{
- return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvti32_sd (__m128d __A, int __B)
-{
- return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvti32_ss (__m128 __A, int __B)
-{
- return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
-}
-
-#ifdef __x86_64__
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_u64 (__m128d __A)
-{
- return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
- __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_u64 (__m128d __A)
-{
- return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
- __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_i64 (__m128d __A)
-{
- return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_i64 (__m128d __A)
-{
- return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
-}
-
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_i64 (__m128 __A)
-{
- return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvti64_sd (__m128d __A, long long __B)
-{
- return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvti64_ss (__m128 __A, long long __B)
-{
- return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
-}
-#endif /* __x86_64__ */
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_u32 (__m128d __A)
-{
- return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_u32 (__m128d __A)
-{
- return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsd_i32 (__m128d __A)
-{
- return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtps_pd (__m256 __A)
@@ -14770,70 +15245,6 @@ _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_ss (__m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
- (__v4sf) __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
- return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getexp_sd (__m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
- (__v2df) __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
- return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
@@ -14906,82 +15317,6 @@ _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D)
-{
- return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
- (__v2df) __B,
- (__D << 2) | __C,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
- _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
-{
- return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__D << 2) | __C,
- (__v2df) __W,
- __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
- _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
-{
- return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
- (__v2df) __B,
- (__D << 2) | __C,
- (__v2df)
- _mm_setzero_pd(),
- __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
- _MM_MANTISSA_SIGN_ENUM __D)
-{
- return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
- (__v4sf) __B,
- (__D << 2) | __C,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
- _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
-{
- return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__D << 2) | __C,
- (__v4sf) __W,
- __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
- _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
-{
- return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
- (__v4sf) __B,
- (__D << 2) | __C,
- (__v4sf)
- _mm_setzero_ps(),
- __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
#else
#define _mm512_getmant_pd(X, B, C) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
@@ -15023,74 +15358,6 @@ _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
(__v16sf)_mm512_setzero_ps(), \
(__mmask16)(U),\
_MM_FROUND_CUR_DIRECTION))
-#define _mm_getmant_sd(X, Y, C, D) \
- ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (int)(((D)<<2) | (C)), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
- ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U),\
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_maskz_getmant_sd(U, X, Y, C, D) \
- ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U),\
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_getmant_ss(X, Y, C, D) \
- ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), \
- (int)(((D)<<2) | (C)), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
- ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U),\
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_maskz_getmant_ss(U, X, Y, C, D) \
- ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U),\
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_getexp_ss(A, B) \
- ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_getexp_ss(W, U, A, B) \
- (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
- _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_getexp_ss(U, A, B) \
- (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\
- _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_getexp_sd(A, B) \
- ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_getexp_sd(W, U, A, B) \
- (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
- _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_getexp_sd(U, A, B) \
- (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\
- _MM_FROUND_CUR_DIRECTION)
-
#define _mm512_getexp_ps(A) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
(__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
@@ -15185,87 +15452,6 @@ _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
-{
- return (__m128)
- __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
- (__v4sf) __B, __imm,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
- const int __imm)
-{
- return (__m128)
- __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
- (__v4sf) __D, __imm,
- (__v4sf) __A,
- (__mmask8) __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
- const int __imm)
-{
- return (__m128)
- __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
- (__v4sf) __C, __imm,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
-{
- return (__m128d)
- __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
- (__v2df) __B, __imm,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
- const int __imm)
-{
- return (__m128d)
- __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
- (__v2df) __D, __imm,
- (__v2df) __A,
- (__mmask8) __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
- const int __imm)
-{
- return (__m128d)
- __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
- (__v2df) __C, __imm,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
#else
#define _mm512_roundscale_ps(A, B) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
@@ -15293,54 +15479,6 @@ _mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
(int)(C), \
(__v8df)_mm512_setzero_pd(),\
(__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
-#define _mm_roundscale_ss(A, B, I) \
- ((__m128) \
- __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
- (__v4sf) (__m128) (B), \
- (int) (I), \
- (__v4sf) _mm_setzero_ps (), \
- (__mmask8) (-1), \
- _MM_FROUND_CUR_DIRECTION))
-#define _mm_mask_roundscale_ss(A, U, B, C, I) \
- ((__m128) \
- __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
- (__v4sf) (__m128) (C), \
- (int) (I), \
- (__v4sf) (__m128) (A), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-#define _mm_maskz_roundscale_ss(U, A, B, I) \
- ((__m128) \
- __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
- (__v4sf) (__m128) (B), \
- (int) (I), \
- (__v4sf) _mm_setzero_ps (), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-#define _mm_roundscale_sd(A, B, I) \
- ((__m128d) \
- __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
- (__v2df) (__m128d) (B), \
- (int) (I), \
- (__v2df) _mm_setzero_pd (), \
- (__mmask8) (-1), \
- _MM_FROUND_CUR_DIRECTION))
-#define _mm_mask_roundscale_sd(A, U, B, C, I) \
- ((__m128d) \
- __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
- (__v2df) (__m128d) (C), \
- (int) (I), \
- (__v2df) (__m128d) (A), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
-#define _mm_maskz_roundscale_sd(U, A, B, I) \
- ((__m128d) \
- __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
- (__v2df) (__m128d) (B), \
- (int) (I), \
- (__v2df) _mm_setzero_pd (), \
- (__mmask8) (U), \
- _MM_FROUND_CUR_DIRECTION))
#endif
#ifdef __OPTIMIZE__
@@ -15384,46 +15522,6 @@ _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
-{
- return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
- (__v2df) __Y, __P,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
-{
- return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
- (__v2df) __Y, __P,
- (__mmask8) __M,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
-{
- return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
- (__v4sf) __Y, __P,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
-{
- return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
- (__v4sf) __Y, __P,
- (__mmask8) __M,
- _MM_FROUND_CUR_DIRECTION);
-}
-
#else
#define _mm512_cmp_pd_mask(X, Y, P) \
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
@@ -15445,25 +15543,6 @@ _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
(__v16sf)(__m512)(Y), (int)(P),\
(__mmask16)(M),_MM_FROUND_CUR_DIRECTION))
-#define _mm_cmp_sd_mask(X, Y, P) \
- ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P),\
- (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
- ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P),\
- M,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm_cmp_ss_mask(X, Y, P) \
- ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P), \
- (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
-
-#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
- ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P), \
- M,_MM_FROUND_CUR_DIRECTION))
#endif
extern __inline __mmask8
@@ -16493,9 +16572,9 @@ _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
#undef __MM512_REDUCE_OP
-#ifdef __DISABLE_AVX512F__
-#undef __DISABLE_AVX512F__
+#ifdef __DISABLE_AVX512F_512__
+#undef __DISABLE_AVX512F_512__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512F__ */
+#endif /* __DISABLE_AVX512F_512__ */
#endif /* _AVX512FINTRIN_H_INCLUDED */