aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaochen Jiang <haochen.jiang@intel.com>2023-03-17 14:42:32 +0800
committerliuhongt <hongtao.liu@intel.com>2023-09-22 10:30:42 +0800
commit36a88e3d464d238c481c9e716d72c5d3a7a21869 (patch)
tree9ffafc6bb6c8b5109a3a53da06465f429da5f6ab
parent4a53b3fd1fac091aa44c835cf2759c314f78866e (diff)
downloadgcc-36a88e3d464d238c481c9e716d72c5d3a7a21869.zip
gcc-36a88e3d464d238c481c9e716d72c5d3a7a21869.tar.gz
gcc-36a88e3d464d238c481c9e716d72c5d3a7a21869.tar.bz2
Push evex512 target for 512 bit intrins
gcc/Changelog: * config/i386/avx512fp16intrin.h: Add evex512 target for 512 bit intrins. Co-authored-by: Hu, Lin1 <lin1.hu@intel.com>
-rw-r--r--gcc/config/i386/avx512fp16intrin.h5383
1 files changed, 2705 insertions, 2678 deletions
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index dd083e5..92c0c24e9 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -25,8 +25,8 @@
#error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
#endif
-#ifndef __AVX512FP16INTRIN_H_INCLUDED
-#define __AVX512FP16INTRIN_H_INCLUDED
+#ifndef _AVX512FP16INTRIN_H_INCLUDED
+#define _AVX512FP16INTRIN_H_INCLUDED
#ifndef __AVX512FP16__
#pragma GCC push_options
@@ -37,21 +37,17 @@
/* Internal data types for implementing the intrinsics. */
typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
-typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
-typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
/* Unaligned version of the same type. */
typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \
__may_alias__, __aligned__ (1)));
typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \
__may_alias__, __aligned__ (1)));
-typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
- __may_alias__, __aligned__ (1)));
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -78,33 +74,8 @@ _mm256_set_ph (_Float16 __A15, _Float16 __A14, _Float16 __A13,
__A12, __A13, __A14, __A15 };
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
- _Float16 __A28, _Float16 __A27, _Float16 __A26,
- _Float16 __A25, _Float16 __A24, _Float16 __A23,
- _Float16 __A22, _Float16 __A21, _Float16 __A20,
- _Float16 __A19, _Float16 __A18, _Float16 __A17,
- _Float16 __A16, _Float16 __A15, _Float16 __A14,
- _Float16 __A13, _Float16 __A12, _Float16 __A11,
- _Float16 __A10, _Float16 __A9, _Float16 __A8,
- _Float16 __A7, _Float16 __A6, _Float16 __A5,
- _Float16 __A4, _Float16 __A3, _Float16 __A2,
- _Float16 __A1, _Float16 __A0)
-{
- return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
- __A4, __A5, __A6, __A7,
- __A8, __A9, __A10, __A11,
- __A12, __A13, __A14, __A15,
- __A16, __A17, __A18, __A19,
- __A20, __A21, __A22, __A23,
- __A24, __A25, __A26, __A27,
- __A28, __A29, __A30, __A31 };
-}
-
-/* Create vectors of elements in the reversed order from _mm_set_ph,
- _mm256_set_ph and _mm512_set_ph functions. */
-
+/* Create vectors of elements in the reversed order from _mm_set_ph
+ and _mm256_set_ph functions. */
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
@@ -128,30 +99,7 @@ _mm256_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
__A0);
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
- _Float16 __A3, _Float16 __A4, _Float16 __A5,
- _Float16 __A6, _Float16 __A7, _Float16 __A8,
- _Float16 __A9, _Float16 __A10, _Float16 __A11,
- _Float16 __A12, _Float16 __A13, _Float16 __A14,
- _Float16 __A15, _Float16 __A16, _Float16 __A17,
- _Float16 __A18, _Float16 __A19, _Float16 __A20,
- _Float16 __A21, _Float16 __A22, _Float16 __A23,
- _Float16 __A24, _Float16 __A25, _Float16 __A26,
- _Float16 __A27, _Float16 __A28, _Float16 __A29,
- _Float16 __A30, _Float16 __A31)
-
-{
- return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
- __A24, __A23, __A22, __A21, __A20, __A19, __A18,
- __A17, __A16, __A15, __A14, __A13, __A12, __A11,
- __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
- __A2, __A1, __A0);
-}
-
/* Broadcast _Float16 to vector. */
-
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_ph (_Float16 __A)
@@ -167,18 +115,7 @@ _mm256_set1_ph (_Float16 __A)
__A, __A, __A, __A, __A, __A, __A, __A);
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_set1_ph (_Float16 __A)
-{
- return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
- __A, __A, __A, __A, __A, __A, __A, __A,
- __A, __A, __A, __A, __A, __A, __A, __A,
- __A, __A, __A, __A, __A, __A, __A, __A);
-}
-
/* Create a vector with all zeros. */
-
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_ph (void)
@@ -193,13 +130,6 @@ _mm256_setzero_ph (void)
return _mm256_set1_ph (0.0f16);
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_setzero_ph (void)
-{
- return _mm512_set1_ph (0.0f16);
-}
-
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_ph (void)
@@ -222,24 +152,6 @@ _mm256_undefined_ph (void)
return __Y;
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_undefined_ph (void)
-{
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Winit-self"
- __m512h __Y = __Y;
-#pragma GCC diagnostic pop
- return __Y;
-}
-
-extern __inline _Float16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsh_h (__m128h __A)
-{
- return __A[0];
-}
-
extern __inline _Float16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtsh_h (__m256h __A)
@@ -247,146 +159,6 @@ _mm256_cvtsh_h (__m256h __A)
return __A[0];
}
-extern __inline _Float16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtsh_h (__m512h __A)
-{
- return __A[0];
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castph_ps (__m512h __a)
-{
- return (__m512) __a;
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castph_pd (__m512h __a)
-{
- return (__m512d) __a;
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castph_si512 (__m512h __a)
-{
- return (__m512i) __a;
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castph512_ph128 (__m512h __A)
-{
- union
- {
- __m128h __a[4];
- __m512h __v;
- } __u = { .__v = __A };
- return __u.__a[0];
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castph512_ph256 (__m512h __A)
-{
- union
- {
- __m256h __a[2];
- __m512h __v;
- } __u = { .__v = __A };
- return __u.__a[0];
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castph128_ph512 (__m128h __A)
-{
- union
- {
- __m128h __a[4];
- __m512h __v;
- } __u;
- __u.__a[0] = __A;
- return __u.__v;
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castph256_ph512 (__m256h __A)
-{
- union
- {
- __m256h __a[2];
- __m512h __v;
- } __u;
- __u.__a[0] = __A;
- return __u.__v;
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_zextph128_ph512 (__m128h __A)
-{
- return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
- (__m128) __A, 0);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_zextph256_ph512 (__m256h __A)
-{
- return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
- (__m256d) __A, 0);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castps_ph (__m512 __a)
-{
- return (__m512h) __a;
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castpd_ph (__m512d __a)
-{
- return (__m512h) __a;
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_castsi512_ph (__m512i __a)
-{
- return (__m512h) __a;
-}
-
-/* Create a vector with element 0 as F and the rest zero. */
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_sh (_Float16 __F)
-{
- return _mm_set_ph (0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16,
- __F);
-}
-
-/* Create a vector with element 0 as *P and the rest zero. */
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_sh (void const *__P)
-{
- return _mm_set_ph (0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16,
- *(_Float16 const *) __P);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_load_ph (void const *__P)
-{
- return *(const __m512h *) __P;
-}
-
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_load_ph (void const *__P)
@@ -401,13 +173,6 @@ _mm_load_ph (void const *__P)
return *(const __m128h *) __P;
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_loadu_ph (void const *__P)
-{
- return *(const __m512h_u *) __P;
-}
-
extern __inline __m256h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_loadu_ph (void const *__P)
@@ -422,21 +187,6 @@ _mm_loadu_ph (void const *__P)
return *(const __m128h_u *) __P;
}
-/* Stores the lower _Float16 value. */
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_sh (void *__P, __m128h __A)
-{
- *(_Float16 *) __P = ((__v8hf)__A)[0];
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_store_ph (void *__P, __m512h __A)
-{
- *(__m512h *) __P = __A;
-}
-
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_store_ph (void *__P, __m256h __A)
@@ -453,13 +203,6 @@ _mm_store_ph (void *__P, __m128h __A)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_storeu_ph (void *__P, __m512h __A)
-{
- *(__m512h_u *) __P = __A;
-}
-
-extern __inline void
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_storeu_ph (void *__P, __m256h __A)
{
*(__m256h_u *) __P = __A;
@@ -472,290 +215,30 @@ _mm_storeu_ph (void *__P, __m128h __A)
*(__m128h_u *) __P = __A;
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_abs_ph (__m512h __A)
-{
- return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
- (__m512i) __A);
-}
-
-/* Intrinsics v[add,sub,mul,div]ph. */
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_ph (__m512h __A, __m512h __B)
-{
- return (__m512h) ((__v32hf) __A + (__v32hf) __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
-{
- return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
-{
- return __builtin_ia32_addph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_ph (__m512h __A, __m512h __B)
-{
- return (__m512h) ((__v32hf) __A - (__v32hf) __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
-{
- return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
-{
- return __builtin_ia32_subph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_ph (__m512h __A, __m512h __B)
-{
- return (__m512h) ((__v32hf) __A * (__v32hf) __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
-{
- return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
-{
- return __builtin_ia32_mulph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_ph (__m512h __A, __m512h __B)
-{
- return (__m512h) ((__v32hf) __A / (__v32hf) __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
-{
- return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
-{
- return __builtin_ia32_divph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
-{
- return __builtin_ia32_addph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
-{
- return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
-{
- return __builtin_ia32_addph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
-{
- return __builtin_ia32_subph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
-{
- return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
-{
- return __builtin_ia32_subph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
-{
- return __builtin_ia32_mulph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
-{
- return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
-{
- return __builtin_ia32_mulph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
-{
- return __builtin_ia32_divph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
-{
- return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
-{
- return __builtin_ia32_divph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-#else
-#define _mm512_add_round_ph(A, B, C) \
- ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_add_round_ph(A, B, C, D, E) \
- ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_add_round_ph(A, B, C, D) \
- ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-
-#define _mm512_sub_round_ph(A, B, C) \
- ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_sub_round_ph(A, B, C, D, E) \
- ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_sub_round_ph(A, B, C, D) \
- ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-
-#define _mm512_mul_round_ph(A, B, C) \
- ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_mul_round_ph(A, B, C, D, E) \
- ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_mul_round_ph(A, B, C, D) \
- ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-
-#define _mm512_div_round_ph(A, B, C) \
- ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_div_round_ph(A, B, C, D, E) \
- ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_div_round_ph(A, B, C, D) \
- ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-#endif /* __OPTIMIZE__ */
-
-extern __inline __m512h
+/* Create a vector with element 0 as F and the rest zero. */
+extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_conj_pch (__m512h __A)
+_mm_set_sh (_Float16 __F)
{
- return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
+ return _mm_set_ph (0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16,
+ __F);
}
-extern __inline __m512h
+/* Create a vector with element 0 as *P and the rest zero. */
+extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
+_mm_load_sh (void const *__P)
{
- return (__m512h)
- __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
- (__v16sf) __W,
- (__mmask16) __U);
+ return _mm_set_ph (0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16, 0.0f16,
+ *(_Float16 const *) __P);
}
-extern __inline __m512h
+/* Stores the lower _Float16 value. */
+extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
+_mm_store_sh (void *__P, __m128h __A)
{
- return (__m512h)
- __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
- (__v16sf) _mm512_setzero_ps (),
- (__mmask16) __U);
+ *(_Float16 *) __P = ((__v8hf)__A)[0];
}
/* Intrinsics of v[add,sub,mul,div]sh. */
@@ -1012,138 +495,6 @@ _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
(A), (D)))
#endif /* __OPTIMIZE__ */
-/* Intrinsic vmaxph vminph. */
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_ph (__m512h __A, __m512h __B)
-{
- return __builtin_ia32_maxph512_mask (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
-{
- return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
-{
- return __builtin_ia32_maxph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_ph (__m512h __A, __m512h __B)
-{
- return __builtin_ia32_minph512_mask (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
-{
- return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
-{
- return __builtin_ia32_minph512_mask (__B, __C,
- _mm512_setzero_ph (), __A);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
-{
- return __builtin_ia32_maxph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
-{
- return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
-{
- return __builtin_ia32_maxph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
-{
- return __builtin_ia32_minph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
-{
- return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
-{
- return __builtin_ia32_minph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-
-#else
-#define _mm512_max_round_ph(A, B, C) \
- (__builtin_ia32_maxph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_max_round_ph(A, B, C, D, E) \
- (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_max_round_ph(A, B, C, D) \
- (__builtin_ia32_maxph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-
-#define _mm512_min_round_ph(A, B, C) \
- (__builtin_ia32_minph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_min_round_ph(A, B, C, D, E) \
- (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_min_round_ph(A, B, C, D) \
- (__builtin_ia32_minph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-#endif /* __OPTIMIZE__ */
-
/* Intrinsic vmaxsh vminsh. */
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1275,60 +626,6 @@ _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
#endif /* __OPTIMIZE__ */
-/* vcmpph */
-#ifdef __OPTIMIZE
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
-{
- return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
- (__mmask32) -1);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
-{
- return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
- __A);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
- const int __D)
-{
- return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
- __C, (__mmask32) -1,
- __D);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D, const int __E)
-{
- return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
- __D, __A,
- __E);
-}
-
-#else
-#define _mm512_cmp_ph_mask(A, B, C) \
- (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
-
-#define _mm512_mask_cmp_ph_mask(A, B, C, D) \
- (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
-
-#define _mm512_cmp_round_ph_mask(A, B, C, D) \
- (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
-
-#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \
- (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
-
-#endif /* __OPTIMIZE__ */
-
/* Intrinsics vcmpsh. */
#ifdef __OPTIMIZE__
extern __inline __mmask8
@@ -1525,126 +822,6 @@ _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
#endif /* __OPTIMIZE__ */
-/* Intrinsics vsqrtph. */
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_ph (__m512h __A)
-{
- return __builtin_ia32_sqrtph512_mask_round (__A,
- _mm512_setzero_ph(),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
-{
- return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
-{
- return __builtin_ia32_sqrtph512_mask_round (__B,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_round_ph (__m512h __A, const int __B)
-{
- return __builtin_ia32_sqrtph512_mask_round (__A,
- _mm512_setzero_ph(),
- (__mmask32) -1, __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- const int __D)
-{
- return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
-{
- return __builtin_ia32_sqrtph512_mask_round (__B,
- _mm512_setzero_ph (),
- __A, __C);
-}
-
-#else
-#define _mm512_sqrt_round_ph(A, B) \
- (__builtin_ia32_sqrtph512_mask_round ((A), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (B)))
-
-#define _mm512_mask_sqrt_round_ph(A, B, C, D) \
- (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
-
-#define _mm512_maskz_sqrt_round_ph(A, B, C) \
- (__builtin_ia32_sqrtph512_mask_round ((B), \
- _mm512_setzero_ph (), \
- (A), (C)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vrsqrtph. */
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt_ph (__m512h __A)
-{
- return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
- (__mmask32) -1);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
-{
- return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
-{
- return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
- __A);
-}
-
-/* Intrinsics vrsqrtsh. */
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt_sh (__m128h __A, __m128h __B)
-{
- return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
- (__mmask8) -1);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
-{
- return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
-{
- return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
- __A);
-}
-
/* Intrinsics vsqrtsh. */
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1718,28 +895,28 @@ _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
#endif /* __OPTIMIZE__ */
-/* Intrinsics vrcpph. */
-extern __inline __m512h
+/* Intrinsics vrsqrtsh. */
+extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp_ph (__m512h __A)
+_mm_rsqrt_sh (__m128h __A, __m128h __B)
{
- return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
- (__mmask32) -1);
+ return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
+ (__mmask8) -1);
}
-extern __inline __m512h
+extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
+_mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
{
- return __builtin_ia32_rcpph512_mask (__C, __A, __B);
+ return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
}
-extern __inline __m512h
+extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
+_mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
{
- return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
- __A);
+ return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
+ __A);
}
/* Intrinsics vrcpsh. */
@@ -1766,80 +943,6 @@ _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
__A);
}
-/* Intrinsics vscalefph. */
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_ph (__m512h __A, __m512h __B)
-{
- return __builtin_ia32_scalefph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
-{
- return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
-{
- return __builtin_ia32_scalefph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
-{
- return __builtin_ia32_scalefph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- __m512h __D, const int __E)
-{
- return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
- __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
- const int __D)
-{
- return __builtin_ia32_scalefph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-
-#else
-#define _mm512_scalef_round_ph(A, B, C) \
- (__builtin_ia32_scalefph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
- (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_scalef_round_ph(A, B, C, D) \
- (__builtin_ia32_scalefph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-
-#endif /* __OPTIMIZE__ */
-
/* Intrinsics vscalefsh. */
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1913,95 +1016,6 @@ _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
#endif /* __OPTIMIZE__ */
-/* Intrinsics vreduceph. */
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_ph (__m512h __A, int __B)
-{
- return __builtin_ia32_reduceph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
-{
- return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
-{
- return __builtin_ia32_reduceph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
-{
- return __builtin_ia32_reduceph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
- int __D, const int __E)
-{
- return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
- __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
- const int __D)
-{
- return __builtin_ia32_reduceph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-
-#else
-#define _mm512_reduce_ph(A, B) \
- (__builtin_ia32_reduceph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_reduce_ph(A, B, C, D) \
- (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_reduce_ph(A, B, C) \
- (__builtin_ia32_reduceph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_reduce_round_ph(A, B, C) \
- (__builtin_ia32_reduceph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
- (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_reduce_round_ph(A, B, C, D) \
- (__builtin_ia32_reduceph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-
-#endif /* __OPTIMIZE__ */
-
/* Intrinsics vreducesh. */
#ifdef __OPTIMIZE__
extern __inline __m128h
@@ -2091,97 +1105,6 @@ _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
#endif /* __OPTIMIZE__ */
-/* Intrinsics vrndscaleph. */
-#ifdef __OPTIMIZE__
-extern __inline __m512h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_ph (__m512h __A, int __B)
-{
- return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
- __m512h __C, int __D)
-{
- return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
-{
- return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
-{
- return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- __C);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
- __m512h __C, int __D, const int __E)
-{
- return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
- __B, __E);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
- const int __D)
-{
- return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
- _mm512_setzero_ph (),
- __A, __D);
-}
-
-#else
-#define _mm512_roundscale_ph(A, B) \
- (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_roundscale_ph(A, B, C, D) \
- (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_roundscale_ph(A, B, C) \
- (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), \
- _MM_FROUND_CUR_DIRECTION))
-#define _mm512_roundscale_round_ph(A, B, C) \
- (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
- _mm512_setzero_ph (), \
- (__mmask32)-1, (C)))
-
-#define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
- (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
-
-#define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
- (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
- _mm512_setzero_ph (), \
- (A), (D)))
-
-#endif /* __OPTIMIZE__ */
-
/* Intrinsics vrndscalesh. */
#ifdef __OPTIMIZE__
extern __inline __m128h
@@ -2297,39 +1220,10 @@ _mm_mask_fpclass_sh_mask (__mmask8 __U, __m128h __A, const int __imm)
#define _mm_mask_fpclass_sh_mask(U, X, C) \
((__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) (__m128h) (X), \
(int) (C), (__mmask8) (U)))
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vfpclassph. */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
- const int __imm)
-{
- return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
- __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fpclass_ph_mask (__m512h __A, const int __imm)
-{
- return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
- __imm,
- (__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_fpclass_ph_mask(u, x, c) \
- ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
- (int) (c),(__mmask8)(u)))
-#define _mm512_fpclass_ph_mask(x, c) \
- ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
- (int) (c),(__mmask8)-1))
-#endif /* __OPIMTIZE__ */
+#endif /* __OPTIMIZE__ */
-/* Intrinsics vgetexpph, vgetexpsh. */
+/* Intrinsics vgetexpsh. */
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getexp_sh (__m128h __A, __m128h __B)
@@ -2362,35 +1256,6 @@ _mm_maskz_getexp_sh (__mmask8 __U, __m128h __A, __m128h __B)
_MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_ph (__m512h __A)
-{
- return (__m512h)
- __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf) _mm512_setzero_ph (),
- (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
-{
- return (__m512h)
- __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
- (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
-{
- return (__m512h)
- __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf) _mm512_setzero_ph (),
- (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
-}
-
#ifdef __OPTIMIZE__
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -2426,36 +1291,6 @@ _mm_maskz_getexp_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
(__mmask8) __U, __R);
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_round_ph (__m512h __A, const int __R)
-{
- return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) -1, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
- const int __R)
-{
- return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf) __W,
- (__mmask32) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
-{
- return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
- (__v32hf)
- _mm512_setzero_ph (),
- (__mmask32) __U, __R);
-}
-
#else
#define _mm_getexp_round_sh(A, B, R) \
((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A), \
@@ -2471,21 +1306,9 @@ _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
(__v8hf)_mm_setzero_ph(), \
U, C)
-#define _mm512_getexp_round_ph(A, R) \
- ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
- (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
-
-#define _mm512_mask_getexp_round_ph(W, U, A, R) \
- ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
- (__v32hf)(__m512h)(W), (__mmask32)(U), R))
-
-#define _mm512_maskz_getexp_round_ph(U, A, R) \
- ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
- (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
-
#endif /* __OPTIMIZE__ */
-/* Intrinsics vgetmantph, vgetmantsh. */
+/* Intrinsics vgetmantsh. */
#ifdef __OPTIMIZE__
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -2525,44 +1348,6 @@ _mm_maskz_getmant_sh (__mmask8 __U, __m128h __A, __m128h __B,
__U, _MM_FROUND_CUR_DIRECTION);
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- _mm512_setzero_ph (),
- (__mmask32) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- (__v32hf) __W, __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C)
-{
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- (__v32hf)
- _mm512_setzero_ph (),
- __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_getmant_round_sh (__m128h __A, __m128h __B,
@@ -2604,67 +1389,7 @@ _mm_maskz_getmant_round_sh (__mmask8 __U, __m128h __A, __m128h __B,
__U, __R);
}
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- _mm512_setzero_ph (),
- (__mmask32) -1, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- (__v32hf) __W, __U,
- __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
- _MM_MANTISSA_NORM_ENUM __B,
- _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
- return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
- (__C << 2) | __B,
- (__v32hf)
- _mm512_setzero_ph (),
- __U, __R);
-}
-
#else
-#define _mm512_getmant_ph(X, B, C) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h) \
- _mm512_setzero_ph(), \
- (__mmask32)-1, \
- _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_getmant_ph(W, U, X, B, C) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h)(W), \
- (__mmask32)(U), \
- _MM_FROUND_CUR_DIRECTION))
-
-
-#define _mm512_maskz_getmant_ph(U, X, B, C) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h) \
- _mm512_setzero_ph(), \
- (__mmask32)(U), \
- _MM_FROUND_CUR_DIRECTION))
-
#define _mm_getmant_sh(X, Y, C, D) \
((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
(__v8hf)(__m128h)(Y), \
@@ -2691,30 +1416,6 @@ _mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
(__mmask8)(U), \
_MM_FROUND_CUR_DIRECTION))
-#define _mm512_getmant_round_ph(X, B, C, R) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h) \
- _mm512_setzero_ph(), \
- (__mmask32)-1, \
- (R)))
-
-#define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h)(W), \
- (__mmask32)(U), \
- (R)))
-
-
-#define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \
- ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
- (int)(((C)<<2) | (B)), \
- (__v32hf)(__m512h) \
- _mm512_setzero_ph(), \
- (__mmask32)(U), \
- (R)))
-
#define _mm_getmant_round_sh(X, Y, C, D, R) \
((__m128h)__builtin_ia32_getmantsh_mask_round ((__v8hf)(__m128h)(X), \
(__v8hf)(__m128h)(Y), \
@@ -2802,6 +1503,2674 @@ _mm_maskz_move_sh (__mmask8 __A, __m128h __B, __m128h __C)
return __builtin_ia32_vmovsh_mask (__B, __C, _mm_setzero_ph (), __A);
}
+/* Intrinsics vcvtsh2si, vcvtsh2us. */
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_i32 (__m128h __A)
+{
+ return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_u32 (__m128h __A)
+{
+ return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_i32 (__m128h __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_u32 (__m128h __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_i32(A, B) \
+ ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
+#define _mm_cvt_roundsh_u32(A, B) \
+ ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_i64 (__m128h __A)
+{
+ return (long long)
+ __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_u64 (__m128h __A)
+{
+ return (long long)
+ __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_i64 (__m128h __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_u64 (__m128h __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_i64(A, B) \
+ ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
+#define _mm_cvt_roundsh_u64(A, B) \
+ ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
+/* Intrinsics vcvtsi2sh, vcvtusi2sh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_sh (__m128h __A, int __B)
+{
+ return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_sh (__m128h __A, unsigned int __B)
+{
+ return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
+{
+ return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
+{
+ return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
+}
+
+#else
+#define _mm_cvt_roundi32_sh(A, B, C) \
+ (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
+#define _mm_cvt_roundu32_sh(A, B, C) \
+ (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_sh (__m128h __A, long long __B)
+{
+ return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_sh (__m128h __A, unsigned long long __B)
+{
+ return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
+{
+ return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
+{
+ return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
+}
+
+#else
+#define _mm_cvt_roundi64_sh(A, B, C) \
+ (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
+#define _mm_cvt_roundu64_sh(A, B, C) \
+ (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
+/* Intrinsics vcvttsh2si, vcvttsh2us. */
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_i32 (__m128h __A)
+{
+ return (int)
+ __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_u32 (__m128h __A)
+{
+ return (int)
+ __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
+{
+ return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
+}
+
+#else
+#define _mm_cvtt_roundsh_i32(A, B) \
+ ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
+#define _mm_cvtt_roundsh_u32(A, B) \
+ ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_i64 (__m128h __A)
+{
+ return (long long)
+ __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_u64 (__m128h __A)
+{
+ return (long long)
+ __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
+{
+ return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
+}
+
+#else
+#define _mm_cvtt_roundsh_i64(A, B) \
+ ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
+#define _mm_cvtt_roundsh_u64(A, B) \
+ ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
+/* Intrinsics vcvtsh2ss, vcvtsh2sd. */
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_ss (__m128 __A, __m128h __B)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
+ _mm_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
+ __m128h __D)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
+ __m128h __C)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
+ _mm_setzero_ps (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_sd (__m128d __A, __m128h __B)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
+ _mm_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
+ __m128h __D)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
+ _mm_setzero_pd (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
+ _mm_setzero_ps (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
+ __m128h __D, const int __R)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
+ __m128h __C, const int __R)
+{
+ return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
+ _mm_setzero_ps (),
+ __A, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
+ _mm_setzero_pd (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
+ __m128h __D, const int __R)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
+{
+ return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
+ _mm_setzero_pd (),
+ __A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_ss(A, B, R) \
+ (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
+ _mm_setzero_ps (), \
+ (__mmask8) -1, (R)))
+
+#define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
+ (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
+ (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
+ _mm_setzero_ps (), \
+ (A), (R)))
+
+#define _mm_cvt_roundsh_sd(A, B, R) \
+ (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
+ _mm_setzero_pd (), \
+ (__mmask8) -1, (R)))
+
+#define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
+ (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
+ (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
+ _mm_setzero_pd (), \
+ (A), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vcvtss2sh, vcvtsd2sh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_sh (__m128h __A, __m128 __B)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
+ _mm_setzero_ph (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
+ _mm_setzero_ph (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_sh (__m128h __A, __m128d __B)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
+ _mm_setzero_ph (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
+ _mm_setzero_ph (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
+ _mm_setzero_ph (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
+ const int __R)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
+ const int __R)
+{
+ return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
+ _mm_setzero_ph (),
+ __A, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
+ _mm_setzero_ph (),
+ (__mmask8) -1, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
+ const int __R)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
+ const int __R)
+{
+ return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
+ _mm_setzero_ph (),
+ __A, __R);
+}
+
+#else
+#define _mm_cvt_roundss_sh(A, B, R) \
+ (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
+ _mm_setzero_ph (), \
+ (__mmask8) -1, R))
+
+#define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
+ (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
+ (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
+ _mm_setzero_ph (), \
+ A, R))
+
+#define _mm_cvt_roundsd_sh(A, B, R) \
+ (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
+ _mm_setzero_ph (), \
+ (__mmask8) -1, R))
+
+#define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
+ (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
+
+#define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
+ (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
+ _mm_setzero_ph (), \
+ (A), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_h (__m128h __A)
+{
+ return __A[0];
+}
+
+/* Intrinsics vfmadd[132,213,231]sh. */
+extern __inline __m128h
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+ __m128h __B, const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fmadd_round_sh(A, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
+#define _mm_mask_fmadd_round_sh(A, U, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
+#define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vfnmadd[132,213,231]sh. */
+extern __inline __m128h
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+ return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+ return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __R)
+{
+ return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+ __m128h __B, const int __R)
+{
+ return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fnmadd_round_sh(A, B, C, R) \
+ ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
+#define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \
+ ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
+#define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \
+ ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
+ ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vfmsub[132,213,231]sh. */
+extern __inline __m128h
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+ return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+ (__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ (__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+ (__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+ __m128h __B, const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+ (__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fmsub_round_sh(A, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
+#define _mm_mask_fmsub_round_sh(A, U, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
+#define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \
+ ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
+#define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vfnmsub[132,213,231]sh. */
+extern __inline __m128h
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ -(__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ -(__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
+{
+ return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+ -(__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+ -(__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ -(__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
+ const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
+ -(__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
+ const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
+ -(__v8hf) __A,
+ (__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
+ __m128h __B, const int __R)
+{
+ return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
+ -(__v8hf) __A,
+ -(__v8hf) __B,
+ (__mmask8) __U, __R);
+}
+
+#else
+#define _mm_fnmsub_round_sh(A, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
+#define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
+#define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \
+ ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
+#define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
+ ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vf[,c]maddcsh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
+ (__v8hf) __C,
+ (__v8hf) __D, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
+ (__v8hf) __B,
+ (__v8hf) __C, __D,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
+ (__v8hf) __C,
+ (__v8hf) __D,
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
+ (__v8hf) __B,
+ (__v8hf) __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
+ (__v8hf) __C,
+ (__v8hf) __D, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
+ (__v8hf) __B,
+ (__v8hf) __C, __D,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
+ (__v8hf) __C,
+ (__v8hf) __D,
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
+{
+ return (__m128h)
+ __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
+ (__v8hf) __B,
+ (__v8hf) __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
+ __m128h __D, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
+ (__v8hf) __C,
+ (__v8hf) __D,
+ __B, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
+ __mmask8 __D, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
+ (__v8hf) __B,
+ (__v8hf) __C,
+ __D, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
+ __m128h __D, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
+ (__v8hf) __C,
+ (__v8hf) __D,
+ __A, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
+ (__v8hf) __B,
+ (__v8hf) __C,
+ __D);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
+ __m128h __D, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
+ (__v8hf) __C,
+ (__v8hf) __D,
+ __B, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
+ __mmask8 __D, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
+ (__v8hf) __B,
+ (__v8hf) __C,
+ __D, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
+ __m128h __D, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
+ (__v8hf) __C,
+ (__v8hf) __D,
+ __A, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
+ (__v8hf) __B,
+ (__v8hf) __C,
+ __D);
+}
+#else
+#define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \
+ ((__m128h) \
+ __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (C), \
+ (__v8hf) (D), \
+ (B), (E)))
+
+
+#define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \
+ ((__m128h) \
+ __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (__v8hf) (C), \
+ (D), (E)))
+
+#define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \
+ __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
+
+#define _mm_fcmadd_round_sch(A, B, C, D) \
+ __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
+
+#define _mm_mask_fmadd_round_sch(A, B, C, D, E) \
+ ((__m128h) \
+ __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \
+ (__v8hf) (C), \
+ (__v8hf) (D), \
+ (B), (E)))
+
+#define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \
+ ((__m128h) \
+ __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \
+ (__v8hf) (B), \
+ (__v8hf) (C), \
+ (D), (E)))
+
+#define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \
+ __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
+
+#define _mm_fmadd_round_sch(A, B, C, D) \
+ __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vf[,c]mulcsh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmul_sch (__m128h __A, __m128h __B)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
+ (__v8hf) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
+ (__v8hf) __D,
+ (__v8hf) __A,
+ __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
+ (__v8hf) __C,
+ _mm_setzero_ph (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmul_sch (__m128h __A, __m128h __B)
+{
+ return (__m128h)
+ __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
+ (__v8hf) __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
+ (__v8hf) __D,
+ (__v8hf) __A,
+ __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return (__m128h)
+ __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
+ (__v8hf) __C,
+ _mm_setzero_ph (),
+ __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
+ (__v8hf) __B,
+ __D);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
+ __m128h __D, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
+ (__v8hf) __D,
+ (__v8hf) __A,
+ __B, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
+ const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
+ (__v8hf) __C,
+ _mm_setzero_ph (),
+ __A, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
+{
+ return (__m128h)
+ __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
+ (__v8hf) __B, __D);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
+ __m128h __D, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
+ (__v8hf) __D,
+ (__v8hf) __A,
+ __B, __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
+{
+ return (__m128h)
+ __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
+ (__v8hf) __C,
+ _mm_setzero_ph (),
+ __A, __E);
+}
+
+#else
+#define _mm_fcmul_round_sch(__A, __B, __D) \
+ (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \
+ (__v8hf) __B, __D)
+
+#define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \
+ (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \
+ (__v8hf) __D, \
+ (__v8hf) __A, \
+ __B, __E)
+
+#define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \
+ (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \
+ (__v8hf) __C, \
+ _mm_setzero_ph (), \
+ __A, __E)
+
+#define _mm_fmul_round_sch(__A, __B, __D) \
+ (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \
+ (__v8hf) __B, __D)
+
+#define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \
+ (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \
+ (__v8hf) __D, \
+ (__v8hf) __A, \
+ __B, __E)
+
+#define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \
+ (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \
+ (__v8hf) __C, \
+ _mm_setzero_ph (), \
+ __A, __E)
+
+#endif /* __OPTIMIZE__ */
+
+#define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
+#define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
+#define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
+#define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
+#define _mm_mask_mul_round_sch(W, U, A, B, R) \
+ _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
+#define _mm_maskz_mul_round_sch(U, A, B, R) \
+ _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
+
+#define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
+#define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
+#define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
+#define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
+#define _mm_mask_cmul_round_sch(W, U, A, B, R) \
+ _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
+#define _mm_maskz_cmul_round_sch(U, A, B, R) \
+ _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
+
+#ifdef __DISABLE_AVX512FP16__
+#undef __DISABLE_AVX512FP16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512FP16__ */
+
+#if !defined (__AVX512FP16__) || !defined (__EVEX512__)
+#pragma GCC push_options
+#pragma GCC target("avx512fp16,evex512")
+#define __DISABLE_AVX512FP16_512__
+#endif /* __AVX512FP16_512__ */
+
+typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
+typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
+ __may_alias__, __aligned__ (1)));
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set_ph (_Float16 __A31, _Float16 __A30, _Float16 __A29,
+ _Float16 __A28, _Float16 __A27, _Float16 __A26,
+ _Float16 __A25, _Float16 __A24, _Float16 __A23,
+ _Float16 __A22, _Float16 __A21, _Float16 __A20,
+ _Float16 __A19, _Float16 __A18, _Float16 __A17,
+ _Float16 __A16, _Float16 __A15, _Float16 __A14,
+ _Float16 __A13, _Float16 __A12, _Float16 __A11,
+ _Float16 __A10, _Float16 __A9, _Float16 __A8,
+ _Float16 __A7, _Float16 __A6, _Float16 __A5,
+ _Float16 __A4, _Float16 __A3, _Float16 __A2,
+ _Float16 __A1, _Float16 __A0)
+{
+ return __extension__ (__m512h)(__v32hf){ __A0, __A1, __A2, __A3,
+ __A4, __A5, __A6, __A7,
+ __A8, __A9, __A10, __A11,
+ __A12, __A13, __A14, __A15,
+ __A16, __A17, __A18, __A19,
+ __A20, __A21, __A22, __A23,
+ __A24, __A25, __A26, __A27,
+ __A28, __A29, __A30, __A31 };
+}
+
+/* Create vectors of elements in the reversed order from
+ _mm512_set_ph functions. */
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setr_ph (_Float16 __A0, _Float16 __A1, _Float16 __A2,
+ _Float16 __A3, _Float16 __A4, _Float16 __A5,
+ _Float16 __A6, _Float16 __A7, _Float16 __A8,
+ _Float16 __A9, _Float16 __A10, _Float16 __A11,
+ _Float16 __A12, _Float16 __A13, _Float16 __A14,
+ _Float16 __A15, _Float16 __A16, _Float16 __A17,
+ _Float16 __A18, _Float16 __A19, _Float16 __A20,
+ _Float16 __A21, _Float16 __A22, _Float16 __A23,
+ _Float16 __A24, _Float16 __A25, _Float16 __A26,
+ _Float16 __A27, _Float16 __A28, _Float16 __A29,
+ _Float16 __A30, _Float16 __A31)
+
+{
+ return _mm512_set_ph (__A31, __A30, __A29, __A28, __A27, __A26, __A25,
+ __A24, __A23, __A22, __A21, __A20, __A19, __A18,
+ __A17, __A16, __A15, __A14, __A13, __A12, __A11,
+ __A10, __A9, __A8, __A7, __A6, __A5, __A4, __A3,
+ __A2, __A1, __A0);
+}
+
+/* Broadcast _Float16 to vector. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_ph (_Float16 __A)
+{
+ return _mm512_set_ph (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+/* Create a vector with all zeros. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_ph (void)
+{
+ return _mm512_set1_ph (0.0f16);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ph (void)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winit-self"
+ __m512h __Y = __Y;
+#pragma GCC diagnostic pop
+ return __Y;
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsh_h (__m512h __A)
+{
+ return __A[0];
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_ps (__m512h __a)
+{
+ return (__m512) __a;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_pd (__m512h __a)
+{
+ return (__m512d) __a;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_si512 (__m512h __a)
+{
+ return (__m512i) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph128 (__m512h __A)
+{
+ union
+ {
+ __m128h __a[4];
+ __m512h __v;
+ } __u = { .__v = __A };
+ return __u.__a[0];
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph256 (__m512h __A)
+{
+ union
+ {
+ __m256h __a[2];
+ __m512h __v;
+ } __u = { .__v = __A };
+ return __u.__a[0];
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph128_ph512 (__m128h __A)
+{
+ union
+ {
+ __m128h __a[4];
+ __m512h __v;
+ } __u;
+ __u.__a[0] = __A;
+ return __u.__v;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph256_ph512 (__m256h __A)
+{
+ union
+ {
+ __m256h __a[2];
+ __m512h __v;
+ } __u;
+ __u.__a[0] = __A;
+ return __u.__v;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph128_ph512 (__m128h __A)
+{
+ return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
+ (__m128) __A, 0);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph256_ph512 (__m256h __A)
+{
+ return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
+ (__m256d) __A, 0);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castps_ph (__m512 __a)
+{
+ return (__m512h) __a;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castpd_ph (__m512d __a)
+{
+ return (__m512h) __a;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castsi512_ph (__m512i __a)
+{
+ return (__m512h) __a;
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_ph (void const *__P)
+{
+ return *(const __m512h *) __P;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_ph (void const *__P)
+{
+ return *(const __m512h_u *) __P;
+}
+
+/* Stores the lower _Float16 value. */
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_ph (void *__P, __m512h __A)
+{
+ *(__m512h *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_ph (void *__P, __m512h __A)
+{
+ *(__m512h_u *) __P = __A;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_ph (__m512h __A)
+{
+ return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
+ (__m512i) __A);
+}
+
+/* Intrinsics v[add,sub,mul,div]ph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_ph (__m512h __A, __m512h __B)
+{
+ return (__m512h) ((__v32hf) __A + (__v32hf) __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_addph512_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_ph (__m512h __A, __m512h __B)
+{
+ return (__m512h) ((__v32hf) __A - (__v32hf) __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_subph512_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_ph (__m512h __A, __m512h __B)
+{
+ return (__m512h) ((__v32hf) __A * (__v32hf) __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_mulph512_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_ph (__m512h __A, __m512h __B)
+{
+ return (__m512h) ((__v32hf) __A / (__v32hf) __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_divph512_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_addph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_addph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_subph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_subph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_mulph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_mulph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_divph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_divph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+#else
+#define _mm512_add_round_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_addph512_mask_round((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_add_round_ph(A, B, C, D, E) \
+ ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_add_round_ph(A, B, C, D) \
+ ((__m512h)__builtin_ia32_addph512_mask_round((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+
+#define _mm512_sub_round_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_subph512_mask_round((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_sub_round_ph(A, B, C, D, E) \
+ ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_sub_round_ph(A, B, C, D) \
+ ((__m512h)__builtin_ia32_subph512_mask_round((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+
+#define _mm512_mul_round_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_mul_round_ph(A, B, C, D, E) \
+ ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_mul_round_ph(A, B, C, D) \
+ ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+
+#define _mm512_div_round_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_divph512_mask_round((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_div_round_ph(A, B, C, D, E) \
+ ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_div_round_ph(A, B, C, D) \
+ ((__m512h)__builtin_ia32_divph512_mask_round((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+#endif /* __OPTIMIZE__ */
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_conj_pch (__m512h __A)
+{
+ return (__m512h) _mm512_xor_epi32 ((__m512i) __A, _mm512_set1_epi32 (1<<31));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_conj_pch (__m512h __W, __mmask16 __U, __m512h __A)
+{
+ return (__m512h)
+ __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_conj_pch (__mmask16 __U, __m512h __A)
+{
+ return (__m512h)
+ __builtin_ia32_movaps512_mask ((__v16sf) _mm512_conj_pch (__A),
+ (__v16sf) _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+/* Intrinsic vmaxph vminph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_ph (__m512h __A, __m512h __B)
+{
+ return __builtin_ia32_maxph512_mask (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_maxph512_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_ph (__m512h __A, __m512h __B)
+{
+ return __builtin_ia32_minph512_mask (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_minph512_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_maxph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_maxph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_minph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_minph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+#else
+#define _mm512_max_round_ph(A, B, C) \
+ (__builtin_ia32_maxph512_mask_round ((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_max_round_ph(A, B, C, D, E) \
+ (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_max_round_ph(A, B, C, D) \
+ (__builtin_ia32_maxph512_mask_round ((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+
+#define _mm512_min_round_ph(A, B, C) \
+ (__builtin_ia32_minph512_mask_round ((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_min_round_ph(A, B, C, D, E) \
+ (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_min_round_ph(A, B, C, D) \
+ (__builtin_ia32_minph512_mask_round ((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+#endif /* __OPTIMIZE__ */
+
+/* vcmpph */
+#ifdef __OPTIMIZE
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
+{
+ return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
+ __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
+ const int __D)
+{
+ return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
+ __C, (__mmask32) -1,
+ __D);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D, const int __E)
+{
+ return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
+ __D, __A,
+ __E);
+}
+
+#else
+#define _mm512_cmp_ph_mask(A, B, C) \
+ (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
+
+#define _mm512_mask_cmp_ph_mask(A, B, C, D) \
+ (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
+
+#define _mm512_cmp_round_ph_mask(A, B, C, D) \
+ (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
+
+#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E) \
+ (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vsqrtph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_ph (__m512h __A)
+{
+ return __builtin_ia32_sqrtph512_mask_round (__A,
+ _mm512_setzero_ph(),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
+{
+ return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
+{
+ return __builtin_ia32_sqrtph512_mask_round (__B,
+ _mm512_setzero_ph (),
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_round_ph (__m512h __A, const int __B)
+{
+ return __builtin_ia32_sqrtph512_mask_round (__A,
+ _mm512_setzero_ph(),
+ (__mmask32) -1, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_sqrtph512_mask_round (__B,
+ _mm512_setzero_ph (),
+ __A, __C);
+}
+
+#else
+#define _mm512_sqrt_round_ph(A, B) \
+ (__builtin_ia32_sqrtph512_mask_round ((A), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (B)))
+
+#define _mm512_mask_sqrt_round_ph(A, B, C, D) \
+ (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
+
+#define _mm512_maskz_sqrt_round_ph(A, B, C) \
+ (__builtin_ia32_sqrtph512_mask_round ((B), \
+ _mm512_setzero_ph (), \
+ (A), (C)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vrsqrtph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt_ph (__m512h __A)
+{
+ return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
+{
+ return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
+{
+ return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
+ __A);
+}
+
+/* Intrinsics vrcpph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp_ph (__m512h __A)
+{
+ return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
+{
+ return __builtin_ia32_rcpph512_mask (__C, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
+{
+ return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
+ __A);
+}
+
+/* Intrinsics vscalefph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_ph (__m512h __A, __m512h __B)
+{
+ return __builtin_ia32_scalefph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_scalefph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_scalefph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
+ __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_scalefph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+#else
+#define _mm512_scalef_round_ph(A, B, C) \
+ (__builtin_ia32_scalefph512_mask_round ((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
+ (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_scalef_round_ph(A, B, C, D) \
+ (__builtin_ia32_scalefph512_mask_round ((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vreduceph. */
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_ph (__m512h __A, int __B)
+{
+ return __builtin_ia32_reduceph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
+{
+ return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
+{
+ return __builtin_ia32_reduceph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
+{
+ return __builtin_ia32_reduceph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ int __D, const int __E)
+{
+ return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
+ __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
+ const int __D)
+{
+ return __builtin_ia32_reduceph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+#else
+#define _mm512_reduce_ph(A, B) \
+ (__builtin_ia32_reduceph512_mask_round ((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_reduce_ph(A, B, C, D) \
+ (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_reduce_ph(A, B, C) \
+ (__builtin_ia32_reduceph512_mask_round ((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_reduce_round_ph(A, B, C) \
+ (__builtin_ia32_reduceph512_mask_round ((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_reduce_round_ph(A, B, C, D, E) \
+ (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_reduce_round_ph(A, B, C, D) \
+ (__builtin_ia32_reduceph512_mask_round ((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vrndscaleph. */
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_ph (__m512h __A, int __B)
+{
+ return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
+ __m512h __C, int __D)
+{
+ return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
+{
+ return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
+{
+ return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
+ __m512h __C, int __D, const int __E)
+{
+ return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
+ __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
+ const int __D)
+{
+ return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+#else
+#define _mm512_roundscale_ph(A, B) \
+ (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_roundscale_ph(A, B, C, D) \
+ (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_roundscale_ph(A, B, C) \
+ (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), \
+ _MM_FROUND_CUR_DIRECTION))
+#define _mm512_roundscale_round_ph(A, B, C) \
+ (__builtin_ia32_rndscaleph512_mask_round ((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_roundscale_round_ph(A, B, C, D, E) \
+ (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
+ (__builtin_ia32_rndscaleph512_mask_round ((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vfpclassph. */
+#ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_ph_mask (__mmask32 __U, __m512h __A,
+ const int __imm)
+{
+ return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_ph_mask (__m512h __A, const int __imm)
+{
+ return (__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) __A,
+ __imm,
+ (__mmask32) -1);
+}
+
+#else
+#define _mm512_mask_fpclass_ph_mask(u, x, c) \
+ ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
+ (int) (c),(__mmask8)(u)))
+
+#define _mm512_fpclass_ph_mask(x, c) \
+ ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
+ (int) (c),(__mmask8)-1))
+#endif /* __OPIMTIZE__ */
+
+/* Intrinsics vgetexpph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_ph (__m512h __A)
+{
+ return (__m512h)
+ __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+ (__v32hf) _mm512_setzero_ph (),
+ (__mmask32) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_ph (__m512h __W, __mmask32 __U, __m512h __A)
+{
+ return (__m512h)
+ __builtin_ia32_getexpph512_mask ((__v32hf) __A, (__v32hf) __W,
+ (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_ph (__mmask32 __U, __m512h __A)
+{
+ return (__m512h)
+ __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+ (__v32hf) _mm512_setzero_ph (),
+ (__mmask32) __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_round_ph (__m512h __A, const int __R)
+{
+ return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ const int __R)
+{
+ return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+ (__v32hf) __W,
+ (__mmask32) __U, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
+{
+ return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ (__mmask32) __U, __R);
+}
+
+#else
+#define _mm512_getexp_round_ph(A, R) \
+ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
+ (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
+
+#define _mm512_mask_getexp_round_ph(W, U, A, R) \
+ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(W), (__mmask32)(U), R))
+
+#define _mm512_maskz_getexp_round_ph(U, A, R) \
+ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
+ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vgetmantph. */
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+ (__C << 2) | __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+ (__C << 2) | __B,
+ (__v32hf) __W, __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_ph (__mmask32 __U, __m512h __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+ (__C << 2) | __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_round_ph (__m512h __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+ (__C << 2) | __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+ (__C << 2) | __B,
+ (__v32hf) __W, __U,
+ __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_round_ph (__mmask32 __U, __m512h __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C, const int __R)
+{
+ return (__m512h) __builtin_ia32_getmantph512_mask ((__v32hf) __A,
+ (__C << 2) | __B,
+ (__v32hf)
+ _mm512_setzero_ph (),
+ __U, __R);
+}
+
+#else
+#define _mm512_getmant_ph(X, B, C) \
+ ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v32hf)(__m512h) \
+ _mm512_setzero_ph(), \
+ (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getmant_ph(W, U, X, B, C) \
+ ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v32hf)(__m512h)(W), \
+ (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+
+#define _mm512_maskz_getmant_ph(U, X, B, C) \
+ ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v32hf)(__m512h) \
+ _mm512_setzero_ph(), \
+ (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_getmant_round_ph(X, B, C, R) \
+ ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v32hf)(__m512h) \
+ _mm512_setzero_ph(), \
+ (__mmask32)-1, \
+ (R)))
+
+#define _mm512_mask_getmant_round_ph(W, U, X, B, C, R) \
+ ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v32hf)(__m512h)(W), \
+ (__mmask32)(U), \
+ (R)))
+
+
+#define _mm512_maskz_getmant_round_ph(U, X, B, C, R) \
+ ((__m512h)__builtin_ia32_getmantph512_mask ((__v32hf)(__m512h)(X), \
+ (int)(((C)<<2) | (B)), \
+ (__v32hf)(__m512h) \
+ _mm512_setzero_ph(), \
+ (__mmask32)(U), \
+ (R)))
+
+#endif /* __OPTIMIZE__ */
+
/* Intrinsics vcvtph2dq. */
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -4364,244 +5733,6 @@ _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
#endif /* __OPTIMIZE__ */
-/* Intrinsics vcvtsh2si, vcvtsh2us. */
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsh_i32 (__m128h __A)
-{
- return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsh_u32 (__m128h __A)
-{
- return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsh_i32 (__m128h __A, const int __R)
-{
- return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsh_u32 (__m128h __A, const int __R)
-{
- return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
-}
-
-#else
-#define _mm_cvt_roundsh_i32(A, B) \
- ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
-#define _mm_cvt_roundsh_u32(A, B) \
- ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
-
-#endif /* __OPTIMIZE__ */
-
-#ifdef __x86_64__
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsh_i64 (__m128h __A)
-{
- return (long long)
- __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsh_u64 (__m128h __A)
-{
- return (long long)
- __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsh_i64 (__m128h __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsh_u64 (__m128h __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
-}
-
-#else
-#define _mm_cvt_roundsh_i64(A, B) \
- ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
-#define _mm_cvt_roundsh_u64(A, B) \
- ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
-
-#endif /* __OPTIMIZE__ */
-#endif /* __x86_64__ */
-
-/* Intrinsics vcvttsh2si, vcvttsh2us. */
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsh_i32 (__m128h __A)
-{
- return (int)
- __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsh_u32 (__m128h __A)
-{
- return (int)
- __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline int
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
-{
- return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
-}
-
-extern __inline unsigned
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
-{
- return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
-}
-
-#else
-#define _mm_cvtt_roundsh_i32(A, B) \
- ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
-#define _mm_cvtt_roundsh_u32(A, B) \
- ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
-
-#endif /* __OPTIMIZE__ */
-
-#ifdef __x86_64__
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsh_i64 (__m128h __A)
-{
- return (long long)
- __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsh_u64 (__m128h __A)
-{
- return (long long)
- __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
-}
-
-extern __inline unsigned long long
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
-{
- return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
-}
-
-#else
-#define _mm_cvtt_roundsh_i64(A, B) \
- ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
-#define _mm_cvtt_roundsh_u64(A, B) \
- ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
-
-#endif /* __OPTIMIZE__ */
-#endif /* __x86_64__ */
-
-/* Intrinsics vcvtsi2sh, vcvtusi2sh. */
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvti32_sh (__m128h __A, int __B)
-{
- return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu32_sh (__m128h __A, unsigned int __B)
-{
- return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
-{
- return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
-{
- return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
-}
-
-#else
-#define _mm_cvt_roundi32_sh(A, B, C) \
- (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
-#define _mm_cvt_roundu32_sh(A, B, C) \
- (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
-
-#endif /* __OPTIMIZE__ */
-
-#ifdef __x86_64__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvti64_sh (__m128h __A, long long __B)
-{
- return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtu64_sh (__m128h __A, unsigned long long __B)
-{
- return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
-{
- return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
-{
- return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
-}
-
-#else
-#define _mm_cvt_roundi64_sh(A, B, C) \
- (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
-#define _mm_cvt_roundu64_sh(A, B, C) \
- (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
-
-#endif /* __OPTIMIZE__ */
-#endif /* __x86_64__ */
-
/* Intrinsics vcvtph2pd. */
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -4900,286 +6031,6 @@ _mm512_maskz_cvt_roundpd_ph (__mmask8 __A, __m512d __B, int __C)
#endif /* __OPTIMIZE__ */
-/* Intrinsics vcvtsh2ss, vcvtsh2sd. */
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsh_ss (__m128 __A, __m128h __B)
-{
- return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
- _mm_setzero_ps (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
- __m128h __D)
-{
- return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsh_ss (__mmask8 __A, __m128 __B,
- __m128h __C)
-{
- return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
- _mm_setzero_ps (),
- __A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsh_sd (__m128d __A, __m128h __B)
-{
- return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
- _mm_setzero_pd (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
- __m128h __D)
-{
- return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsh_sd (__mmask8 __A, __m128d __B, __m128h __C)
-{
- return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
- _mm_setzero_pd (),
- __A, _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsh_ss (__m128 __A, __m128h __B, const int __R)
-{
- return __builtin_ia32_vcvtsh2ss_mask_round (__B, __A,
- _mm_setzero_ps (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvt_roundsh_ss (__m128 __A, __mmask8 __B, __m128 __C,
- __m128h __D, const int __R)
-{
- return __builtin_ia32_vcvtsh2ss_mask_round (__D, __C, __A, __B, __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvt_roundsh_ss (__mmask8 __A, __m128 __B,
- __m128h __C, const int __R)
-{
- return __builtin_ia32_vcvtsh2ss_mask_round (__C, __B,
- _mm_setzero_ps (),
- __A, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsh_sd (__m128d __A, __m128h __B, const int __R)
-{
- return __builtin_ia32_vcvtsh2sd_mask_round (__B, __A,
- _mm_setzero_pd (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvt_roundsh_sd (__m128d __A, __mmask8 __B, __m128d __C,
- __m128h __D, const int __R)
-{
- return __builtin_ia32_vcvtsh2sd_mask_round (__D, __C, __A, __B, __R);
-}
-
-extern __inline __m128d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvt_roundsh_sd (__mmask8 __A, __m128d __B, __m128h __C, const int __R)
-{
- return __builtin_ia32_vcvtsh2sd_mask_round (__C, __B,
- _mm_setzero_pd (),
- __A, __R);
-}
-
-#else
-#define _mm_cvt_roundsh_ss(A, B, R) \
- (__builtin_ia32_vcvtsh2ss_mask_round ((B), (A), \
- _mm_setzero_ps (), \
- (__mmask8) -1, (R)))
-
-#define _mm_mask_cvt_roundsh_ss(A, B, C, D, R) \
- (__builtin_ia32_vcvtsh2ss_mask_round ((D), (C), (A), (B), (R)))
-
-#define _mm_maskz_cvt_roundsh_ss(A, B, C, R) \
- (__builtin_ia32_vcvtsh2ss_mask_round ((C), (B), \
- _mm_setzero_ps (), \
- (A), (R)))
-
-#define _mm_cvt_roundsh_sd(A, B, R) \
- (__builtin_ia32_vcvtsh2sd_mask_round ((B), (A), \
- _mm_setzero_pd (), \
- (__mmask8) -1, (R)))
-
-#define _mm_mask_cvt_roundsh_sd(A, B, C, D, R) \
- (__builtin_ia32_vcvtsh2sd_mask_round ((D), (C), (A), (B), (R)))
-
-#define _mm_maskz_cvt_roundsh_sd(A, B, C, R) \
- (__builtin_ia32_vcvtsh2sd_mask_round ((C), (B), \
- _mm_setzero_pd (), \
- (A), (R)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vcvtss2sh, vcvtsd2sh. */
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_sh (__m128h __A, __m128 __B)
-{
- return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D)
-{
- return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtss_sh (__mmask8 __A, __m128h __B, __m128 __C)
-{
- return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsd_sh (__m128h __A, __m128d __B)
-{
- return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D)
-{
- return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtsd_sh (__mmask8 __A, __m128h __B, __m128d __C)
-{
- return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundss_sh (__m128h __A, __m128 __B, const int __R)
-{
- return __builtin_ia32_vcvtss2sh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvt_roundss_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128 __D,
- const int __R)
-{
- return __builtin_ia32_vcvtss2sh_mask_round (__D, __C, __A, __B, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvt_roundss_sh (__mmask8 __A, __m128h __B, __m128 __C,
- const int __R)
-{
- return __builtin_ia32_vcvtss2sh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_roundsd_sh (__m128h __A, __m128d __B, const int __R)
-{
- return __builtin_ia32_vcvtsd2sh_mask_round (__B, __A,
- _mm_setzero_ph (),
- (__mmask8) -1, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvt_roundsd_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128d __D,
- const int __R)
-{
- return __builtin_ia32_vcvtsd2sh_mask_round (__D, __C, __A, __B, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C,
- const int __R)
-{
- return __builtin_ia32_vcvtsd2sh_mask_round (__C, __B,
- _mm_setzero_ph (),
- __A, __R);
-}
-
-#else
-#define _mm_cvt_roundss_sh(A, B, R) \
- (__builtin_ia32_vcvtss2sh_mask_round ((B), (A), \
- _mm_setzero_ph (), \
- (__mmask8) -1, R))
-
-#define _mm_mask_cvt_roundss_sh(A, B, C, D, R) \
- (__builtin_ia32_vcvtss2sh_mask_round ((D), (C), (A), (B), (R)))
-
-#define _mm_maskz_cvt_roundss_sh(A, B, C, R) \
- (__builtin_ia32_vcvtss2sh_mask_round ((C), (B), \
- _mm_setzero_ph (), \
- A, R))
-
-#define _mm_cvt_roundsd_sh(A, B, R) \
- (__builtin_ia32_vcvtsd2sh_mask_round ((B), (A), \
- _mm_setzero_ph (), \
- (__mmask8) -1, R))
-
-#define _mm_mask_cvt_roundsd_sh(A, B, C, D, R) \
- (__builtin_ia32_vcvtsd2sh_mask_round ((D), (C), (A), (B), (R)))
-
-#define _mm_maskz_cvt_roundsd_sh(A, B, C, R) \
- (__builtin_ia32_vcvtsd2sh_mask_round ((C), (B), \
- _mm_setzero_ph (), \
- (A), (R)))
-
-#endif /* __OPTIMIZE__ */
-
/* Intrinsics vfmaddsub[132,213,231]ph. */
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -5840,418 +6691,6 @@ _mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
#endif /* __OPTIMIZE__ */
-/* Intrinsics vfmadd[132,213,231]sh. */
-extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_sh (__m128h __W, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
- const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
- __m128h __B, const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-#else
-#define _mm_fmadd_round_sh(A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (-1), (R)))
-#define _mm_mask_fmadd_round_sh(A, U, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), (C), (U), (R)))
-#define _mm_mask3_fmadd_round_sh(A, B, C, U, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask3 ((A), (B), (C), (U), (R)))
-#define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), (C), (U), (R)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vfnmadd[132,213,231]sh. */
-extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmadd_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmadd_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
-{
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmadd_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
-{
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmadd_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
-{
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmadd_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
- const int __R)
-{
- return (__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmadd_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
- __m128h __B, const int __R)
-{
- return (__m128h) __builtin_ia32_vfnmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-#else
-#define _mm_fnmadd_round_sh(A, B, C, R) \
- ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (-1), (R)))
-#define _mm_mask_fnmadd_round_sh(A, U, B, C, R) \
- ((__m128h) __builtin_ia32_vfnmaddsh3_mask ((A), (B), (C), (U), (R)))
-#define _mm_mask3_fnmadd_round_sh(A, B, C, U, R) \
- ((__m128h) __builtin_ia32_vfnmaddsh3_mask3 ((A), (B), (C), (U), (R)))
-#define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
- ((__m128h) __builtin_ia32_vfnmaddsh3_maskz ((A), (B), (C), (U), (R)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vfmsub[132,213,231]sh. */
-extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_sh (__m128h __W, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
-{
- return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
- const int __R)
-{
- return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
- (__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
- __m128h __B, const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- (__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-#else
-#define _mm_fmsub_round_sh(A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (-1), (R)))
-#define _mm_mask_fmsub_round_sh(A, U, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), (B), -(C), (U), (R)))
-#define _mm_mask3_fmsub_round_sh(A, B, C, U, R) \
- ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), (B), (C), (U), (R)))
-#define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), (B), -(C), (U), (R)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vfnmsub[132,213,231]sh. */
-extern __inline __m128h
- __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsub_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsub_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U)
-{
- return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
- -(__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsub_sh (__mmask8 __U, __m128h __W, __m128h __A, __m128h __B)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) -1,
- __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsub_round_sh (__m128h __W, __mmask8 __U, __m128h __A, __m128h __B,
- const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_mask ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsub_round_sh (__m128h __W, __m128h __A, __m128h __B, __mmask8 __U,
- const int __R)
-{
- return (__m128h) __builtin_ia32_vfmsubsh3_mask3 ((__v8hf) __W,
- -(__v8hf) __A,
- (__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsub_round_sh (__mmask8 __U, __m128h __W, __m128h __A,
- __m128h __B, const int __R)
-{
- return (__m128h) __builtin_ia32_vfmaddsh3_maskz ((__v8hf) __W,
- -(__v8hf) __A,
- -(__v8hf) __B,
- (__mmask8) __U, __R);
-}
-
-#else
-#define _mm_fnmsub_round_sh(A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (-1), (R)))
-#define _mm_mask_fnmsub_round_sh(A, U, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_mask ((A), -(B), -(C), (U), (R)))
-#define _mm_mask3_fnmsub_round_sh(A, B, C, U, R) \
- ((__m128h) __builtin_ia32_vfmsubsh3_mask3 ((A), -(B), (C), (U), (R)))
-#define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
- ((__m128h) __builtin_ia32_vfmaddsh3_maskz ((A), -(B), -(C), (U), (R)))
-
-#endif /* __OPTIMIZE__ */
-
/* Intrinsics vf[,c]maddcph. */
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -6636,400 +7075,6 @@ _mm512_maskz_fmul_round_pch (__mmask16 __A, __m512h __B,
#endif /* __OPTIMIZE__ */
-/* Intrinsics vf[,c]maddcsh. */
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fcmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
-{
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
- (__v8hf) __C,
- (__v8hf) __D, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
-{
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C, __D,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fcmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
-{
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
- (__v8hf) __C,
- (__v8hf) __D,
- __A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fcmadd_sch (__m128h __A, __m128h __B, __m128h __C)
-{
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
-{
- return (__m128h)
- __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
- (__v8hf) __C,
- (__v8hf) __D, __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_sch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
-{
- return (__m128h)
- __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C, __D,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_sch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
-{
- return (__m128h)
- __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
- (__v8hf) __C,
- (__v8hf) __D,
- __A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_sch (__m128h __A, __m128h __B, __m128h __C)
-{
- return (__m128h)
- __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fcmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) __A,
- (__v8hf) __C,
- (__v8hf) __D,
- __B, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
- __mmask8 __D, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- __D, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fcmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
- __m128h __D, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_maskz_round ((__v8hf) __B,
- (__v8hf) __C,
- (__v8hf) __D,
- __A, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fcmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
-{
- return (__m128h)
- __builtin_ia32_vfcmaddcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- __D);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmadd_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) __A,
- (__v8hf) __C,
- (__v8hf) __D,
- __B, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C,
- __mmask8 __D, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- __D, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmadd_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
- __m128h __D, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfmaddcsh_maskz_round ((__v8hf) __B,
- (__v8hf) __C,
- (__v8hf) __D,
- __A, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmadd_round_sch (__m128h __A, __m128h __B, __m128h __C, const int __D)
-{
- return (__m128h)
- __builtin_ia32_vfmaddcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- (__v8hf) __C,
- __D);
-}
-#else
-#define _mm_mask_fcmadd_round_sch(A, B, C, D, E) \
- ((__m128h) \
- __builtin_ia32_vfcmaddcsh_mask_round ((__v8hf) (A), \
- (__v8hf) (C), \
- (__v8hf) (D), \
- (B), (E)))
-
-
-#define _mm_mask3_fcmadd_round_sch(A, B, C, D, E) \
- ((__m128h) \
- __builtin_ia32_vfcmaddcsh_mask3_round ((__v8hf) (A), \
- (__v8hf) (B), \
- (__v8hf) (C), \
- (D), (E)))
-
-#define _mm_maskz_fcmadd_round_sch(A, B, C, D, E) \
- __builtin_ia32_vfcmaddcsh_maskz_round ((B), (C), (D), (A), (E))
-
-#define _mm_fcmadd_round_sch(A, B, C, D) \
- __builtin_ia32_vfcmaddcsh_round ((A), (B), (C), (D))
-
-#define _mm_mask_fmadd_round_sch(A, B, C, D, E) \
- ((__m128h) \
- __builtin_ia32_vfmaddcsh_mask_round ((__v8hf) (A), \
- (__v8hf) (C), \
- (__v8hf) (D), \
- (B), (E)))
-
-#define _mm_mask3_fmadd_round_sch(A, B, C, D, E) \
- ((__m128h) \
- __builtin_ia32_vfmaddcsh_mask3_round ((__v8hf) (A), \
- (__v8hf) (B), \
- (__v8hf) (C), \
- (D), (E)))
-
-#define _mm_maskz_fmadd_round_sch(A, B, C, D, E) \
- __builtin_ia32_vfmaddcsh_maskz_round ((B), (C), (D), (A), (E))
-
-#define _mm_fmadd_round_sch(A, B, C, D) \
- __builtin_ia32_vfmaddcsh_round ((A), (B), (C), (D))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vf[,c]mulcsh. */
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fcmul_sch (__m128h __A, __m128h __B)
-{
- return (__m128h)
- __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fcmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
-{
- return (__m128h)
- __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
- (__v8hf) __D,
- (__v8hf) __A,
- __B, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fcmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
-{
- return (__m128h)
- __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
- (__v8hf) __C,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmul_sch (__m128h __A, __m128h __B)
-{
- return (__m128h)
- __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmul_sch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
-{
- return (__m128h)
- __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
- (__v8hf) __D,
- (__v8hf) __A,
- __B, _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmul_sch (__mmask8 __A, __m128h __B, __m128h __C)
-{
- return (__m128h)
- __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
- (__v8hf) __C,
- _mm_setzero_ph (),
- __A, _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fcmul_round_sch (__m128h __A, __m128h __B, const int __D)
-{
- return (__m128h)
- __builtin_ia32_vfcmulcsh_round ((__v8hf) __A,
- (__v8hf) __B,
- __D);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fcmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C,
- (__v8hf) __D,
- (__v8hf) __A,
- __B, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fcmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C,
- const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B,
- (__v8hf) __C,
- _mm_setzero_ph (),
- __A, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmul_round_sch (__m128h __A, __m128h __B, const int __D)
-{
- return (__m128h)
- __builtin_ia32_vfmulcsh_round ((__v8hf) __A,
- (__v8hf) __B, __D);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmul_round_sch (__m128h __A, __mmask8 __B, __m128h __C,
- __m128h __D, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C,
- (__v8hf) __D,
- (__v8hf) __A,
- __B, __E);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
-{
- return (__m128h)
- __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B,
- (__v8hf) __C,
- _mm_setzero_ph (),
- __A, __E);
-}
-
-#else
-#define _mm_fcmul_round_sch(__A, __B, __D) \
- (__m128h) __builtin_ia32_vfcmulcsh_round ((__v8hf) __A, \
- (__v8hf) __B, __D)
-
-#define _mm_mask_fcmul_round_sch(__A, __B, __C, __D, __E) \
- (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __C, \
- (__v8hf) __D, \
- (__v8hf) __A, \
- __B, __E)
-
-#define _mm_maskz_fcmul_round_sch(__A, __B, __C, __E) \
- (__m128h) __builtin_ia32_vfcmulcsh_mask_round ((__v8hf) __B, \
- (__v8hf) __C, \
- _mm_setzero_ph (), \
- __A, __E)
-
-#define _mm_fmul_round_sch(__A, __B, __D) \
- (__m128h) __builtin_ia32_vfmulcsh_round ((__v8hf) __A, \
- (__v8hf) __B, __D)
-
-#define _mm_mask_fmul_round_sch(__A, __B, __C, __D, __E) \
- (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __C, \
- (__v8hf) __D, \
- (__v8hf) __A, \
- __B, __E)
-
-#define _mm_maskz_fmul_round_sch(__A, __B, __C, __E) \
- (__m128h) __builtin_ia32_vfmulcsh_mask_round ((__v8hf) __B, \
- (__v8hf) __C, \
- _mm_setzero_ph (), \
- __A, __E)
-
-#endif /* __OPTIMIZE__ */
-
#define _MM512_REDUCE_OP(op) \
__m256h __T1 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
__m256h __T2 = (__m256h) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
@@ -7193,27 +7238,9 @@ _mm512_set1_pch (_Float16 _Complex __A)
#define _mm512_maskz_cmul_round_pch(U, A, B, R) \
_mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
-#define _mm_mul_sch(A, B) _mm_fmul_sch ((A), (B))
-#define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch ((W), (U), (A), (B))
-#define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch ((U), (A), (B))
-#define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch ((A), (B), (R))
-#define _mm_mask_mul_round_sch(W, U, A, B, R) \
- _mm_mask_fmul_round_sch ((W), (U), (A), (B), (R))
-#define _mm_maskz_mul_round_sch(U, A, B, R) \
- _mm_maskz_fmul_round_sch ((U), (A), (B), (R))
-
-#define _mm_cmul_sch(A, B) _mm_fcmul_sch ((A), (B))
-#define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch ((W), (U), (A), (B))
-#define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch ((U), (A), (B))
-#define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch ((A), (B), (R))
-#define _mm_mask_cmul_round_sch(W, U, A, B, R) \
- _mm_mask_fcmul_round_sch ((W), (U), (A), (B), (R))
-#define _mm_maskz_cmul_round_sch(U, A, B, R) \
- _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
-
-#ifdef __DISABLE_AVX512FP16__
-#undef __DISABLE_AVX512FP16__
+#ifdef __DISABLE_AVX512FP16_512__
+#undef __DISABLE_AVX512FP16_512__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16__ */
+#endif /* __DISABLE_AVX512FP16_512__ */
-#endif /* __AVX512FP16INTRIN_H_INCLUDED */
+#endif /* _AVX512FP16INTRIN_H_INCLUDED */