aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config.gcc2
-rw-r--r--gcc/config/i386/avx10_2-512bf16intrin.h364
-rw-r--r--gcc/config/i386/avx10_2bf16intrin.h685
-rw-r--r--gcc/config/i386/i386-builtin-types.def9
-rw-r--r--gcc/config/i386/i386-builtin.def78
-rw-r--r--gcc/config/i386/i386-expand.cc9
-rw-r--r--gcc/config/i386/immintrin.h4
-rw-r--r--gcc/config/i386/sse.md293
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10-helper.h48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c87
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vaddnepbf16-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vdivnepbf16-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vmulnepbf16-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vsubnepbf16-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c172
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vaddnepbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vdivnepbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vmulnepbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vsubnepbf16-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-helper.h2
-rw-r--r--gcc/testsuite/gcc.target/i386/m512-check.h27
35 files changed, 2514 insertions, 2 deletions
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5e9c36a..7d761b2 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -454,7 +454,7 @@ i[34567]86-*-* | x86_64-*-*)
sm3intrin.h sha512intrin.h sm4intrin.h
usermsrintrin.h avx10_2roundingintrin.h
avx10_2mediaintrin.h avx10_2-512mediaintrin.h
- avx10_2convertintrin.h avx10_2-512convertintrin.h"
+ avx10_2bf16intrin.h avx10_2-512bf16intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h
new file mode 100644
index 0000000..b409ea1
--- /dev/null
+++ b/gcc/config/i386/avx10_2-512bf16intrin.h
@@ -0,0 +1,364 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2-512bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED
+#define _AVX10_2_512BF16INTRIN_H_INCLUDED
+
+#if !defined (__AVX10_2_512__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2-512")
+#define __DISABLE_AVX10_2_512__
+#endif /* __AVX10_2_512__ */
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_addne_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_addnepbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_addne_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_addnepbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_addne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_addnepbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subne_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_subnepbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subne_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_subnepbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_subnepbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mulne_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_mulnepbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mulne_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_mulnepbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mulne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_mulnepbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_divne_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_divnepbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_divne_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_divnepbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_divne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_divnepbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_maxpbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_maxpbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_maxpbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_minpbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_minpbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_minpbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
+{
+ return (__m512bh) __builtin_ia32_scalefpbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_scalefpbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+ return (__m512bh)
+ __builtin_ia32_scalefpbf16512_mask (__A, __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmaddne_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddnepbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmaddne_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddnepbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmaddne_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddnepbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmaddne_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmaddnepbf16512_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsubne_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubnepbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsubne_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubnepbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsubne_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubnepbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsubne_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fmsubnepbf16512_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmaddne_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddnepbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmaddne_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddnepbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmaddne_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddnepbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmaddne_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmaddnepbf16512_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsubne_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubnepbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsubne_pbh (__m512bh __A, __mmask32 __U,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubnepbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsubne_pbh (__m512bh __A, __m512bh __B,
+ __m512bh __C, __mmask32 __U)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubnepbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsubne_pbh (__mmask32 __U, __m512bh __A,
+ __m512bh __B, __m512bh __C)
+{
+ return (__m512bh)
+ __builtin_ia32_fnmsubnepbf16512_maskz (__A, __B, __C, __U);
+}
+
+#ifdef __DISABLE_AVX10_2_512__
+#undef __DISABLE_AVX10_2_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX10_2_512__ */
+
+#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h
new file mode 100644
index 0000000..e16f1b6
--- /dev/null
+++ b/gcc/config/i386/avx10_2bf16intrin.h
@@ -0,0 +1,685 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2BF16INTRIN_H_INCLUDED
+#define _AVX10_2BF16INTRIN_H_INCLUDED
+
+#if !defined(__AVX10_2_256__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2")
+#define __DISABLE_AVX10_2_256__
+#endif /* __AVX10_2_256__ */
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addne_pbh (__m256bh __A, __m256bh __B)
+{
+ return (__m256bh) __builtin_ia32_addnepbf16256 (__A, __B);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_addne_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_addnepbf16256_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_addne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_addnepbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addne_pbh (__m128bh __A, __m128bh __B)
+{
+ return (__m128bh) __builtin_ia32_addnepbf16128 (__A, __B);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_addne_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_addnepbf16128_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_addne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_addnepbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subne_pbh (__m256bh __A, __m256bh __B)
+{
+ return (__m256bh) __builtin_ia32_subnepbf16256 (__A, __B);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subne_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_subnepbf16256_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_subnepbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subne_pbh (__m128bh __A, __m128bh __B)
+{
+ return (__m128bh) __builtin_ia32_subnepbf16128 (__A, __B);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subne_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_subnepbf16128_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_subnepbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulne_pbh (__m256bh __A, __m256bh __B)
+{
+ return (__m256bh) __builtin_ia32_mulnepbf16256 (__A, __B);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mulne_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_mulnepbf16256_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mulne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_mulnepbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulne_pbh (__m128bh __A, __m128bh __B)
+{
+ return (__m128bh) __builtin_ia32_mulnepbf16128 (__A, __B);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mulne_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_mulnepbf16128_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mulne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_mulnepbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_divne_pbh (__m256bh __A, __m256bh __B)
+{
+ return (__m256bh) __builtin_ia32_divnepbf16256 (__A, __B);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_divne_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_divnepbf16256_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_divne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_divnepbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_divne_pbh (__m128bh __A, __m128bh __B)
+{
+ return (__m128bh) __builtin_ia32_divnepbf16128 (__A, __B);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_divne_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_divnepbf16128_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_divne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_divnepbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_pbh (__m256bh __A, __m256bh __B)
+{
+ return (__m256bh) __builtin_ia32_maxpbf16256 (__A, __B);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_maxpbf16256_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_maxpbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pbh (__m128bh __A, __m128bh __B)
+{
+ return (__m128bh) __builtin_ia32_maxpbf16128 (__A, __B);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_maxpbf16128_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_maxpbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_pbh (__m256bh __A, __m256bh __B)
+{
+ return (__m256bh) __builtin_ia32_minpbf16256 (__A, __B);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_minpbf16256_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_minpbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pbh (__m128bh __A, __m128bh __B)
+{
+ return (__m128bh) __builtin_ia32_minpbf16128 (__A, __B);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_minpbf16128_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_minpbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_pbh (__m256bh __A, __m256bh __B)
+{
+ return (__m256bh) __builtin_ia32_scalefpbf16256 (__A, __B);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_scalefpbf16256_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+{
+ return (__m256bh)
+ __builtin_ia32_scalefpbf16256_mask (__A, __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_pbh (__m128bh __A, __m128bh __B)
+{
+ return (__m128bh) __builtin_ia32_scalefpbf16128 (__A, __B);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_scalefpbf16128_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+{
+ return (__m128bh)
+ __builtin_ia32_scalefpbf16128_mask (__A, __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddne_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fmaddnepbf16256_mask (__A, __B, __C, (__mmask16) -1);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmaddne_pbh (__m256bh __A, __mmask16 __U,
+ __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fmaddnepbf16256_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmaddne_pbh (__m256bh __A, __m256bh __B,
+ __m256bh __C, __mmask16 __U)
+{
+ return (__m256bh)
+ __builtin_ia32_fmaddnepbf16256_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmaddne_pbh (__mmask16 __U, __m256bh __A,
+ __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fmaddnepbf16256_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddne_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fmaddnepbf16128_mask (__A, __B, __C, (__mmask8) -1);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmaddne_pbh (__m128bh __A, __mmask8 __U,
+ __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fmaddnepbf16128_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmaddne_pbh (__m128bh __A, __m128bh __B,
+ __m128bh __C, __mmask8 __U)
+{
+ return (__m128bh)
+ __builtin_ia32_fmaddnepbf16128_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmaddne_pbh (__mmask8 __U, __m128bh __A,
+ __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fmaddnepbf16128_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubne_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fmsubnepbf16256_mask (__A, __B, __C, (__mmask16) -1);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsubne_pbh (__m256bh __A, __mmask16 __U,
+ __m256bh __B, __m256bh __C)
+{
+ return (__m256bh) __builtin_ia32_fmsubnepbf16256_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsubne_pbh (__m256bh __A, __m256bh __B,
+ __m256bh __C, __mmask16 __U)
+{
+ return (__m256bh)
+ __builtin_ia32_fmsubnepbf16256_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsubne_pbh (__mmask16 __U, __m256bh __A,
+ __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fmsubnepbf16256_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubne_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fmsubnepbf16128_mask (__A, __B, __C, (__mmask8) -1);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsubne_pbh (__m128bh __A, __mmask8 __U,
+ __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fmsubnepbf16128_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsubne_pbh (__m128bh __A, __m128bh __B,
+ __m128bh __C, __mmask8 __U)
+{
+ return (__m128bh)
+ __builtin_ia32_fmsubnepbf16128_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsubne_pbh (__mmask8 __U, __m128bh __A,
+ __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fmsubnepbf16128_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmaddne_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fnmaddnepbf16256_mask (__A, __B, __C, (__mmask16) -1);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmaddne_pbh (__m256bh __A, __mmask16 __U,
+ __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fnmaddnepbf16256_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmaddne_pbh (__m256bh __A, __m256bh __B,
+ __m256bh __C, __mmask16 __U)
+{
+ return (__m256bh)
+ __builtin_ia32_fnmaddnepbf16256_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmaddne_pbh (__mmask16 __U, __m256bh __A,
+ __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fnmaddnepbf16256_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmaddne_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fnmaddnepbf16128_mask (__A, __B, __C, (__mmask8) -1);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmaddne_pbh (__m128bh __A, __mmask8 __U,
+ __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fnmaddnepbf16128_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmaddne_pbh (__m128bh __A, __m128bh __B,
+ __m128bh __C, __mmask8 __U)
+{
+ return (__m128bh)
+ __builtin_ia32_fnmaddnepbf16128_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmaddne_pbh (__mmask8 __U, __m128bh __A,
+ __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fnmaddnepbf16128_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsubne_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fnmsubnepbf16256_mask (__A, __B, __C, (__mmask16) -1);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmsubne_pbh (__m256bh __A, __mmask16 __U,
+ __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fnmsubnepbf16256_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmsubne_pbh (__m256bh __A, __m256bh __B,
+ __m256bh __C, __mmask16 __U)
+{
+ return (__m256bh)
+ __builtin_ia32_fnmsubnepbf16256_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m256bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmsubne_pbh (__mmask16 __U, __m256bh __A,
+ __m256bh __B, __m256bh __C)
+{
+ return (__m256bh)
+ __builtin_ia32_fnmsubnepbf16256_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsubne_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fnmsubnepbf16128_mask (__A, __B, __C, (__mmask8) -1);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsubne_pbh (__m128bh __A, __mmask8 __U,
+ __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fnmsubnepbf16128_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsubne_pbh (__m128bh __A, __m128bh __B,
+ __m128bh __C, __mmask8 __U)
+{
+ return (__m128bh)
+ __builtin_ia32_fnmsubnepbf16128_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m128bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsubne_pbh (__mmask8 __U, __m128bh __A,
+ __m128bh __B, __m128bh __C)
+{
+ return (__m128bh)
+ __builtin_ia32_fnmsubnepbf16128_maskz (__A, __B, __C, __U);
+}
+
+#ifdef __DISABLE_AVX10_2_256__
+#undef __DISABLE_AVX10_2_256__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX10_2_256__ */
+
+#endif /* __AVX10_2BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 63b6584..f383842 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1474,3 +1474,12 @@ DEF_FUNCTION_TYPE (V64QI, V32HF, V32HF, V64QI, UDI)
DEF_FUNCTION_TYPE (V16QI, V8HF, V16QI, UQI)
DEF_FUNCTION_TYPE (V16QI, V16HF, V16QI, UHI)
DEF_FUNCTION_TYPE (V32QI, V32HF, V32QI, USI)
+DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF)
+DEF_FUNCTION_TYPE (V16BF, V16BF, V16BF)
+DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF)
+DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF, USI)
+DEF_FUNCTION_TYPE (V16BF, V16BF, V16BF, UHI)
+DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF, UQI)
+DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF, V32BF, USI)
+DEF_FUNCTION_TYPE (V16BF, V16BF, V16BF, V16BF, UHI)
+DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF, V8BF, UQI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 6f5ab32..3f3bc76 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3159,6 +3159,84 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2hf8sv32hf_mask, "__bui
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv8hf_mask, "__builtin_ia32_vcvthf82ph128_mask", IX86_BUILTIN_VCVTHF82PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V16QI_V8HF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv16hf_mask, "__builtin_ia32_vcvthf82ph256_mask", IX86_BUILTIN_VCVTHF82PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16QI_V16HF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvthf82phv32hf_mask, "__builtin_ia32_vcvthf82ph512_mask", IX86_BUILTIN_VCVTHF82PH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32QI_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_addnepbf16_v32bf, "__builtin_ia32_addnepbf16512", IX86_BUILTIN_ADDNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_addnepbf16_v32bf_mask, "__builtin_ia32_addnepbf16512_mask", IX86_BUILTIN_ADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v16bf, "__builtin_ia32_addnepbf16256", IX86_BUILTIN_ADDNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v16bf_mask, "__builtin_ia32_addnepbf16256_mask", IX86_BUILTIN_ADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v8bf, "__builtin_ia32_addnepbf16128", IX86_BUILTIN_ADDNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v8bf_mask, "__builtin_ia32_addnepbf16128_mask", IX86_BUILTIN_ADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_subnepbf16_v32bf, "__builtin_ia32_subnepbf16512", IX86_BUILTIN_SUBNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_subnepbf16_v32bf_mask, "__builtin_ia32_subnepbf16512_mask", IX86_BUILTIN_SUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v16bf, "__builtin_ia32_subnepbf16256", IX86_BUILTIN_SUBNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v16bf_mask, "__builtin_ia32_subnepbf16256_mask", IX86_BUILTIN_SUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v8bf, "__builtin_ia32_subnepbf16128", IX86_BUILTIN_SUBNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v8bf_mask, "__builtin_ia32_subnepbf16128_mask", IX86_BUILTIN_SUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mulnepbf16_v32bf, "__builtin_ia32_mulnepbf16512", IX86_BUILTIN_MULNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mulnepbf16_v32bf_mask, "__builtin_ia32_mulnepbf16512_mask", IX86_BUILTIN_MULNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v16bf, "__builtin_ia32_mulnepbf16256", IX86_BUILTIN_MULNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v16bf_mask, "__builtin_ia32_mulnepbf16256_mask", IX86_BUILTIN_MULNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v8bf, "__builtin_ia32_mulnepbf16128", IX86_BUILTIN_MULNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v8bf_mask, "__builtin_ia32_mulnepbf16128_mask", IX86_BUILTIN_MULNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_divnepbf16_v32bf, "__builtin_ia32_divnepbf16512", IX86_BUILTIN_DIVNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_divnepbf16_v32bf_mask, "__builtin_ia32_divnepbf16512_mask", IX86_BUILTIN_DIVNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v16bf, "__builtin_ia32_divnepbf16256", IX86_BUILTIN_DIVNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v16bf_mask, "__builtin_ia32_divnepbf16256_mask", IX86_BUILTIN_DIVNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v8bf, "__builtin_ia32_divnepbf16128", IX86_BUILTIN_DIVNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v8bf_mask, "__builtin_ia32_divnepbf16128_mask", IX86_BUILTIN_DIVNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf, "__builtin_ia32_maxpbf16512", IX86_BUILTIN_MAXPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf_mask, "__builtin_ia32_maxpbf16512_mask", IX86_BUILTIN_MAXPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf, "__builtin_ia32_maxpbf16256", IX86_BUILTIN_MAXPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf_mask, "__builtin_ia32_maxpbf16256_mask", IX86_BUILTIN_MAXPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf, "__builtin_ia32_maxpbf16128", IX86_BUILTIN_MAXPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf_mask, "__builtin_ia32_maxpbf16128_mask", IX86_BUILTIN_MAXPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf, "__builtin_ia32_minpbf16512", IX86_BUILTIN_MINPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf_mask, "__builtin_ia32_minpbf16512_mask", IX86_BUILTIN_MINPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf, "__builtin_ia32_minpbf16256", IX86_BUILTIN_MINPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf_mask, "__builtin_ia32_minpbf16256_mask", IX86_BUILTIN_MINPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf, "__builtin_ia32_minpbf16128", IX86_BUILTIN_MINPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf_mask, "__builtin_ia32_minpbf16128_mask", IX86_BUILTIN_MINPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf, "__builtin_ia32_scalefpbf16512", IX86_BUILTIN_SCALEFPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf_mask, "__builtin_ia32_scalefpbf16512_mask", IX86_BUILTIN_SCALEFPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf, "__builtin_ia32_scalefpbf16256", IX86_BUILTIN_SCALEFPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf_mask, "__builtin_ia32_scalefpbf16256_mask", IX86_BUILTIN_SCALEFPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf, "__builtin_ia32_scalefpbf16128", IX86_BUILTIN_SCALEFPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf_mask, "__builtin_ia32_scalefpbf16128_mask", IX86_BUILTIN_SCALEFPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_mask, "__builtin_ia32_fmaddnepbf16512_mask", IX86_BUILTIN_FMADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_mask3, "__builtin_ia32_fmaddnepbf16512_mask3", IX86_BUILTIN_FMADDNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_maskz, "__builtin_ia32_fmaddnepbf16512_maskz", IX86_BUILTIN_FMADDNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_mask, "__builtin_ia32_fmaddnepbf16256_mask", IX86_BUILTIN_FMADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_mask3, "__builtin_ia32_fmaddnepbf16256_mask3", IX86_BUILTIN_FMADDNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_maskz, "__builtin_ia32_fmaddnepbf16256_maskz", IX86_BUILTIN_FMADDNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_mask, "__builtin_ia32_fmaddnepbf16128_mask", IX86_BUILTIN_FMADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_mask3, "__builtin_ia32_fmaddnepbf16128_mask3", IX86_BUILTIN_FMADDNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_maskz, "__builtin_ia32_fmaddnepbf16128_maskz", IX86_BUILTIN_FMADDNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_mask, "__builtin_ia32_fmsubnepbf16512_mask", IX86_BUILTIN_FMSUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_mask3, "__builtin_ia32_fmsubnepbf16512_mask3", IX86_BUILTIN_FMSUBNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_maskz, "__builtin_ia32_fmsubnepbf16512_maskz", IX86_BUILTIN_FMSUBNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_mask, "__builtin_ia32_fmsubnepbf16256_mask", IX86_BUILTIN_FMSUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_mask3, "__builtin_ia32_fmsubnepbf16256_mask3", IX86_BUILTIN_FMSUBNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_maskz, "__builtin_ia32_fmsubnepbf16256_maskz", IX86_BUILTIN_FMSUBNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_mask, "__builtin_ia32_fmsubnepbf16128_mask", IX86_BUILTIN_FMSUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_mask3, "__builtin_ia32_fmsubnepbf16128_mask3", IX86_BUILTIN_FMSUBNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_maskz, "__builtin_ia32_fmsubnepbf16128_maskz", IX86_BUILTIN_FMSUBNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_mask, "__builtin_ia32_fnmaddnepbf16512_mask", IX86_BUILTIN_FNMADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_mask3, "__builtin_ia32_fnmaddnepbf16512_mask3", IX86_BUILTIN_FNMADDNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_maskz, "__builtin_ia32_fnmaddnepbf16512_maskz", IX86_BUILTIN_FNMADDNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_mask, "__builtin_ia32_fnmaddnepbf16256_mask", IX86_BUILTIN_FNMADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_mask3, "__builtin_ia32_fnmaddnepbf16256_mask3", IX86_BUILTIN_FNMADDNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_maskz, "__builtin_ia32_fnmaddnepbf16256_maskz", IX86_BUILTIN_FNMADDNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_mask, "__builtin_ia32_fnmaddnepbf16128_mask", IX86_BUILTIN_FNMADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_mask3, "__builtin_ia32_fnmaddnepbf16128_mask3", IX86_BUILTIN_FNMADDNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_maskz, "__builtin_ia32_fnmaddnepbf16128_maskz", IX86_BUILTIN_FNMADDNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_mask, "__builtin_ia32_fnmsubnepbf16512_mask", IX86_BUILTIN_FNMSUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_mask3, "__builtin_ia32_fnmsubnepbf16512_mask3", IX86_BUILTIN_FNMSUBNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_maskz, "__builtin_ia32_fnmsubnepbf16512_maskz", IX86_BUILTIN_FNMSUBNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_mask, "__builtin_ia32_fnmsubnepbf16256_mask", IX86_BUILTIN_FNMSUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_mask3, "__builtin_ia32_fnmsubnepbf16256_mask3", IX86_BUILTIN_FNMSUBNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_maskz, "__builtin_ia32_fnmsubnepbf16256_maskz", IX86_BUILTIN_FNMSUBNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_mask, "__builtin_ia32_fnmsubnepbf16128_mask", IX86_BUILTIN_FNMSUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_mask3, "__builtin_ia32_fnmsubnepbf16128_mask3", IX86_BUILTIN_FNMSUBNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_maskz, "__builtin_ia32_fnmsubnepbf16128_maskz", IX86_BUILTIN_FNMSUBNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 41d6eb8..f5fbc8e 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -11267,6 +11267,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HI_FTYPE_V8SI_V8SI:
case V64QI_FTYPE_V64QI_V64QI:
case V32QI_FTYPE_V32QI_V32QI:
+ case V32BF_FTYPE_V32BF_V32BF:
+ case V16BF_FTYPE_V16BF_V16BF:
+ case V8BF_FTYPE_V8BF_V8BF:
case V16HI_FTYPE_V32QI_V32QI:
case V16HI_FTYPE_V16HI_V16HI:
case V8SI_FTYPE_V4DF_V4DF:
@@ -11434,6 +11437,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HI_FTYPE_V8HI_V16HI_UHI:
case V16HI_FTYPE_HI_V16HI_UHI:
case V8HI_FTYPE_V8HI_V8HI_UQI:
+ case V8BF_FTYPE_V8BF_V8BF_UQI:
case V8HI_FTYPE_HI_V8HI_UQI:
case V16HF_FTYPE_V16HF_V16HF_UHI:
case V8SF_FTYPE_V8HI_V8SF_UQI:
@@ -11531,9 +11535,11 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HF_FTYPE_V16HF_V16HF_V16HF:
case V16HI_FTYPE_V16HF_V16HI_UHI:
case V16HI_FTYPE_V16HI_V16HI_UHI:
+ case V16BF_FTYPE_V16BF_V16BF_UHI:
case V8HI_FTYPE_V16QI_V8HI_UQI:
case V16HI_FTYPE_V16QI_V16HI_UHI:
case V32HI_FTYPE_V32HI_V32HI_USI:
+ case V32BF_FTYPE_V32BF_V32BF_USI:
case V32HI_FTYPE_V32QI_V32HI_USI:
case V8DI_FTYPE_V16QI_V8DI_UQI:
case V8DI_FTYPE_V2DI_V8DI_UQI:
@@ -11663,6 +11669,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
break;
case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
+ case V32BF_FTYPE_V32BF_V32BF_V32BF_USI:
case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
@@ -11693,6 +11700,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
+ case V8BF_FTYPE_V8BF_V8BF_V8BF_UQI:
case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
case V16HF_FTYPE_V16HF_V16HF_V16HF_UQI:
@@ -11700,6 +11708,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
+ case V16BF_FTYPE_V16BF_V16BF_V16BF_UHI:
case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index fea55a29..0253340 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -148,4 +148,8 @@
#include <avx10_2-512convertintrin.h>
+#include <avx10_2bf16intrin.h>
+
+#include <avx10_2-512bf16intrin.h>
+
#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 622873b..dad7f61 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -229,6 +229,7 @@
UNSPEC_VCVTNEPH2HF8
UNSPEC_VCVTNEPH2HF8S
UNSPEC_VCVTHF82PH
+ UNSPEC_VSCALEFPBF16
])
(define_c_enum "unspecv" [
@@ -499,6 +500,9 @@
(define_mode_iterator VHF_AVX10_2
[(V32HF "TARGET_AVX10_2_512") V16HF V8HF])
+(define_mode_iterator VBF_AVX10_2
+ [(V32BF "TARGET_AVX10_2_512") V16BF V8BF])
+
;; All vector integer modes
(define_mode_iterator VI
[(V16SI "TARGET_AVX512F && TARGET_EVEX512")
@@ -31812,3 +31816,292 @@
"TARGET_AVX10_2_256"
"vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}"
[(set_attr "prefix" "evex")])
+
+(define_insn "avx10_2_scalefpbf16_<mode><mask_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (unspec:VBF_AVX10_2
+ [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")]
+ UNSPEC_VSCALEFPBF16))]
+ "TARGET_AVX10_2_256"
+ "vscalefpbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")])
+
+(define_insn "avx10_2_<code>pbf16_<mode><mask_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (smaxmin:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "register_operand" "v")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX10_2_256"
+ "v<maxmin_float>pbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx10_2_<insn>nepbf16_<mode><mask_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (plusminusmultdiv:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "register_operand" "v")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX10_2_256"
+ "v<insn>nepbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")])
+
+(define_expand "avx10_2_fmaddnepbf16_<mode>_maskz"
+ [(match_operand:VBF_AVX10_2 0 "register_operand")
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX10_2_256"
+ {
+ emit_insn (gen_avx10_2_fmaddnepbf16_<mode>_maskz_1 (operands[0], operands[1],
+ operands[2], operands[3],
+ CONST0_RTX(<MODE>mode),
+ operands[4]));
+ DONE;
+ })
+
+(define_insn "avx10_2_fmaddnepbf16_<mode><sd_maskz_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
+ (fma:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
+ "TARGET_AVX10_2_256"
+ "@
+ vfmadd132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
+ vfmadd213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
+ vfmadd231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_fmaddnepbf16_<mode>_mask"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
+ (vec_merge:VBF_AVX10_2
+ (fma:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+ "TARGET_AVX10_2_256"
+ "@
+ vfmadd132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+ vfmadd213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_fmaddnepbf16_<mode>_mask3"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (vec_merge:VBF_AVX10_2
+ (fma:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
+ (match_operand:VBF_AVX10_2 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+ "TARGET_AVX10_2_256"
+ "vfmadd231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx10_2_fnmaddnepbf16_<mode>_maskz"
+ [(match_operand:VBF_AVX10_2 0 "register_operand")
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX10_2_256"
+ {
+ emit_insn (gen_avx10_2_fnmaddnepbf16_<mode>_maskz_1 (operands[0], operands[1],
+ operands[2], operands[3],
+ CONST0_RTX(<MODE>mode),
+ operands[4]));
+ DONE;
+ })
+
+(define_insn "avx10_2_fnmaddnepbf16_<mode><sd_maskz_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
+ (fma:VBF_AVX10_2
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
+ "TARGET_AVX10_2_256"
+ "@
+ vfnmadd132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
+ vfnmadd213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
+ vfnmadd231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_fnmaddnepbf16_<mode>_mask"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
+ (vec_merge:VBF_AVX10_2
+ (fma:VBF_AVX10_2
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+ "TARGET_AVX10_2_256"
+ "@
+ vfnmadd132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+ vfnmadd213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_fnmaddnepbf16_<mode>_mask3"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (vec_merge:VBF_AVX10_2
+ (fma:VBF_AVX10_2
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
+ (match_operand:VBF_AVX10_2 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+ "TARGET_AVX10_2_256"
+ "vfnmadd231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx10_2_fmsubnepbf16_<mode>_maskz"
+ [(match_operand:VBF_AVX10_2 0 "register_operand")
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX10_2_256"
+ {
+ emit_insn (gen_avx10_2_fmsubnepbf16_<mode>_maskz_1 (operands[0], operands[1],
+ operands[2], operands[3],
+ CONST0_RTX(<MODE>mode),
+ operands[4]));
+ DONE;
+ })
+
+(define_insn "avx10_2_fmsubnepbf16_<mode><sd_maskz_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
+ (fma:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
+ "TARGET_AVX10_2_256"
+ "@
+ vfmsub132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
+ vfmsub213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
+ vfmsub231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_fmsubnepbf16_<mode>_mask"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
+ (vec_merge:VBF_AVX10_2
+ (fma:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+ "TARGET_AVX10_2_256"
+ "@
+ vfmsub132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+ vfmsub213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_fmsubnepbf16_<mode>_mask3"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (vec_merge:VBF_AVX10_2
+ (fma:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+ "TARGET_AVX10_2_256"
+ "vfmsub231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx10_2_fnmsubnepbf16_<mode>_maskz"
+ [(match_operand:VBF_AVX10_2 0 "register_operand")
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX10_2_256"
+ {
+ emit_insn (gen_avx10_2_fnmsubnepbf16_<mode>_maskz_1 (operands[0], operands[1],
+ operands[2], operands[3],
+ CONST0_RTX(<MODE>mode),
+ operands[4]));
+ DONE;
+ })
+
+(define_insn "avx10_2_fnmsubnepbf16_<mode><sd_maskz_name>"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
+ (fma:VBF_AVX10_2
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
+ "TARGET_AVX10_2_256"
+ "@
+ vfnmsub132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
+ vfnmsub213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
+ vfnmsub231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_fnmsubnepbf16_<mode>_mask"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
+ (vec_merge:VBF_AVX10_2
+ (fma:VBF_AVX10_2
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
+ "TARGET_AVX10_2_256"
+ "@
+ vfnmsub132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+ vfnmsub213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_fnmsubnepbf16_<mode>_mask3"
+ [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
+ (vec_merge:VBF_AVX10_2
+ (fma:VBF_AVX10_2
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
+ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
+ (neg:VBF_AVX10_2
+ (match_operand:VBF_AVX10_2 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
+ "TARGET_AVX10_2_256"
+ "vfnmsub231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/avx10-helper.h b/gcc/testsuite/gcc.target/i386/avx10-helper.h
index 385c744..9ff1dd72 100644
--- a/gcc/testsuite/gcc.target/i386/avx10-helper.h
+++ b/gcc/testsuite/gcc.target/i386/avx10-helper.h
@@ -3,9 +3,55 @@
#define AVX10
#define AVX512FP16
-
+#define AVX512BF16
#include "avx512f-helper.h"
#include "avx512f-mask-type.h"
+#include <stdint.h>
+
+#define NOINLINE __attribute__((noinline,noclone))
+typedef union
+{
+ uint32_t int32;
+ float flt;
+}float_int_t;
+
+float NOINLINE
+convert_bf16_to_fp32 (unsigned short bf16)
+{
+ unsigned int ii = bf16 << 16;
+ return *(float*)&ii;
+}
+
+unsigned short NOINLINE
+convert_fp32_to_bf16 (float fp)
+{
+ float_int_t fi;
+ fi.flt = fp;
+ return ((fi.int32 >> 16) & 0xffff);
+}
+
+unsigned short NOINLINE
+convert_fp32_to_bf16_ne (float fp)
+{
+ float_int_t fi;
+ uint32_t rounding_bias, lsb;
+
+ fi.flt = fp;
+ lsb = (fi.int32 >> 16) & 0x1;
+ rounding_bias = 0x7fff + lsb;
+ fi.int32 += rounding_bias;
+
+ return ((fi.int32 >> 16) & 0xffff);
+}
+
+float NOINLINE
+scalef (float x, float y)
+{
+ __m128 px = _mm_load_ss (&x);
+ __m128 py = _mm_load_ss (&y);
+ __m128 out = _mm_scalef_ss (px, py);
+ return _mm_cvtss_f32 (out);
+}
#endif /* AVX10_HELPER_INCLUDED */
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c
new file mode 100644
index 0000000..78839fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.2-512 -O2" } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512bh res, x1, x2;
+volatile __mmask32 m32;
+
+void extern
+avx10_2_512_test (void)
+{
+ res = _mm512_addne_pbh (x1, x2);
+ res = _mm512_mask_addne_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_addne_pbh (m32, x1, x2);
+ res = _mm512_subne_pbh (x1, x2);
+ res = _mm512_mask_subne_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_subne_pbh (m32, x1, x2);
+ res = _mm512_mulne_pbh (x1, x2);
+ res = _mm512_mask_mulne_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_mulne_pbh (m32, x1, x2);
+ res = _mm512_divne_pbh (x1, x2);
+ res = _mm512_mask_divne_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_divne_pbh (m32, x1, x2);
+ res = _mm512_max_pbh (x1, x2);
+ res = _mm512_mask_max_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_max_pbh (m32, x1, x2);
+ res = _mm512_min_pbh (x1, x2);
+ res = _mm512_mask_min_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_min_pbh (m32, x1, x2);
+ res = _mm512_scalef_pbh (x1, x2);
+ res = _mm512_mask_scalef_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_scalef_pbh (m32, x1, x2);
+
+ res = _mm512_fmaddne_pbh (res, x1, x2);
+ res = _mm512_mask_fmaddne_pbh (res, m32, x1, x2);
+ res = _mm512_mask3_fmaddne_pbh (res, x1, x2, m32);
+ res = _mm512_maskz_fmaddne_pbh (m32,res, x1, x2);
+ res = _mm512_fmsubne_pbh (res, x1, x2);
+ res = _mm512_mask_fmsubne_pbh (res, m32, x1, x2);
+ res = _mm512_mask3_fmsubne_pbh (res, x1, x2, m32);
+ res = _mm512_maskz_fmsubne_pbh (m32,res, x1, x2);
+ res = _mm512_fnmaddne_pbh (res, x1, x2);
+ res = _mm512_mask_fnmaddne_pbh (res, m32, x1, x2);
+ res = _mm512_mask3_fnmaddne_pbh (res, x1, x2, m32);
+ res = _mm512_maskz_fnmaddne_pbh (m32,res, x1, x2);
+ res = _mm512_fnmsubne_pbh (res, x1, x2);
+ res = _mm512_mask_fnmsubne_pbh (res, m32, x1, x2);
+ res = _mm512_mask3_fnmsubne_pbh (res, x1, x2, m32);
+ res = _mm512_maskz_fnmsubne_pbh (m32,res, x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vaddnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vaddnepbf16-2.c
new file mode 100644
index 0000000..3b7d163
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vaddnepbf16-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ res1.a[i] = 0;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ float x = (float) (2 * (i % 7) + 7);
+ float y = (float) (3 * (i % 7) - 5);
+ float res;
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ res = x + y;
+ res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res);
+ }
+
+ res1.x = INTRINSIC (_addne_pbh) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_addne_pbh) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_addne_pbh) (mask, src1.x, src2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vdivnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vdivnepbf16-2.c
new file mode 100644
index 0000000..ca90828
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vdivnepbf16-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ res1.a[i] = 0;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ float x = (float) (2 * (i % 7) + 7);
+ float y = (float) (3 * (i % 7) - 5);
+ float res;
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ res = x / y;
+ res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res);
+ }
+
+ res1.x = INTRINSIC (_divne_pbh) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_divne_pbh) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_divne_pbh) (mask, src1.x, src2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c
new file mode 100644
index 0000000..b19c9d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ float x = 0.5;
+ float y = 2;
+ float z = 0.25;
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ res1.a[i] = convert_fp32_to_bf16 (z);
+ res2.a[i] = res1.a[i];
+ float x16, y16, z16, m1, m2;
+ x16 = convert_bf16_to_fp32 (src1.a[i]);
+ y16 = convert_bf16_to_fp32 (src2.a[i]);
+ z16 = convert_bf16_to_fp32 (res1.a[i]);
+ m1 = y16 + x16 * z16;
+ m2 = z16 + x16 * y16;
+ res_ref[i] = convert_fp32_to_bf16 (m1);
+ res_ref2[i] = convert_fp32_to_bf16 (m2);
+ }
+
+ MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES);
+ res1.x = INTRINSIC (_mask_fmaddne_pbh) (res1.x, mask, src1.x, src2.x);
+ res2.x = INTRINSIC (_mask3_fmaddne_pbh) (src1.x, src2.x, res2.x, mask);
+
+ MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c
new file mode 100644
index 0000000..86adbc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ float x = 0.5;
+ float y = 2;
+ float z = 0.25;
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ res1.a[i] = convert_fp32_to_bf16 (z);
+ res2.a[i] = res1.a[i];
+ float x16, y16, z16, m1, m2;
+ x16 = convert_bf16_to_fp32 (src1.a[i]);
+ y16 = convert_bf16_to_fp32 (src2.a[i]);
+ z16 = convert_bf16_to_fp32 (res1.a[i]);
+ m1 = -y16 + x16 * z16;
+ m2 = -z16 + x16 * y16;
+ res_ref[i] = convert_fp32_to_bf16 (m1);
+ res_ref2[i] = convert_fp32_to_bf16 (m2);
+ }
+
+ MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES);
+ res1.x = INTRINSIC (_mask_fmsubne_pbh) (res1.x, mask, src1.x, src2.x);
+ res2.x = INTRINSIC (_mask3_fmsubne_pbh) (src1.x, src2.x, res2.x, mask);
+
+ MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c
new file mode 100644
index 0000000..3a7d4cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ float x = 0.5;
+ float y = 2;
+ float z = 0.25;
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ res1.a[i] = convert_fp32_to_bf16 (z);
+ res2.a[i] = res1.a[i];
+ float x16, y16, z16, m1, m2;
+ x16 = convert_bf16_to_fp32 (src1.a[i]);
+ y16 = convert_bf16_to_fp32 (src2.a[i]);
+ z16 = convert_bf16_to_fp32 (res1.a[i]);
+ m1 = y16 - x16 * z16;
+ m2 = z16 - x16 * y16;
+ res_ref[i] = convert_fp32_to_bf16 (m1);
+ res_ref2[i] = convert_fp32_to_bf16 (m2);
+ }
+
+ MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES);
+ res1.x = INTRINSIC (_mask_fnmaddne_pbh) (res1.x, mask, src1.x, src2.x);
+ res2.x = INTRINSIC (_mask3_fnmaddne_pbh) (src1.x, src2.x, res2.x, mask);
+
+ MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c
new file mode 100644
index 0000000..943146e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ float x = 0.5;
+ float y = 2;
+ float z = 0.25;
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ res1.a[i] = convert_fp32_to_bf16 (z);
+ res2.a[i] = res1.a[i];
+ float x16, y16, z16, m1, m2;
+ x16 = convert_bf16_to_fp32 (src1.a[i]);
+ y16 = convert_bf16_to_fp32 (src2.a[i]);
+ z16 = convert_bf16_to_fp32 (res1.a[i]);
+ m1 = -y16 - x16 * z16;
+ m2 = -z16 - x16 * y16;
+ res_ref[i] = convert_fp32_to_bf16 (m1);
+ res_ref2[i] = convert_fp32_to_bf16 (m2);
+ }
+
+ MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES);
+ res1.x = INTRINSIC (_mask_fnmsubne_pbh) (res1.x, mask, src1.x, src2.x);
+ res2.x = INTRINSIC (_mask3_fnmsubne_pbh) (src1.x, src2.x, res2.x, mask);
+
+ MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c
new file mode 100644
index 0000000..a563b1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ res1.a[i] = 0;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ float x = 0.5;
+ float y = 0.25;
+ float res;
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ if (x > y)
+ res_ref[i] = res_ref2[i] = src1.a[i];
+ else
+ res_ref[i] = res_ref2[i] = src2.a[i];
+ }
+
+ res1.x = INTRINSIC (_max_pbh) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_pbh) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_pbh) (mask, src1.x, src2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c
new file mode 100644
index 0000000..10f13d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ res1.a[i] = 0;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ float x = 0.5;
+ float y = 0.25;
+ float res;
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ if (x < y)
+ res_ref[i] = res_ref2[i] = src1.a[i];
+ else
+ res_ref[i] = res_ref2[i] = src2.a[i];
+ }
+
+ res1.x = INTRINSIC (_min_pbh) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_pbh) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_pbh) (mask, src1.x, src2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vmulnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmulnepbf16-2.c
new file mode 100644
index 0000000..ce16807
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmulnepbf16-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ res1.a[i] = 0;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ float x = (float) (2 * (i % 7) + 7);
+ float y = (float) (3 * (i % 7) - 5);
+ float res;
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ res = x * y;
+ res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res);
+ }
+
+ res1.x = INTRINSIC (_mulne_pbh) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_mulne_pbh) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_mulne_pbh) (mask, src1.x, src2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c
new file mode 100644
index 0000000..78df474
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ res1.a[i] = 0;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ float x = (float) (2 * (i % 7) + 7);
+ float y = 1.0 + (float) (4 * i) / (float) SIZE_RES;
+ float xx, yy, res;
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ xx = convert_bf16_to_fp32 (src1.a[i]);
+ yy = convert_bf16_to_fp32 (src2.a[i]);
+ res = scalef (xx, yy);
+ res_ref[i] = res_ref2[i] = convert_fp32_to_bf16 (res);
+ }
+
+ res1.x = INTRINSIC (_scalef_pbh) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_scalef_pbh) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_scalef_pbh) (mask, src1.x, src2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vsubnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vsubnepbf16-2.c
new file mode 100644
index 0000000..f8a9a51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vsubnepbf16-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#define SIZE_RES (AVX512F_LEN / 16)
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+
+ for (i = 0; i < SIZE_RES; i++)
+ {
+ res1.a[i] = 0;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ float x = (float) (2 * (i % 7) + 7);
+ float y = (float) (3 * (i % 7) - 5);
+ float res;
+ src2.a[i] = convert_fp32_to_bf16 (y);
+ src1.a[i] = convert_fp32_to_bf16 (x);
+ res = x - y;
+ res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res);
+ }
+
+ res1.x = INTRINSIC (_subne_pbh) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_subne_pbh) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_subne_pbh) (mask, src1.x, src2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
+ abort ();
+
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c
new file mode 100644
index 0000000..831c8f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c
@@ -0,0 +1,172 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10.2 -O2" } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256bh res, x1, x2;
+volatile __m128bh res1, x3, x4;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx10_2_test (void)
+{
+ res = _mm256_addne_pbh (x1, x2);
+ res = _mm256_mask_addne_pbh (res, m16, x1, x2);
+ res = _mm256_maskz_addne_pbh (m16, x1, x2);
+ res1 = _mm_addne_pbh (x3, x4);
+ res1 = _mm_mask_addne_pbh (res1, m8, x3, x4);
+ res1 = _mm_maskz_addne_pbh (m8, x3, x4);
+
+ res = _mm256_subne_pbh (x1, x2);
+ res = _mm256_mask_subne_pbh (res, m16, x1, x2);
+ res = _mm256_maskz_subne_pbh (m16, x1, x2);
+ res1 = _mm_subne_pbh (x3, x4);
+ res1 = _mm_mask_subne_pbh (res1, m8, x3, x4);
+ res1 = _mm_maskz_subne_pbh (m8, x3, x4);
+
+ res = _mm256_mulne_pbh (x1, x2);
+ res = _mm256_mask_mulne_pbh (res, m16, x1, x2);
+ res = _mm256_maskz_mulne_pbh (m16, x1, x2);
+ res1 = _mm_mulne_pbh (x3, x4);
+ res1 = _mm_mask_mulne_pbh (res1, m8, x3, x4);
+ res1 = _mm_maskz_mulne_pbh (m8, x3, x4);
+
+ res = _mm256_divne_pbh (x1, x2);
+ res = _mm256_mask_divne_pbh (res, m16, x1, x2);
+ res = _mm256_maskz_divne_pbh (m16, x1, x2);
+ res1 = _mm_divne_pbh (x3, x4);
+ res1 = _mm_mask_divne_pbh (res1, m8, x3, x4);
+ res1 = _mm_maskz_divne_pbh (m8, x3, x4);
+
+ res = _mm256_max_pbh (x1, x2);
+ res = _mm256_mask_max_pbh (res, m16, x1, x2);
+ res = _mm256_maskz_max_pbh (m16, x1, x2);
+ res1 = _mm_max_pbh (x3, x4);
+ res1 = _mm_mask_max_pbh (res1, m8, x3, x4);
+ res1 = _mm_maskz_max_pbh (m8, x3, x4);
+
+ res = _mm256_min_pbh (x1, x2);
+ res = _mm256_mask_min_pbh (res, m16, x1, x2);
+ res = _mm256_maskz_min_pbh (m16, x1, x2);
+ res1 = _mm_min_pbh (x3, x4);
+ res1 = _mm_mask_min_pbh (res1, m8, x3, x4);
+ res1 = _mm_maskz_min_pbh (m8, x3, x4);
+
+ res = _mm256_scalef_pbh (x1, x2);
+ res = _mm256_mask_scalef_pbh (res, m16, x1, x2);
+ res = _mm256_maskz_scalef_pbh (m16, x1, x2);
+ res1 = _mm_scalef_pbh (x3, x4);
+ res1 = _mm_mask_scalef_pbh (res1, m8, x3, x4);
+ res1 = _mm_maskz_scalef_pbh (m8, x3, x4);
+
+ res = _mm256_fmaddne_pbh (res, x1, x2);
+ res = _mm256_mask_fmaddne_pbh (res, m16, x1, x2);
+ res = _mm256_mask3_fmaddne_pbh (res, x1, x2, m16);
+ res = _mm256_maskz_fmaddne_pbh (m16,res, x1, x2);
+ res1 = _mm_fmaddne_pbh (res1, x3, x4);
+ res1 = _mm_mask_fmaddne_pbh (res1, m8, x3, x4);
+ res1 = _mm_mask3_fmaddne_pbh (res1, x3, x4, m8);
+ res1 = _mm_maskz_fmaddne_pbh (m8,res1, x3, x4);
+
+ res = _mm256_fmsubne_pbh (res, x1, x2);
+ res = _mm256_mask_fmsubne_pbh (res, m16, x1, x2);
+ res = _mm256_mask3_fmsubne_pbh (res, x1, x2, m16);
+ res = _mm256_maskz_fmsubne_pbh (m16,res, x1, x2);
+ res1 = _mm_fmsubne_pbh (res1, x3, x4);
+ res1 = _mm_mask_fmsubne_pbh (res1, m8, x3, x4);
+ res1 = _mm_mask3_fmsubne_pbh (res1, x3, x4, m8);
+ res1 = _mm_maskz_fmsubne_pbh (m8,res1, x3, x4);
+
+ res = _mm256_fnmaddne_pbh (res, x1, x2);
+ res = _mm256_mask_fnmaddne_pbh (res, m16, x1, x2);
+ res = _mm256_mask3_fnmaddne_pbh (res, x1, x2, m16);
+ res = _mm256_maskz_fnmaddne_pbh (m16,res, x1, x2);
+ res1 = _mm_fnmaddne_pbh (res1, x3, x4);
+ res1 = _mm_mask_fnmaddne_pbh (res1, m8, x3, x4);
+ res1 = _mm_mask3_fnmaddne_pbh (res1, x3, x4, m8);
+ res1 = _mm_maskz_fnmaddne_pbh (m8,res1, x3, x4);
+
+ res = _mm256_fnmsubne_pbh (res, x1, x2);
+ res = _mm256_mask_fnmsubne_pbh (res, m16, x1, x2);
+ res = _mm256_mask3_fnmsubne_pbh (res, x1, x2, m16);
+ res = _mm256_maskz_fnmsubne_pbh (m16,res, x1, x2);
+ res1 = _mm_fnmsubne_pbh (res1, x3, x4);
+ res1 = _mm_mask_fnmsubne_pbh (res1, m8, x3, x4);
+ res1 = _mm_mask3_fnmsubne_pbh (res1, x3, x4, m8);
+ res1 = _mm_maskz_fnmsubne_pbh (m8,res1, x3, x4);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vaddnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vaddnepbf16-2.c
new file mode 100644
index 0000000..7783dce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vaddnepbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vaddnepbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vaddnepbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vdivnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vdivnepbf16-2.c
new file mode 100644
index 0000000..dd2c544
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vdivnepbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vdivnepbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vdivnepbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c
new file mode 100644
index 0000000..a4f2e5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vfmaddXXXnepbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vfmaddXXXnepbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c
new file mode 100644
index 0000000..406c173
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vfmsubXXXnepbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vfmsubXXXnepbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c
new file mode 100644
index 0000000..3f53099
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vfnmaddXXXnepbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vfnmaddXXXnepbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c
new file mode 100644
index 0000000..fc906cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vfnmsubXXXnepbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vfnmsubXXXnepbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c
new file mode 100644
index 0000000..2b8f820
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vmaxpbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vmaxpbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c
new file mode 100644
index 0000000..dcb7c0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vminpbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vminpbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmulnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmulnepbf16-2.c
new file mode 100644
index 0000000..753e2d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmulnepbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vmulnepbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vmulnepbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c
new file mode 100644
index 0000000..8f26dfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vscalefpbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vscalefpbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vsubnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vsubnepbf16-2.c
new file mode 100644
index 0000000..ad02ee1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vsubnepbf16-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vsubnepbf16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vsubnepbf16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-helper.h b/gcc/testsuite/gcc.target/i386/avx512f-helper.h
index 3cd6751..b61c03b 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-helper.h
+++ b/gcc/testsuite/gcc.target/i386/avx512f-helper.h
@@ -45,6 +45,7 @@ MAKE_MASK_MERGE(, float)
MAKE_MASK_MERGE(d, double)
MAKE_MASK_MERGE(i_ub, unsigned char)
MAKE_MASK_MERGE(i_uw, unsigned short)
+MAKE_MASK_MERGE(bf16_uw, unsigned short)
MAKE_MASK_MERGE(i_ud, unsigned int)
MAKE_MASK_MERGE(i_uq, unsigned long long)
@@ -70,6 +71,7 @@ MAKE_MASK_ZERO(, float)
MAKE_MASK_ZERO(d, double)
MAKE_MASK_ZERO(i_ub, unsigned char)
MAKE_MASK_ZERO(i_uw, unsigned short)
+MAKE_MASK_ZERO(bf16_uw, unsigned short)
MAKE_MASK_ZERO(i_ud, unsigned int)
MAKE_MASK_ZERO(i_uq, unsigned long long)
diff --git a/gcc/testsuite/gcc.target/i386/m512-check.h b/gcc/testsuite/gcc.target/i386/m512-check.h
index d5d1837..bdc682d 100644
--- a/gcc/testsuite/gcc.target/i386/m512-check.h
+++ b/gcc/testsuite/gcc.target/i386/m512-check.h
@@ -69,6 +69,12 @@ typedef union
typedef union
{
+ __m512bh x;
+ unsigned short a[32];
+} union512bf16_uw;
+
+typedef union
+{
__m128h x;
_Float16 a[8];
} union128h;
@@ -79,6 +85,18 @@ typedef union
_Float16 a[16];
} union256h;
+typedef union
+{
+ __m128bh x;
+ unsigned short a[8];
+} union128bf16_uw;
+
+typedef union
+{
+ __m256bh x;
+ unsigned short a[16];
+} union256bf16_uw;
+
#define CHECK_ROUGH_EXP(UNION_TYPE, VALUE_TYPE, FMT) \
static int \
__attribute__((noinline, unused)) \
@@ -155,3 +173,12 @@ CHECK_FP_EXP (union256h, _Float16, ESP_FLOAT16, "%f")
CHECK_ROUGH_EXP (union128h, _Float16, "%f")
CHECK_ROUGH_EXP (union256h, _Float16, "%f")
#endif
+
+#if defined(AVX512BF16)
+CHECK_EXP (union512bf16_uw, unsigned short, "%d")
+#endif
+
+#if defined(AVX512BF16)
+CHECK_EXP (union128bf16_uw, unsigned short, "%d")
+CHECK_EXP (union256bf16_uw, unsigned short, "%d")
+#endif