aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHongtao Liu <liuhongt@gcc.gnu.org>2019-05-08 10:21:40 +0000
committerHongtao Liu <liuhongt@gcc.gnu.org>2019-05-08 10:21:40 +0000
commit4f0e90fae97a894247ec93336c8826cf4afb3d0d (patch)
tree0cfa0ff6eb03bc72ee6f2320052fe38d3e8501e6
parentda2d30c199a6b6866593c20dbd84673c1637be89 (diff)
downloadgcc-4f0e90fae97a894247ec93336c8826cf4afb3d0d.zip
gcc-4f0e90fae97a894247ec93336c8826cf4afb3d0d.tar.gz
gcc-4f0e90fae97a894247ec93336c8826cf4afb3d0d.tar.bz2
Enable support for bfloat16 which will be in Future Cooper Lake.
There are 3 instructions for AVX512BF16: VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting: - VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data. - VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data. - VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision. 2019-05-07 Wei Xiao <wei3.xiao@intel.com> * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BF16_SET OPTION_MASK_ISA_AVX512BF16_UNSET, OPTION_MASK_ISA2_AVX512BW_UNSET): New. (OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512BF16_UNSET. (ix86_handle_option): Handle -mavx512bf16. * config.gcc: Add avx512bf16vlintrin.h and avx512bf16intrin.h to extra_headers. * config/i386/avx512bf16vlintrin.h: New. * config/i386/avx512bf16intrin.h: New. * config/i386/cpuid.h (bit_AVX512BF16): New. * config/i386/driver-i386.c (host_detect_local_cpu): Detect BF16. * config/i386/i386-builtin-types.def: Add new types. * config/i386/i386-builtin.def: Add new builtins. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AVX512BF16__. * config/i386/i386-option.c (ix86_target_string): Add -mavx512bf16. (ix86_option_override_internal): Handle BF16. (ix86_valid_target_attribute_inner_p): Ditto. * config/i386/i386-expand.c (ix86_expand_args_builtin): Ditto. * config/i386/i386-builtin.c (enum processor_features): Add F_AVX512BF16. (static const _isa_names_table isa_names_table): Ditto. * config/i386/i386.h (TARGET_AVX512BF16, TARGET_AVX512BF16_P): New. (PTA_AVX512BF16): Ditto. * config/i386/i386.opt: Add -mavx512bf16. * config/i386/immintrin.h: Include avx512bf16intrin.h and avx512bf16vlintrin.h. * config/i386/sse.md (avx512f_cvtne2ps2bf16_<mode><mask_name>, avx512f_cvtneps2bf16_<mode><mask_name>, avx512f_dpbf16ps_<mode><mask_half_name>): New define_insn patterns. * config/i386/subst.md (mask_half): Add new subst. * doc/invoke.texi: Document -mavx512bf16. 2019-05-07 Wei Xiao <wei3.xiao@intel.com> * gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c: New test. * gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c: New test. * gcc.target/i386/avx512bf16-vdpbf16ps-1.c: New test. * gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c: New test. * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: New test. * gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c: New test. * gcc.target/i386/builtin_target.c: Handle avx512bf16. * gcc.target/i386/sse-12.c: Add -mavx512bf16. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. 2019-05-07 Hongtao Liu <hongtao.liu@intel.com> * config/i386/cpuinfo.c (get_available_features): Detect BF16. * config/i386/cpuinfo.h (enum processor_features): Add FEATURE_AVX512BF16. From-SVN: r271006
-rw-r--r--gcc/ChangeLog34
-rw-r--r--gcc/common/config/i386/i386-common.c25
-rw-r--r--gcc/config.gcc4
-rw-r--r--gcc/config/i386/avx512bf16intrin.h118
-rw-r--r--gcc/config/i386/avx512bf16vlintrin.h183
-rw-r--r--gcc/config/i386/cpuid.h3
-rw-r--r--gcc/config/i386/driver-i386.c7
-rw-r--r--gcc/config/i386/i386-builtin-types.def26
-rw-r--r--gcc/config/i386/i386-builtin.def29
-rw-r--r--gcc/config/i386/i386-builtins.c4
-rw-r--r--gcc/config/i386/i386-c.c2
-rw-r--r--gcc/config/i386/i386-expand.c24
-rw-r--r--gcc/config/i386/i386-options.c8
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/i386.opt5
-rw-r--r--gcc/config/i386/immintrin.h4
-rw-r--r--gcc/config/i386/sse.md101
-rw-r--r--gcc/config/i386/subst.md13
-rw-r--r--gcc/doc/invoke.texi7
-rw-r--r--gcc/testsuite/ChangeLog17
-rw-r--r--gcc/testsuite/g++.dg/other/i386-2.C2
-rw-r--r--gcc/testsuite/g++.dg/other/i386-3.C2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bf16-vdpbf16ps-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bf16-vdpbf16ps-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/builtin_target.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c2
-rw-r--r--libgcc/ChangeLog6
-rw-r--r--libgcc/config/i386/cpuinfo.c6
-rw-r--r--libgcc/config/i386/cpuinfo.h3
38 files changed, 824 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9035498..e37aafc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -3,6 +3,40 @@
PR tree-optimization/90356
* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.
+2019-05-07 Wei Xiao <wei3.xiao@intel.com>
+
+ * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BF16_SET
+ OPTION_MASK_ISA_AVX512BF16_UNSET, OPTION_MASK_ISA2_AVX512BW_UNSET): New.
+ (OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512BF16_UNSET.
+ (ix86_handle_option): Handle -mavx512bf16.
+ * config.gcc: Add avx512bf16vlintrin.h and avx512bf16intrin.h
+ to extra_headers.
+ * config/i386/avx512bf16vlintrin.h: New.
+ * config/i386/avx512bf16intrin.h: New.
+ * config/i386/cpuid.h (bit_AVX512BF16): New.
+ * config/i386/driver-i386.c (host_detect_local_cpu): Detect BF16.
+ * config/i386/i386-builtin-types.def: Add new types.
+ * config/i386/i386-builtin.def: Add new builtins.
+ * config/i386/i386-c.c (ix86_target_macros_internal): Define
+ __AVX512BF16__.
+ * config/i386/i386-option.c (ix86_target_string): Add -mavx512bf16.
+ (ix86_option_override_internal): Handle BF16.
+ (ix86_valid_target_attribute_inner_p): Ditto.
+ * config/i386/i386-expand.c (ix86_expand_args_builtin): Ditto.
+ * config/i386/i386-builtin.c (enum processor_features): Add
+ F_AVX512BF16.
+ (static const _isa_names_table isa_names_table): Ditto.
+ * config/i386/i386.h (TARGET_AVX512BF16, TARGET_AVX512BF16_P): New.
+ (PTA_AVX512BF16): Ditto.
+ * config/i386/i386.opt: Add -mavx512bf16.
+ * config/i386/immintrin.h: Include avx512bf16intrin.h
+ and avx512bf16vlintrin.h.
+ * config/i386/sse.md (avx512f_cvtne2ps2bf16_<mode><mask_name>,
+ avx512f_cvtneps2bf16_<mode><mask_name>,
+ avx512f_dpbf16ps_<mode><mask_half_name>): New define_insn patterns.
+ * config/i386/subst.md (mask_half): Add new subst.
+ * doc/invoke.texi: Document -mavx512bf16.
+
2019-05-07 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000-protos.h (rs6000_legitimize_reload_address_ptr):
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index ee725a4..db5c3f8 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512BITALG_SET \
(OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512BF16_SET OPTION_MASK_ISA_AVX512BF16
#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
@@ -215,6 +216,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
+#define OPTION_MASK_ISA_AVX512BF16_UNSET OPTION_MASK_ISA_AVX512BF16
#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
@@ -276,10 +278,14 @@ along with GCC; see the file COPYING3. If not see
| OPTION_MASK_ISA_SSE_UNSET)
#define OPTION_MASK_ISA2_AVX512F_UNSET \
- (OPTION_MASK_ISA_AVX5124FMAPS_UNSET | OPTION_MASK_ISA_AVX5124VNNIW_UNSET)
+ (OPTION_MASK_ISA_AVX512BF16_UNSET \
+ | OPTION_MASK_ISA_AVX5124FMAPS_UNSET \
+ | OPTION_MASK_ISA_AVX5124VNNIW_UNSET)
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
(OPTION_MASK_ISA2_AVX512F_UNSET)
+#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA_AVX512BF16_UNSET
+
/* Set 1 << value as value of -malign-FLAG option. */
static void
@@ -738,6 +744,21 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mavx512bf16:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BF16_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BF16_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512BW_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BF16_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BF16_UNSET;
+ }
+ return true;
+
case OPT_msgx:
if (value)
{
@@ -800,6 +821,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512BW_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512BW_UNSET;
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512BW_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512BW_UNSET;
}
return true;
diff --git a/gcc/config.gcc b/gcc/config.gcc
index baa156d..b5a313f 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -407,7 +407,7 @@ i[34567]86-*-*)
avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
- waitpkgintrin.h cldemoteintrin.h"
+ waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h avx512bf16intrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -439,7 +439,7 @@ x86_64-*-*)
avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
- waitpkgintrin.h cldemoteintrin.h"
+ waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h avx512bf16intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
new file mode 100644
index 0000000..cc983bd
--- /dev/null
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -0,0 +1,118 @@
+/* Copyright (C) 2019 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BF16INTRIN_H_INCLUDED
+#define _AVX512BF16INTRIN_H_INCLUDED
+
+#ifndef __AVX512BF16__
+#pragma GCC push_options
+#pragma GCC target("avx512bf16")
+#define __DISABLE_AVX512BF16__
+#endif /* __AVX512BF16__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef short __v32bh __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
+
+/* vcvtne2ps2bf16 */
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
+}
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
+}
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
+}
+
+/* vcvtneps2bf16 */
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtneps_pbh (__m512 __A)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
+}
+
+/* vdpbf16ps */
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
+}
+
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BF16__ */
+
+#endif /* _AVX512BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h
new file mode 100644
index 0000000..fa32a7f
--- /dev/null
+++ b/gcc/config/i386/avx512bf16vlintrin.h
@@ -0,0 +1,183 @@
+/* Copyright (C) 2019 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
+#define _AVX512BF16VLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
+#pragma GCC push_options
+#pragma GCC target("avx512bf16,avx512vl")
+#define __DISABLE_AVX512BF16VL__
+#endif /* __AVX512BF16__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef short __v16bh __attribute__ ((__vector_size__ (32)));
+typedef short __v8bh __attribute__ ((__vector_size__ (16)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
+typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* vcvtne2ps2bf16 */
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
+{
+ return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
+{
+ return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
+{
+ return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
+{
+ return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
+{
+ return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
+{
+ return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
+}
+
+/* vcvtneps2bf16 */
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneps_pbh (__m256 __A)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneps_pbh (__m128 __A)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
+}
+
+/* vdpbf16ps */
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
+{
+ return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
+{
+ return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
+{
+ return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
+{
+ return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
+{
+ return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
+{
+ return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
+}
+
+#ifdef __DISABLE_AVX512BF16VL__
+#undef __DISABLE_AVX512BF16VL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BF16VL__ */
+
+#endif /* _AVX512BF16VLINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 39bf0fb..8ddd425 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -21,6 +21,9 @@
* <http://www.gnu.org/licenses/>.
*/
+/* %eax */
+#define bit_AVX512BF16 (1 << 5)
+
/* %ecx */
#define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 75f7026..22ad5bcf 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -426,6 +426,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_movdiri = 0, has_movdir64b = 0;
unsigned int has_waitpkg = 0;
unsigned int has_cldemote = 0;
+ unsigned int has_avx512bf16 = 0;
unsigned int has_ptwrite = 0;
@@ -533,6 +534,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_shstk = ecx & bit_SHSTK;
has_pconfig = edx & bit_PCONFIG;
has_waitpkg = ecx & bit_WAITPKG;
+
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
+ has_avx512bf16 = eax & bit_AVX512BF16;
}
if (max_level >= 13)
@@ -1143,6 +1147,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
+ const char *avx512bf16 = has_avx512bf16 ? " -mavx512bf16" : " -mno-avx512bf16";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
@@ -1157,7 +1162,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
- ptwrite,
+ ptwrite, avx512bf16,
NULL);
}
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index dfe13adb..d7b9939 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1262,3 +1262,29 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, UHI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, UHI)
+
+# BF16 builtins
+DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF)
+DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF, V32HI, USI)
+DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF, USI)
+DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF)
+DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF, V16HI, UHI)
+DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF, UHI)
+DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF, V8HI, UQI)
+DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF, UQI)
+DEF_FUNCTION_TYPE (V16HI, V16SF)
+DEF_FUNCTION_TYPE (V16HI, V16SF, V16HI, UHI)
+DEF_FUNCTION_TYPE (V16HI, V16SF, UHI)
+DEF_FUNCTION_TYPE (V8HI, V8SF)
+DEF_FUNCTION_TYPE (V8HI, V8SF, V8HI, UQI)
+DEF_FUNCTION_TYPE (V8HI, V8SF, UQI)
+DEF_FUNCTION_TYPE (V8HI, V4SF)
+DEF_FUNCTION_TYPE (V8HI, V4SF, V8HI, UQI)
+DEF_FUNCTION_TYPE (V8HI, V4SF, UQI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V32HI, V32HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V32HI, V32HI, UHI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI, UQI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI, UQI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 6580890..e95d5d3 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2703,6 +2703,35 @@ BDESC (0, OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaes
BDESC (0, OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (0, OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+/* BF16 */
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi, "__builtin_ia32_cvtne2ps2bf16_v32hi", IX86_BUILTIN_CVTNE2PS2HI16_V32HI, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi_mask, "__builtin_ia32_cvtne2ps2bf16_v32hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v32hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF_USI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi, "__builtin_ia32_cvtne2ps2bf16_v16hi", IX86_BUILTIN_CVTNE2PS2HI16_V16HI, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi_mask, "__builtin_ia32_cvtne2ps2bf16_v16hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v16hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi, "__builtin_ia32_cvtne2ps2bf16_v8hi", IX86_BUILTIN_CVTNE2PS2HI16_V8HI, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi_mask, "__builtin_ia32_cvtne2ps2bf16_v8hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v8hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2HI16_V16SF, UNKNOWN, (int) V16HI_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V16SF_MASK, UNKNOWN, (int) V16HI_FTYPE_V16SF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V16SF_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16SF_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2HI16_V8SF, UNKNOWN, (int) V8HI_FTYPE_V8SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V8SF_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V8SF_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8SF_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2HI16_V4SF, UNKNOWN, (int) V8HI_FTYPE_V4SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V4SF_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V4SF_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V4SF_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPHI16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPHI16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPHI16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPHI16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPHI16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPHI16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf, "__builtin_ia32_dpbf16ps_v4sf", IX86_BUILTIN_DPHI16PS_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__builtin_ia32_dpbf16ps_v4sf_mask", IX86_BUILTIN_DPHI16PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
+
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
index 9779727..72bb5d7 100644
--- a/gcc/config/i386/i386-builtins.c
+++ b/gcc/config/i386/i386-builtins.c
@@ -1920,6 +1920,7 @@ enum processor_features
F_VPCLMULQDQ,
F_AVX512VNNI,
F_AVX512BITALG,
+ F_AVX512BF16,
F_MAX
};
@@ -2064,7 +2065,8 @@ static const _isa_names_table isa_names_table[] =
{"gfni", F_GFNI, P_ZERO},
{"vpclmulqdq", F_VPCLMULQDQ, P_ZERO},
{"avx512vnni", F_AVX512VNNI, P_ZERO},
- {"avx512bitalg", F_AVX512BITALG, P_ZERO}
+ {"avx512bitalg", F_AVX512BITALG, P_ZERO},
+ {"avx512bf16", F_AVX512BF16, P_ZERO}
};
/* This parses the attribute arguments to target in DECL and determines
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 50cac3b..92bf066 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -548,6 +548,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__CLDEMOTE__");
if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
def_or_undef (parse_in, "__PTWRITE__");
+ if (isa_flag2 & OPTION_MASK_ISA_AVX512BF16)
+ def_or_undef (parse_in, "__AVX512BF16__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 0835ebf..b7ce5d0 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -8968,6 +8968,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8DF_FTYPE_V2DF:
case V8DF_FTYPE_V8DF:
case V4DI_FTYPE_V4DI:
+ case V16HI_FTYPE_V16SF:
+ case V8HI_FTYPE_V8SF:
+ case V8HI_FTYPE_V4SF:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
@@ -9092,6 +9095,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case USI_FTYPE_USI_USI:
case UDI_FTYPE_UDI_UDI:
case V16SI_FTYPE_V8DF_V8DF:
+ case V32HI_FTYPE_V16SF_V16SF:
+ case V16HI_FTYPE_V8SF_V8SF:
+ case V8HI_FTYPE_V4SF_V4SF:
+ case V16HI_FTYPE_V16SF_UHI:
+ case V8HI_FTYPE_V8SF_UQI:
+ case V8HI_FTYPE_V4SF_UQI:
nargs = 2;
break;
case V2DI_FTYPE_V2DI_INT_CONVERT:
@@ -9274,6 +9283,15 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HI_FTYPE_V16HI_V16HI_V16HI:
case V8SI_FTYPE_V8SI_V8SI_V8SI:
case V8HI_FTYPE_V8HI_V8HI_V8HI:
+ case V32HI_FTYPE_V16SF_V16SF_USI:
+ case V16HI_FTYPE_V8SF_V8SF_UHI:
+ case V8HI_FTYPE_V4SF_V4SF_UQI:
+ case V16HI_FTYPE_V16SF_V16HI_UHI:
+ case V8HI_FTYPE_V8SF_V8HI_UQI:
+ case V8HI_FTYPE_V4SF_V8HI_UQI:
+ case V16SF_FTYPE_V16SF_V32HI_V32HI:
+ case V8SF_FTYPE_V8SF_V16HI_V16HI:
+ case V4SF_FTYPE_V4SF_V8HI_V8HI:
nargs = 3;
break;
case V32QI_FTYPE_V32QI_V32QI_INT:
@@ -9413,6 +9431,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
+ case V32HI_FTYPE_V16SF_V16SF_V32HI_USI:
+ case V16HI_FTYPE_V8SF_V8SF_V16HI_UHI:
+ case V8HI_FTYPE_V4SF_V4SF_V8HI_UQI:
nargs = 4;
break;
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
@@ -9456,6 +9477,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
break;
case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
+ case V16SF_FTYPE_V16SF_V32HI_V32HI_UHI:
+ case V8SF_FTYPE_V8SF_V16HI_V16HI_UQI:
+ case V4SF_FTYPE_V4SF_V8HI_V8HI_UQI:
nargs = 4;
break;
case UQI_FTYPE_V8DI_V8DI_INT_UQI:
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index 95a9ae3..dec8352 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -209,7 +209,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
{ "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B },
{ "-mwaitpkg", OPTION_MASK_ISA_WAITPKG },
{ "-mcldemote", OPTION_MASK_ISA_CLDEMOTE },
- { "-mptwrite", OPTION_MASK_ISA_PTWRITE }
+ { "-mptwrite", OPTION_MASK_ISA_PTWRITE },
+ { "-mavx512bf16", OPTION_MASK_ISA_AVX512BF16 }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -919,6 +920,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
IX86_ATTR_ISA ("ptwrite", OPT_mptwrite),
+ IX86_ATTR_ISA ("avx512bf16", OPT_mavx512bf16),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -2034,6 +2036,10 @@ ix86_option_override_internal (bool main_args_p,
&& !(opts->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512VPOPCNTDQ))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
+ if (((processor_alias_table[i].flags & PTA_AVX512BF16) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA_AVX512BF16))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BF16;
if (((processor_alias_table[i].flags & PTA_SGX) != 0)
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index ad6c36b..3fee779 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -193,6 +193,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_CLDEMOTE_P(x) TARGET_ISA_CLDEMOTE_P(x)
#define TARGET_PTWRITE TARGET_ISA_PTWRITE
#define TARGET_PTWRITE_P(x) TARGET_ISA_PTWRITE_P(x)
+#define TARGET_AVX512BF16 TARGET_ISA_AVX512BF16
+#define TARGET_AVX512BF16_P(x) TARGET_ISA_AVX512BF16_P(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
@@ -2355,6 +2357,7 @@ const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9);
const wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10);
+const wide_int_bitmask PTA_AVX512BF16 (0, HOST_WIDE_INT_1U << 11);
const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
| PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 5fb2ec6..8f3dcf9 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1101,3 +1101,8 @@ Enum(instrument_return) String(nop5) Value(instrument_return_nop5)
mrecord-return
Target Report Var(ix86_flag_record_return) Init(0)
Generate a __return_loc section pointing to all return instrumentation code.
+
+mavx512bf16
+Target Report Mask(ISA_AVX512BF16) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and
+AVX512BF16 built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 10e1f27..d99886a 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -130,6 +130,10 @@
#include <cldemoteintrin.h>
+#include <avx512bf16vlintrin.h>
+
+#include <avx512bf16intrin.h>
+
#include <rdseedintrin.h>
#include <prfchwintrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6b8298d..11363de 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -187,6 +187,11 @@
;; For AVX512BITALG support
UNSPEC_VPSHUFBIT
+
+ ;; For AVX512BF16 support
+ UNSPEC_VCVTNE2PS2BF16
+ UNSPEC_VCVTNEPS2BF16
+ UNSPEC_VDPBF16PS
])
(define_c_enum "unspecv" [
@@ -726,6 +731,15 @@
(V16SF "hi") (V8SF "qi") (V4SF "qi")
(V8DF "qi") (V4DF "qi") (V2DF "qi")])
+;; Mapping of vector modes to corresponding mask half size
+(define_mode_attr avx512fmaskhalfmode
+ [(V64QI "SI") (V32QI "HI") (V16QI "QI")
+ (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
+ (V16SI "QI") (V8SI "QI") (V4SI "QI")
+ (V8DI "QI") (V4DI "QI") (V2DI "QI")
+ (V16SF "QI") (V8SF "QI") (V4SF "QI")
+ (V8DF "QI") (V4DF "QI") (V2DF "QI")])
+
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V16SF "V16SI") (V8DF "V8DI")
@@ -22184,3 +22198,90 @@
"vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+;; Converting from BF to SF
+(define_mode_attr bf16_cvt_2sf
+ [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
+;; Converting from SF to BF
+(define_mode_attr sf_cvt_bf16
+ [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
+;; Mapping from BF to SF
+(define_mode_attr sf_bf16
+ [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
+
+(define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
+ [(match_operand:BF16 0 "register_operand")
+ (match_operand:<bf16_cvt_2sf> 1 "register_operand")
+ (match_operand:<bf16_cvt_2sf> 2 "register_operand")
+ (match_operand:<avx512fmaskmode> 3 "register_operand")]
+ "TARGET_AVX512BF16"
+{
+ emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
+ operands[2], CONST0_RTX(<MODE>mode), operands[3]));
+ DONE;
+})
+
+(define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
+ [(set (match_operand:BF16 0 "register_operand" "=v")
+ (unspec:BF16
+ [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
+ (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
+ UNSPEC_VCVTNE2PS2BF16))]
+ "TARGET_AVX512BF16"
+ "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
+
+(define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
+ [(match_operand:<sf_cvt_bf16> 0 "register_operand")
+ (match_operand:VF1_AVX512VL 1 "register_operand")
+ (match_operand:<avx512fmaskmode> 2 "register_operand")]
+ "TARGET_AVX512BF16"
+{
+ emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
+ CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
+ DONE;
+})
+
+(define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
+ [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
+ (unspec:<sf_cvt_bf16>
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
+ UNSPEC_VCVTNEPS2BF16))]
+ "TARGET_AVX512BF16"
+ "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
+(define_expand "avx512f_dpbf16ps_<mode>_maskz"
+ [(match_operand:VF1_AVX512VL 0 "register_operand")
+ (match_operand:VF1_AVX512VL 1 "register_operand")
+ (match_operand:<sf_bf16> 2 "register_operand")
+ (match_operand:<sf_bf16> 3 "register_operand")
+ (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
+ "TARGET_AVX512BF16"
+{
+ emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
+ operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:<sf_bf16> 2 "register_operand" "v")
+ (match_operand:<sf_bf16> 3 "register_operand" "v")]
+ UNSPEC_VDPBF16PS))]
+ "TARGET_AVX512BF16"
+ "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
+
+(define_insn "avx512f_dpbf16ps_<mode>_mask"
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF1_AVX512VL
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:<sf_bf16> 2 "register_operand" "v")
+ (match_operand:<sf_bf16> 3 "register_operand" "v")]
+ UNSPEC_VDPBF16PS)
+ (match_dup 1)
+ (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
+ "TARGET_AVX512BF16"
+ "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 99198a3..dd58905 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -313,3 +313,16 @@
(const_int 1))
(match_operand:SI 3 "const48_operand")]
UNSPEC_EMBEDDED_ROUNDING))])
+
+(define_subst_attr "maskz_half_name" "maskz_half" "" "_maskz_1")
+(define_subst_attr "maskz_half_operand4" "maskz_half" "" "%{%5%}%N4")
+
+(define_subst "maskz_half"
+ [(set (match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1))]
+ ""
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 2 "const0_operand" "C")
+ (match_operand:<avx512fmaskhalfmode> 3 "register_operand" "Yk")))])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index de7e1aa..8e4a8a8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1274,7 +1274,7 @@ See RS/6000 and PowerPC Options.
-msse4a -m3dnow -m3dnowa -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop @gol
-madx -mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mhle -mlwp @gol
-mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes -mwaitpkg @gol
--mshstk -mmanual-endbr -mforce-indirect-call -mavx512vbmi2 @gol
+-mshstk -mmanual-endbr -mforce-indirect-call -mavx512vbmi2 -mavx512bf16 @gol
-mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx @gol
@@ -28041,6 +28041,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mavx512vbmi2
@opindex mavx512vbmi2
@need 200
+@itemx -mavx512bf16
+@opindex mavx512bf16
+@need 200
@itemx -mgfni
@opindex mgfni
@need 200
@@ -28083,7 +28086,7 @@ AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG,
WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
3DNow!@:, enhanced 3DNow!@:, POPCNT, ABM, ADX, BMI, BMI2, LZCNT, FXSR, XSAVE,
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
-GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B,
+GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16
AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, or CLDEMOTE
extended instruction sets. Each has a corresponding @option{-mno-} option to
disable use of these instructions.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8e2a7c5..d50e4e0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -12,6 +12,23 @@
* gcc.dg/tree-ssa/pr90356-3.c: New test.
* gcc.dg/tree-ssa/pr90356-4.c: New test.
+2019-05-07 Wei Xiao <wei3.xiao@intel.com>
+
+ * gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c: New test.
+ * gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c: New test.
+ * gcc.target/i386/avx512bf16-vdpbf16ps-1.c: New test.
+ * gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c: New test.
+ * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: New test.
+ * gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c: New test.
+ * gcc.target/i386/builtin_target.c: Handle avx512bf16.
+ * gcc.target/i386/sse-12.c: Add -mavx512bf16.
+ * gcc.target/i386/sse-13.c: Ditto.
+ * gcc.target/i386/sse-14.c: Ditto.
+ * gcc.target/i386/sse-22.c: Ditto.
+ * gcc.target/i386/sse-23.c: Ditto.
+ * g++.dg/other/i386-2.C: Ditto.
+ * g++.dg/other/i386-3.C: Ditto.
+
2019-05-07 Cherry Zhang <cherryyz@google.com>
* go.dg/arrayclear.go: New test.
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index a70d9f4..f7a564b 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index 73eb5e7..4d6f94f 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c b/gcc/testsuite/gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c
new file mode 100644
index 0000000..6d19459
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512bh res;
+volatile __m512 x1, x2;
+volatile __mmask32 m32;
+
+void extern
+avx512bf16_test (void)
+{
+ res = _mm512_cvtne2ps_pbh (x1, x2);
+ res = _mm512_mask_cvtne2ps_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_cvtne2ps_pbh (m32, x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c b/gcc/testsuite/gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c
new file mode 100644
index 0000000..99ba4ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256bh res;
+volatile __m512 x1;
+volatile __mmask16 m16;
+
+void extern
+avx512bf16_test (void)
+{
+ res = _mm512_cvtneps_pbh (x1);
+ res = _mm512_mask_cvtneps_pbh (res, m16, x1);
+ res = _mm512_maskz_cvtneps_pbh (m16, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16-vdpbf16ps-1.c b/gcc/testsuite/gcc.target/i386/avx512bf16-vdpbf16ps-1.c
new file mode 100644
index 0000000..d9ad444
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bf16-vdpbf16ps-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -O2" } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 res;
+volatile __m512bh x1, x2;
+volatile __mmask16 m16;
+
+void extern
+avx512bf16_test (void)
+{
+ res = _mm512_dpbf16_ps (res, x1, x2);
+ res = _mm512_mask_dpbf16_ps (res, m16, x1, x2);
+ res = _mm512_maskz_dpbf16_ps (m16, res, x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16-vdpbf16ps-2.c b/gcc/testsuite/gcc.target/i386/avx512bf16-vdpbf16ps-2.c
new file mode 100644
index 0000000..b64ad7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bf16-vdpbf16ps-2.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -O2" } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+typedef union
+{
+ __m512 x;
+ float a[16];
+} union512s;
+
+float res_ref[16];
+union512s res;
+__m512bh x1, x2;
+__mmask16 m16;
+
+static void __attribute__((noinline, unused))
+merge_masking_s (float *arr, unsigned long long mask, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ arr[i] = (mask & (1LL << i)) ? arr[i] : 117;
+ }
+}
+
+static int __attribute__((noinline, unused))
+check_union512s (union512s u, const float *v)
+{
+ int i;
+ int err = 0;
+ for (i = 0; i < (sizeof (u.a) / sizeof ((u.a)[0])); i++)
+ if (u.a[i] != v[i])
+ {
+ err++;
+ ;
+ }
+ return err;
+}
+
+void extern
+avx512bf16_test (void)
+{
+ res.x = _mm512_mask_dpbf16_ps (res.x, m16, x1, x2);
+ merge_masking_s (res_ref, m16, 16);
+ if (check_union512s (res, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c
new file mode 100644
index 0000000..f0ec70f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128bh res1;
+volatile __m256bh res2;
+volatile __m128 x1, x2;
+volatile __m256 x3, x4;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+
+void extern
+avx512bf16_test (void)
+{
+ res2 = _mm256_cvtne2ps_pbh (x3, x4);
+ res2 = _mm256_mask_cvtne2ps_pbh (res2, m16, x3, x4);
+ res2 = _mm256_maskz_cvtne2ps_pbh (m16, x3, x4);
+
+ res1 = _mm_cvtne2ps_pbh (x1, x2);
+ res1 = _mm_mask_cvtne2ps_pbh (res1, m8, x1, x2);
+ res1 = _mm_maskz_cvtne2ps_pbh (m8, x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c
new file mode 100644
index 0000000..0969ae1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128bh res1, res2;
+volatile __m128 x1;
+volatile __m256 x2;
+volatile __mmask8 m8;
+
+void extern
+avx512bf16_test (void)
+{
+ res2 = _mm256_cvtneps_pbh (x2);
+ res2 = _mm256_mask_cvtneps_pbh (res2, m8, x2);
+ res2 = _mm256_maskz_cvtneps_pbh (m8, x2);
+
+ res1 = _mm_cvtneps_pbh (x1);
+ res1 = _mm_mask_cvtneps_pbh (res1, m8, x1);
+ res1 = _mm_maskz_cvtneps_pbh (m8, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c
new file mode 100644
index 0000000..1837462
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 res1;
+volatile __m256bh x1, x2;
+volatile __m128 res2;
+volatile __m128bh x3, x4;
+volatile __mmask8 m8;
+
+void extern
+avx512bf16_test (void)
+{
+ res1 = _mm256_dpbf16_ps (res1, x1, x2);
+ res1 = _mm256_mask_dpbf16_ps (res1, m8, x1, x2);
+ res1 = _mm256_maskz_dpbf16_ps (m8, res1, x1, x2);
+
+ res2 = _mm_dpbf16_ps (res2, x3, x4);
+ res2 = _mm_mask_dpbf16_ps (res2, m8, x3, x4);
+ res2 = _mm_maskz_dpbf16_ps (m8, res2, x3, x4);
+}
diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c
index d396266..7a8b6e8 100644
--- a/gcc/testsuite/gcc.target/i386/builtin_target.c
+++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
@@ -265,6 +265,10 @@ check_features (unsigned int ecx, unsigned int edx,
assert (__builtin_cpu_supports ("avx5124vnniw"));
if (edx & bit_AVX5124FMAPS)
assert (__builtin_cpu_supports ("avx5124fmaps"));
+
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
+ if (eax & bit_AVX512BF16)
+ assert (__builtin_cpu_supports ("avx512bf16"));
}
/* Check cpuid level of extended features. */
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index f7f55f4..6066973 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index e868f6d..2b48c455 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 748339f..ae7d3d5 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 0c62f20..733c670 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -101,7 +101,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16")
#endif
/* Following intrinsics require immediate arguments. They
@@ -218,7 +218,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 78a3c0a..58f4c82 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -696,6 +696,6 @@
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16")
#include <x86intrin.h>
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 30e4da4..c3c7a16 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-05-07 Hongtao Liu <hongtao.liu@intel.com>
+
+ * config/i386/cpuinfo.c (get_available_features): Detect BF16.
+ * config/i386/cpuinfo.h (enum processor_features): Add
+ FEATURE_AVX512BF16.
+
2019-04-23 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
Bernd Edlinger <bernd.edlinger@hotmail.de>
Jakub Jelinek <jakub@redhat.com>
diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c
index d6cb2de..5659ec8 100644
--- a/libgcc/config/i386/cpuinfo.c
+++ b/libgcc/config/i386/cpuinfo.c
@@ -336,7 +336,7 @@ get_available_features (unsigned int ecx, unsigned int edx,
set_feature (FEATURE_FMA);
}
- /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
+ /* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */
if (max_cpuid_level >= 7)
{
__cpuid_count (7, 0, eax, ebx, ecx, edx);
@@ -385,6 +385,10 @@ get_available_features (unsigned int ecx, unsigned int edx,
set_feature (FEATURE_AVX5124VNNIW);
if (edx & bit_AVX5124FMAPS)
set_feature (FEATURE_AVX5124FMAPS);
+
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
+ if (eax & bit_AVX512BF16)
+ set_feature (FEATURE_AVX512BF16);
}
}
diff --git a/libgcc/config/i386/cpuinfo.h b/libgcc/config/i386/cpuinfo.h
index b4492eb..68ca466 100644
--- a/libgcc/config/i386/cpuinfo.h
+++ b/libgcc/config/i386/cpuinfo.h
@@ -119,7 +119,8 @@ enum processor_features
FEATURE_GFNI,
FEATURE_VPCLMULQDQ,
FEATURE_AVX512VNNI,
- FEATURE_AVX512BITALG
+ FEATURE_AVX512BITALG,
+ FEATURE_AVX512BF16
};
extern struct __processor_model