aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulia Koval <julia.koval@intel.com>2017-12-22 13:37:16 +0100
committerKirill Yukhin <kyukhin@gcc.gnu.org>2017-12-22 12:37:16 +0000
commite2a29465e91c75b337aabd5886af982653faf00e (patch)
treeb8fee74f68676ef891dd3ffc540bff331f528c36
parentfefab9536e9d986ed0ffbdeeb0ef851578385564 (diff)
downloadgcc-e2a29465e91c75b337aabd5886af982653faf00e.zip
gcc-e2a29465e91c75b337aabd5886af982653faf00e.tar.gz
gcc-e2a29465e91c75b337aabd5886af982653faf00e.tar.bz2
Enable AVX512BITALG
gcc/ * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BITALG_SET, OPTION_MASK_ISA_AVX512BITALG_UNSET): New. (ix86_handle_option): Handle -mavx512bitalg, fix 4VNNIW formatting. * config.gcc: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h. * config/i386/avx512bitalgintrin.h (_mm512_popcnt_epi8, _mm512_popcnt_epi16, _mm512_mask_popcnt_epi8, _mm512_maskz_popcnt_epi8, _mm512_mask_popcnt_epi16, _mm512_maskz_popcnt_epi16, _mm512_bitshuffle_epi64_mask, _mm256_popcnt_epi8, _mm512_mask_bitshuffle_epi64_mask, _mm256_mask_popcnt_epi8, _mm_popcnt_epi8, _mm256_maskz_popcnt_epi8, _mm_bitshuffle_epi64_mask, _mm256_popcnt_epi16, _mm_mask_bitshuffle_epi64_mask, _mm256_bitshuffle_epi64_mask, _mm256_mask_bitshuffle_epi64_mask, _mm_popcnt_epi16, _mm_maskz_popcnt_epi8, _mm256_mask_popcnt_epi16, _mm256_maskz_popcnt_epi16, _mm_mask_popcnt_epi8, _mm_mask_popcnt_epi16, _mm_maskz_popcnt_epi16): New intrinsics. * config/i386/avx512vpopcntdqvlintrin.h (_mm_popcnt_epi32, _mm_popcnt_epi64, _mm_mask_popcnt_epi32, _mm_maskz_popcnt_epi32, _mm256_popcnt_epi32, _mm256_mask_popcnt_epi32, _mm256_maskz_popcnt_epi32, _mm_mask_popcnt_epi64, _mm_maskz_popcnt_epi64, _mm256_popcnt_epi64, _mm256_mask_popcnt_epi64, _mm256_maskz_popcnt_epi64): New intrinsics. * config/i386/cpuid.h (bit_AVX512BITALG): New bit. * config/i386/driver-i386.c (host_detect_local_cpu): Detect -mavx512bitalg. * config/i386/i386-builtin-types.def (V64QI_FTYPE_V64QI, V64QI_FTYPE_V64QI, V4DI_FTYPE_V4DI, UHI_FTYPE_V2DI_V2DI_UHI, USI_FTYPE_V4DI_V4DI_USI, V4SI_FTYPE_V4SI_V4SI_UHI, V8SI_FTYPE_V8SI_V8SI_UHI): New types. * config/i386/i386-builtin.def (__builtin_ia32_vpopcountq_v4di, __builtin_ia32_vpopcountq_v4di_mask, __builtin_ia32_vpopcountq_v2di, __builtin_ia32_vpopcountq_v2di_mask, __builtin_ia32_vpopcountd_v4si, __builtin_ia32_vpopcountd_v4si_mask, __builtin_ia32_vpopcountd_v8si, __builtin_ia32_vpopcountd_v8si_mask, __builtin_ia32_vpopcountb_v64qi, __builtin_ia32_vpopcountb_v64qi_mask, __builtin_ia32_vpopcountb_v32qi, __builtin_ia32_vpopcountb_v32qi_mask, __builtin_ia32_vpopcountb_v16qi, __builtin_ia32_vpopcountb_v16qi_mask, __builtin_ia32_vpopcountw_v32hi, __builtin_ia32_vpopcountw_v32hi_mask, __builtin_ia32_vpopcountw_v16hi, __builtin_ia32_vpopcountw_v16hi_mask, __builtin_ia32_vpopcountw_v8hi, __builtin_ia32_vpopcountw_v8hi_mask, __builtin_ia32_vpshufbitqmb128_mask, __builtin_ia32_vpshufbitqmb256_mask, __builtin_ia32_vpshufbitqmb512_mask): New builtins. * config/i386/i386-c.c (__AVX512BITALG__): New. * config/i386/i386.c (isa2_opts): Add -mavx512bitalg. (ix86_valid_target_attribute_inner_p): Ditto. (ix86_expand_args_builtin): Handle new types. * config/i386/i386.h (TARGET_AVX512BITALG, TARGET_AVX512BITALG_P): New. * config/i386/i386.opt: Add -mavx512bitalg. * config/i386/immintrin.h: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h. * config/i386/sse.md (VI48_AVX512VLBW): New iterator. (vpopcount<mode><mask_name>): Add more types. (avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>): New. * doc/invoke.texi: Add -mavx512bitalg and -mavx512vpopcntdq. gcc/testsuite/ * g++.dg/other/i386-2.C: Add new options. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/sse-12.c: Ditto. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx512-check.h: Handle bit_AVX512BITALG. * gcc.target/i386/avx512bitalg-vpopcntb-1.c: New. * gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto. * gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto. * gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto. * gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c: Ditto. * gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c: Ditto. * gcc.target/i386/i386.exp (check_effective_target_avx512bitalg): New. * gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c: Add more types. * gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Handle new intrinsics. * gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c: Ditto. * gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Ditto. Co-Authored-By: Sebastian Peryt <sebastian.peryt@intel.com> From-SVN: r255975
-rw-r--r--gcc/ChangeLog52
-rw-r--r--gcc/common/config/i386/i386-common.c30
-rw-r--r--gcc/config.gcc6
-rw-r--r--gcc/config/i386/avx512bitalgintrin.h282
-rw-r--r--gcc/config/i386/avx512vpopcntdqvlintrin.h147
-rw-r--r--gcc/config/i386/cpuid.h1
-rw-r--r--gcc/config/i386/driver-i386.c6
-rw-r--r--gcc/config/i386/i386-builtin-types.def9
-rw-r--r--gcc/config/i386/i386-builtin.def28
-rw-r--r--gcc/config/i386/i386-c.c2
-rw-r--r--gcc/config/i386/i386.c11
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/i386.opt4
-rw-r--r--gcc/config/i386/immintrin.h4
-rw-r--r--gcc/config/i386/sse.md31
-rw-r--r--gcc/doc/invoke.texi11
-rw-r--r--gcc/testsuite/ChangeLog29
-rw-r--r--gcc/testsuite/g++.dg/other/i386-2.C6
-rw-r--r--gcc/testsuite/g++.dg/other/i386-3.C7
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512-check.h3
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c61
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/i386.exp13
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c8
42 files changed, 1122 insertions, 46 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index efef50a..0847188 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,55 @@
+2017-12-22 Julia Koval <julia.koval@intel.com>
+ Sebastian Peryt <sebastian.peryt@intel.com>
+
+ * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BITALG_SET,
+ OPTION_MASK_ISA_AVX512BITALG_UNSET): New.
+ (ix86_handle_option): Handle -mavx512bitalg, fix 4VNNIW formatting.
+ * config.gcc: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h.
+ * config/i386/avx512bitalgintrin.h (_mm512_popcnt_epi8, _mm512_popcnt_epi16,
+ _mm512_mask_popcnt_epi8, _mm512_maskz_popcnt_epi8, _mm512_mask_popcnt_epi16,
+ _mm512_maskz_popcnt_epi16, _mm512_bitshuffle_epi64_mask, _mm256_popcnt_epi8,
+ _mm512_mask_bitshuffle_epi64_mask, _mm256_mask_popcnt_epi8, _mm_popcnt_epi8,
+ _mm256_maskz_popcnt_epi8, _mm_bitshuffle_epi64_mask, _mm256_popcnt_epi16,
+ _mm_mask_bitshuffle_epi64_mask, _mm256_bitshuffle_epi64_mask,
+ _mm256_mask_bitshuffle_epi64_mask, _mm_popcnt_epi16, _mm_maskz_popcnt_epi8,
+ _mm256_mask_popcnt_epi16, _mm256_maskz_popcnt_epi16, _mm_mask_popcnt_epi8,
+ _mm_mask_popcnt_epi16, _mm_maskz_popcnt_epi16): New intrinsics.
+ * config/i386/avx512vpopcntdqvlintrin.h (_mm_popcnt_epi32, _mm_popcnt_epi64,
+ _mm_mask_popcnt_epi32, _mm_maskz_popcnt_epi32, _mm256_popcnt_epi32,
+ _mm256_mask_popcnt_epi32, _mm256_maskz_popcnt_epi32, _mm_mask_popcnt_epi64,
+ _mm_maskz_popcnt_epi64, _mm256_popcnt_epi64, _mm256_mask_popcnt_epi64,
+ _mm256_maskz_popcnt_epi64): New intrinsics.
+ * config/i386/cpuid.h (bit_AVX512BITALG): New bit.
+ * config/i386/driver-i386.c (host_detect_local_cpu): Detect -mavx512bitalg.
+ * config/i386/i386-builtin-types.def (V64QI_FTYPE_V64QI, V64QI_FTYPE_V64QI,
+ V4DI_FTYPE_V4DI, UHI_FTYPE_V2DI_V2DI_UHI, USI_FTYPE_V4DI_V4DI_USI,
+ V4SI_FTYPE_V4SI_V4SI_UHI, V8SI_FTYPE_V8SI_V8SI_UHI): New types.
+ * config/i386/i386-builtin.def (__builtin_ia32_vpopcountq_v4di,
+ __builtin_ia32_vpopcountq_v4di_mask, __builtin_ia32_vpopcountq_v2di,
+ __builtin_ia32_vpopcountq_v2di_mask, __builtin_ia32_vpopcountd_v4si,
+ __builtin_ia32_vpopcountd_v4si_mask, __builtin_ia32_vpopcountd_v8si,
+ __builtin_ia32_vpopcountd_v8si_mask, __builtin_ia32_vpopcountb_v64qi,
+ __builtin_ia32_vpopcountb_v64qi_mask, __builtin_ia32_vpopcountb_v32qi,
+ __builtin_ia32_vpopcountb_v32qi_mask, __builtin_ia32_vpopcountb_v16qi,
+ __builtin_ia32_vpopcountb_v16qi_mask, __builtin_ia32_vpopcountw_v32hi,
+ __builtin_ia32_vpopcountw_v32hi_mask, __builtin_ia32_vpopcountw_v16hi,
+ __builtin_ia32_vpopcountw_v16hi_mask, __builtin_ia32_vpopcountw_v8hi,
+ __builtin_ia32_vpopcountw_v8hi_mask, __builtin_ia32_vpshufbitqmb128_mask,
+ __builtin_ia32_vpshufbitqmb256_mask,
+ __builtin_ia32_vpshufbitqmb512_mask): New builtins.
+ * config/i386/i386-c.c (__AVX512BITALG__): New.
+ * config/i386/i386.c (isa2_opts): Add -mavx512bitalg.
+ (ix86_valid_target_attribute_inner_p): Ditto.
+ (ix86_expand_args_builtin): Handle new types.
+ * config/i386/i386.h (TARGET_AVX512BITALG, TARGET_AVX512BITALG_P): New.
+ * config/i386/i386.opt: Add -mavx512bitalg.
+ * config/i386/immintrin.h: Add avx512vpopcntdqvlintrin.h and
+ avx512bitalgintrin.h.
+ * config/i386/sse.md (VI48_AVX512VLBW): New iterator.
+ (vpopcount<mode><mask_name>): Add more types.
+ (avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>): New.
+ * doc/invoke.texi: Add -mavx512bitalg and -mavx512vpopcntdq.
+
2017-12-22 Igor Tsimbalist <igor.v.tsimbalist@intel.com>
* common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512VNNI_SET):
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 421b450..73224f6 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -85,6 +85,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_AVX512VNNI_SET \
(OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET OPTION_MASK_ISA_AVX512VPOPCNTDQ
+#define OPTION_MASK_ISA_AVX512BITALG_SET OPTION_MASK_ISA_AVX512BITALG
#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
@@ -201,6 +202,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2
#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
+#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
@@ -444,11 +446,17 @@ ix86_handle_option (struct gcc_options *opts,
/* Turn off additional isa flags. */
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET;
- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET;
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BITALG_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX512BITALG_UNSET;
}
return true;
@@ -643,6 +651,22 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mavx512bitalg:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BITALG_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BITALG_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BITALG_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX512BITALG_UNSET;
+ }
+ return true;
+
case OPT_msgx:
if (value)
{
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 1a0d676..8dc83c8 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -382,7 +382,8 @@ i[34567]86-*-*)
gfniintrin.h cet.h avx512vbmi2intrin.h
avx512vbmi2vlintrin.h avx512vnniintrin.h
avx512vnnivlintrin.h gfniintrin.h vaesintrin.h
- vpclmulqdqintrin.h"
+ vpclmulqdqintrin.h avx512vpopcntdqvlintrin.h
+ avx512bitalgintrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -410,7 +411,8 @@ x86_64-*-*)
gfniintrin.h cet.h avx512vbmi2intrin.h
avx512vbmi2vlintrin.h avx512vnniintrin.h
avx512vnnivlintrin.h gfniintrin.h vaesintrin.h
- vpclmulqdqintrin.h"
+ vpclmulqdqintrin.h gfniintrin.h
+ avx512vpopcntdqvlintrin.h avx512bitalgintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h
new file mode 100644
index 0000000..b507707
--- /dev/null
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -0,0 +1,282 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _AVX512BITALGINTRIN_H_INCLUDED
+#define _AVX512BITALGINTRIN_H_INCLUDED
+
+#ifndef __AVX512BITALG__
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg")
+#define __DISABLE_AVX512BITALG__
+#endif /* __AVX512BITALG__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_popcnt_epi8 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_popcnt_epi16 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
+}
+
+#ifdef __DISABLE_AVX512BITALG__
+#undef __DISABLE_AVX512BITALG__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALG__ */
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512bw")
+#define __DISABLE_AVX512BITALGBW__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_popcnt_epi8 (__m512i __A, __mmask64 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_popcnt_epi16 (__m512i __A, __mmask32 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __mmask64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask64) __M);
+}
+
+#ifdef __DISABLE_AVX512BITALGBW__
+#undef __DISABLE_AVX512BITALGBW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGBW__ */
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512vl,avx512bw")
+#define __DISABLE_AVX512BITALGVLBW__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi8 (__m256i __A, __mmask32 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask16) __M);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask32) __M);
+}
+
+#ifdef __DISABLE_AVX512BITALGVLBW__
+#undef __DISABLE_AVX512BITALGVLBW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGVLBW__ */
+
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512vl")
+#define __DISABLE_AVX512BITALGVL__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi8 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi16 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi8 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi16 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi16 (__m256i __A, __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi8 (__m128i __A, __mmask16 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi16 (__m128i __A, __mmask8 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+#ifdef __DISABLE_AVX512BITALGVL__
+#undef __DISABLE_AVX512BITALGVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGBW__ */
+
+#endif /* _AVX512BITALGINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h
new file mode 100644
index 0000000..c8f5717
--- /dev/null
+++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h
@@ -0,0 +1,147 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
+#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512vpopcntdq,avx512vl")
+#define __DISABLE_AVX512VPOPCNTDQVL__
+#endif /* __AVX512VPOPCNTDQVL__ */
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi32 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi32 (__m128i __A, __mmask16 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi32 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi32 (__m256i __A, __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi64 (__m128i __A, __mmask8 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi64 (__m256i __A, __mmask8 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+#ifdef __DISABLE_AVX512VPOPCNTDQVL__
+#undef __DISABLE_AVX512VPOPCNTDQVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */
+
+#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */
+
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 37f3e1a..1660d26 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -103,6 +103,7 @@
#define bit_VAES (1 << 9)
#define bit_AVX512VNNI (1 << 11)
#define bit_VPCLMULQDQ (1 << 10)
+#define bit_AVX512BITALG (1 << 12)
#define bit_AVX512VPOPCNTDQ (1 << 14)
#define bit_RDPID (1 << 22)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 99826fd..1e06936 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -418,6 +418,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
+ unsigned int has_avx512bitalg = 0;
unsigned int has_ibt = 0, has_shstk = 0;
unsigned int has_avx512vnni = 0, has_vaes = 0;
unsigned int has_vpclmulqdq = 0;
@@ -515,6 +516,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_gfni = ecx & bit_GFNI;
has_vaes = ecx & bit_VAES;
has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
+ has_avx512bitalg = ecx & bit_AVX512BITALG;
has_avx5124vnniw = edx & bit_AVX5124VNNIW;
has_avx5124fmaps = edx & bit_AVX5124FMAPS;
@@ -1083,6 +1085,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
+ const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
@@ -1093,7 +1096,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
xsavec, xsaves, avx512dq, avx512bw, avx512vl,
avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk,
- avx512vbmi2, avx512vnni, vaes, vpclmulqdq, NULL);
+ avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
+ avx512bitalg, NULL);
}
done:
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 1423f3e..9ecdcc0 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -309,6 +309,8 @@ DEF_FUNCTION_TYPE (V16SI, V4SI)
DEF_FUNCTION_TYPE (V16SI, V8SI)
DEF_FUNCTION_TYPE (V16SI, V16SF)
DEF_FUNCTION_TYPE (V16SI, V16SI)
+DEF_FUNCTION_TYPE (V32HI, V32HI)
+DEF_FUNCTION_TYPE (V64QI, V64QI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, UHI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, UQI)
DEF_FUNCTION_TYPE (V8DI, PV8DI)
@@ -1256,3 +1258,10 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT)
+
+# BITALG builtins
+DEF_FUNCTION_TYPE (UHI, V2DI, V2DI, UHI)
+DEF_FUNCTION_TYPE (USI, V4DI, V4DI, USI)
+DEF_FUNCTION_TYPE (V4DI, V4DI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, UHI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, UHI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 3365cea..2c6ea3c 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2788,6 +2788,16 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv16si_mask, "__builtin
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di, "__builtin_ia32_vpopcountq_v2di", IX86_BUILTIN_VPOPCOUNTQV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di_mask, "__builtin_ia32_vpopcountq_v2di_mask", IX86_BUILTIN_VPOPCOUNTQV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si, "__builtin_ia32_vpopcountd_v4si", IX86_BUILTIN_VPOPCOUNTDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si_mask, "__builtin_ia32_vpopcountd_v4si_mask", IX86_BUILTIN_VPOPCOUNTDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si, "__builtin_ia32_vpopcountd_v8si", IX86_BUILTIN_VPOPCOUNTDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI)
+
+
/* RDPID */
BDESC (OPTION_MASK_ISA_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_BUILTIN_RDPID, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
@@ -2805,6 +2815,24 @@ BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenc
BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+/* BITALG */
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
+
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
+
+BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv2di_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V2DI_V2DI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv4di_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V4DI_V4DI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_avx512vl_vpshufbitqmbv8di_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
BDESC_END (ARGS2, MPX)
/* Builtins for MPX. */
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index d9de37b..dbd5f43 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -402,6 +402,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__SGX__");
if (isa_flag2 & OPTION_MASK_ISA_AVX5124FMAPS)
def_or_undef (parse_in, "__AVX5124FMAPS__");
+ if (isa_flag2 & OPTION_MASK_ISA_AVX512BITALG)
+ def_or_undef (parse_in, "__AVX512BITALG__");
if (isa_flag2 & OPTION_MASK_ISA_AVX512VPOPCNTDQ)
def_or_undef (parse_in, "__AVX512VPOPCNTDQ__");
if (isa_flag & OPTION_MASK_ISA_FMA)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 575e75a..7b055d1 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2763,7 +2763,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
{ "-mhle", OPTION_MASK_ISA_HLE },
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-mclzero", OPTION_MASK_ISA_CLZERO },
- { "-mmwaitx", OPTION_MASK_ISA_MWAITX }
+ { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
+ { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -5266,6 +5267,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
+ IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg),
IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
@@ -33536,12 +33538,15 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V4SI:
case V16SI_FTYPE_V16SF:
case V16SI_FTYPE_V16SI:
+ case V64QI_FTYPE_V64QI:
+ case V32HI_FTYPE_V32HI:
case V16SF_FTYPE_V16SF:
case V8DI_FTYPE_UQI:
case V8DI_FTYPE_V8DI:
case V8DF_FTYPE_V4DF:
case V8DF_FTYPE_V2DF:
case V8DF_FTYPE_V8DF:
+ case V4DI_FTYPE_V4DI:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
@@ -33918,6 +33923,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case HI_FTYPE_V16SF_INT_UHI:
case QI_FTYPE_V8SF_INT_UQI:
case QI_FTYPE_V4SF_INT_UQI:
+ case UHI_FTYPE_V2DI_V2DI_UHI:
+ case USI_FTYPE_V4DI_V4DI_USI:
+ case V4SI_FTYPE_V4SI_V4SI_UHI:
+ case V8SI_FTYPE_V8SI_V8SI_UHI:
nargs = 3;
mask_pos = 1;
nargs_constant = 1;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7da8573..3b953de 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -91,6 +91,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_AVX512VPOPCNTDQ_P(x) TARGET_ISA_AVX512VPOPCNTDQ_P(x)
#define TARGET_AVX512VNNI TARGET_ISA_AVX512VNNI
#define TARGET_AVX512VNNI_P(x) TARGET_ISA_AVX512VNNI_P(x)
+#define TARGET_AVX512BITALG TARGET_ISA_AVX512BITALG
+#define TARGET_AVX512BITALG_P(x) TARGET_ISA_AVX512BITALG_P(x)
#define TARGET_FMA TARGET_ISA_FMA
#define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x)
#define TARGET_SSE4A TARGET_ISA_SSE4A
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9e7bcce..01cdac8 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -745,6 +745,10 @@ mavx512vnni
Target Report Mask(ISA_AVX512VNNI) Var(ix86_isa_flags) Save
Support AVX512VNNI built-in functions and code generation.
+mavx512bitalg
+Target Report Mask(ISA_AVX512BITALG) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512BITALG built-in functions and code generation.
+
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 7fcaa69..0a68501 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -82,6 +82,10 @@
#include <avx512vnnivlintrin.h>
+#include <avx512vpopcntdqvlintrin.h>
+
+#include <avx512bitalgintrin.h>
+
#include <shaintrin.h>
#include <lzcntintrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 20e7b16..f4f68eb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -181,6 +181,9 @@
;; For VPCLMULQDQ support
UNSPEC_VPCLMULQDQ
+
+ ;; For AVX512BITALG support
+ UNSPEC_VPSHUFBIT
])
(define_c_enum "unspecv" [
@@ -501,6 +504,10 @@
(V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
+(define_mode_iterator VI48_AVX512VLBW
+ [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX512VL")
+ (V2DI "TARGET_AVX512VL")])
+
(define_mode_attr avx512
[(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
(V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
@@ -20023,9 +20030,9 @@
(set_attr ("mode") ("TI"))])
(define_insn "vpopcount<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (popcount:VI48_512
- (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (popcount:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512VPOPCNTDQ"
"vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
@@ -20066,6 +20073,13 @@
"TARGET_SSE && TARGET_64BIT"
"jmp\t%P1")
+(define_insn "vpopcount<mode><mask_name>"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ (popcount:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512BITALG"
+ "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
(define_insn "vgf2p8affineinvqb_<mode><mask_name>"
[(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
(unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
@@ -20514,3 +20528,14 @@
"TARGET_VPCLMULQDQ"
"vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "mode" "DI")])
+
+(define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VLBW 2 "nonimmediate_operand" "vm")]
+ UNSPEC_VPSHUFBIT))]
+ "TARGET_AVX512BITALG"
+ "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 283eab8..b15cc44 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1208,7 +1208,7 @@ See RS/6000 and PowerPC Options.
-mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx @gol
-mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes @gol
-mcet -mibt -mshstk -mforce-indirect-call -mavx512vbmi2 @gol
--mvpclmulqdq @gol
+-mvpclmulqdq -mavx512bitalg -mavx512vpopcntdq @gol
-mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol
@@ -26165,12 +26165,19 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@itemx -mvpclmulqdq
@opindex mvpclmulqdq
+@need 200
+@itemx -mavx512bitalg
+@opindex mavx512bitalg
+@need 200
+@itemx -mavx512vpopcntdq
+@opindex mavx512vpopcntdq
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD,
SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES,
FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2,
-GFNI, VPCLMULQDQ, 3DNow!@: or enhanced 3DNow!@: extended instruction sets.
+GFNI, VPCLMULQDQ, AVX512BITALG, AVX512VPOPCNTDQ3DNow!@: or enhanced 3DNow!@:
+extended instruction sets.
Each has a corresponding @option{-mno-} option to disable use of these
instructions.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 91d1102..fb9c339 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,32 @@
+2017-12-22 Julia Koval <julia.koval@intel.com>
+ Sebastian Peryt <sebastian.peryt@intel.com>
+
+ * g++.dg/other/i386-2.C: Add new options.
+ * g++.dg/other/i386-3.C: Ditto.
+ * gcc.target/i386/sse-12.c: Ditto.
+ * gcc.target/i386/sse-13.c: Ditto.
+ * gcc.target/i386/sse-22.c: Ditto.
+ * gcc.target/i386/sse-23.c: Ditto.
+ * gcc.target/i386/avx512-check.h: Handle bit_AVX512BITALG.
+ * gcc.target/i386/avx512bitalg-vpopcntb-1.c: New.
+ * gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto.
+ * gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto.
+ * gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto.
+ * gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto.
+ * gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c: Ditto.
+ * gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c: Ditto.
+ * gcc.target/i386/i386.exp (check_effective_target_avx512bitalg): New.
+ * gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c: Add more types.
+ * gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Handle new intrinsics.
+ * gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c: Ditto.
+ * gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Ditto.
+
2017-12-22 Mike Stump <mikestump@comcast.net>
Eric Botcazou <ebotcazou@adacore.com>
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index 7e35e68..75a8c27 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,12 +1,12 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */
-
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h
- and mm_malloc.h.h are usable with -O -pedantic-errors. */
+ avx512bitalgintrin.h and mm_malloc.h.h are usable with -O
+ -pedantic-errors. */
#include <x86intrin.h>
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index 7e44d47..444c246 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,10 +1,11 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
- avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h and
- mm_malloc.h are usable with -O -fkeep-inline-functions. */
+ avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h
+ avx512bitalgintrin.h and mm_malloc.h are usable with -O
+ -fkeep-inline-functions. */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h
index 2d174f9..234e60c 100644
--- a/gcc/testsuite/gcc.target/i386/avx512-check.h
+++ b/gcc/testsuite/gcc.target/i386/avx512-check.h
@@ -75,6 +75,9 @@ main ()
#ifdef AVX512VPOPCNTDQ
&& (ecx & bit_AVX512VPOPCNTDQ)
#endif
+#ifdef AVX512BITALG
+ && (ecx & bit_AVX512BITALG)
+#endif
#ifdef GFNI
&& (ecx & bit_GFNI)
#endif
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c
new file mode 100644
index 0000000..2c1a9a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bitalg" } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#include "avx512f-helper.h"
+
+#define AVX512BITALG
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+#define TYPE char
+
+int
+CALC (TYPE v)
+{
+ int ret;
+ int i;
+
+ ret = 0;
+ for (i = 0; i < sizeof(v) * 8; i++)
+ if ((v & ((TYPE)1 << (TYPE) i)))
+ ret++;
+
+ return ret;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src, src0;
+ MASK_TYPE mask = MASK_VALUE;
+ TYPE res_ref[SIZE];
+ src.x = INTRINSIC (_set1_epi8) (0x3D);
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res_ref[i] = CALC (src.a[i]);
+ src0.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_popcnt_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_popcnt_epi8) (src.x, mask, src0.x);
+ res3.x = INTRINSIC (_maskz_popcnt_epi8) (mask, src.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c
new file mode 100644
index 0000000..b23da58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <x86intrin.h>
+
+extern __m512i z, z1;
+
+int foo ()
+{
+ __mmask16 msk;
+ __m512i c = _mm512_popcnt_epi8 (z);
+ asm volatile ("" : "+v" (c));
+ c = _mm512_mask_popcnt_epi8 (z, msk, z1);
+ asm volatile ("" : "+v" (c));
+ c = _mm512_maskz_popcnt_epi8 (msk, z);
+ asm volatile ("" : "+v" (c));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c
new file mode 100644
index 0000000..e6d60f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <x86intrin.h>
+
+extern __m256i y, y_1;
+extern __m128i x, x_1;
+
+int foo ()
+{
+ __mmask32 msk32;
+ __mmask16 msk16;
+ __m256i c256 = _mm256_popcnt_epi8 (y);
+ asm volatile ("" : "+v" (c256));
+ c256 = _mm256_mask_popcnt_epi8 (y, msk32, y_1);
+ asm volatile ("" : "+v" (c256));
+ c256 = _mm256_maskz_popcnt_epi8 (msk32, y);
+ asm volatile ("" : "+v" (c256));
+ __m128i c128 = _mm_popcnt_epi8 (x);
+ asm volatile ("" : "+v" (c128));
+ c128 = _mm_mask_popcnt_epi8 (x, msk16, x_1);
+ asm volatile ("" : "+v" (c128));
+ c128 = _mm_maskz_popcnt_epi8 (msk16, x);
+ asm volatile ("" : "+v" (c128));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c
new file mode 100644
index 0000000..500b7f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bitalg" } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#include "avx512f-helper.h"
+
+#define AVX512BITALG
+#define SIZE (AVX512F_LEN / 16)
+
+#include "avx512f-mask-type.h"
+
+#define TYPE short
+
+int
+CALC (TYPE v)
+{
+ int ret;
+ int i;
+
+ ret = 0;
+ for (i = 0; i < sizeof(v) * 8; i++)
+ if ((v & ((TYPE)1 << (TYPE) i)))
+ ret++;
+
+ return ret;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src, src0;
+ MASK_TYPE mask = MASK_VALUE;
+ TYPE res_ref[SIZE];
+ src.x = INTRINSIC (_set1_epi8) (0x3D);
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res_ref[i] = CALC (src.a[i]);
+ src0.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_popcnt_epi16) (src.x);
+ res2.x = INTRINSIC (_mask_popcnt_epi16) (src.x, mask, src0.x);
+ res3.x = INTRINSIC (_maskz_popcnt_epi16) (mask, src.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c
new file mode 100644
index 0000000..2c49583
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <x86intrin.h>
+
+extern __m512i z, z1;
+
+int foo ()
+{
+ __mmask16 msk;
+ __m512i c = _mm512_popcnt_epi16 (z);
+ asm volatile ("" : "+v" (c));
+ c = _mm512_mask_popcnt_epi16 (z, msk, z1);
+ asm volatile ("" : "+v" (c));
+ c = _mm512_maskz_popcnt_epi16 (msk, z);
+ asm volatile ("" : "+v" (c));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c
new file mode 100644
index 0000000..b55adc6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <x86intrin.h>
+
+extern __m256i y, y_1;
+extern __m128i x, x_1;
+
+int foo ()
+{
+ __mmask16 msk16;
+ __mmask8 msk8;
+ __m256i c256 = _mm256_popcnt_epi16 (y);
+ asm volatile ("" : "+v" (c256));
+ c256 = _mm256_mask_popcnt_epi16 (y, msk16, y_1);
+ asm volatile ("" : "+v" (c256));
+ c256 = _mm256_maskz_popcnt_epi16 (msk16, y);
+ asm volatile ("" : "+v" (c256));
+ __m128i c128 = _mm_popcnt_epi16 (x);
+ asm volatile ("" : "+v" (c128));
+ c128 = _mm_mask_popcnt_epi16 (x, msk8, x_1);
+ asm volatile ("" : "+v" (c128));
+ c128 = _mm_maskz_popcnt_epi16 (msk8, x);
+ asm volatile ("" : "+v" (c128));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c
new file mode 100644
index 0000000..2ee6ca6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bitalg" } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#include "avx512f-helper.h"
+
+#define AVX512BITALG
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+#define TYPE unsigned long long
+
+unsigned char
+CALC (TYPE a, TYPE b)
+{
+ unsigned char res = 0;
+ for (int i = 0; i < 8; i++)
+ {
+ unsigned char m = (b >> (64 - ((i+1)*8))) & 0x3F;
+ unsigned char bit = (a >> m) & 1;
+ res |= (bit << (8 - i - 1));
+ }
+
+ return res;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ TYPE res1, res2;
+ TYPE res_ref = 0;
+
+ src1.x = INTRINSIC (_set1_epi8) (0x13);
+ src2.x = INTRINSIC (_set1_epi8) (0x17);
+
+ src1.a[0] = 0xff;
+ src2.a[0] = 0xff;
+
+ for (int i = 0; i < SIZE/8; i++)
+ {
+ unsigned long long bit = CALC (src1.a[i], src2.a[i]);
+ res_ref |= ((unsigned long long)(CALC (src1.a[i], src2.a[i])) << (i*8));
+ }
+
+ res1 = INTRINSIC (_bitshuffle_epi64_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_bitshuffle_epi64_mask) (mask, src1.x, src2.x);
+
+ if (res1 != res_ref)
+ abort();
+
+ for (int i = 0; i < SIZE; i++)
+ {
+ if (!((mask >> i) & 1))
+ res_ref &= ~((unsigned long long)1 <<i);
+ }
+ if (res2 != res_ref)
+ abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb.c
new file mode 100644
index 0000000..52c84c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bitalg -mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*k\[1-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*k\[1-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*k\[1-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __m512i x512;
+
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+volatile __mmask64 m64;
+
+void extern
+avx512vl_test (void)
+{
+ m16 = _mm_bitshuffle_epi64_mask (x128, x128);
+ m32 = _mm256_bitshuffle_epi64_mask (x256, x256);
+ m64 = _mm512_bitshuffle_epi64_mask (x512, x512);
+ m16 = _mm_mask_bitshuffle_epi64_mask (m16, x128, x128);
+ m32 = _mm256_mask_bitshuffle_epi64_mask (m32, x256, x256);
+ m64 = _mm512_mask_bitshuffle_epi64_mask (m64, x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c
new file mode 100644
index 0000000..a4e9d63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpopcntb-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpopcntb-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c
new file mode 100644
index 0000000..55fa811
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpopcntw-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpopcntw-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c
new file mode 100644
index 0000000..497e369
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpshufbitqmb-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpshufbitqmb-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c
index d9faf0a..4fb949f 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c
@@ -2,17 +2,17 @@
/* { dg-options "-O2 -mavx512vpopcntdq" } */
/* { dg-require-effective-target avx512vpopcntdq } */
-#define AVX512VPOPCNTDQ
#include "avx512f-helper.h"
+#define AVX512VPOPCNTDQ
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
#define TYPE int
-static int
-compute_popcnt (TYPE v)
+int
+CALC (TYPE v)
{
int ret;
int i;
@@ -31,12 +31,12 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src, src0;
MASK_TYPE mask = MASK_VALUE;
TYPE res_ref[SIZE];
- src.x = _mm512_set1_epi8 (0x3D);
+ src.x = INTRINSIC (_set1_epi8) (0x3D);
int i;
for (i = 0; i < SIZE; i++)
{
- res_ref[i] = compute_popcnt (src.a[i]);
+ res_ref[i] = CALC (src.a[i]);
src0.a[i] = DEFAULT_VALUE;
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c
index c55a05a..c70f226 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c
@@ -1,19 +1,40 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512vpopcntdq" } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
-extern __m512i z, z1;
+extern __m128i x, x_1;
+extern __m256i y, y_1;
+extern __m512i z, z_1;
int foo ()
{
__mmask16 msk;
+ __mmask8 msk8;
+ __m128i a = _mm_popcnt_epi32 (x);
+ asm volatile ("" : "+v" (a));
+ a = _mm_mask_popcnt_epi32 (x, msk8, x_1);
+ asm volatile ("" : "+v" (a));
+ a = _mm_maskz_popcnt_epi32 (msk8, x);
+ asm volatile ("" : "+v" (a));
+ __m256i b = _mm256_popcnt_epi32 (y);
+ asm volatile ("" : "+v" (b));
+ b = _mm256_mask_popcnt_epi32 (y, msk8, y_1);
+ asm volatile ("" : "+v" (b));
+ b = _mm256_maskz_popcnt_epi32 (msk8, y);
+ asm volatile ("" : "+v" (b));
__m512i c = _mm512_popcnt_epi32 (z);
asm volatile ("" : "+v" (c));
- c = _mm512_mask_popcnt_epi32 (z, msk, z1);
+ c = _mm512_mask_popcnt_epi32 (z, msk, z_1);
asm volatile ("" : "+v" (c));
c = _mm512_maskz_popcnt_epi32 (msk, z);
asm volatile ("" : "+v" (c));
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c
index 5a62821..cc0d8b8 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c
@@ -2,17 +2,17 @@
/* { dg-options "-O2 -mavx512vpopcntdq" } */
/* { dg-require-effective-target avx512vpopcntdq } */
-#define AVX512VPOPCNTDQ
#include "avx512f-helper.h"
+#define AVX512VPOPCNTDQ
#define SIZE (AVX512F_LEN / 64)
#include "avx512f-mask-type.h"
#define TYPE long long
-static int
-compute_popcnt (TYPE v)
+int
+CALC (TYPE v)
{
int ret;
int i;
@@ -31,12 +31,12 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src, src0;
MASK_TYPE mask = MASK_VALUE;
TYPE res_ref[SIZE];
- src.x = _mm512_set1_epi8 (0x3D);
+ src.x = INTRINSIC (_set1_epi8) (0x3D);
int i;
for (i = 0; i < SIZE; i++)
{
- res_ref[i] = compute_popcnt (src.a[i]);
+ res_ref[i] = CALC (src.a[i]);
src0.a[i] = DEFAULT_VALUE;
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c
index 2698ec3..9f400c0 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c
@@ -1,20 +1,40 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512vpopcntdq" } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
-extern __m512i z, z1;
+extern __m128i x, x_1;
+extern __m256i y, y_1;
+extern __m512i z, z_1;
int foo ()
{
__mmask8 msk;
+ __m128i a = _mm_popcnt_epi64 (x);
+ asm volatile ("" : "+v" (a));
+ a = _mm_mask_popcnt_epi64 (x, msk, x_1);
+ asm volatile ("" : "+v" (a));
+ a = _mm_maskz_popcnt_epi64 (msk, x);
+ asm volatile ("" : "+v" (a));
+ __m256i b = _mm256_popcnt_epi64 (y);
+ asm volatile ("" : "+v" (b));
+ b = _mm256_mask_popcnt_epi64 (y, msk, y_1);
+ asm volatile ("" : "+v" (b));
+ b = _mm256_maskz_popcnt_epi64 (msk, y);
+ asm volatile ("" : "+v" (b));
__m512i c = _mm512_popcnt_epi64 (z);
asm volatile ("" : "+v" (c));
- c = _mm512_mask_popcnt_epi64 (z, msk, z1);
+ c = _mm512_mask_popcnt_epi64 (z, msk, z_1);
asm volatile ("" : "+v" (c));
- c = _mm512_maskz_popcnt_epi64 (msk, z);
+ c = _mm512_maskz_popcnt_epi64 (msk, z);
asm volatile ("" : "+v" (c));
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c
new file mode 100644
index 0000000..95e43ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512vpopcntdq } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-vpopcntd-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-vpopcntd-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c
new file mode 100644
index 0000000..6e110e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512vpopcntdq" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512vpopcntdq } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-vpopcntq-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-vpopcntq-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index 0b53023..79d97c3 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -483,6 +483,19 @@ proc check_effective_target_vpclmulqdq { } {
} "-mvpclmulqdq -mavx512vl" ]
}
+# Return 1 if avx512_bitalg instructions can be compiled.
+proc check_effective_target_avx512bitalg { } {
+ return [check_no_compiler_messages avx512bitalg object {
+ typedef int __v32hi __attribute__ ((__vector_size__ (64)));
+
+ __v32hi
+ _mm512_popcnt_epi16 (__v32hi __A)
+ {
+ return (__v32hi) __builtin_ia32_vpopcountd_v32hi ((__v32hi) __A);
+ }
+ } "-mavx512bitalg" ]
+}
+
# If a testcase doesn't have special options, use these.
global DEFAULT_CFLAGS
if ![info exists DEFAULT_CFLAGS] then {
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index 82f5d3c..cd45096 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 62f87f0..cc9d00a 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 3e64e29..99af58a 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -9,9 +9,9 @@
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h,
- avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h and
- mm_malloc.h that reference the proper builtin functions.
-
+ avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
+ avx512bitalgintrin.h and mm_malloc.h that reference the proper builtin
+ functions.
Defining away "extern" and "__inline" results in all of them being
compiled as proper functions. */
@@ -101,7 +101,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
#endif
/* Following intrinsics require immediate arguments. They
@@ -218,7 +218,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 65f6ccf..00d30ba 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -8,9 +8,9 @@
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h,
- avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h
- and mm_malloc.h that reference the proper builtin functions.
-
+ avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
+ avx512bitalgintrin.h and mm_malloc.h that reference the proper builtin
+ functions.
Defining away "extern" and "__inline" results in all of them being
compiled as proper functions. */
@@ -676,6 +676,6 @@
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg")
#include <x86intrin.h>