aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
authorJulia Koval <julia.koval@intel.com>2017-12-22 13:37:16 +0100
committerKirill Yukhin <kyukhin@gcc.gnu.org>2017-12-22 12:37:16 +0000
commite2a29465e91c75b337aabd5886af982653faf00e (patch)
treeb8fee74f68676ef891dd3ffc540bff331f528c36 /gcc/config/i386
parentfefab9536e9d986ed0ffbdeeb0ef851578385564 (diff)
downloadgcc-e2a29465e91c75b337aabd5886af982653faf00e.zip
gcc-e2a29465e91c75b337aabd5886af982653faf00e.tar.gz
gcc-e2a29465e91c75b337aabd5886af982653faf00e.tar.bz2
Enable AVX512BITALG
gcc/ * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BITALG_SET, OPTION_MASK_ISA_AVX512BITALG_UNSET): New. (ix86_handle_option): Handle -mavx512bitalg, fix 4VNNIW formatting. * config.gcc: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h. * config/i386/avx512bitalgintrin.h (_mm512_popcnt_epi8, _mm512_popcnt_epi16, _mm512_mask_popcnt_epi8, _mm512_maskz_popcnt_epi8, _mm512_mask_popcnt_epi16, _mm512_maskz_popcnt_epi16, _mm512_bitshuffle_epi64_mask, _mm256_popcnt_epi8, _mm512_mask_bitshuffle_epi64_mask, _mm256_mask_popcnt_epi8, _mm_popcnt_epi8, _mm256_maskz_popcnt_epi8, _mm_bitshuffle_epi64_mask, _mm256_popcnt_epi16, _mm_mask_bitshuffle_epi64_mask, _mm256_bitshuffle_epi64_mask, _mm256_mask_bitshuffle_epi64_mask, _mm_popcnt_epi16, _mm_maskz_popcnt_epi8, _mm256_mask_popcnt_epi16, _mm256_maskz_popcnt_epi16, _mm_mask_popcnt_epi8, _mm_mask_popcnt_epi16, _mm_maskz_popcnt_epi16): New intrinsics. * config/i386/avx512vpopcntdqvlintrin.h (_mm_popcnt_epi32, _mm_popcnt_epi64, _mm_mask_popcnt_epi32, _mm_maskz_popcnt_epi32, _mm256_popcnt_epi32, _mm256_mask_popcnt_epi32, _mm256_maskz_popcnt_epi32, _mm_mask_popcnt_epi64, _mm_maskz_popcnt_epi64, _mm256_popcnt_epi64, _mm256_mask_popcnt_epi64, _mm256_maskz_popcnt_epi64): New intrinsics. * config/i386/cpuid.h (bit_AVX512BITALG): New bit. * config/i386/driver-i386.c (host_detect_local_cpu): Detect -mavx512bitalg. * config/i386/i386-builtin-types.def (V64QI_FTYPE_V64QI, V64QI_FTYPE_V64QI, V4DI_FTYPE_V4DI, UHI_FTYPE_V2DI_V2DI_UHI, USI_FTYPE_V4DI_V4DI_USI, V4SI_FTYPE_V4SI_V4SI_UHI, V8SI_FTYPE_V8SI_V8SI_UHI): New types. * config/i386/i386-builtin.def (__builtin_ia32_vpopcountq_v4di, __builtin_ia32_vpopcountq_v4di_mask, __builtin_ia32_vpopcountq_v2di, __builtin_ia32_vpopcountq_v2di_mask, __builtin_ia32_vpopcountd_v4si, __builtin_ia32_vpopcountd_v4si_mask, __builtin_ia32_vpopcountd_v8si, __builtin_ia32_vpopcountd_v8si_mask, __builtin_ia32_vpopcountb_v64qi, __builtin_ia32_vpopcountb_v64qi_mask, __builtin_ia32_vpopcountb_v32qi, __builtin_ia32_vpopcountb_v32qi_mask, __builtin_ia32_vpopcountb_v16qi, __builtin_ia32_vpopcountb_v16qi_mask, __builtin_ia32_vpopcountw_v32hi, __builtin_ia32_vpopcountw_v32hi_mask, __builtin_ia32_vpopcountw_v16hi, __builtin_ia32_vpopcountw_v16hi_mask, __builtin_ia32_vpopcountw_v8hi, __builtin_ia32_vpopcountw_v8hi_mask, __builtin_ia32_vpshufbitqmb128_mask, __builtin_ia32_vpshufbitqmb256_mask, __builtin_ia32_vpshufbitqmb512_mask): New builtins. * config/i386/i386-c.c (__AVX512BITALG__): New. * config/i386/i386.c (isa2_opts): Add -mavx512bitalg. (ix86_valid_target_attribute_inner_p): Ditto. (ix86_expand_args_builtin): Handle new types. * config/i386/i386.h (TARGET_AVX512BITALG, TARGET_AVX512BITALG_P): New. * config/i386/i386.opt: Add -mavx512bitalg. * config/i386/immintrin.h: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h. * config/i386/sse.md (VI48_AVX512VLBW): New iterator. (vpopcount<mode><mask_name>): Add more types. (avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>): New. * doc/invoke.texi: Add -mavx512bitalg and -mavx512vpopcntdq. gcc/testsuite/ * g++.dg/other/i386-2.C: Add new options. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/sse-12.c: Ditto. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx512-check.h: Handle bit_AVX512BITALG. * gcc.target/i386/avx512bitalg-vpopcntb-1.c: New. * gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto. * gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto. * gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto. * gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c: Ditto. * gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c: Ditto. * gcc.target/i386/i386.exp (check_effective_target_avx512bitalg): New. * gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c: Add more types. * gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Handle new intrinsics. * gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c: Ditto. * gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Ditto. Co-Authored-By: Sebastian Peryt <sebastian.peryt@intel.com> From-SVN: r255975
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/avx512bitalgintrin.h282
-rw-r--r--gcc/config/i386/avx512vpopcntdqvlintrin.h147
-rw-r--r--gcc/config/i386/cpuid.h1
-rw-r--r--gcc/config/i386/driver-i386.c6
-rw-r--r--gcc/config/i386/i386-builtin-types.def9
-rw-r--r--gcc/config/i386/i386-builtin.def28
-rw-r--r--gcc/config/i386/i386-c.c2
-rw-r--r--gcc/config/i386/i386.c11
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/i386.opt4
-rw-r--r--gcc/config/i386/immintrin.h4
-rw-r--r--gcc/config/i386/sse.md31
12 files changed, 522 insertions, 5 deletions
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h
new file mode 100644
index 0000000..b507707
--- /dev/null
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -0,0 +1,282 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _AVX512BITALGINTRIN_H_INCLUDED
+#define _AVX512BITALGINTRIN_H_INCLUDED
+
+#ifndef __AVX512BITALG__
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg")
+#define __DISABLE_AVX512BITALG__
+#endif /* __AVX512BITALG__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_popcnt_epi8 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_popcnt_epi16 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
+}
+
+#ifdef __DISABLE_AVX512BITALG__
+#undef __DISABLE_AVX512BITALG__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALG__ */
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512bw")
+#define __DISABLE_AVX512BITALGBW__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_popcnt_epi8 (__m512i __A, __mmask64 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_popcnt_epi16 (__m512i __A, __mmask32 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __mmask64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask64) __M);
+}
+
+#ifdef __DISABLE_AVX512BITALGBW__
+#undef __DISABLE_AVX512BITALGBW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGBW__ */
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512vl,avx512bw")
+#define __DISABLE_AVX512BITALGVLBW__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi8 (__m256i __A, __mmask32 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask16) __M);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask32) __M);
+}
+
+#ifdef __DISABLE_AVX512BITALGVLBW__
+#undef __DISABLE_AVX512BITALGVLBW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGVLBW__ */
+
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512vl")
+#define __DISABLE_AVX512BITALGVL__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi8 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi16 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi8 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi16 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi16 (__m256i __A, __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi8 (__m128i __A, __mmask16 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi16 (__m128i __A, __mmask8 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+#ifdef __DISABLE_AVX512BITALGVL__
+#undef __DISABLE_AVX512BITALGVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGBW__ */
+
+#endif /* _AVX512BITALGINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h
new file mode 100644
index 0000000..c8f5717
--- /dev/null
+++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h
@@ -0,0 +1,147 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
+#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512vpopcntdq,avx512vl")
+#define __DISABLE_AVX512VPOPCNTDQVL__
+#endif /* __AVX512VPOPCNTDQVL__ */
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi32 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi32 (__m128i __A, __mmask16 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi32 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi32 (__m256i __A, __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi64 (__m128i __A, __mmask8 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi64 (__m256i __A, __mmask8 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+#ifdef __DISABLE_AVX512VPOPCNTDQVL__
+#undef __DISABLE_AVX512VPOPCNTDQVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */
+
+#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */
+
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 37f3e1a..1660d26 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -103,6 +103,7 @@
#define bit_VAES (1 << 9)
#define bit_AVX512VNNI (1 << 11)
#define bit_VPCLMULQDQ (1 << 10)
+#define bit_AVX512BITALG (1 << 12)
#define bit_AVX512VPOPCNTDQ (1 << 14)
#define bit_RDPID (1 << 22)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 99826fd..1e06936 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -418,6 +418,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
+ unsigned int has_avx512bitalg = 0;
unsigned int has_ibt = 0, has_shstk = 0;
unsigned int has_avx512vnni = 0, has_vaes = 0;
unsigned int has_vpclmulqdq = 0;
@@ -515,6 +516,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_gfni = ecx & bit_GFNI;
has_vaes = ecx & bit_VAES;
has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
+ has_avx512bitalg = ecx & bit_AVX512BITALG;
has_avx5124vnniw = edx & bit_AVX5124VNNIW;
has_avx5124fmaps = edx & bit_AVX5124FMAPS;
@@ -1083,6 +1085,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
+ const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
@@ -1093,7 +1096,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
xsavec, xsaves, avx512dq, avx512bw, avx512vl,
avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk,
- avx512vbmi2, avx512vnni, vaes, vpclmulqdq, NULL);
+ avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
+ avx512bitalg, NULL);
}
done:
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 1423f3e..9ecdcc0 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -309,6 +309,8 @@ DEF_FUNCTION_TYPE (V16SI, V4SI)
DEF_FUNCTION_TYPE (V16SI, V8SI)
DEF_FUNCTION_TYPE (V16SI, V16SF)
DEF_FUNCTION_TYPE (V16SI, V16SI)
+DEF_FUNCTION_TYPE (V32HI, V32HI)
+DEF_FUNCTION_TYPE (V64QI, V64QI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, UHI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, UQI)
DEF_FUNCTION_TYPE (V8DI, PV8DI)
@@ -1256,3 +1258,10 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT)
+
+# BITALG builtins
+DEF_FUNCTION_TYPE (UHI, V2DI, V2DI, UHI)
+DEF_FUNCTION_TYPE (USI, V4DI, V4DI, USI)
+DEF_FUNCTION_TYPE (V4DI, V4DI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, UHI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, UHI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 3365cea..2c6ea3c 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2788,6 +2788,16 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv16si_mask, "__builtin
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di, "__builtin_ia32_vpopcountq_v2di", IX86_BUILTIN_VPOPCOUNTQV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di_mask, "__builtin_ia32_vpopcountq_v2di_mask", IX86_BUILTIN_VPOPCOUNTQV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si, "__builtin_ia32_vpopcountd_v4si", IX86_BUILTIN_VPOPCOUNTDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si_mask, "__builtin_ia32_vpopcountd_v4si_mask", IX86_BUILTIN_VPOPCOUNTDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si, "__builtin_ia32_vpopcountd_v8si", IX86_BUILTIN_VPOPCOUNTDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI)
+
+
/* RDPID */
BDESC (OPTION_MASK_ISA_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_BUILTIN_RDPID, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
@@ -2805,6 +2815,24 @@ BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenc
BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+/* BITALG */
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
+
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
+
+BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv2di_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V2DI_V2DI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv4di_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V4DI_V4DI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_avx512vl_vpshufbitqmbv8di_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
BDESC_END (ARGS2, MPX)
/* Builtins for MPX. */
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index d9de37b..dbd5f43 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -402,6 +402,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__SGX__");
if (isa_flag2 & OPTION_MASK_ISA_AVX5124FMAPS)
def_or_undef (parse_in, "__AVX5124FMAPS__");
+ if (isa_flag2 & OPTION_MASK_ISA_AVX512BITALG)
+ def_or_undef (parse_in, "__AVX512BITALG__");
if (isa_flag2 & OPTION_MASK_ISA_AVX512VPOPCNTDQ)
def_or_undef (parse_in, "__AVX512VPOPCNTDQ__");
if (isa_flag & OPTION_MASK_ISA_FMA)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 575e75a..7b055d1 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2763,7 +2763,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
{ "-mhle", OPTION_MASK_ISA_HLE },
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-mclzero", OPTION_MASK_ISA_CLZERO },
- { "-mmwaitx", OPTION_MASK_ISA_MWAITX }
+ { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
+ { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG }
};
static struct ix86_target_opts isa_opts[] =
{
@@ -5266,6 +5267,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
+ IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg),
IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
@@ -33536,12 +33538,15 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V4SI:
case V16SI_FTYPE_V16SF:
case V16SI_FTYPE_V16SI:
+ case V64QI_FTYPE_V64QI:
+ case V32HI_FTYPE_V32HI:
case V16SF_FTYPE_V16SF:
case V8DI_FTYPE_UQI:
case V8DI_FTYPE_V8DI:
case V8DF_FTYPE_V4DF:
case V8DF_FTYPE_V2DF:
case V8DF_FTYPE_V8DF:
+ case V4DI_FTYPE_V4DI:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
@@ -33918,6 +33923,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case HI_FTYPE_V16SF_INT_UHI:
case QI_FTYPE_V8SF_INT_UQI:
case QI_FTYPE_V4SF_INT_UQI:
+ case UHI_FTYPE_V2DI_V2DI_UHI:
+ case USI_FTYPE_V4DI_V4DI_USI:
+ case V4SI_FTYPE_V4SI_V4SI_UHI:
+ case V8SI_FTYPE_V8SI_V8SI_UHI:
nargs = 3;
mask_pos = 1;
nargs_constant = 1;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7da8573..3b953de 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -91,6 +91,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_AVX512VPOPCNTDQ_P(x) TARGET_ISA_AVX512VPOPCNTDQ_P(x)
#define TARGET_AVX512VNNI TARGET_ISA_AVX512VNNI
#define TARGET_AVX512VNNI_P(x) TARGET_ISA_AVX512VNNI_P(x)
+#define TARGET_AVX512BITALG TARGET_ISA_AVX512BITALG
+#define TARGET_AVX512BITALG_P(x) TARGET_ISA_AVX512BITALG_P(x)
#define TARGET_FMA TARGET_ISA_FMA
#define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x)
#define TARGET_SSE4A TARGET_ISA_SSE4A
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 9e7bcce..01cdac8 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -745,6 +745,10 @@ mavx512vnni
Target Report Mask(ISA_AVX512VNNI) Var(ix86_isa_flags) Save
Support AVX512VNNI built-in functions and code generation.
+mavx512bitalg
+Target Report Mask(ISA_AVX512BITALG) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512BITALG built-in functions and code generation.
+
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 7fcaa69..0a68501 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -82,6 +82,10 @@
#include <avx512vnnivlintrin.h>
+#include <avx512vpopcntdqvlintrin.h>
+
+#include <avx512bitalgintrin.h>
+
#include <shaintrin.h>
#include <lzcntintrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 20e7b16..f4f68eb 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -181,6 +181,9 @@
;; For VPCLMULQDQ support
UNSPEC_VPCLMULQDQ
+
+ ;; For AVX512BITALG support
+ UNSPEC_VPSHUFBIT
])
(define_c_enum "unspecv" [
@@ -501,6 +504,10 @@
(V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
+(define_mode_iterator VI48_AVX512VLBW
+ [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX512VL")
+ (V2DI "TARGET_AVX512VL")])
+
(define_mode_attr avx512
[(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
(V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
@@ -20023,9 +20030,9 @@
(set_attr ("mode") ("TI"))])
(define_insn "vpopcount<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (popcount:VI48_512
- (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (popcount:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512VPOPCNTDQ"
"vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
@@ -20066,6 +20073,13 @@
"TARGET_SSE && TARGET_64BIT"
"jmp\t%P1")
+(define_insn "vpopcount<mode><mask_name>"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ (popcount:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512BITALG"
+ "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
(define_insn "vgf2p8affineinvqb_<mode><mask_name>"
[(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
(unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
@@ -20514,3 +20528,14 @@
"TARGET_VPCLMULQDQ"
"vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "mode" "DI")])
+
+(define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VLBW 2 "nonimmediate_operand" "vm")]
+ UNSPEC_VPSHUFBIT))]
+ "TARGET_AVX512BITALG"
+ "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])