diff options
author | Julia Koval <julia.koval@intel.com> | 2017-12-22 13:37:16 +0100 |
---|---|---|
committer | Kirill Yukhin <kyukhin@gcc.gnu.org> | 2017-12-22 12:37:16 +0000 |
commit | e2a29465e91c75b337aabd5886af982653faf00e (patch) | |
tree | b8fee74f68676ef891dd3ffc540bff331f528c36 /gcc/config/i386 | |
parent | fefab9536e9d986ed0ffbdeeb0ef851578385564 (diff) | |
download | gcc-e2a29465e91c75b337aabd5886af982653faf00e.zip gcc-e2a29465e91c75b337aabd5886af982653faf00e.tar.gz gcc-e2a29465e91c75b337aabd5886af982653faf00e.tar.bz2 |
Enable AVX512BITALG
gcc/
* common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BITALG_SET,
OPTION_MASK_ISA_AVX512BITALG_UNSET): New.
(ix86_handle_option): Handle -mavx512bitalg, fix 4VNNIW formatting.
* config.gcc: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h.
* config/i386/avx512bitalgintrin.h (_mm512_popcnt_epi8, _mm512_popcnt_epi16,
_mm512_mask_popcnt_epi8, _mm512_maskz_popcnt_epi8, _mm512_mask_popcnt_epi16,
_mm512_maskz_popcnt_epi16, _mm512_bitshuffle_epi64_mask, _mm256_popcnt_epi8,
_mm512_mask_bitshuffle_epi64_mask, _mm256_mask_popcnt_epi8, _mm_popcnt_epi8,
_mm256_maskz_popcnt_epi8, _mm_bitshuffle_epi64_mask, _mm256_popcnt_epi16,
_mm_mask_bitshuffle_epi64_mask, _mm256_bitshuffle_epi64_mask,
_mm256_mask_bitshuffle_epi64_mask, _mm_popcnt_epi16, _mm_maskz_popcnt_epi8,
_mm256_mask_popcnt_epi16, _mm256_maskz_popcnt_epi16, _mm_mask_popcnt_epi8,
_mm_mask_popcnt_epi16, _mm_maskz_popcnt_epi16): New intrinsics.
* config/i386/avx512vpopcntdqvlintrin.h (_mm_popcnt_epi32, _mm_popcnt_epi64,
_mm_mask_popcnt_epi32, _mm_maskz_popcnt_epi32, _mm256_popcnt_epi32,
_mm256_mask_popcnt_epi32, _mm256_maskz_popcnt_epi32, _mm_mask_popcnt_epi64,
_mm_maskz_popcnt_epi64, _mm256_popcnt_epi64, _mm256_mask_popcnt_epi64,
_mm256_maskz_popcnt_epi64): New intrinsics.
* config/i386/cpuid.h (bit_AVX512BITALG): New bit.
* config/i386/driver-i386.c (host_detect_local_cpu): Detect -mavx512bitalg.
* config/i386/i386-builtin-types.def (V64QI_FTYPE_V64QI, V64QI_FTYPE_V64QI,
V4DI_FTYPE_V4DI, UHI_FTYPE_V2DI_V2DI_UHI, USI_FTYPE_V4DI_V4DI_USI,
V4SI_FTYPE_V4SI_V4SI_UHI, V8SI_FTYPE_V8SI_V8SI_UHI): New types.
* config/i386/i386-builtin.def (__builtin_ia32_vpopcountq_v4di,
__builtin_ia32_vpopcountq_v4di_mask, __builtin_ia32_vpopcountq_v2di,
__builtin_ia32_vpopcountq_v2di_mask, __builtin_ia32_vpopcountd_v4si,
__builtin_ia32_vpopcountd_v4si_mask, __builtin_ia32_vpopcountd_v8si,
__builtin_ia32_vpopcountd_v8si_mask, __builtin_ia32_vpopcountb_v64qi,
__builtin_ia32_vpopcountb_v64qi_mask, __builtin_ia32_vpopcountb_v32qi,
__builtin_ia32_vpopcountb_v32qi_mask, __builtin_ia32_vpopcountb_v16qi,
__builtin_ia32_vpopcountb_v16qi_mask, __builtin_ia32_vpopcountw_v32hi,
__builtin_ia32_vpopcountw_v32hi_mask, __builtin_ia32_vpopcountw_v16hi,
__builtin_ia32_vpopcountw_v16hi_mask, __builtin_ia32_vpopcountw_v8hi,
__builtin_ia32_vpopcountw_v8hi_mask, __builtin_ia32_vpshufbitqmb128_mask,
__builtin_ia32_vpshufbitqmb256_mask,
__builtin_ia32_vpshufbitqmb512_mask): New builtins.
* config/i386/i386-c.c (__AVX512BITALG__): New.
* config/i386/i386.c (isa2_opts): Add -mavx512bitalg.
(ix86_valid_target_attribute_inner_p): Ditto.
(ix86_expand_args_builtin): Handle new types.
* config/i386/i386.h (TARGET_AVX512BITALG, TARGET_AVX512BITALG_P): New.
* config/i386/i386.opt: Add -mavx512bitalg.
* config/i386/immintrin.h: Add avx512vpopcntdqvlintrin.h and
avx512bitalgintrin.h.
* config/i386/sse.md (VI48_AVX512VLBW): New iterator.
(vpopcount<mode><mask_name>): Add more types.
(avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>): New.
* doc/invoke.texi: Add -mavx512bitalg and -mavx512vpopcntdq.
gcc/testsuite/
* g++.dg/other/i386-2.C: Add new options.
* g++.dg/other/i386-3.C: Ditto.
* gcc.target/i386/sse-12.c: Ditto.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx512-check.h: Handle bit_AVX512BITALG.
* gcc.target/i386/avx512bitalg-vpopcntb-1.c: New.
* gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto.
* gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto.
* gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto.
* gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto.
* gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto.
* gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto.
* gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto.
* gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto.
* gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto.
* gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto.
* gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c: Ditto.
* gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c: Ditto.
* gcc.target/i386/i386.exp (check_effective_target_avx512bitalg): New.
* gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c: Add more types.
* gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Handle new intrinsics.
* gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c: Ditto.
* gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Ditto.
Co-Authored-By: Sebastian Peryt <sebastian.peryt@intel.com>
From-SVN: r255975
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/avx512bitalgintrin.h | 282 | ||||
-rw-r--r-- | gcc/config/i386/avx512vpopcntdqvlintrin.h | 147 | ||||
-rw-r--r-- | gcc/config/i386/cpuid.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/driver-i386.c | 6 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin-types.def | 9 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 28 | ||||
-rw-r--r-- | gcc/config/i386/i386-c.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 11 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 4 | ||||
-rw-r--r-- | gcc/config/i386/immintrin.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 31 |
12 files changed, 522 insertions, 5 deletions
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h new file mode 100644 index 0000000..b507707 --- /dev/null +++ b/gcc/config/i386/avx512bitalgintrin.h @@ -0,0 +1,282 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _AVX512BITALGINTRIN_H_INCLUDED +#define _AVX512BITALGINTRIN_H_INCLUDED + +#ifndef __AVX512BITALG__ +#pragma GCC push_options +#pragma GCC target("avx512bitalg") +#define __DISABLE_AVX512BITALG__ +#endif /* __AVX512BITALG__ */ + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_popcnt_epi8 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_popcnt_epi16 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A); +} + +#ifdef __DISABLE_AVX512BITALG__ +#undef __DISABLE_AVX512BITALG__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALG__ */ + +#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__) +#pragma GCC push_options +#pragma GCC target("avx512bitalg,avx512bw") +#define __DISABLE_AVX512BITALGBW__ +#endif /* __AVX512VLBW__ */ + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_popcnt_epi8 (__m512i __A, __mmask64 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A, + (__v64qi) __B, + (__mmask64) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_popcnt_epi16 (__m512i __A, __mmask32 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A, + (__v32hi) __B, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __mmask64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B) +{ + return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A, + (__v8di) __B, + (__mmask64) -1); +} + +extern __inline __mmask64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B) +{ + return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A, + (__v8di) __B, + (__mmask64) __M); +} + +#ifdef __DISABLE_AVX512BITALGBW__ +#undef __DISABLE_AVX512BITALGBW__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALGBW__ */ + +#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__) +#pragma GCC push_options +#pragma GCC target("avx512bitalg,avx512vl,avx512bw") +#define __DISABLE_AVX512BITALGVLBW__ +#endif /* __AVX512VLBW__ */ + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi8 (__m256i __A, __mmask32 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A, + (__v32qi) __B, + (__mmask32) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); +} +extern __inline __mmask16 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A, + (__v2di) __B, + (__mmask16) -1); +} + +extern __inline __mmask16 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A, + (__v2di) __B, + (__mmask16) __M); +} + +extern __inline __mmask32 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A, + (__v4di) __B, + (__mmask32) -1); +} + +extern __inline __mmask32 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A, + (__v4di) __B, + (__mmask32) __M); +} + +#ifdef __DISABLE_AVX512BITALGVLBW__ +#undef __DISABLE_AVX512BITALGVLBW__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALGVLBW__ */ + + +#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("avx512bitalg,avx512vl") +#define __DISABLE_AVX512BITALGVL__ +#endif /* __AVX512VLBW__ */ + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi8 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi16 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi16 (__m256i __A, __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi8 (__m128i __A, __mmask16 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A, + (__v16qi) __B, + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); +} +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi16 (__m128i __A, __mmask8 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} +#ifdef __DISABLE_AVX512BITALGVL__ +#undef __DISABLE_AVX512BITALGVL__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALGBW__ */ + +#endif /* _AVX512BITALGINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h new file mode 100644 index 0000000..c8f5717 --- /dev/null +++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h @@ -0,0 +1,147 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +# error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED +#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED + +#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("avx512vpopcntdq,avx512vl") +#define __DISABLE_AVX512VPOPCNTDQVL__ +#endif /* __AVX512VPOPCNTDQVL__ */ + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi32 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi32 (__m128i __A, __mmask16 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A, + (__v4si) __B, + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi32 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi32 (__m256i __A, __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A, + (__v8si) __B, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi64 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi64 (__m128i __A, __mmask8 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A, + (__v2di) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi64 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi64 (__m256i __A, __mmask8 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A, + (__v4di) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +#ifdef __DISABLE_AVX512VPOPCNTDQVL__ +#undef __DISABLE_AVX512VPOPCNTDQVL__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */ + +#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */ + diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 37f3e1a..1660d26 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -103,6 +103,7 @@ #define bit_VAES (1 << 9) #define bit_AVX512VNNI (1 << 11) #define bit_VPCLMULQDQ (1 << 10) +#define bit_AVX512BITALG (1 << 12) #define bit_AVX512VPOPCNTDQ (1 << 14) #define bit_RDPID (1 << 22) diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 99826fd..1e06936 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -418,6 +418,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0; unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0; unsigned int has_gfni = 0, has_avx512vbmi2 = 0; + unsigned int has_avx512bitalg = 0; unsigned int has_ibt = 0, has_shstk = 0; unsigned int has_avx512vnni = 0, has_vaes = 0; unsigned int has_vpclmulqdq = 0; @@ -515,6 +516,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_gfni = ecx & bit_GFNI; has_vaes = ecx & bit_VAES; has_vpclmulqdq = ecx & bit_VPCLMULQDQ; + has_avx512bitalg = ecx & bit_AVX512BITALG; has_avx5124vnniw = edx & bit_AVX5124VNNIW; has_avx5124fmaps = edx & bit_AVX5124FMAPS; @@ -1083,6 +1085,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk"; const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes"; const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq"; + const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, sha, pclmul, popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2, @@ -1093,7 +1096,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) xsavec, xsaves, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk, - avx512vbmi2, avx512vnni, vaes, vpclmulqdq, NULL); + avx512vbmi2, avx512vnni, vaes, vpclmulqdq, + avx512bitalg, NULL); } done: diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 1423f3e..9ecdcc0 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -309,6 +309,8 @@ DEF_FUNCTION_TYPE (V16SI, V4SI) DEF_FUNCTION_TYPE (V16SI, V8SI) DEF_FUNCTION_TYPE (V16SI, V16SF) DEF_FUNCTION_TYPE (V16SI, V16SI) +DEF_FUNCTION_TYPE (V32HI, V32HI) +DEF_FUNCTION_TYPE (V64QI, V64QI) DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, UHI) DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, UQI) DEF_FUNCTION_TYPE (V8DI, PV8DI) @@ -1256,3 +1258,10 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT) DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI) DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT) DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT) + +# BITALG builtins +DEF_FUNCTION_TYPE (UHI, V2DI, V2DI, UHI) +DEF_FUNCTION_TYPE (USI, V4DI, V4DI, USI) +DEF_FUNCTION_TYPE (V4DI, V4DI) +DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, UHI) +DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, UHI) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 3365cea..2c6ea3c 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2788,6 +2788,16 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv16si_mask, "__builtin BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI) BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di, "__builtin_ia32_vpopcountq_v2di", IX86_BUILTIN_VPOPCOUNTQV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di_mask, "__builtin_ia32_vpopcountq_v2di_mask", IX86_BUILTIN_VPOPCOUNTQV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si, "__builtin_ia32_vpopcountd_v4si", IX86_BUILTIN_VPOPCOUNTDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si_mask, "__builtin_ia32_vpopcountd_v4si_mask", IX86_BUILTIN_VPOPCOUNTDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si, "__builtin_ia32_vpopcountd_v8si", IX86_BUILTIN_VPOPCOUNTDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI) + + /* RDPID */ BDESC (OPTION_MASK_ISA_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_BUILTIN_RDPID, UNKNOWN, (int) UNSIGNED_FTYPE_VOID) @@ -2805,6 +2815,24 @@ BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenc BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +/* BITALG */ +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) + +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) + +BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv2di_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V2DI_V2DI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv4di_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V4DI_V4DI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_avx512vl_vpshufbitqmbv8di_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) BDESC_END (ARGS2, MPX) /* Builtins for MPX. */ diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index d9de37b..dbd5f43 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -402,6 +402,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__SGX__"); if (isa_flag2 & OPTION_MASK_ISA_AVX5124FMAPS) def_or_undef (parse_in, "__AVX5124FMAPS__"); + if (isa_flag2 & OPTION_MASK_ISA_AVX512BITALG) + def_or_undef (parse_in, "__AVX512BITALG__"); if (isa_flag2 & OPTION_MASK_ISA_AVX512VPOPCNTDQ) def_or_undef (parse_in, "__AVX512VPOPCNTDQ__"); if (isa_flag & OPTION_MASK_ISA_FMA) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 575e75a..7b055d1 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2763,7 +2763,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, { "-mhle", OPTION_MASK_ISA_HLE }, { "-mmovbe", OPTION_MASK_ISA_MOVBE }, { "-mclzero", OPTION_MASK_ISA_CLZERO }, - { "-mmwaitx", OPTION_MASK_ISA_MWAITX } + { "-mmwaitx", OPTION_MASK_ISA_MWAITX }, + { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG } }; static struct ix86_target_opts isa_opts[] = { @@ -5266,6 +5267,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq), IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2), IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni), + IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg), IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi), IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), @@ -33536,12 +33538,15 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16SI_FTYPE_V4SI: case V16SI_FTYPE_V16SF: case V16SI_FTYPE_V16SI: + case V64QI_FTYPE_V64QI: + case V32HI_FTYPE_V32HI: case V16SF_FTYPE_V16SF: case V8DI_FTYPE_UQI: case V8DI_FTYPE_V8DI: case V8DF_FTYPE_V4DF: case V8DF_FTYPE_V2DF: case V8DF_FTYPE_V8DF: + case V4DI_FTYPE_V4DI: nargs = 1; break; case V4SF_FTYPE_V4SF_VEC_MERGE: @@ -33918,6 +33923,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, case HI_FTYPE_V16SF_INT_UHI: case QI_FTYPE_V8SF_INT_UQI: case QI_FTYPE_V4SF_INT_UQI: + case UHI_FTYPE_V2DI_V2DI_UHI: + case USI_FTYPE_V4DI_V4DI_USI: + case V4SI_FTYPE_V4SI_V4SI_UHI: + case V8SI_FTYPE_V8SI_V8SI_UHI: nargs = 3; mask_pos = 1; nargs_constant = 1; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7da8573..3b953de 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -91,6 +91,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_AVX512VPOPCNTDQ_P(x) TARGET_ISA_AVX512VPOPCNTDQ_P(x) #define TARGET_AVX512VNNI TARGET_ISA_AVX512VNNI #define TARGET_AVX512VNNI_P(x) TARGET_ISA_AVX512VNNI_P(x) +#define TARGET_AVX512BITALG TARGET_ISA_AVX512BITALG +#define TARGET_AVX512BITALG_P(x) TARGET_ISA_AVX512BITALG_P(x) #define TARGET_FMA TARGET_ISA_FMA #define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x) #define TARGET_SSE4A TARGET_ISA_SSE4A diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 9e7bcce..01cdac8 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -745,6 +745,10 @@ mavx512vnni Target Report Mask(ISA_AVX512VNNI) Var(ix86_isa_flags) Save Support AVX512VNNI built-in functions and code generation. +mavx512bitalg +Target Report Mask(ISA_AVX512BITALG) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512BITALG built-in functions and code generation. + mfma Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 7fcaa69..0a68501 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -82,6 +82,10 @@ #include <avx512vnnivlintrin.h> +#include <avx512vpopcntdqvlintrin.h> + +#include <avx512bitalgintrin.h> + #include <shaintrin.h> #include <lzcntintrin.h> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 20e7b16..f4f68eb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -181,6 +181,9 @@ ;; For VPCLMULQDQ support UNSPEC_VPCLMULQDQ + + ;; For AVX512BITALG support + UNSPEC_VPSHUFBIT ]) (define_c_enum "unspecv" [ @@ -501,6 +504,10 @@ (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) +(define_mode_iterator VI48_AVX512VLBW + [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX512VL") + (V2DI "TARGET_AVX512VL")]) + (define_mode_attr avx512 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw") (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw") @@ -20023,9 +20030,9 @@ (set_attr ("mode") ("TI"))]) (define_insn "vpopcount<mode><mask_name>" - [(set (match_operand:VI48_512 0 "register_operand" "=v") - (popcount:VI48_512 - (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))] + [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") + (popcount:VI48_AVX512VL + (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512VPOPCNTDQ" "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}") @@ -20066,6 +20073,13 @@ "TARGET_SSE && TARGET_64BIT" "jmp\t%P1") +(define_insn "vpopcount<mode><mask_name>" + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") + (popcount:VI12_AVX512VL + (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512BITALG" + "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}") + (define_insn "vgf2p8affineinvqb_<mode><mask_name>" [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") @@ -20514,3 +20528,14 @@ "TARGET_VPCLMULQDQ" "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "mode" "DI")]) + +(define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>" + [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") + (unspec:<avx512fmaskmode> + [(match_operand:VI48_AVX512VLBW 1 "register_operand" "v") + (match_operand:VI48_AVX512VLBW 2 "nonimmediate_operand" "vm")] + UNSPEC_VPSHUFBIT))] + "TARGET_AVX512BITALG" + "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) |