aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog423
-rw-r--r--gcc/config.gcc4
-rw-r--r--gcc/config/i386/avx2intrin.h1874
-rw-r--r--gcc/config/i386/i386-builtin-types.def78
-rw-r--r--gcc/config/i386/i386.c562
-rw-r--r--gcc/config/i386/i386.md11
-rw-r--r--gcc/config/i386/immintrin.h4
-rw-r--r--gcc/config/i386/predicates.md8
-rw-r--r--gcc/config/i386/sse.md2532
-rw-r--r--gcc/doc/extend.texi178
10 files changed, 5415 insertions, 259 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 76154f2..15c4e3b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,426 @@
+2011-08-22 Kirill Yukhin <kirill.yukhin@intel.com>
+
+ * config/i386/avx2intrin.h: New file.
+ * config/i386/i386-builtin-types.def (PCINT, PCINT64, PV4SI,
+ PV8SI, V32QI_FTYPE_V32QI, V32QI_FTYPE_V16QI, V16HI_FTYPE_V16HI,
+ V16HI_FTYPE_V8HI, V8SI_FTYPE_V8SI, V16HI_FTYPE_V16QI,
+ V8SI_FTYPE_V16QI, V4DI_FTYPE_V16QI, V8SI_FTYPE_V8HI,
+ V4DI_FTYPE_V8HI, V4DI_FTYPE_V4SI, V4DI_FTYPE_PV4DI,
+ V4DI_FTYPE_V2DI, V2DI_FTYPE_PCV2DI_V2DI, V4SI_FTYPE_PCV4SI_V4SI,
+ V32QI_FTYPE_V16HI_V16HI, V16HI_FTYPE_V8SI_V8SI,
+ V32QI_FTYPE_V32QI_V32QI, V16HI_FTYPE_V32QI_V32QI,
+ V16HI_FTYPE_V16HI_V8HI, V16HI_FTYPE_V16HI_V16HI,
+ V16HI_FTYPE_V16HI_INT, V16HI_FTYPE_V16HI_SI,
+ V16HI_FTYPE_V16HI_V16HI_INT, V32QI_FTYPE_V32QI_V32QI_INT,
+ V8SI_FTYPE_V8SI_V4SI, V8SI_FTYPE_V8SI_V8SI,
+ V8SI_FTYPE_V16HI_V16HI, V8SI_FTYPE_V8SI_INT, V8SI_FTYPE_V8SI_SI,
+ V8SI_FTYPE_PCV8SI_V8SI, V4DI_FTYPE_V4DI_V4DI,
+ V4DI_FTYPE_V8SI_V8SI, V4DI_FTYPE_V4DI_V2DI,
+ V4DI_FTYPE_PCV4DI_V4DI, V4DI_FTYPE_V4DI_INT,
+ V2DI_FTYPE_V4DI_INT, V4DI_FTYPE_V4DI_V4DI_INT,
+ V4DI_FTYPE_V4DI_V2DI_INT, VOID_FTYPE_PV2DI_V2DI_V2DI,
+ VOID_FTYPE_PV4DI_V4DI_V4DI, VOID_FTYPE_PV4SI_V4SI_V4SI,
+ VOID_FTYPE_PV8SI_V8SI_V8SI,
+ V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
+ V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
+ V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
+ V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
+ V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
+ V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
+ V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
+ V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
+ V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
+ V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
+ V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
+ V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
+ V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
+ V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
+ V16HI_FTYPE_V16HI_SI_COUNT, V16HI_FTYPE_V16HI_V8HI_COUNT,
+ V8SI_FTYPE_V8SI_SI_COUNT, V8SI_FTYPE_V8SI_V4SI_COUNT,
+ V4DI_FTYPE_V4DI_INT_COUNT, V4DI_FTYPE_V4DI_V2DI_COUNT,
+ V4DI_FTYPE_V4DI_INT_CONVERT,
+ V4DI_FTYPE_V4DI_V4DI_INT_CONVERT): New.
+ * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_MPSADBW256,
+ IX86_BUILTIN_PABSB256, IX86_BUILTIN_PABSW256,
+ IX86_BUILTIN_PABSD256, IX86_BUILTIN_PACKSSDW256,
+ IX86_BUILTIN_PACKSSWB256, IX86_BUILTIN_PACKUSDW256,
+ IX86_BUILTIN_PACKUSWB256, IX86_BUILTIN_PADDB256,
+ IX86_BUILTIN_PADDW256, IX86_BUILTIN_PADDD256,
+ IX86_BUILTIN_PADDQ256, IX86_BUILTIN_PADDSB256,
+ IX86_BUILTIN_PADDSW256, IX86_BUILTIN_PADDUSB256,
+ IX86_BUILTIN_PADDUSW256, IX86_BUILTIN_PALIGNR256,
+ IX86_BUILTIN_AND256I, IX86_BUILTIN_ANDNOT256I,
+ IX86_BUILTIN_PAVGB256, IX86_BUILTIN_PAVGW256,
+ IX86_BUILTIN_PBLENDVB256, IX86_BUILTIN_PBLENDVW256,
+ IX86_BUILTIN_PCMPEQB256, IX86_BUILTIN_PCMPEQW256,
+ IX86_BUILTIN_PCMPEQD256, IX86_BUILTIN_PCMPEQQ256,
+ IX86_BUILTIN_PCMPGTB256, IX86_BUILTIN_PCMPGTW256,
+ IX86_BUILTIN_PCMPGTD256, IX86_BUILTIN_PCMPGTQ256,
+ IX86_BUILTIN_PHADDW256, IX86_BUILTIN_PHADDD256,
+ IX86_BUILTIN_PHADDSW256, IX86_BUILTIN_PHSUBW256,
+ IX86_BUILTIN_PHSUBD256, IX86_BUILTIN_PHSUBSW256,
+ IX86_BUILTIN_PMADDUBSW256, IX86_BUILTIN_PMADDWD256,
+ IX86_BUILTIN_PMAXSB256, IX86_BUILTIN_PMAXSW256,
+ IX86_BUILTIN_PMAXSD256, IX86_BUILTIN_PMAXUB256,
+ IX86_BUILTIN_PMAXUW256, IX86_BUILTIN_PMAXUD256,
+ IX86_BUILTIN_PMINSB256, IX86_BUILTIN_PMINSW256,
+ IX86_BUILTIN_PMINSD256, IX86_BUILTIN_PMINUB256,
+ IX86_BUILTIN_PMINUW256, IX86_BUILTIN_PMINUD256,
+ IX86_BUILTIN_PMOVMSKB256, IX86_BUILTIN_PMOVSXBW256,
+ IX86_BUILTIN_PMOVSXBD256, IX86_BUILTIN_PMOVSXBQ256,
+ IX86_BUILTIN_PMOVSXWD256, IX86_BUILTIN_PMOVSXWQ256,
+ IX86_BUILTIN_PMOVSXDQ256, IX86_BUILTIN_PMOVZXBW256,
+ IX86_BUILTIN_PMOVZXBD256, IX86_BUILTIN_PMOVZXBQ256,
+ IX86_BUILTIN_PMOVZXWD256, IX86_BUILTIN_PMOVZXWQ256,
+ IX86_BUILTIN_PMOVZXDQ256, IX86_BUILTIN_PMULDQ256,
+ IX86_BUILTIN_PMULHRSW256, IX86_BUILTIN_PMULHUW256,
+ IX86_BUILTIN_PMULHW256, IX86_BUILTIN_PMULLW256,
+ IX86_BUILTIN_PMULLD256, IX86_BUILTIN_PMULUDQ256,
+ IX86_BUILTIN_POR256, IX86_BUILTIN_PSADBW256,
+ IX86_BUILTIN_PSHUFB256, IX86_BUILTIN_PSHUFD256,
+ IX86_BUILTIN_PSHUFHW256, IX86_BUILTIN_PSHUFLW256,
+ IX86_BUILTIN_PSIGNB256, IX86_BUILTIN_PSIGNW256,
+ IX86_BUILTIN_PSIGND256, IX86_BUILTIN_PSLLDQI256,
+ IX86_BUILTIN_PSLLWI256, IX86_BUILTIN_PSLLW256,
+ IX86_BUILTIN_PSLLDI256, IX86_BUILTIN_PSLLD256,
+ IX86_BUILTIN_PSLLQI256, IX86_BUILTIN_PSLLQ256,
+ IX86_BUILTIN_PSRAWI256, IX86_BUILTIN_PSRAW256,
+ IX86_BUILTIN_PSRADI256, IX86_BUILTIN_PSRAD256,
+ IX86_BUILTIN_PSRLDQI256, IX86_BUILTIN_PSRLWI256,
+ IX86_BUILTIN_PSRLW256, IX86_BUILTIN_PSRLDI256,
+ IX86_BUILTIN_PSRLD256, IX86_BUILTIN_PSRLQI256,
+ IX86_BUILTIN_PSRLQ256, IX86_BUILTIN_PSUBB256,
+ IX86_BUILTIN_PSUBW256, IX86_BUILTIN_PSUBD256,
+ IX86_BUILTIN_PSUBQ256, IX86_BUILTIN_PSUBSB256,
+ IX86_BUILTIN_PSUBSW256, IX86_BUILTIN_PSUBUSB256,
+ IX86_BUILTIN_PSUBUSW256, IX86_BUILTIN_PUNPCKHBW256,
+ IX86_BUILTIN_PUNPCKHWD256, IX86_BUILTIN_PUNPCKHDQ256,
+ IX86_BUILTIN_PUNPCKHQDQ256, IX86_BUILTIN_PUNPCKLBW256,
+ IX86_BUILTIN_PUNPCKLWD256, IX86_BUILTIN_PUNPCKLDQ256,
+ IX86_BUILTIN_PUNPCKLQDQ256, IX86_BUILTIN_PXOR256,
+ IX86_BUILTIN_MOVNTDQA256, IX86_BUILTIN_VBROADCASTSS_PS,
+ IX86_BUILTIN_VBROADCASTSS_PS256,
+ IX86_BUILTIN_VBROADCASTSD_PD256,
+ IX86_BUILTIN_VBROADCASTSI256, IX86_BUILTIN_PBLENDD256,
+ IX86_BUILTIN_PBLENDD128, IX86_BUILTIN_PBROADCASTB256,
+ IX86_BUILTIN_PBROADCASTW256, IX86_BUILTIN_PBROADCASTD256,
+ IX86_BUILTIN_PBROADCASTQ256, IX86_BUILTIN_PBROADCASTB128,
+ IX86_BUILTIN_PBROADCASTW128, IX86_BUILTIN_PBROADCASTD128,
+ IX86_BUILTIN_PBROADCASTQ128, IX86_BUILTIN_VPERMVARSI256,
+ IX86_BUILTIN_VPERMDF256, IX86_BUILTIN_VPERMVARSF256,
+ IX86_BUILTIN_VPERMDI256, IX86_BUILTIN_VPERMTI256,
+ IX86_BUILTIN_VEXTRACT128I256, IX86_BUILTIN_VINSERT128I256,
+ IX86_BUILTIN_MASKLOADD, IX86_BUILTIN_MASKLOADQ,
+ IX86_BUILTIN_MASKLOADD256, IX86_BUILTIN_MASKLOADQ256,
+ IX86_BUILTIN_MASKSTORED, IX86_BUILTIN_MASKSTOREQ,
+ IX86_BUILTIN_MASKSTORED256, IX86_BUILTIN_MASKSTOREQ256,
+ IX86_BUILTIN_PSLLVV4DI, IX86_BUILTIN_PSLLVV2DI,
+ IX86_BUILTIN_PSLLVV8SI, IX86_BUILTIN_PSLLVV4SI,
+ IX86_BUILTIN_PSRAVV8SI, IX86_BUILTIN_PSRAVV4SI,
+ IX86_BUILTIN_PSRLVV4DI, IX86_BUILTIN_PSRLVV2DI,
+ IX86_BUILTIN_PSRLVV8SI, IX86_BUILTIN_PSRLVV4SI,
+ IX86_BUILTIN_GATHERSIV2DF, IX86_BUILTIN_GATHERSIV4DF,
+ IX86_BUILTIN_GATHERDIV2DF, IX86_BUILTIN_GATHERDIV4DF,
+ IX86_BUILTIN_GATHERSIV4SF, IX86_BUILTIN_GATHERSIV8SF,
+ IX86_BUILTIN_GATHERDIV4SF, IX86_BUILTIN_GATHERDIV8SF,
+ IX86_BUILTIN_GATHERSIV2DI, IX86_BUILTIN_GATHERSIV4DI,
+ IX86_BUILTIN_GATHERDIV2DI, IX86_BUILTIN_GATHERDIV4DI,
+ IX86_BUILTIN_GATHERSIV4SI, IX86_BUILTIN_GATHERSIV8SI,
+ IX86_BUILTIN_GATHERDIV4SI, IX86_BUILTIN_GATHERDIV8SI.
+ (bdesc_special_args): Add IX86_BUILTIN_MOVNTDQA256,
+ IX86_BUILTIN_MASKLOADD, IX86_BUILTIN_MASKLOADQ,
+ IX86_BUILTIN_MASKLOADD256, IX86_BUILTIN_MASKLOADQ256,
+ IX86_BUILTIN_MASKSTORED, IX86_BUILTIN_MASKSTOREQ,
+ IX86_BUILTIN_MASKSTORED256, IX86_BUILTIN_MASKSTOREQ256.
+ (bdesc_args): Add IX86_BUILTIN_MPSADBW256,
+ IX86_BUILTIN_PABSB256, IX86_BUILTIN_PABSW256,
+ IX86_BUILTIN_PABSD256, IX86_BUILTIN_PACKSSDW256,
+ IX86_BUILTIN_PACKSSWB256, IX86_BUILTIN_PACKUSDW256,
+ IX86_BUILTIN_PACKUSWB256, IX86_BUILTIN_PADDB256,
+ IX86_BUILTIN_PADDW256, IX86_BUILTIN_PADDD256,
+ IX86_BUILTIN_PADDQ256, IX86_BUILTIN_PADDSB256,
+ IX86_BUILTIN_PADDSW256, IX86_BUILTIN_PADDUSB256,
+ IX86_BUILTIN_PADDUSW256, IX86_BUILTIN_PALIGNR256,
+ IX86_BUILTIN_AND256I, IX86_BUILTIN_ANDNOT256I,
+ IX86_BUILTIN_PAVGB256, IX86_BUILTIN_PAVGW256,
+ IX86_BUILTIN_PBLENDVB256, IX86_BUILTIN_PBLENDVW256,
+ IX86_BUILTIN_PCMPEQB256, IX86_BUILTIN_PCMPEQW256,
+ IX86_BUILTIN_PCMPEQD256, IX86_BUILTIN_PCMPEQQ256,
+ IX86_BUILTIN_PCMPGTB256, IX86_BUILTIN_PCMPGTW256,
+ IX86_BUILTIN_PCMPGTD256, IX86_BUILTIN_PCMPGTQ256,
+ IX86_BUILTIN_PHADDW256, IX86_BUILTIN_PHADDD256,
+ IX86_BUILTIN_PHADDSW256, IX86_BUILTIN_PHSUBW256,
+ IX86_BUILTIN_PHSUBD256, IX86_BUILTIN_PHSUBSW256,
+ IX86_BUILTIN_PMADDUBSW256, IX86_BUILTIN_PMADDWD256,
+ IX86_BUILTIN_PMAXSB256, IX86_BUILTIN_PMAXSW256,
+ IX86_BUILTIN_PMAXSD256, IX86_BUILTIN_PMAXUB256,
+ IX86_BUILTIN_PMAXUW256, IX86_BUILTIN_PMAXUD256,
+ IX86_BUILTIN_PMINSB256, IX86_BUILTIN_PMINSW256,
+ IX86_BUILTIN_PMINSD256, IX86_BUILTIN_PMINUB256,
+ IX86_BUILTIN_PMINUW256, IX86_BUILTIN_PMINUD256,
+ IX86_BUILTIN_PMOVMSKB256, IX86_BUILTIN_PMOVSXBW256,
+ IX86_BUILTIN_PMOVSXBD256, IX86_BUILTIN_PMOVSXBQ256,
+ IX86_BUILTIN_PMOVSXWD256, IX86_BUILTIN_PMOVSXWQ256,
+ IX86_BUILTIN_PMOVSXDQ256, IX86_BUILTIN_PMOVZXBW256,
+ IX86_BUILTIN_PMOVZXBD256, IX86_BUILTIN_PMOVZXBQ256,
+ IX86_BUILTIN_PMOVZXWD256, IX86_BUILTIN_PMOVZXWQ256,
+ IX86_BUILTIN_PMOVZXDQ256, IX86_BUILTIN_PMULDQ256,
+ IX86_BUILTIN_PMULHRSW256, IX86_BUILTIN_PMULHUW256,
+ IX86_BUILTIN_PMULHW256, IX86_BUILTIN_PMULLW256,
+ IX86_BUILTIN_PMULLD256, IX86_BUILTIN_PMULUDQ256,
+ IX86_BUILTIN_POR256, IX86_BUILTIN_PSADBW256,
+ IX86_BUILTIN_PSHUFB256, IX86_BUILTIN_PSHUFD256,
+ IX86_BUILTIN_PSHUFHW256, IX86_BUILTIN_PSHUFLW256,
+ IX86_BUILTIN_PSIGNB256, IX86_BUILTIN_PSIGNW256,
+ IX86_BUILTIN_PSIGND256, IX86_BUILTIN_PSLLDQI256,
+ IX86_BUILTIN_PSLLWI256, IX86_BUILTIN_PSLLW256,
+ IX86_BUILTIN_PSLLDI256, IX86_BUILTIN_PSLLD256,
+ IX86_BUILTIN_PSLLQI256, IX86_BUILTIN_PSLLQ256,
+ IX86_BUILTIN_PSRAWI256, IX86_BUILTIN_PSRAW256,
+ IX86_BUILTIN_PSRADI256, IX86_BUILTIN_PSRAD256,
+ IX86_BUILTIN_PSRLDQI256, IX86_BUILTIN_PSRLWI256,
+ IX86_BUILTIN_PSRLW256, IX86_BUILTIN_PSRLDI256,
+ IX86_BUILTIN_PSRLD256, IX86_BUILTIN_PSRLQI256,
+ IX86_BUILTIN_PSRLQ256, IX86_BUILTIN_PSUBB256,
+ IX86_BUILTIN_PSUBW256, IX86_BUILTIN_PSUBD256,
+ IX86_BUILTIN_PSUBQ256, IX86_BUILTIN_PSUBSB256,
+ IX86_BUILTIN_PSUBSW256, IX86_BUILTIN_PSUBUSB256,
+ IX86_BUILTIN_PSUBUSW256, IX86_BUILTIN_PUNPCKHBW256,
+ IX86_BUILTIN_PUNPCKHWD256, IX86_BUILTIN_PUNPCKHDQ256,
+ IX86_BUILTIN_PUNPCKHQDQ256, IX86_BUILTIN_PUNPCKLBW256,
+ IX86_BUILTIN_PUNPCKLWD256, IX86_BUILTIN_PUNPCKLDQ256,
+ IX86_BUILTIN_PUNPCKLQDQ256, IX86_BUILTIN_PXOR256,
+ IX86_BUILTIN_VBROADCASTSS_PS, IX86_BUILTIN_VBROADCASTSS_PS256,
+ IX86_BUILTIN_VBROADCASTSD_PD256,
+ IX86_BUILTIN_VBROADCASTSI256, IX86_BUILTIN_PBLENDD256,
+ IX86_BUILTIN_PBLENDD128, IX86_BUILTIN_PBROADCASTB256,
+ IX86_BUILTIN_PBROADCASTW256, IX86_BUILTIN_PBROADCASTD256,
+ IX86_BUILTIN_PBROADCASTQ256, IX86_BUILTIN_PBROADCASTB128,
+ IX86_BUILTIN_PBROADCASTW128, IX86_BUILTIN_PBROADCASTD128,
+ IX86_BUILTIN_PBROADCASTQ128, IX86_BUILTIN_VPERMVARSI256,
+ IX86_BUILTIN_VPERMDF256, IX86_BUILTIN_VPERMVARSF256,
+ IX86_BUILTIN_VPERMDI256, IX86_BUILTIN_VPERMTI256,
+ IX86_BUILTIN_VEXTRACT128I256, IX86_BUILTIN_VINSERT128I256,
+ IX86_BUILTIN_PSLLVV4DI, IX86_BUILTIN_PSLLVV2DI,
+ IX86_BUILTIN_PSLLVV8SI, IX86_BUILTIN_PSLLVV4SI,
+ IX86_BUILTIN_PSRAVV8SI, IX86_BUILTIN_PSRAVV4SI,
+ IX86_BUILTIN_PSRLVV4DI, IX86_BUILTIN_PSRLVV2DI,
+ IX86_BUILTIN_PSRLVV8SI, IX86_BUILTIN_PSRLVV4SI.
+ (ix86_init_mmx_sse_builtins): Add IX86_BUILTIN_GATHERSIV2DF,
+ IX86_BUILTIN_GATHERSIV4DF, IX86_BUILTIN_GATHERDIV2DF,
+ IX86_BUILTIN_GATHERDIV4DF, IX86_BUILTIN_GATHERSIV4SF,
+ IX86_BUILTIN_GATHERSIV8SF, IX86_BUILTIN_GATHERDIV4SF,
+ IX86_BUILTIN_GATHERDIV8SF, IX86_BUILTIN_GATHERSIV2DI,
+ IX86_BUILTIN_GATHERSIV4DI, IX86_BUILTIN_GATHERDIV2DI,
+ IX86_BUILTIN_GATHERDIV4DI, IX86_BUILTIN_GATHERSIV4SI,
+ IX86_BUILTIN_GATHERSIV8SI, IX86_BUILTIN_GATHERDIV4SI,
+ IX86_BUILTIN_GATHERDIV8SI.
+ (ix86_preferred_simd_mode): Support AVX2 modes.
+ (ix86_expand_args_builtin): Support AVX2 built-ins.
+ (ix86_expand_special_args_builtin): Likewise.
+ (ix86_expand_builtin): Likewise.
+ * config/i386/i386.md (UNSPEC_VPERMSI): New.
+ (UNSPEC_VPERMDF): Likewise.
+ (UNSPEC_VPERMSF): Likewise.
+ (UNSPEC_VPERMDI): Likewise.
+ (UNSPEC_VPERMTI): Likewise.
+ (UNSPEC_GATHER): Likewise.
+ (ssemodesuffix): Extend.
+ * config/i386/immintrin.h: Include avx2intrin.h when __AVX2__
+ is defined.
+ * config/i386/predicates.md (const1248_operand): New.
+ * config/i386/sse.md (VI_AVX2):
+ (VI1_AVX2): Likewise.
+ (VI2_AVX2): Likewise.
+ (VI4_AVX2): Likewise.
+ (VI8_AVX2): Likewise.
+ (VIMAX_AVX2): Likewise.
+ (SSESCALARMODE): Likewise.
+ (VI12_AVX2): Likewise.
+ (VI24_AVX2): Likewise.
+ (VI124_AVX2): Likeuse_submit_for_speed = 1
+ wise.
+ (VI248_AVX2): Likewise.
+ (VI48_AVX2): Likewise.
+ (VI4SD_AVX2): Likewise.
+ (V48_AVX2): Likewise.
+ (avx2modesuffix): Likewise.
+ (sse_avx2): Likewise.
+ (sse2_avx2): Likewise.
+ (ssse3_avx2): Likewise.
+ (sse4_1_avx2): Likewise.
+ (avx_avx2): Likewise.
+ (lshift)<code_oterator>: Likewise.
+ (lshift_insn): Likewise.
+ (lshift)<code_attr>: Likewise.
+ (SSESHORTMODE): Likewise.
+ (SSELONGMODE): Likewise.
+ (SSEBYTEMODE): Likewise.
+ (AVXTOSSEMODE): Likewise.
+ (shortmode): Likewise.
+ (ssescalarmodesuffix): Update.
+ (sseunpackmode): Likewise.
+ (ssepackmode): Likewise.
+ (AVX256MODEI): New.
+ (AVX256MODE124): Likewise.
+ (AVX256MODE1248): Likewise.
+ (AVX256MODE248): Likewise.
+ (AVXMODE48P_SI): Likewise.
+ (AVXMODE48P_SI): Likewise.
+ (AVXMODE48P_DI): Likewise.
+ (AVXMODE48P_DI): Likewise.
+ (gthrfirstp): Likewise.
+ (gthrlastp): Likewise.
+ (avx2): Likwise.
+ (ssevecsize): Likewise.
+ (ssedoublesizemode): Likewise.
+ (avxvecmode): Likewise.
+ (avxvecsize): Likewise.
+ (avxhalfvecmode): Likewise.
+ (avxscalarmode): Likewise.
+ (avxpermvecmode): Likewise.
+ (avxmodesuffixp): Likewise.
+ (avxmodesuffix): Likewise.
+ (avx2_vec_dupv4sf): New.
+ (avx2_vec_dupv8sf): Likewise.
+ (avx2_interleave_highv4di): Likewise.
+ (avx2_interleave_lowv4di): Likewise.
+ (<plusminus_insn><mode>3): Update.
+ (*<plusminus_insn><mode>3): Likewise.
+ (sse2_<plusminus_insn><mode>3): Rename to ...
+ ("<sse2_avx2>_<plusminus_insn><mode>3): ... this. updated.
+ (*sse2_<plusminus_insn><mode>3): Likewise.
+ (*<sse2_avx2>_<plusminus_insn><mode>3): Likewise.
+ (mulv8hi3): Likewise.
+ (mul<mode>3): Likewise.
+ (*mulv8hi3): Likewise.
+ (*mul<mode>3): Likewise.
+ (<s>mulv8hi3_highpart): Likewise.
+ (<s>mul<mode>3_highpart): Likewise.
+ (*<s>mulv8hi3_highpart): Likewise.
+ (*<s>mul<mode>3_highpart): Likewise.
+ (avx2_umulv4siv4di3): Likewise.
+ (*avx_umulv4siv4di3): Likewise.
+ (sse4_1_mulv2siv2di3): Likewise.
+ (<sse4_1_avx2>_mul<shortmode><mode>3): Likewise.
+ (*sse4_1_mulv2siv2di3): Likewise.
+ (*<sse4_1_avx2>_mulv2siv2di3): Likewise.
+ (avx2_pmaddwd): New.
+ (*avx2_pmaddwd): Likewise.
+ (mulv4si3): Rename to ...
+ (mul<mode>3): ... this. Update.
+ (*sse4_1_mulv4si3): Likewise.
+ (*<sse4_1_avx2>_mul<mode>3): Likewise.
+ (ashr<mode>3): Update.
+ (avx2_lshrqv4di3): New.
+ (lshr<mode>3): Update.
+ (avx2_lshlqv4di3): New.
+ (avx2_lshl<mode>3): Likewise.
+ (sse2_ashlv1ti3): Rename to ...
+ (<sse2_avx2>_ashl<mode>3): ... this. Update.
+ (avx2_<code><mode>3)<umaxmin>: New.
+ (*avx2_<code><mode>3)<umaxmin>: Likewise.
+ (avx2_<code><mode>3)<smaxmin>: New.
+ (*avx2_<code><mode>3)<smaxmin>: Likewise.
+ (avx2_eq<mode>3): Likewise.
+ (*avx2_eq<mode>3): Likewise.
+ (avx2_gt<mode>3): Likewise.
+ (sse2_andnot<mode>3): Rename to ...
+ (<sse2_avx2>_andnot<mode>3): ... this. Update.
+ (*andnot<mode>3): Update.
+ (<code><mode>3)<any_logic>: Update.
+ (*<code><mode>3)<any_logic>: Likewise.
+ (sse2_packsswb): Rename to ...
+ (<sse2_avx2>_packsswb): ... this. Update.
+ (sse2_packssdw): Likewise.
+ (<sse2_avx2>_packssdw): Likewise.
+ (sse2_packuswb): Likewise.
+ (<sse2_avx2>_packuswb): Likewise.
+ (avx2_interleave_highv32qi): New.
+ (avx2_interleave_lowv32qi): Likewise.
+ (avx2_interleave_highv16hi): Likewise.
+ (avx2_interleave_lowv16hi): Likewise.
+ (avx2_interleave_highv8si): Likewise.
+ (avx2_interleave_lowv8si): Likewise.
+ (avx2_pshufd): New
+ (avx2_pshufd_1): Likewise.
+ (avx2_pshuflwv3): Likewise.
+ (avx2_pshuflw_1): Likewise.
+ (avx2_pshufhwv3): Likewise.
+ (avx2_pshufhw_1): Likewise.
+ (avx2_uavgv32qi3): Likewise.
+ (*avx2_uavgv32qi3): Likewise.
+ (avx2_uavgv16hi3): Likewise.
+ (*avx2_uavgv16hi3): Likewise.
+ (sse2_psadbw): Rename to ...
+ (<sse2_avx2>_psadbw): ... this. Update.
+ (avx2_pmovmskb): New.
+ (avx2_phaddwv16hi3): Likewise.
+ (avx2_phadddv8si3): Likewise.
+ (avx2_phaddswv16hi3): Likewise.
+ (avx2_phsubwv16hi3): Likewise.
+ (avx2_phsubdv8si3): Likewise.
+ (avx2_phsubswv16hi3): Likewise.
+ (avx2_pmaddubsw256): Likewise.
+ (avx2_umulhrswv16hi3): Likewise.
+ (*avx2_umulhrswv16hi3): Likewise.
+ (ssse3_pshufbv16qi3): Rename to ...
+ (<ssse3_avx2>_pshufb<mode>3): ... this. Update.
+ (ssse3_psign<mode>3): Likewise.
+ (<ssse3_avx2>_psign<mode>3): Likewise.
+ (ssse3_palignrti): Likewise.
+ (<ssse3_avx2>_palignr<mode>): Likewise.
+ (abs<mode>2): Likewise.
+ (sse4_1_movntdqa): Rename to ...
+ (<sse4_1_avx2>_movntdqa): ... this. Update.
+ (sse4_1_mpsadbw): Likewise.
+ (<sse4_1_avx2>_mpsadbw): Likewise.
+ (avx2_packusdw): New.
+ (sse4_1_pblendvb): Rename to ...
+ (<sse4_1_avx2>_pblendvb): ... this. Update.
+ (sse4_1_pblendw): Likewise.
+ (<sse4_1_avx2>_pblendw): Likewise.
+ (avx2_pblendd<mode>): New.
+ (avx2_<code>v16qiv16hi2): Likewise.
+ (avx2_<code>v8qiv8si2): Likewise.
+ (avx2_<code>v8hiv8si2): Likewise.
+ (avx2_<code>v4qiv4di2): Likewise.
+ (avx2_<code>v4hiv4di2): Likewise.
+ (avx2_<code>v4siv4di2): Likewise.
+ (avx2_pbroadcast<mode>): Likewise.
+ (avx2_permvarv8si): Likewise.
+ (avx2_permv4df): Likewise.
+ (avx2_permvarv8sf): Likewise.
+ (avx2_permv4di): Likewise.
+ (avx2_permv2ti): Likewise.
+ (avx2_vec_dupv4df): Likewise.
+ (avx2_vbroadcasti128_<mode>): Likewise.
+ (avx2_vec_set_lo_v4di): Likewise.
+ (avx2_vec_set_hi_v4di): Likewise.
+ (avx_maskload<ssemodesuffix><avxsizesuffix>): Rename to ...
+ (<avx_avx2>_maskload<avx2modesuffix><avxmodesuffix>): ... this.
+ Update.
+ (avx_maskstore<ssemodesuffix><avxsizesuffix>): Likewise.
+ (<avx_avx2>_maskstore<avx2modesuffix><avxmodesuffix>): Likewise.
+ (*avx2_maskmov<avx2modesuffix><avxmodesuffix>): New.
+ (avx2_extracti128): Likewise.
+ (avx2_inserti128): Likewise.
+ (avx2_ashrvv8si): Likewise.
+ (avx2_ashrvv4si): Likewise.
+ (avx2_<lshift>vv8si): Likewise.
+ (avx2_<lshift>v<mode>): Likewise.
+ (avx2_<lshift>vv2di): Likewise.
+ (avx2_gathersi<mode>): Likewise.
+ (*avx2_gathersi<mode>): Likewise.
+ (avx2_gatherdi<mode>): Likewise.
+ (*avx2_gatherdi<mode>): Likewise.
+ (avx2_gatherdi<mode>256): Likewise.
+ (*avx2_gatherdi<mode>256): Likewise.
+ * doc/extend.texi: Document AVX2 built-in functions.
+ * doc/invoke.texi: Document -mavx2.
+
2011-08-22 Matthias Klose <doko@debian.org>
Revert:
diff --git a/gcc/config.gcc b/gcc/config.gcc
index e8e0eeb..b8addaf 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -352,7 +352,7 @@ i[34567]86-*-*)
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
- lzcntintrin.h bmiintrin.h tbmintrin.h"
+ lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -364,7 +364,7 @@ x86_64-*-*)
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
- lzcntintrin.h bmiintrin.h tbmintrin.h"
+ lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
new file mode 100644
index 0000000..3c8f360
--- /dev/null
+++ b/gcc/config/i386/avx2intrin.h
@@ -0,0 +1,1874 @@
+/* Copyright (C) 2011
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+/* Sum absolute 8-bit integer difference of adjacent groups of 4
+ byte integers in the first 2 operands. Starting offsets within
+ operands are determined by the 3rd mask operand. */
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
+ (__v32qi)__Y, __M);
+}
+#else
+#define _mm256_mpsadbw_epu8(X, Y, M) \
+ ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi8 (__m256i __A)
+{
+ return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi16 (__m256i __A)
+{
+ return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi32 (__m256i __A)
+{
+ return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packs_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packs_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packus_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_packus_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_adds_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
+{
+ return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
+ (__v4di)__B,
+ __N * 8);
+}
+#else
+/* In that case (__N*8) will be in vreg, and insn will not be matched. */
+/* Use define instead */
+#define _mm256_alignr_epi8(A, B, N) \
+ ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_si256 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_si256 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avg_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_avg_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
+{
+ return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
+ (__v32qi)__Y,
+ (__v32qi)__M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
+ (__v16hi)__Y,
+ __M);
+}
+#else
+#define _mm256_blend_epi16(X, Y, M) \
+ ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpeqb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpeqw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpeqd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpeqq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpgtb256 ((__v32qi)__A,
+ (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpgtw256 ((__v16hi)__A,
+ (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpgtd256 ((__v8si)__A,
+ (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pcmpgtq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadds_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
+ (__v32qi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_madd_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
+ (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_epi8 (__m256i __A)
+{
+ return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi16 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi8_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi16 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu8_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu16_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu16_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
+ (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhi_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mulhi_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mullo_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmullw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mullo_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmulld256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_epu32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_si256 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sad_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
+ (__v32qi)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_epi32 (__m256i __A, const int __mask)
+{
+ return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shufflehi_epi16 (__m256i __A, const int __mask)
+{
+ return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shufflelo_epi16 (__m256i __A, const int __mask)
+{
+ return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
+}
+#else
+#define _mm256_shuffle_epi32(A, N) \
+ ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
+#define _mm256_shufflehi_epi16(A, N) \
+ ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
+#define _mm256_shufflelo_epi16(A, N) \
+ ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi8 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi16 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sign_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_si256 (__m256i __A, const int __N)
+{
+ return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
+}
+#else
+#define _mm256_slli_si256(A, N) \
+ ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi16 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi16 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi32 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi32 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_slli_epi64 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sll_epi64 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srai_epi16 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sra_epi16 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srai_epi32 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sra_epi32 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_si256 (__m256i __A, const int __N)
+{
+ return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
+}
+#else
+#define _mm256_srli_si256(A, N) \
+ ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi16 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi16 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi32 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi32 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srli_epi64 (__m256i __A, int __B)
+{
+ return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srl_epi64 (__m256i __A, __m128i __B)
+{
+ return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubd256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epu8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_subs_epu16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_si256 (__m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_load_si256 (__m256i const *__X)
+{
+ return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastss_ps (__m128 __X)
+{
+ return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastss_ps (__m128 __X)
+{
+ return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastsd_pd (__m128d __X)
+{
+ return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastsi128_si256 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
+ (__v4si)__Y,
+ __M);
+}
+#else
+#define _mm_blend_epi32(X, Y, M) \
+ ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
+ (__v8si)__Y,
+ __M);
+}
+#else
+#define _mm256_blend_epi32(X, Y, M) \
+ ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastb_epi8 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastw_epi16 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastd_epi32 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastq_epi64 (__m128i __X)
+{
+ return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastb_epi8 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastw_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastd_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastq_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute4x64_pd (__m256d __X, const int __M)
+{
+ return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
+}
+#else
+#define _mm256_permute4x64_pd(X, M) \
+ ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
+#endif
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar8x32_ps (__m256 __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X,(__v8sf)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute4x64_epi64 (__m256i __X, const int __M)
+{
+ return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
+}
+#else
+#define _mm256_permute4x64_epi64(X, M) \
+ ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
+#endif
+
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
+}
+#else
+#define _mm256_permute2x128_si256(X, Y, M) \
+ ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extracti128_si256 (__m256i __X, const int __M)
+{
+ return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
+}
+#else
+#define _mm256_extracti128_si256(X, M) \
+ ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
+{
+ return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
+}
+#else
+#define _mm256_inserti128_si256(X, Y, M) \
+ ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
+ (__v2di)(__m128i)(Y), \
+ (int)(M)))
+#endif
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_epi32 (int const *__X, __m256i __M )
+{
+ return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
+ (__v8si)__M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_epi64 (long long const *__X, __m256i __M )
+{
+ return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
+ (__v4di)__M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_epi32 (int const *__X, __m128i __M )
+{
+ return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
+ (__v4si)__M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_epi64 (long long const *__X, __m128i __M )
+{
+ return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
+ (__v2di)__M);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
+{
+ __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
+{
+ __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
+{
+ __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
+{
+ __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sllv_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sllv_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sllv_epi64 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sllv_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srav_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srav_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srlv_epi32 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srlv_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srlv_epi64 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srlv_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_pd (double const *base, __m128i index, const int scale)
+{
+ __v2df src = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (src, src);
+
+ return (__m128d) __builtin_ia32_gathersiv2df (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
+ __m128d mask, const int scale)
+{
+ return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
+ base,
+ (__v4si)index,
+ (__v2df)mask,
+ scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
+{
+ __v4df src = _mm256_setzero_pd ();
+ __v4df mask = _mm256_set1_pd((double)(long long int) -1);
+
+ return (__m256d) __builtin_ia32_gathersiv4df (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_pd (__m256d src, double const *base,
+ __m128i index, __m256d mask, const int scale)
+{
+ return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
+ base,
+ (__v4si)index,
+ (__v4df)mask,
+ scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_pd (double const *base, __m128i index, const int scale)
+{
+ __v2df src = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (src, src);
+
+ return (__m128d) __builtin_ia32_gatherdiv2df (src,
+ base,
+ (__v2di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
+ __m128d mask, const int scale)
+{
+ return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
+ base,
+ (__v2di)index,
+ (__v2df)mask,
+ scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
+{
+ __v4df src = _mm256_setzero_pd ();
+ __v4df mask = _mm256_set1_pd((double)(long long int) -1);
+
+ return (__m256d) __builtin_ia32_gatherdiv4df (src,
+ base,
+ (__v4di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_pd (__m256d src, double const *base,
+ __m256i index, __m256d mask, const int scale)
+{
+ return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
+ base,
+ (__v4di)index,
+ (__v4df)mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_ps (float const *base, __m128i index, const int scale)
+{
+ __v4sf src = _mm_setzero_ps ();
+ __v4sf mask = _mm_cmpeq_ps (src, src);
+
+ return (__m128) __builtin_ia32_gathersiv4sf (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
+ __m128 mask, const int scale)
+{
+ return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
+ base,
+ (__v4si)index,
+ (__v4sf)mask,
+ scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
+{
+ __v8sf src = _mm256_setzero_ps ();
+ __v8sf mask = _mm256_set1_ps((float)(int) -1);
+
+ return (__m256) __builtin_ia32_gathersiv8sf (src,
+ base,
+ (__v8si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_ps (__m256 src, float const *base,
+ __m256i index, __m256 mask, const int scale)
+{
+ return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
+ base,
+ (__v8si)index,
+ (__v8sf)mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_ps (float const *base, __m128i index, const int scale)
+{
+ __v4sf src = _mm_setzero_ps ();
+ __v4sf mask = _mm_cmpeq_ps (src, src);
+
+ return (__m128) __builtin_ia32_gatherdiv4sf (src,
+ base,
+ (__v2di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
+ __m128 mask, const int scale)
+{
+ return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
+ base,
+ (__v2di)index,
+ (__v4sf)mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
+{
+ __v4sf src = _mm_setzero_ps ();
+ __v4sf mask = _mm_cmpeq_ps (src, src);
+
+ return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
+ base,
+ (__v4di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_ps (__m128 src, float const *base,
+ __m256i index, __m128 mask, const int scale)
+{
+ return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
+ base,
+ (__v4di)index,
+ (__v4sf)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_epi64 (long long int const *base,
+ __m128i index, const int scale)
+{
+ __v2di src = __extension__ (__v2di){ 0, 0 };
+ __v2di mask = __extension__ (__v2di){ ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gathersiv2di (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
+ __m128i index, __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
+ base,
+ (__v4si)index,
+ (__v2di)mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_epi64 (long long int const *base,
+ __m128i index, const int scale)
+{
+ __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
+ __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
+
+ return (__m256i) __builtin_ia32_gathersiv4di (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
+ __m128i index, __m256i mask, const int scale)
+{
+ return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
+ base,
+ (__v4si)index,
+ (__v4di)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_epi64 (long long int const *base,
+ __m128i index, const int scale)
+{
+ __v2di src = __extension__ (__v2di){ 0, 0 };
+ __v2di mask = __extension__ (__v2di){ ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gatherdiv2di (src,
+ base,
+ (__v2di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
+ __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
+ base,
+ (__v2di)index,
+ (__v2di)mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_epi64 (long long int const *base,
+ __m256i index, const int scale)
+{
+ __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
+ __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
+
+ return (__m256i) __builtin_ia32_gatherdiv4di (src,
+ base,
+ (__v4di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
+ __m256i index, __m256i mask, const int scale)
+{
+ return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
+ base,
+ (__v4di)index,
+ (__v4di)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
+{
+ __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+ __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gathersiv4si (src,
+ base,
+ (__v4si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
+ __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
+ base,
+ (__v4si)index,
+ (__v4si)mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
+{
+ __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
+ __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
+
+ return (__m256i) __builtin_ia32_gathersiv8si (src,
+ base,
+ (__v8si)index,
+ mask,
+ scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
+ __m256i index, __m256i mask, const int scale)
+{
+ return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
+ base,
+ (__v8si)index,
+ (__v8si)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
+{
+ __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+ __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gatherdiv4si (src,
+ base,
+ (__v2di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
+ __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
+ base,
+ (__v2di)index,
+ (__v4si)mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
+{
+ __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
+ __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+
+ return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
+ base,
+ (__v4di)index,
+ mask,
+ scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
+ __m256i index, __m128i mask, const int scale)
+{
+ return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
+ base,
+ (__v4di)index,
+ (__v4si)mask,
+ scale);
+}
+#else /* __OPTIMIZE__ */
+#define _mm_i32gather_pd(BASE, INDEX, SCALE) \
+ (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (), \
+ (double const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2df)_mm_set1_pd( \
+ (double)(long long int) -1), \
+ (int)SCALE)
+
+#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC, \
+ (double const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2df)(__m128d)MASK, \
+ (int)SCALE)
+
+#define _mm256_i32gather_pd(BASE, INDEX, SCALE) \
+ (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (), \
+ (double const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4df)_mm256_set1_pd( \
+ (double)(long long int) -1), \
+ (int)SCALE)
+
+#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC, \
+ (double const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4df)(__m256d)MASK, \
+ (int)SCALE)
+
+#define _mm_i64gather_pd(BASE, INDEX, SCALE) \
+ (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (), \
+ (double const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2df)_mm_set1_pd( \
+ (double)(long long int) -1), \
+ (int)SCALE)
+
+#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC, \
+ (double const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2df)(__m128d)MASK, \
+ (int)SCALE)
+
+#define _mm256_i64gather_pd(BASE, INDEX, SCALE) \
+ (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (), \
+ (double const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4df)_mm256_set1_pd( \
+ (double)(long long int) -1), \
+ (int)SCALE)
+
+#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC, \
+ (double const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4df)(__m256d)MASK, \
+ (int)SCALE)
+
+#define _mm_i32gather_ps(BASE, INDEX, SCALE) \
+ (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (), \
+ (float const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ _mm_set1_ps ((float)(int) -1), \
+ (int)SCALE)
+
+#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC, \
+ (float const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4sf)(__m128d)MASK, \
+ (int)SCALE)
+
+#define _mm256_i32gather_ps(BASE, INDEX, SCALE) \
+ (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
+ (float const *)BASE, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8sf)_mm256_set1_ps ( \
+ (float)(int) -1), \
+ (int)SCALE)
+
+#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \
+ (float const *)BASE, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8sf)(__m256d)MASK, \
+ (int)SCALE)
+
+#define _mm_i64gather_ps(BASE, INDEX, SCALE) \
+ (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (), \
+ (float const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4sf)_mm_set1_ps ( \
+ (float)(int) -1), \
+ (int)SCALE)
+
+#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \
+ (float const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4sf)(__m128d)MASK, \
+ (int)SCALE)
+
+#define _mm256_i64gather_ps(BASE, INDEX, SCALE) \
+ (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \
+ (float const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4sf)_mm_set1_ps( \
+ (float)(int) -1), \
+ (int)SCALE)
+
+#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC, \
+ (float const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4sf)(__m128)MASK, \
+ (int)SCALE)
+
+#define _mm_i32gather_epi64(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
+ (long long const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2di)_mm_set1_epi64x (-1), \
+ (int)SCALE)
+
+#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC, \
+ (long long const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2di)(__m128i)MASK, \
+ (int)SCALE)
+
+#define _mm256_i32gather_epi64(BASE, INDEX, SCALE) \
+ (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
+ (long long const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4di)_mm256_set1_epi64x (-1), \
+ (int)SCALE)
+
+#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC, \
+ (long long const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4di)(__m256i)MASK, \
+ (int)SCALE)
+
+#define _mm_i64gather_epi64(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
+ (long long const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2di)_mm_set1_epi64x (-1), \
+ (int)SCALE)
+
+#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC, \
+ (long long const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2di)(__m128i)MASK, \
+ (int)SCALE)
+
+#define _mm256_i64gather_epi64(BASE, INDEX, SCALE) \
+ (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
+ (long long const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4di)_mm256_set1_epi64x (-1), \
+ (int)SCALE)
+
+#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC, \
+ (long long const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4di)(__m256i)MASK, \
+ (int)SCALE)
+
+#define _mm_i32gather_epi32(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (), \
+ (int const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4si)_mm_set1_epi32 (-1), \
+ (int)SCALE)
+
+#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \
+ (int const *)BASE, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4si)(__m128i)MASK, \
+ (int)SCALE)
+
+#define _mm256_i32gather_epi32(BASE, INDEX, SCALE) \
+ (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
+ (int const *)BASE, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8si)_mm256_set1_epi32 (-1), \
+ (int)SCALE)
+
+#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC, \
+ (int const *)BASE, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8si)(__m256i)MASK, \
+ (int)SCALE)
+
+#define _mm_i64gather_epi32(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (), \
+ (int const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4si)_mm_set1_epi32 (-1), \
+ (int)SCALE)
+
+#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \
+ (int const *)BASE, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4si)(__m128i)MASK, \
+ (int)SCALE)
+
+#define _mm256_i64gather_epi32(BASE, INDEX, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
+ (int const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4si)_mm_set1_epi32(-1), \
+ (int)SCALE)
+
+#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
+ (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC, \
+ (int const *)BASE, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4si)(__m128i)MASK, \
+ (int)SCALE)
+#endif /* __OPTIMIZE__ */
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index d4b3e82..c4070e4 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -102,6 +102,8 @@ DEF_VECTOR_TYPE (V32QI, QI)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
DEF_POINTER_TYPE (PCFLOAT, FLOAT, CONST)
+DEF_POINTER_TYPE (PCINT, INT, CONST)
+DEF_POINTER_TYPE (PCINT64, INT64, CONST)
DEF_POINTER_TYPE (PCHAR, CHAR)
DEF_POINTER_TYPE (PCVOID, VOID, CONST)
DEF_POINTER_TYPE (PVOID, VOID)
@@ -119,6 +121,8 @@ DEF_POINTER_TYPE (PV4DF, V4DF)
DEF_POINTER_TYPE (PV4DI, V4DI)
DEF_POINTER_TYPE (PV4SF, V4SF)
DEF_POINTER_TYPE (PV8SF, V8SF)
+DEF_POINTER_TYPE (PV4SI, V4SI)
+DEF_POINTER_TYPE (PV8SI, V8SI)
DEF_POINTER_TYPE (PCV2DF, V2DF, CONST)
DEF_POINTER_TYPE (PCV2SF, V2SF, CONST)
@@ -126,6 +130,11 @@ DEF_POINTER_TYPE (PCV4DF, V4DF, CONST)
DEF_POINTER_TYPE (PCV4SF, V4SF, CONST)
DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
+DEF_POINTER_TYPE (PCV2DI, V2DI, CONST)
+DEF_POINTER_TYPE (PCV4SI, V4SI, CONST)
+DEF_POINTER_TYPE (PCV4DI, V4DI, CONST)
+DEF_POINTER_TYPE (PCV8SI, V8SI, CONST)
+
DEF_FUNCTION_TYPE (FLOAT128)
DEF_FUNCTION_TYPE (UINT64)
DEF_FUNCTION_TYPE (UNSIGNED)
@@ -141,6 +150,7 @@ DEF_FUNCTION_TYPE (INT, V4DF)
DEF_FUNCTION_TYPE (INT, V4SF)
DEF_FUNCTION_TYPE (INT, V8QI)
DEF_FUNCTION_TYPE (INT, V8SF)
+DEF_FUNCTION_TYPE (INT, V32QI)
DEF_FUNCTION_TYPE (INT64, INT64)
DEF_FUNCTION_TYPE (INT64, V2DF)
DEF_FUNCTION_TYPE (INT64, V4SF)
@@ -199,6 +209,11 @@ DEF_FUNCTION_TYPE (V8SF, V8SI)
DEF_FUNCTION_TYPE (V8SF, V8HI)
DEF_FUNCTION_TYPE (V8SI, V4SI)
DEF_FUNCTION_TYPE (V8SI, V8SF)
+DEF_FUNCTION_TYPE (V32QI, V32QI)
+DEF_FUNCTION_TYPE (V32QI, V16QI)
+DEF_FUNCTION_TYPE (V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16HI, V8HI)
+DEF_FUNCTION_TYPE (V8SI, V8SI)
DEF_FUNCTION_TYPE (VOID, PCVOID)
DEF_FUNCTION_TYPE (VOID, PVOID)
DEF_FUNCTION_TYPE (VOID, UINT64)
@@ -206,6 +221,14 @@ DEF_FUNCTION_TYPE (VOID, UNSIGNED)
DEF_FUNCTION_TYPE (INT, PUSHORT)
DEF_FUNCTION_TYPE (INT, PUNSIGNED)
DEF_FUNCTION_TYPE (INT, PULONGLONG)
+DEF_FUNCTION_TYPE (V16HI, V16QI)
+DEF_FUNCTION_TYPE (V8SI, V16QI)
+DEF_FUNCTION_TYPE (V4DI, V16QI)
+DEF_FUNCTION_TYPE (V8SI, V8HI)
+DEF_FUNCTION_TYPE (V4DI, V8HI)
+DEF_FUNCTION_TYPE (V4DI, V4SI)
+DEF_FUNCTION_TYPE (V4DI, PV4DI)
+DEF_FUNCTION_TYPE (V4DI, V2DI)
DEF_FUNCTION_TYPE (DI, V2DI, INT)
DEF_FUNCTION_TYPE (DOUBLE, V2DF, INT)
@@ -252,6 +275,7 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, SI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI)
DEF_FUNCTION_TYPE (V2SF, V2SF, V2SF)
DEF_FUNCTION_TYPE (V2SI, INT, INT)
DEF_FUNCTION_TYPE (V2SI, V2SF, V2SF)
@@ -284,6 +308,7 @@ DEF_FUNCTION_TYPE (V4SI, V4SI, SI)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V4SI, V8HI, V8HI)
DEF_FUNCTION_TYPE (V4SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI)
DEF_FUNCTION_TYPE (V8HI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, INT)
@@ -297,6 +322,28 @@ DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SI)
DEF_FUNCTION_TYPE (V8SF, V8SF, INT)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI)
+DEF_FUNCTION_TYPE (V32QI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16HI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V8HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, INT)
+DEF_FUNCTION_TYPE (V16HI, V16HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, SI)
+DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
+DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V4DI, INT)
DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF)
@@ -351,11 +398,17 @@ DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, INT)
DEF_FUNCTION_TYPE (VOID, PCVOID, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, PV2DF, V2DI, V2DF)
DEF_FUNCTION_TYPE (VOID, PV4DF, V4DI, V4DF)
DEF_FUNCTION_TYPE (VOID, PV4SF, V4SI, V4SF)
DEF_FUNCTION_TYPE (VOID, PV8SF, V8SI, V8SF)
+DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI, V2DI)
+DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, V8SI)
DEF_FUNCTION_TYPE (VOID, UINT, UINT, UINT)
DEF_FUNCTION_TYPE (VOID, UINT64, UINT, UINT)
DEF_FUNCTION_TYPE (VOID, V16QI, V16QI, PCHAR)
@@ -377,6 +430,23 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, INT, V16QI, INT, INT)
DEF_FUNCTION_TYPE (V8QI, QI, QI, QI, QI, QI, QI, QI, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V4SI, V2DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4SI, V4DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V2DI, V2DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4DI, V4DF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4SI, V4SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V8SI, V8SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V2DI, V4SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4DI, V4SF, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V4SI, V2DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4SI, V4DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V2DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V2DI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4DI, V4SI, INT)
+
DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND)
@@ -404,11 +474,19 @@ DEF_FUNCTION_TYPE_ALIAS (V2SI_FTYPE_V2SI_V2SI, COUNT)
DEF_FUNCTION_TYPE_ALIAS (V4HI_FTYPE_V4HI_V4HI, COUNT)
DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, COUNT)
DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V16HI_FTYPE_V16HI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V16HI_FTYPE_V16HI_V8HI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SI_V4SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_INT, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V2DI, COUNT)
DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF_V2DF, SWAP)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, SWAP)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V4DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_V1DI_INT, CONVERT)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7b7ac87..ef02673 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23867,6 +23867,180 @@ enum ix86_builtins
IX86_BUILTIN_MOVMSKPD256,
IX86_BUILTIN_MOVMSKPS256,
+ /* AVX2 */
+ IX86_BUILTIN_MPSADBW256,
+ IX86_BUILTIN_PABSB256,
+ IX86_BUILTIN_PABSW256,
+ IX86_BUILTIN_PABSD256,
+ IX86_BUILTIN_PACKSSDW256,
+ IX86_BUILTIN_PACKSSWB256,
+ IX86_BUILTIN_PACKUSDW256,
+ IX86_BUILTIN_PACKUSWB256,
+ IX86_BUILTIN_PADDB256,
+ IX86_BUILTIN_PADDW256,
+ IX86_BUILTIN_PADDD256,
+ IX86_BUILTIN_PADDQ256,
+ IX86_BUILTIN_PADDSB256,
+ IX86_BUILTIN_PADDSW256,
+ IX86_BUILTIN_PADDUSB256,
+ IX86_BUILTIN_PADDUSW256,
+ IX86_BUILTIN_PALIGNR256,
+ IX86_BUILTIN_AND256I,
+ IX86_BUILTIN_ANDNOT256I,
+ IX86_BUILTIN_PAVGB256,
+ IX86_BUILTIN_PAVGW256,
+ IX86_BUILTIN_PBLENDVB256,
+ IX86_BUILTIN_PBLENDVW256,
+ IX86_BUILTIN_PCMPEQB256,
+ IX86_BUILTIN_PCMPEQW256,
+ IX86_BUILTIN_PCMPEQD256,
+ IX86_BUILTIN_PCMPEQQ256,
+ IX86_BUILTIN_PCMPGTB256,
+ IX86_BUILTIN_PCMPGTW256,
+ IX86_BUILTIN_PCMPGTD256,
+ IX86_BUILTIN_PCMPGTQ256,
+ IX86_BUILTIN_PHADDW256,
+ IX86_BUILTIN_PHADDD256,
+ IX86_BUILTIN_PHADDSW256,
+ IX86_BUILTIN_PHSUBW256,
+ IX86_BUILTIN_PHSUBD256,
+ IX86_BUILTIN_PHSUBSW256,
+ IX86_BUILTIN_PMADDUBSW256,
+ IX86_BUILTIN_PMADDWD256,
+ IX86_BUILTIN_PMAXSB256,
+ IX86_BUILTIN_PMAXSW256,
+ IX86_BUILTIN_PMAXSD256,
+ IX86_BUILTIN_PMAXUB256,
+ IX86_BUILTIN_PMAXUW256,
+ IX86_BUILTIN_PMAXUD256,
+ IX86_BUILTIN_PMINSB256,
+ IX86_BUILTIN_PMINSW256,
+ IX86_BUILTIN_PMINSD256,
+ IX86_BUILTIN_PMINUB256,
+ IX86_BUILTIN_PMINUW256,
+ IX86_BUILTIN_PMINUD256,
+ IX86_BUILTIN_PMOVMSKB256,
+ IX86_BUILTIN_PMOVSXBW256,
+ IX86_BUILTIN_PMOVSXBD256,
+ IX86_BUILTIN_PMOVSXBQ256,
+ IX86_BUILTIN_PMOVSXWD256,
+ IX86_BUILTIN_PMOVSXWQ256,
+ IX86_BUILTIN_PMOVSXDQ256,
+ IX86_BUILTIN_PMOVZXBW256,
+ IX86_BUILTIN_PMOVZXBD256,
+ IX86_BUILTIN_PMOVZXBQ256,
+ IX86_BUILTIN_PMOVZXWD256,
+ IX86_BUILTIN_PMOVZXWQ256,
+ IX86_BUILTIN_PMOVZXDQ256,
+ IX86_BUILTIN_PMULDQ256,
+ IX86_BUILTIN_PMULHRSW256,
+ IX86_BUILTIN_PMULHUW256,
+ IX86_BUILTIN_PMULHW256,
+ IX86_BUILTIN_PMULLW256,
+ IX86_BUILTIN_PMULLD256,
+ IX86_BUILTIN_PMULUDQ256,
+ IX86_BUILTIN_POR256,
+ IX86_BUILTIN_PSADBW256,
+ IX86_BUILTIN_PSHUFB256,
+ IX86_BUILTIN_PSHUFD256,
+ IX86_BUILTIN_PSHUFHW256,
+ IX86_BUILTIN_PSHUFLW256,
+ IX86_BUILTIN_PSIGNB256,
+ IX86_BUILTIN_PSIGNW256,
+ IX86_BUILTIN_PSIGND256,
+ IX86_BUILTIN_PSLLDQI256,
+ IX86_BUILTIN_PSLLWI256,
+ IX86_BUILTIN_PSLLW256,
+ IX86_BUILTIN_PSLLDI256,
+ IX86_BUILTIN_PSLLD256,
+ IX86_BUILTIN_PSLLQI256,
+ IX86_BUILTIN_PSLLQ256,
+ IX86_BUILTIN_PSRAWI256,
+ IX86_BUILTIN_PSRAW256,
+ IX86_BUILTIN_PSRADI256,
+ IX86_BUILTIN_PSRAD256,
+ IX86_BUILTIN_PSRLDQI256,
+ IX86_BUILTIN_PSRLWI256,
+ IX86_BUILTIN_PSRLW256,
+ IX86_BUILTIN_PSRLDI256,
+ IX86_BUILTIN_PSRLD256,
+ IX86_BUILTIN_PSRLQI256,
+ IX86_BUILTIN_PSRLQ256,
+ IX86_BUILTIN_PSUBB256,
+ IX86_BUILTIN_PSUBW256,
+ IX86_BUILTIN_PSUBD256,
+ IX86_BUILTIN_PSUBQ256,
+ IX86_BUILTIN_PSUBSB256,
+ IX86_BUILTIN_PSUBSW256,
+ IX86_BUILTIN_PSUBUSB256,
+ IX86_BUILTIN_PSUBUSW256,
+ IX86_BUILTIN_PUNPCKHBW256,
+ IX86_BUILTIN_PUNPCKHWD256,
+ IX86_BUILTIN_PUNPCKHDQ256,
+ IX86_BUILTIN_PUNPCKHQDQ256,
+ IX86_BUILTIN_PUNPCKLBW256,
+ IX86_BUILTIN_PUNPCKLWD256,
+ IX86_BUILTIN_PUNPCKLDQ256,
+ IX86_BUILTIN_PUNPCKLQDQ256,
+ IX86_BUILTIN_PXOR256,
+ IX86_BUILTIN_MOVNTDQA256,
+ IX86_BUILTIN_VBROADCASTSS_PS,
+ IX86_BUILTIN_VBROADCASTSS_PS256,
+ IX86_BUILTIN_VBROADCASTSD_PD256,
+ IX86_BUILTIN_VBROADCASTSI256,
+ IX86_BUILTIN_PBLENDD256,
+ IX86_BUILTIN_PBLENDD128,
+ IX86_BUILTIN_PBROADCASTB256,
+ IX86_BUILTIN_PBROADCASTW256,
+ IX86_BUILTIN_PBROADCASTD256,
+ IX86_BUILTIN_PBROADCASTQ256,
+ IX86_BUILTIN_PBROADCASTB128,
+ IX86_BUILTIN_PBROADCASTW128,
+ IX86_BUILTIN_PBROADCASTD128,
+ IX86_BUILTIN_PBROADCASTQ128,
+ IX86_BUILTIN_VPERMVARSI256,
+ IX86_BUILTIN_VPERMDF256,
+ IX86_BUILTIN_VPERMVARSF256,
+ IX86_BUILTIN_VPERMDI256,
+ IX86_BUILTIN_VPERMTI256,
+ IX86_BUILTIN_VEXTRACT128I256,
+ IX86_BUILTIN_VINSERT128I256,
+ IX86_BUILTIN_MASKLOADD,
+ IX86_BUILTIN_MASKLOADQ,
+ IX86_BUILTIN_MASKLOADD256,
+ IX86_BUILTIN_MASKLOADQ256,
+ IX86_BUILTIN_MASKSTORED,
+ IX86_BUILTIN_MASKSTOREQ,
+ IX86_BUILTIN_MASKSTORED256,
+ IX86_BUILTIN_MASKSTOREQ256,
+ IX86_BUILTIN_PSLLVV4DI,
+ IX86_BUILTIN_PSLLVV2DI,
+ IX86_BUILTIN_PSLLVV8SI,
+ IX86_BUILTIN_PSLLVV4SI,
+ IX86_BUILTIN_PSRAVV8SI,
+ IX86_BUILTIN_PSRAVV4SI,
+ IX86_BUILTIN_PSRLVV4DI,
+ IX86_BUILTIN_PSRLVV2DI,
+ IX86_BUILTIN_PSRLVV8SI,
+ IX86_BUILTIN_PSRLVV4SI,
+
+ IX86_BUILTIN_GATHERSIV2DF,
+ IX86_BUILTIN_GATHERSIV4DF,
+ IX86_BUILTIN_GATHERDIV2DF,
+ IX86_BUILTIN_GATHERDIV4DF,
+ IX86_BUILTIN_GATHERSIV4SF,
+ IX86_BUILTIN_GATHERSIV8SF,
+ IX86_BUILTIN_GATHERDIV4SF,
+ IX86_BUILTIN_GATHERDIV8SF,
+ IX86_BUILTIN_GATHERSIV2DI,
+ IX86_BUILTIN_GATHERSIV4DI,
+ IX86_BUILTIN_GATHERDIV2DI,
+ IX86_BUILTIN_GATHERDIV4DI,
+ IX86_BUILTIN_GATHERSIV4SI,
+ IX86_BUILTIN_GATHERSIV8SI,
+ IX86_BUILTIN_GATHERDIV4SI,
+ IX86_BUILTIN_GATHERDIV8SI,
+
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
IX86_BUILTIN_HUGE_VALQ,
@@ -24362,6 +24536,17 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
+ /* AVX2 */
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
+
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
@@ -25026,6 +25211,154 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ /* AVX2 */
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+
{ OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
/* BMI */
@@ -25415,6 +25748,71 @@ ix86_init_mmx_sse_builtins (void)
"__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
IX86_BUILTIN_RDRAND64_STEP);
+ /* AVX2 */
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
+ V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
+ IX86_BUILTIN_GATHERSIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
+ IX86_BUILTIN_GATHERSIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
+ V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
+ IX86_BUILTIN_GATHERDIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
+ IX86_BUILTIN_GATHERDIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
+ IX86_BUILTIN_GATHERSIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
+ V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
+ IX86_BUILTIN_GATHERSIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
+ IX86_BUILTIN_GATHERDIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
+ V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
+ IX86_BUILTIN_GATHERDIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
+ V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
+ IX86_BUILTIN_GATHERSIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
+ V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
+ IX86_BUILTIN_GATHERSIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
+ V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
+ IX86_BUILTIN_GATHERDIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
+ V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
+ IX86_BUILTIN_GATHERDIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
+ V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
+ IX86_BUILTIN_GATHERSIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
+ V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
+ IX86_BUILTIN_GATHERSIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
+ V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
+ IX86_BUILTIN_GATHERDIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
+ V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
+ IX86_BUILTIN_GATHERDIV8SI);
+
/* MMX access to the vec_init patterns. */
def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
@@ -26364,6 +26762,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case INT_FTYPE_V4DF:
case INT_FTYPE_V4SF:
case INT_FTYPE_V2DF:
+ case INT_FTYPE_V32QI:
case V16QI_FTYPE_V16QI:
case V8SI_FTYPE_V8SF:
case V8SI_FTYPE_V4SI:
@@ -26407,6 +26806,18 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2SI_FTYPE_V2DF:
case V2SF_FTYPE_V2SF:
case V2SF_FTYPE_V2SI:
+ case V32QI_FTYPE_V32QI:
+ case V32QI_FTYPE_V16QI:
+ case V16HI_FTYPE_V16HI:
+ case V16HI_FTYPE_V8HI:
+ case V8SI_FTYPE_V8SI:
+ case V16HI_FTYPE_V16QI:
+ case V8SI_FTYPE_V16QI:
+ case V4DI_FTYPE_V16QI:
+ case V8SI_FTYPE_V8HI:
+ case V4DI_FTYPE_V8HI:
+ case V4DI_FTYPE_V4SI:
+ case V4DI_FTYPE_V2DI:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
@@ -26454,6 +26865,15 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V1DI_FTYPE_V1DI_V1DI:
case V1DI_FTYPE_V8QI_V8QI:
case V1DI_FTYPE_V2SI_V2SI:
+ case V32QI_FTYPE_V16HI_V16HI:
+ case V16HI_FTYPE_V8SI_V8SI:
+ case V32QI_FTYPE_V32QI_V32QI:
+ case V16HI_FTYPE_V32QI_V32QI:
+ case V16HI_FTYPE_V16HI_V16HI:
+ case V8SI_FTYPE_V8SI_V8SI:
+ case V8SI_FTYPE_V16HI_V16HI:
+ case V4DI_FTYPE_V4DI_V4DI:
+ case V4DI_FTYPE_V8SI_V8SI:
if (comparison == UNKNOWN)
return ix86_expand_binop_builtin (icode, exp, target);
nargs = 2;
@@ -26464,6 +26884,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 2;
swap = true;
break;
+ case V16HI_FTYPE_V16HI_V8HI_COUNT:
+ case V16HI_FTYPE_V16HI_SI_COUNT:
+ case V8SI_FTYPE_V8SI_V4SI_COUNT:
+ case V8SI_FTYPE_V8SI_SI_COUNT:
+ case V4DI_FTYPE_V4DI_V2DI_COUNT:
+ case V4DI_FTYPE_V4DI_INT_COUNT:
case V8HI_FTYPE_V8HI_V8HI_COUNT:
case V8HI_FTYPE_V8HI_SI_COUNT:
case V4SI_FTYPE_V4SI_V4SI_COUNT:
@@ -26505,6 +26931,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DI_FTYPE_V2DI_INT:
case V2DF_FTYPE_V2DF_INT:
case V2DF_FTYPE_V4DF_INT:
+ case V16HI_FTYPE_V16HI_INT:
+ case V8SI_FTYPE_V8SI_INT:
+ case V4DI_FTYPE_V4DI_INT:
+ case V2DI_FTYPE_V4DI_INT:
nargs = 2;
nargs_constant = 1;
break;
@@ -26513,9 +26943,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DF_FTYPE_V4DF_V4DF_V4DF:
case V4SF_FTYPE_V4SF_V4SF_V4SF:
case V2DF_FTYPE_V2DF_V2DF_V2DF:
+ case V32QI_FTYPE_V32QI_V32QI_V32QI:
nargs = 3;
break;
+ case V32QI_FTYPE_V32QI_V32QI_INT:
+ case V16HI_FTYPE_V16HI_V16HI_INT:
case V16QI_FTYPE_V16QI_V16QI_INT:
+ case V4DI_FTYPE_V4DI_V4DI_INT:
case V8HI_FTYPE_V8HI_V8HI_INT:
case V8SI_FTYPE_V8SI_V8SI_INT:
case V8SI_FTYPE_V8SI_V4SI_INT:
@@ -26526,10 +26960,16 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DF_FTYPE_V4DF_V2DF_INT:
case V4SF_FTYPE_V4SF_V4SF_INT:
case V2DI_FTYPE_V2DI_V2DI_INT:
+ case V4DI_FTYPE_V4DI_V2DI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT:
nargs = 3;
nargs_constant = 1;
break;
+ case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
+ nargs = 3;
+ rmode = V4DImode;
+ nargs_constant = 1;
+ break;
case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
nargs = 3;
rmode = V2DImode;
@@ -26606,6 +27046,11 @@ ix86_expand_args_builtin (const struct builtin_description *d,
if (!match)
switch (icode)
{
+ case CODE_FOR_avx2_inserti128:
+ case CODE_FOR_avx2_extracti128:
+ error ("the last argument must be an 1-bit immediate");
+ return const0_rtx;
+
case CODE_FOR_sse4_1_roundpd:
case CODE_FOR_sse4_1_roundps:
case CODE_FOR_sse4_1_roundsd:
@@ -26759,6 +27204,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
break;
case UINT64_FTYPE_PUNSIGNED:
case V2DI_FTYPE_PV2DI:
+ case V4DI_FTYPE_PV4DI:
case V32QI_FTYPE_PCCHAR:
case V16QI_FTYPE_PCCHAR:
case V8SF_FTYPE_PCV4SF:
@@ -26798,6 +27244,10 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case V4DF_FTYPE_PCV4DF_V4DI:
case V4SF_FTYPE_PCV4SF_V4SI:
case V2DF_FTYPE_PCV2DF_V2DI:
+ case V8SI_FTYPE_PCV8SI_V8SI:
+ case V4DI_FTYPE_PCV4DI_V4DI:
+ case V4SI_FTYPE_PCV4SI_V4SI:
+ case V2DI_FTYPE_PCV2DI_V2DI:
nargs = 2;
klass = load;
memory = 0;
@@ -26806,6 +27256,10 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PV4DF_V4DI_V4DF:
case VOID_FTYPE_PV4SF_V4SI_V4SF:
case VOID_FTYPE_PV2DF_V2DI_V2DF:
+ case VOID_FTYPE_PV8SI_V8SI_V8SI:
+ case VOID_FTYPE_PV4DI_V4DI_V4DI:
+ case VOID_FTYPE_PV4SI_V4SI_V4SI:
+ case VOID_FTYPE_PV2DI_V2DI_V2DI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
@@ -27062,9 +27516,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
enum insn_code icode;
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- tree arg0, arg1, arg2;
- rtx op0, op1, op2, pat;
- enum machine_mode mode0, mode1, mode2;
+ tree arg0, arg1, arg2, arg3, arg4;
+ rtx op0, op1, op2, op3, op4, pat;
+ enum machine_mode mode0, mode1, mode2, mode3, mode4;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
/* Determine whether the builtin function is available under the current ISA.
@@ -27333,6 +27787,100 @@ rdrand_step:
gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
return target;
+ case IX86_BUILTIN_GATHERSIV2DF:
+ icode = CODE_FOR_avx2_gathersiv2df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV4DF:
+ icode = CODE_FOR_avx2_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV2DF:
+ icode = CODE_FOR_avx2_gatherdiv2df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV4DF:
+ icode = CODE_FOR_avx2_gatherdiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV4SF:
+ icode = CODE_FOR_avx2_gathersiv4sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV8SF:
+ icode = CODE_FOR_avx2_gathersiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV4SF:
+ icode = CODE_FOR_avx2_gatherdiv4sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV8SF:
+ icode = CODE_FOR_avx2_gatherdiv4sf256;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV2DI:
+ icode = CODE_FOR_avx2_gathersiv2di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV4DI:
+ icode = CODE_FOR_avx2_gathersiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV2DI:
+ icode = CODE_FOR_avx2_gatherdiv2di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV4DI:
+ icode = CODE_FOR_avx2_gatherdiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV4SI:
+ icode = CODE_FOR_avx2_gathersiv4si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV8SI:
+ icode = CODE_FOR_avx2_gathersiv8si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV4SI:
+ icode = CODE_FOR_avx2_gatherdiv4si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV8SI:
+ icode = CODE_FOR_avx2_gatherdiv4si256;
+
+ gather_gen:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ arg3 = CALL_EXPR_ARG (exp, 3);
+ arg4 = CALL_EXPR_ARG (exp, 4);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+ op4 = expand_normal (arg4);
+ /* Note the arg order is different from the operand order. */
+ mode0 = insn_data[icode].operand[1].mode;
+ mode1 = insn_data[icode].operand[2].mode;
+ mode2 = insn_data[icode].operand[3].mode;
+ mode3 = insn_data[icode].operand[4].mode;
+ mode4 = insn_data[icode].operand[5].mode;
+
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (insn_data[icode].operand[0].mode);
+
+ /* Force memory operand only with base register here. But we
+ don't want to do it on memory operand for other builtin
+ functions. */
+ op1 = force_reg (Pmode, op1);
+ op1 = gen_rtx_MEM (mode1, op1);
+
+ if (!insn_data[icode].operand[1].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!insn_data[icode].operand[2].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+ if (!insn_data[icode].operand[3].predicate (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+ if (!insn_data[icode].operand[4].predicate (op3, mode3))
+ op3 = copy_to_mode_reg (mode3, op3);
+ if (!insn_data[icode].operand[5].predicate (op4, mode4))
+ {
+ error ("last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+ }
+ pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
+ if (! pat)
+ return const0_rtx;
+ emit_insn (pat);
+ return target;
+
default:
break;
}
@@ -35044,13 +35592,13 @@ ix86_preferred_simd_mode (enum machine_mode mode)
switch (mode)
{
case QImode:
- return V16QImode;
+ return TARGET_AVX2 ? V32QImode : V16QImode;
case HImode:
- return V8HImode;
+ return TARGET_AVX2 ? V16HImode : V8HImode;
case SImode:
- return V4SImode;
+ return TARGET_AVX2 ? V8SImode : V4SImode;
case DImode:
- return V2DImode;
+ return TARGET_AVX2 ? V4DImode : V2DImode;
case SFmode:
if (TARGET_AVX && !TARGET_PREFER_AVX128)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b73d46f..d343fc2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -231,6 +231,14 @@
UNSPEC_VCVTPH2PS
UNSPEC_VCVTPS2PH
+ ;; For AVX2 support
+ UNSPEC_VPERMSI
+ UNSPEC_VPERMDF
+ UNSPEC_VPERMSF
+ UNSPEC_VPERMDI
+ UNSPEC_VPERMTI
+ UNSPEC_GATHER
+
;; For BMI support
UNSPEC_BEXTR
@@ -930,7 +938,8 @@
[(SF "ss") (DF "sd")
(V8SF "ps") (V4DF "pd")
(V4SF "ps") (V2DF "pd")
- (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
+ (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
+ (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")])
;; SSE vector suffix for floating point modes
(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 11a1a4e..3704df7 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -56,6 +56,10 @@
#include <avxintrin.h>
#endif
+#ifdef __AVX2__
+#include <avx2intrin.h>
+#endif
+
#ifdef __RDRND__
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index bc0a357..b4fa04e 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -611,6 +611,14 @@
return i == 2 || i == 4 || i == 8;
})
+;; Match 1, 2, 4, or 8
+(define_predicate "const1248_operand"
+ (match_code "const_int")
+{
+ HOST_WIDE_INT i = INTVAL (op);
+ return i == 1 || i == 2 || i == 4 || i == 8;
+})
+
;; Match 3, 5, or 9. Used for leal multiplicands.
(define_predicate "const359_operand"
(match_code "const_int")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e9f6c3d..5bc8586 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -81,9 +81,104 @@
(define_mode_iterator VI8
[(V4DI "TARGET_AVX") V2DI])
+(define_mode_iterator VI1_AVX2
+ [(V32QI "TARGET_AVX2") V16QI])
+
+(define_mode_iterator VI2_AVX2
+ [(V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI4_AVX2
+ [(V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI8_AVX2
+ [(V4DI "TARGET_AVX2") V2DI])
+
+(define_mode_iterator VIMAX_AVX2
+ [(V4DI "TARGET_AVX2") V1TI])
+
+(define_mode_iterator SSESCALARMODE
+ [(V4DI "TARGET_AVX2") TI])
+
+(define_mode_iterator VI12_AVX2
+ [(V32QI "TARGET_AVX2") V16QI
+ (V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI24_AVX2
+ [(V16HI "TARGET_AVX2") V8HI
+ (V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI124_AVX2
+ [(V32QI "TARGET_AVX2") V16QI
+ (V16HI "TARGET_AVX2") V8HI
+ (V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI248_AVX2
+ [(V16HI "TARGET_AVX2") V8HI
+ (V8SI "TARGET_AVX2") V4SI
+ (V4DI "TARGET_AVX2") V2DI])
+
+(define_mode_iterator VI48_AVX2
+ [V8SI V4SI V4DI V2DI])
+
+(define_mode_iterator VI4SD_AVX2
+ [V4SI V4DI])
+
+(define_mode_iterator V48_AVX2
+ [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE2")
+ (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
+ (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
+ (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
+
+(define_mode_attr sse2_avx2
+ [(V16QI "sse2") (V32QI "avx2")
+ (V8HI "sse2") (V16HI "avx2")
+ (V4SI "sse2") (V8SI "avx2")
+ (V2DI "sse2") (V4DI "avx2")
+ (V1TI "sse2")])
+
+(define_mode_attr ssse3_avx2
+ [(V16QI "ssse3") (V32QI "avx2")
+ (V8HI "ssse3") (V16HI "avx2")
+ (V4SI "ssse3") (V8SI "avx2")
+ (V2DI "ssse3") (V4DI "avx2")
+ (TI "ssse3")])
+
+(define_mode_attr sse4_1_avx2
+ [(V16QI "sse4_1") (V32QI "avx2")
+ (V8HI "sse4_1") (V16HI "avx2")
+ (V4SI "sse4_1") (V8SI "avx2")
+ (V2DI "sse4_1") (V4DI "avx2")])
+
+(define_mode_attr avx_avx2
+ [(V4SF "avx") (V2DF "avx")
+ (V8SF "avx") (V4DF "avx")
+ (V4SI "avx2") (V2DI "avx2")
+ (V8SI "avx2") (V4DI "avx2")])
+
+;; Mapping of logic-shift operators
+(define_code_iterator lshift [lshiftrt ashift])
+
+;; Base name for define_insn
+(define_code_attr lshift_insn [(lshiftrt "srl") (ashift "sll")])
+
+;; Base name for insn mnemonic
+(define_code_attr lshift [(lshiftrt "lshr") (ashift "lshl")])
+
+(define_mode_attr ssedoublemode
+ [(V16HI "V16SI") (V8HI "V8SI")])
+
+(define_mode_attr ssebytemode
+ [(V4DI "V32QI") (V2DI "V16QI")])
+
+(define_mode_attr shortmode
+ [(V4DI "v4si") (V2DI "v2si")])
+
;; All 128bit vector integer modes
(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
+;; All 256bit vector integer modes
+(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
+
;; Random 128bit vector integer mode combinations
(define_mode_iterator VI12_128 [V16QI V8HI])
(define_mode_iterator VI14_128 [V16QI V4SI])
@@ -91,6 +186,11 @@
(define_mode_iterator VI24_128 [V8HI V4SI])
(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
+;; Random 256bit vector integer mode combinations
+(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
+(define_mode_iterator VI1248_256 [V32QI V16HI V8SI V4DI])
+(define_mode_iterator VI248_256 [V16HI V8SI V4DI])
+
;; Int-float size matches
(define_mode_iterator VI4F_128 [V4SI V4SF])
(define_mode_iterator VI8F_128 [V2DI V2DF])
@@ -125,12 +225,16 @@
[(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
(V8SF "V8SF") (V4DF "V4DF")
- (V4SF "V4SF") (V2DF "V2DF")])
+ (V4SF "V4SF") (V2DF "V2DF")
+ (TI "TI") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V8SF "V8SI") (V4DF "V4DI")
- (V4SF "V4SI") (V2DF "V2DI")])
+ (V4SF "V4SI") (V2DF "V2DI")
+ (V4DF "V4DI") (V8SF "V8SI")
+ (V8SI "V8SI") (V4DI "V4DI")
+ (V4SI "V4SI") (V2DI "V2DI")])
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr ssedoublevecmode
@@ -162,17 +266,20 @@
;; SSE scalar suffix for vector modes
(define_mode_attr ssescalarmodesuffix
- [(V8SF "ss") (V4DF "sd")
+ [(SF "ss") (DF "sd")
+ (V8SF "ss") (V4DF "sd")
(V4SF "ss") (V2DF "sd")
(V8SI "ss") (V4DI "sd")
(V4SI "d")])
;; Pack/unpack vector modes
(define_mode_attr sseunpackmode
- [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+ [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
+ (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
(define_mode_attr ssepackmode
- [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
+ [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
+ (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
;; Mapping of the max integer size for xop rotate immediate constraint
(define_mode_attr sserotatemax
@@ -184,11 +291,27 @@
;; Instruction suffix for sign and zero extensions.
(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
-
-
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
+(define_mode_iterator AVXMODE48P_DI
+ [V2DI V2DF V4DI V4DF V4SF V4SI])
+(define_mode_attr AVXMODE48P_DI
+ [(V2DI "V2DI") (V2DF "V2DI")
+ (V4DI "V4DI") (V4DF "V4DI")
+ (V4SI "V2DI") (V4SF "V2DI")
+ (V8SI "V4DI") (V8SF "V4DI")])
+(define_mode_attr gthrfirstp
+ [(V2DI "p") (V2DF "")
+ (V4DI "p") (V4DF "")
+ (V4SI "p") (V4SF "")
+ (V8SI "p") (V8SF "")])
+(define_mode_attr gthrlastp
+ [(V2DI "q") (V2DF "pd")
+ (V4DI "q") (V4DF "pd")
+ (V4SI "d") (V4SF "ps")
+ (V8SI "d") (V8SF "ps")])
+
(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
;; Mapping of immediate bits for blend instructions
@@ -229,7 +352,7 @@
case 1:
case 2:
switch (get_attr_mode (insn))
- {
+ {
case MODE_V8SF:
case MODE_V4SF:
if (TARGET_AVX
@@ -272,10 +395,10 @@
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(ne (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "<sseinsnmode>")
+ (const_string "<sseinsnmode>")
(ior (ior
- (ne (symbol_ref "optimize_function_for_size_p (cfun)")
- (const_int 0))
+ (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
(eq (symbol_ref "TARGET_SSE2") (const_int 0)))
(and (eq_attr "alternative" "2")
(ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
@@ -325,15 +448,15 @@
/* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
Assemble the 64-bit DImode value in an xmm register. */
emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
- gen_rtx_SUBREG (SImode, operands[1], 0)));
+ gen_rtx_SUBREG (SImode, operands[1], 0)));
emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
- operands[2]));
+ operands[2]));
}
else if (memory_operand (operands[1], DImode))
emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
- operands[1], const0_rtx));
+ operands[1], const0_rtx));
else
gcc_unreachable ();
})
@@ -1281,12 +1404,12 @@
(define_expand "vcond<mode>"
[(set (match_operand:VF 0 "register_operand" "")
- (if_then_else:VF
- (match_operator 3 ""
- [(match_operand:VF 4 "nonimmediate_operand" "")
- (match_operand:VF 5 "nonimmediate_operand" "")])
- (match_operand:VF 1 "general_operand" "")
- (match_operand:VF 2 "general_operand" "")))]
+ (if_then_else:VF
+ (match_operator 3 ""
+ [(match_operand:VF 4 "nonimmediate_operand" "")
+ (match_operand:VF 5 "nonimmediate_operand" "")])
+ (match_operand:VF 1 "general_operand" "")
+ (match_operand:VF 2 "general_operand" "")))]
"TARGET_SSE"
{
bool ok = ix86_expand_fp_vcond (operands);
@@ -2579,7 +2702,7 @@
(parallel [(const_int 2) (const_int 3)
(const_int 2) (const_int 3)])))
(set (match_operand:V2DF 0 "register_operand" "")
- (float:V2DF
+ (float:V2DF
(vec_select:V2SI
(match_dup 2)
(parallel [(const_int 0) (const_int 1)]))))]
@@ -2601,7 +2724,7 @@
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))
(set (match_operand:V4DF 0 "register_operand" "")
- (float:V4DF
+ (float:V4DF
(match_dup 2)))]
"TARGET_AVX"
"operands[2] = gen_reg_rtx (V4SImode);")
@@ -2622,7 +2745,7 @@
(parallel [(const_int 2) (const_int 3)
(const_int 2) (const_int 3)])))
(set (match_dup 6)
- (float:V2DF
+ (float:V2DF
(vec_select:V2SI
(match_dup 5)
(parallel [(const_int 0) (const_int 1)]))))
@@ -2728,8 +2851,8 @@
emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
- gen_lowpart (V2DImode, r1),
- gen_lowpart (V2DImode, r2)));
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
DONE;
})
@@ -2747,8 +2870,8 @@
emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
- gen_lowpart (V2DImode, r1),
- gen_lowpart (V2DImode, r2)));
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
DONE;
})
@@ -3290,6 +3413,18 @@
operands[1] = force_reg (SFmode, operands[1]);
})
+(define_insn "avx2_vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_duplicate:V4SF
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX2"
+ "vbroadcastss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "*vec_dupv4sf_avx"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_duplicate:V4SF
@@ -3304,6 +3439,18 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
+(define_insn "avx2_vec_dupv8sf"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_duplicate:V8SF
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX2"
+ "vbroadcastss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "*vec_dupv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_duplicate:V4SF
@@ -3899,7 +4046,7 @@
(match_dup 3)
(match_dup 4))
(parallel [(const_int 0) (const_int 1)
- (const_int 4) (const_int 5)])))]
+ (const_int 4) (const_int 5)])))]
"TARGET_AVX"
{
operands[3] = gen_reg_rtx (V4DFmode);
@@ -4059,6 +4206,21 @@
})
;; punpcklqdq and punpckhqdq are shorter than shufpd.
+(define_insn "avx2_interleave_highv4di"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (vec_select:V4DI
+ (vec_concat:V8DI
+ (match_operand:V4DI 1 "register_operand" "x")
+ (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1)
+ (const_int 5)
+ (const_int 3)
+ (const_int 7)])))]
+ "TARGET_AVX2"
+ "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
(define_insn "vec_interleave_highv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x,x")
@@ -4078,6 +4240,22 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_interleave_lowv4di"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (vec_select:V4DI
+ (vec_concat:V8DI
+ (match_operand:V4DI 1 "register_operand" "x")
+ (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 2)
+ (const_int 6)])))]
+ "TARGET_AVX2"
+ "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "vec_interleave_lowv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x,x")
(vec_select:V2DI
@@ -4463,18 +4641,18 @@
"operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
(define_expand "<plusminus_insn><mode>3"
- [(set (match_operand:VI_128 0 "register_operand" "")
- (plusminus:VI_128
- (match_operand:VI_128 1 "nonimmediate_operand" "")
- (match_operand:VI_128 2 "nonimmediate_operand" "")))]
+ [(set (match_operand:VI 0 "register_operand" "")
+ (plusminus:VI
+ (match_operand:VI 1 "nonimmediate_operand" "")
+ (match_operand:VI 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<plusminus_insn><mode>3"
- [(set (match_operand:VI_128 0 "register_operand" "=x,x")
- (plusminus:VI_128
- (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x")
- (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))]
+ [(set (match_operand:VI 0 "register_operand" "=x,x")
+ (plusminus:VI
+ (match_operand:VI 1 "nonimmediate_operand" "<comm>0,x")
+ (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -4483,21 +4661,21 @@
(set_attr "type" "sseiadd")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
-(define_expand "sse2_<plusminus_insn><mode>3"
- [(set (match_operand:VI12_128 0 "register_operand" "")
- (sat_plusminus:VI12_128
- (match_operand:VI12_128 1 "nonimmediate_operand" "")
- (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
+(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
+ [(set (match_operand:VI12_AVX2 0 "register_operand" "")
+ (sat_plusminus:VI12_AVX2
+ (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
+ (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*sse2_<plusminus_insn><mode>3"
- [(set (match_operand:VI12_128 0 "register_operand" "=x,x")
- (sat_plusminus:VI12_128
- (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x")
- (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))]
+(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
+ [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
+ (sat_plusminus:VI12_AVX2
+ (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
+ (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -4548,18 +4726,18 @@
DONE;
})
-(define_expand "mulv8hi3"
- [(set (match_operand:V8HI 0 "register_operand" "")
- (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
- (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+(define_expand "mul<mode>3"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "")
+ (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+ "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
-(define_insn "*mulv8hi3"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
- (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+(define_insn "*mul<mode>3"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+ (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
"@
pmullw\t{%2, %0|%0, %2}
vpmullw\t{%2, %1, %0|%0, %1, %2}"
@@ -4567,32 +4745,32 @@
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
-(define_expand "<s>mulv8hi3_highpart"
- [(set (match_operand:V8HI 0 "register_operand" "")
- (truncate:V8HI
- (lshiftrt:V8SI
- (mult:V8SI
- (any_extend:V8SI
- (match_operand:V8HI 1 "nonimmediate_operand" ""))
- (any_extend:V8SI
- (match_operand:V8HI 2 "nonimmediate_operand" "")))
- (const_int 16))))]
+(define_expand "<s>mul<mode>3_highpart"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "")
+ (truncate:VI2_AVX2
+ (lshiftrt:<ssedoublemode>
+ (mult:<ssedoublemode>
+ (any_extend:<ssedoublemode>
+ (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
+ (any_extend:<ssedoublemode>
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
-(define_insn "*<s>mulv8hi3_highpart"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
- (truncate:V8HI
- (lshiftrt:V8SI
- (mult:V8SI
- (any_extend:V8SI
- (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
- (any_extend:V8SI
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
+(define_insn "*<s>mul<mode>3_highpart"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+ (truncate:VI2_AVX2
+ (lshiftrt:<ssedoublemode>
+ (mult:<ssedoublemode>
+ (any_extend:<ssedoublemode>
+ (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+ (any_extend:<ssedoublemode>
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
(const_int 16))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
"@
pmulh<u>w\t{%2, %0|%0, %2}
vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
@@ -4600,7 +4778,42 @@
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx2_umulv4siv4di3"
+ [(set (match_operand:V4DI 0 "register_operand" "")
+ (mult:V4DI
+ (zero_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (zero_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
+
+(define_insn "*avx_umulv4siv4di3"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (mult:V4DI
+ (zero_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (zero_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
+ "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
(define_expand "sse2_umulv2siv2di3"
[(set (match_operand:V2DI 0 "register_operand" "")
@@ -4637,6 +4850,43 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_expand "avx2_mulv4siv4di3"
+ [(set (match_operand:V4DI 0 "register_operand" "")
+ (mult:V4DI
+ (sign_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (sign_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
+
+(define_insn "*avx2_mulv4siv4di3"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (mult:V4DI
+ (sign_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "x")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))
+ (sign_extend:V4DI
+ (vec_select:V4SI
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
+ "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "avx")
+ (set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_expand "sse4_1_mulv2siv2di3"
[(set (match_operand:V2DI 0 "register_operand" "")
(mult:V2DI
@@ -4673,6 +4923,56 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_expand "avx2_pmaddwd"
+ [(set (match_operand:V8SI 0 "register_operand" "")
+ (plus:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)]))))
+ (mult:V8SI
+ (sign_extend:V8SI
+ (vec_select:V8HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))
+ (sign_extend:V8SI
+ (vec_select:V8HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)]))))))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
+
(define_expand "sse2_pmaddwd"
[(set (match_operand:V4SI 0 "register_operand" "")
(plus:V4SI
@@ -4707,6 +5007,59 @@
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+(define_insn "*avx2_pmaddwd"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (plus:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8SI
+ (vec_select:V8HI
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)]))))
+ (mult:V8SI
+ (sign_extend:V8SI
+ (vec_select:V8HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))
+ (sign_extend:V8SI
+ (vec_select:V8HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)]))))))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
+ "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "*sse2_pmaddwd"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(plus:V4SI
@@ -4749,21 +5102,21 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_expand "mulv4si3"
- [(set (match_operand:V4SI 0 "register_operand" "")
- (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
- (match_operand:V4SI 2 "register_operand" "")))]
+(define_expand "mul<mode>3"
+ [(set (match_operand:VI4_AVX2 0 "register_operand" "")
+ (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
+ (match_operand:VI4_AVX2 2 "register_operand" "")))]
"TARGET_SSE2"
{
if (TARGET_SSE4_1 || TARGET_AVX)
- ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
+ ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
})
-(define_insn "*sse4_1_mulv4si3"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
- (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+(define_insn "*<sse4_1_avx2>_mul<mode>3"
+ [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
+ (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
+ (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
"@
pmulld\t{%2, %0|%0, %2}
vpmulld\t{%2, %1, %0|%0, %1, %2}"
@@ -4771,7 +5124,7 @@
(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "*sse2_mulv4si3"
[(set (match_operand:V4SI 0 "register_operand" "")
@@ -4885,7 +5238,7 @@
/* Multiply low parts. */
emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
- gen_lowpart (V4SImode, op2)));
+ gen_lowpart (V4SImode, op2)));
/* Shift input vectors left 32 bits so we can multiply high parts. */
emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
@@ -5119,9 +5472,9 @@
})
(define_insn "ashr<mode>3"
- [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
- (ashiftrt:VI24_128
- (match_operand:VI24_128 1 "register_operand" "0,x")
+ [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
+ (ashiftrt:VI24_AVX2
+ (match_operand:VI24_AVX2 1 "register_operand" "0,x")
(match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
"TARGET_SSE2"
"@
@@ -5135,12 +5488,27 @@
(const_string "0")))
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_lshrqv4di3"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (lshiftrt:V4DI
+ (match_operand:V4DI 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_AVX2"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "OI")])
(define_insn "lshr<mode>3"
- [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
- (lshiftrt:VI248_128
- (match_operand:VI248_128 1 "register_operand" "0,x")
+ [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
+ (lshiftrt:VI248_AVX2
+ (match_operand:VI248_AVX2 1 "register_operand" "0,x")
(match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
"TARGET_SSE2"
"@
@@ -5154,7 +5522,36 @@
(const_string "0")))
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_lshlqv4di3"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (ashift:V4DI (match_operand:V4DI 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_AVX2"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_lshl<mode>3"
+ [(set (match_operand:VI248_256 0 "register_operand" "=x")
+ (ashift:VI248_256
+ (match_operand:VI248_256 1 "register_operand" "x")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_AVX2"
+ "vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand" "")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "OI")])
(define_insn "ashl<mode>3"
[(set (match_operand:VI248_128 0 "register_operand" "=x,x")
@@ -5177,7 +5574,7 @@
(define_expand "vec_shl_<mode>"
[(set (match_operand:VI_128 0 "register_operand" "")
- (ashift:V1TI
+ (ashift:V1TI
(match_operand:VI_128 1 "register_operand" "")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
"TARGET_SSE2"
@@ -5186,10 +5583,10 @@
operands[1] = gen_lowpart (V1TImode, operands[1]);
})
-(define_insn "sse2_ashlv1ti3"
- [(set (match_operand:V1TI 0 "register_operand" "=x,x")
- (ashift:V1TI
- (match_operand:V1TI 1 "register_operand" "0,x")
+(define_insn "<sse2_avx2>_ashl<mode>3"
+ [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
+ (ashift:VIMAX_AVX2
+ (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
{
@@ -5210,11 +5607,11 @@
(set_attr "length_immediate" "1")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
(define_expand "vec_shr_<mode>"
[(set (match_operand:VI_128 0 "register_operand" "")
- (lshiftrt:V1TI
+ (lshiftrt:V1TI
(match_operand:VI_128 1 "register_operand" "")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
"TARGET_SSE2"
@@ -5223,9 +5620,29 @@
operands[1] = gen_lowpart (V1TImode, operands[1]);
})
+(define_expand "avx2_<code><mode>3"
+ [(set (match_operand:VI124_256 0 "register_operand" "")
+ (umaxmin:VI124_256
+ (match_operand:VI124_256 1 "nonimmediate_operand" "")
+ (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx2_<code><mode>3"
+ [(set (match_operand:VI124_256 0 "register_operand" "=x")
+ (umaxmin:VI124_256
+ (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
+ (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse2_lshrv1ti3"
[(set (match_operand:V1TI 0 "register_operand" "=x,x")
- (lshiftrt:V1TI
+ (lshiftrt:V1TI
(match_operand:V1TI 1 "register_operand" "0,x")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
@@ -5250,6 +5667,26 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_expand "avx2_<code><mode>3"
+ [(set (match_operand:VI124_256 0 "register_operand" "")
+ (smaxmin:VI124_256
+ (match_operand:VI124_256 1 "nonimmediate_operand" "")
+ (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx2_<code><mode>3"
+ [(set (match_operand:VI124_256 0 "register_operand" "=x")
+ (smaxmin:VI124_256
+ (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
+ (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "*sse4_1_<code><mode>3"
[(set (match_operand:VI14_128 0 "register_operand" "=x,x")
(smaxmin:VI14_128
@@ -5533,6 +5970,26 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_expand "avx2_eq<mode>3"
+ [(set (match_operand:VI1248_256 0 "register_operand" "")
+ (eq:VI1248_256
+ (match_operand:VI1248_256 1 "nonimmediate_operand" "")
+ (match_operand:VI1248_256 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*avx2_eq<mode>3"
+ [(set (match_operand:VI1248_256 0 "register_operand" "=x")
+ (eq:VI1248_256
+ (match_operand:VI1248_256 1 "nonimmediate_operand" "%x")
+ (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "*sse4_1_eqv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x,x")
(eq:V2DI
@@ -5595,6 +6052,18 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_gt<mode>3"
+ [(set (match_operand:VI1248_256 0 "register_operand" "=x")
+ (gt:VI1248_256
+ (match_operand:VI1248_256 1 "register_operand" "x")
+ (match_operand:VI1248_256 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX2"
+ "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse2_gt<mode>3"
[(set (match_operand:VI124_128 0 "register_operand" "=x,x")
(gt:VI124_128
@@ -5612,12 +6081,12 @@
(define_expand "vcond<mode>"
[(set (match_operand:VI124_128 0 "register_operand" "")
- (if_then_else:VI124_128
- (match_operator 3 ""
- [(match_operand:VI124_128 4 "nonimmediate_operand" "")
- (match_operand:VI124_128 5 "nonimmediate_operand" "")])
- (match_operand:VI124_128 1 "general_operand" "")
- (match_operand:VI124_128 2 "general_operand" "")))]
+ (if_then_else:VI124_128
+ (match_operator 3 ""
+ [(match_operand:VI124_128 4 "nonimmediate_operand" "")
+ (match_operand:VI124_128 5 "nonimmediate_operand" "")])
+ (match_operand:VI124_128 1 "general_operand" "")
+ (match_operand:VI124_128 2 "general_operand" "")))]
"TARGET_SSE2"
{
bool ok = ix86_expand_int_vcond (operands);
@@ -5627,12 +6096,12 @@
(define_expand "vcondv2di"
[(set (match_operand:V2DI 0 "register_operand" "")
- (if_then_else:V2DI
- (match_operator 3 ""
- [(match_operand:V2DI 4 "nonimmediate_operand" "")
- (match_operand:V2DI 5 "nonimmediate_operand" "")])
- (match_operand:V2DI 1 "general_operand" "")
- (match_operand:V2DI 2 "general_operand" "")))]
+ (if_then_else:V2DI
+ (match_operator 3 ""
+ [(match_operand:V2DI 4 "nonimmediate_operand" "")
+ (match_operand:V2DI 5 "nonimmediate_operand" "")])
+ (match_operand:V2DI 1 "general_operand" "")
+ (match_operand:V2DI 2 "general_operand" "")))]
"TARGET_SSE4_2"
{
bool ok = ix86_expand_int_vcond (operands);
@@ -5642,12 +6111,12 @@
(define_expand "vcondu<mode>"
[(set (match_operand:VI124_128 0 "register_operand" "")
- (if_then_else:VI124_128
- (match_operator 3 ""
- [(match_operand:VI124_128 4 "nonimmediate_operand" "")
- (match_operand:VI124_128 5 "nonimmediate_operand" "")])
- (match_operand:VI124_128 1 "general_operand" "")
- (match_operand:VI124_128 2 "general_operand" "")))]
+ (if_then_else:VI124_128
+ (match_operator 3 ""
+ [(match_operand:VI124_128 4 "nonimmediate_operand" "")
+ (match_operand:VI124_128 5 "nonimmediate_operand" "")])
+ (match_operand:VI124_128 1 "general_operand" "")
+ (match_operand:VI124_128 2 "general_operand" "")))]
"TARGET_SSE2"
{
bool ok = ix86_expand_int_vcond (operands);
@@ -5657,12 +6126,12 @@
(define_expand "vconduv2di"
[(set (match_operand:V2DI 0 "register_operand" "")
- (if_then_else:V2DI
- (match_operator 3 ""
- [(match_operand:V2DI 4 "nonimmediate_operand" "")
- (match_operand:V2DI 5 "nonimmediate_operand" "")])
- (match_operand:V2DI 1 "general_operand" "")
- (match_operand:V2DI 2 "general_operand" "")))]
+ (if_then_else:V2DI
+ (match_operator 3 ""
+ [(match_operand:V2DI 4 "nonimmediate_operand" "")
+ (match_operand:V2DI 5 "nonimmediate_operand" "")])
+ (match_operand:V2DI 1 "general_operand" "")
+ (match_operand:V2DI 2 "general_operand" "")))]
"TARGET_SSE4_2"
{
bool ok = ix86_expand_int_vcond (operands);
@@ -5691,11 +6160,11 @@
operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
})
-(define_expand "sse2_andnot<mode>3"
- [(set (match_operand:VI_128 0 "register_operand" "")
- (and:VI_128
- (not:VI_128 (match_operand:VI_128 1 "register_operand" ""))
- (match_operand:VI_128 2 "nonimmediate_operand" "")))]
+(define_expand "<sse2_avx2>_andnot<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "")
+ (and:VI
+ (not:VI (match_operand:VI 1 "register_operand" ""))
+ (match_operand:VI 2 "nonimmediate_operand" "")))]
"TARGET_SSE2")
(define_insn "*andnot<mode>3"
@@ -5708,7 +6177,8 @@
static char buf[32];
const char *ops;
const char *tmp
- = (get_attr_mode (insn) == MODE_TI) ? "pandn" : "andnps";
+ = ((get_attr_mode (insn) == MODE_TI) ||
+ (get_attr_mode (insn) == MODE_OI)) ? "pandn" : "andnps";
switch (which_alternative)
{
@@ -5739,6 +6209,8 @@
(const_string "V8SF")
(ne (symbol_ref "TARGET_SSE2") (const_int 0))
(const_string "TI")
+ (ne (symbol_ref "TARGET_AVX2") (const_int 0))
+ (const_string "OI")
]
(const_string "V4SF")))])
@@ -5761,7 +6233,8 @@
static char buf[32];
const char *ops;
const char *tmp
- = (get_attr_mode (insn) == MODE_TI) ? "p<logic>" : "<logic>ps";
+ = (get_attr_mode (insn) == MODE_TI)||
+ (get_attr_mode (insn) == MODE_OI) ? "p<logic>" : "<logic>ps";
switch (which_alternative)
{
@@ -5792,6 +6265,8 @@
(const_string "V8SF")
(ne (symbol_ref "TARGET_SSE2") (const_int 0))
(const_string "TI")
+ (ne (symbol_ref "TARGET_AVX2") (const_int 0))
+ (const_string "OI")
]
(const_string "V4SF")))])
@@ -5852,13 +6327,13 @@
DONE;
})
-(define_insn "sse2_packsswb"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
- (vec_concat:V16QI
- (ss_truncate:V8QI
- (match_operand:V8HI 1 "register_operand" "0,x"))
- (ss_truncate:V8QI
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
+(define_insn "<sse2_avx2>_packsswb"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+ (vec_concat:VI1_AVX2
+ (ss_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (ss_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
"TARGET_SSE2"
"@
packsswb\t{%2, %0|%0, %2}
@@ -5867,15 +6342,15 @@
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
-(define_insn "sse2_packssdw"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
- (vec_concat:V8HI
- (ss_truncate:V4HI
- (match_operand:V4SI 1 "register_operand" "0,x"))
- (ss_truncate:V4HI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
+(define_insn "<sse2_avx2>_packssdw"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+ (vec_concat:VI2_AVX2
+ (ss_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (ss_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
"TARGET_SSE2"
"@
packssdw\t{%2, %0|%0, %2}
@@ -5884,15 +6359,15 @@
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
-(define_insn "sse2_packuswb"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
- (vec_concat:V16QI
- (us_truncate:V8QI
- (match_operand:V8HI 1 "register_operand" "0,x"))
- (us_truncate:V8QI
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
+(define_insn "<sse2_avx2>_packuswb"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+ (vec_concat:VI1_AVX2
+ (us_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (us_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
"TARGET_SSE2"
"@
packuswb\t{%2, %0|%0, %2}
@@ -5901,7 +6376,36 @@
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_interleave_highv32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (vec_select:V32QI
+ (vec_concat:V64QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 8) (const_int 40)
+ (const_int 9) (const_int 41)
+ (const_int 10) (const_int 42)
+ (const_int 11) (const_int 43)
+ (const_int 12) (const_int 44)
+ (const_int 13) (const_int 45)
+ (const_int 14) (const_int 46)
+ (const_int 15) (const_int 47)
+ (const_int 24) (const_int 56)
+ (const_int 25) (const_int 57)
+ (const_int 26) (const_int 58)
+ (const_int 27) (const_int 59)
+ (const_int 28) (const_int 60)
+ (const_int 29) (const_int 61)
+ (const_int 30) (const_int 62)
+ (const_int 31) (const_int 63)
+ (const_int 32) (const_int 64)])))]
+ "TARGET_AVX2"
+ "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
(define_insn "vec_interleave_highv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=x,x")
@@ -5927,6 +6431,35 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_interleave_lowv32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (vec_select:V32QI
+ (vec_concat:V64QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 32)
+ (const_int 1) (const_int 33)
+ (const_int 2) (const_int 34)
+ (const_int 3) (const_int 35)
+ (const_int 4) (const_int 36)
+ (const_int 5) (const_int 37)
+ (const_int 6) (const_int 38)
+ (const_int 7) (const_int 39)
+ (const_int 15) (const_int 47)
+ (const_int 16) (const_int 48)
+ (const_int 17) (const_int 49)
+ (const_int 18) (const_int 50)
+ (const_int 19) (const_int 51)
+ (const_int 20) (const_int 52)
+ (const_int 21) (const_int 53)
+ (const_int 22) (const_int 54)
+ (const_int 23) (const_int 55)])))]
+ "TARGET_AVX2"
+ "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "vec_interleave_lowv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=x,x")
(vec_select:V16QI
@@ -5951,6 +6484,26 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_interleave_highv16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_select:V16HI
+ (vec_concat:V32HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_AVX2"
+ "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "vec_interleave_highv8hi"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(vec_select:V8HI
@@ -5971,6 +6524,26 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_interleave_lowv16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_select:V16HI
+ (vec_concat:V32HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)])))]
+ "TARGET_AVX2"
+ "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "vec_interleave_lowv8hi"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(vec_select:V8HI
@@ -5991,6 +6564,22 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_interleave_highv8si"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_select:V8SI
+ (vec_concat:V16SI
+ (match_operand:V8SI 1 "register_operand" "x")
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_AVX2"
+ "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "vec_interleave_highv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_select:V4SI
@@ -6009,6 +6598,22 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_interleave_lowv8si"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_select:V8SI
+ (vec_concat:V16SI
+ (match_operand:V8SI 1 "register_operand" "x")
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)])))]
+ "TARGET_AVX2"
+ "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "vec_interleave_lowv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_select:V4SI
@@ -6055,13 +6660,13 @@
{
case 0:
if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
- return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
+ return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
/* FALLTHRU */
case 1:
return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
case 2:
if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
- return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+ return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
/* FALLTHRU */
case 3:
return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
@@ -6189,6 +6794,49 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_expand "avx2_pshufdv3"
+ [(match_operand:V8SI 0 "register_operand" "")
+ (match_operand:V8SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_255_operand" "")]
+ "TARGET_AVX2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_insn "avx2_pshufd_1"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_select:V8SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_0_to_3_operand" "")
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)])))]
+ "TARGET_AVX2"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "OI")])
+
(define_expand "sse2_pshufd"
[(match_operand:V4SI 0 "register_operand" "")
(match_operand:V4SI 1 "nonimmediate_operand" "")
@@ -6229,6 +6877,57 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
+(define_expand "avx2_pshuflwv3"
+ [(match_operand:V16HI 0 "register_operand" "")
+ (match_operand:V16HI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_255_operand" "")]
+ "TARGET_AVX2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_insn "avx2_pshuflw_1"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_select:V16HI
+ (match_operand:V16HI 1 "nonimmediate_operand" "xm")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_0_to_3_operand" "")
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (const_int 12)
+ (const_int 13)
+ (const_int 14)
+ (const_int 15)])))]
+ "TARGET_AVX2"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "OI")])
+
(define_expand "sse2_pshuflw"
[(match_operand:V8HI 0 "register_operand" "")
(match_operand:V8HI 1 "nonimmediate_operand" "")
@@ -6274,6 +6973,57 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
+(define_expand "avx2_pshufhwv3"
+ [(match_operand:V16HI 0 "register_operand" "")
+ (match_operand:V16HI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_255_operand" "")]
+ "TARGET_AVX2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_insn "avx2_pshufhw_1"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_select:V16HI
+ (match_operand:V16HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (match_operand 2 "const_4_to_7_operand" "")
+ (match_operand 3 "const_4_to_7_operand" "")
+ (match_operand 4 "const_4_to_7_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")
+ (const_int 8)
+ (const_int 9)
+ (const_int 10)
+ (const_int 11)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)])))]
+ "TARGET_AVX2"
+{
+ int mask = 0;
+ mask |= (INTVAL (operands[2]) - 4) << 0;
+ mask |= (INTVAL (operands[3]) - 4) << 2;
+ mask |= (INTVAL (operands[4]) - 4) << 4;
+ mask |= (INTVAL (operands[5]) - 4) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "OI")])
+
(define_expand "sse2_pshufhw"
[(match_operand:V8HI 0 "register_operand" "")
(match_operand:V8HI 1 "nonimmediate_operand" "")
@@ -6665,6 +7415,36 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_expand "avx2_uavgv32qi3"
+ [(set (match_operand:V32QI 0 "register_operand" "")
+ (truncate:V32QI
+ (lshiftrt:V32HI
+ (plus:V32HI
+ (plus:V32HI
+ (zero_extend:V32HI
+ (match_operand:V32QI 1 "nonimmediate_operand" ""))
+ (zero_extend:V32HI
+ (match_operand:V32QI 2 "nonimmediate_operand" "")))
+ (const_vector:V32QI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
+
(define_expand "sse2_uavgv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(truncate:V16QI
@@ -6687,6 +7467,39 @@
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
+(define_insn "*avx2_uavgv32qi3"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (truncate:V32QI
+ (lshiftrt:V32HI
+ (plus:V32HI
+ (plus:V32HI
+ (zero_extend:V32HI
+ (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
+ (zero_extend:V32HI
+ (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V32QI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
+ "vpavgb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "*sse2_uavgv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x,x")
(truncate:V16QI
@@ -6716,6 +7529,28 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_expand "avx2_uavgv16hi3"
+ [(set (match_operand:V16HI 0 "register_operand" "")
+ (truncate:V16HI
+ (lshiftrt:V16SI
+ (plus:V16SI
+ (plus:V16SI
+ (zero_extend:V16SI
+ (match_operand:V16HI 1 "nonimmediate_operand" ""))
+ (zero_extend:V16SI
+ (match_operand:V16HI 2 "nonimmediate_operand" "")))
+ (const_vector:V16HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
+
(define_expand "sse2_uavgv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "")
(truncate:V8HI
@@ -6734,6 +7569,31 @@
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
+(define_insn "*avx2_uavgv16hi3"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (truncate:V16HI
+ (lshiftrt:V16SI
+ (plus:V16SI
+ (plus:V16SI
+ (zero_extend:V16SI
+ (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
+ (zero_extend:V16SI
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V16HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
+ "vpavgw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "*sse2_uavgv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(truncate:V8HI
@@ -6761,11 +7621,11 @@
;; The correct representation for this is absolutely enormous, and
;; surely not generally useful.
-(define_insn "sse2_psadbw"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
- (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
- UNSPEC_PSADBW))]
+(define_insn "<sse2_avx2>_psadbw"
+ [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
+ (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
+ (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
+ UNSPEC_PSADBW))]
"TARGET_SSE2"
"@
psadbw\t{%2, %0|%0, %2}
@@ -6775,7 +7635,7 @@
(set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -6788,6 +7648,16 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_insn "avx2_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "TARGET_AVX2"
+ "vpmovmskb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DI")])
+
(define_insn "sse2_pmovmskb"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
@@ -6947,6 +7817,82 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "avx2_phaddwv16hi3"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
+ "TARGET_AVX2"
+ "vphaddw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "ssse3_phaddwv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(vec_concat:V8HI
@@ -7025,6 +7971,50 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn "avx2_phadddv8si3"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_concat:V8SI
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V8SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
+ (plus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
+ (plus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX2"
+ "vphaddd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "ssse3_phadddv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_concat:V4SI
@@ -7079,6 +8069,82 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn "avx2_phaddswv16hi3"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
+ "TARGET_AVX2"
+ "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "ssse3_phaddswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(vec_concat:V8HI
@@ -7157,6 +8223,82 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn "avx2_phsubwv16hi3"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
+ "TARGET_AVX2"
+ "vphsubw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "ssse3_phsubwv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(vec_concat:V8HI
@@ -7235,6 +8377,50 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn "avx2_phsubdv8si3"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_concat:V8SI
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V8SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
+ (minus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
+ (minus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX2"
+ "vphsubd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "ssse3_phsubdv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_concat:V4SI
@@ -7290,6 +8476,82 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn "avx2_phsubswv16hi3"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
+ "TARGET_AVX2"
+ "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "ssse3_phsubswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(vec_concat:V8HI
@@ -7368,6 +8630,92 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn "avx2_pmaddubsw256"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (ss_plus:V16HI
+ (mult:V16HI
+ (zero_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)
+ (const_int 16)
+ (const_int 18)
+ (const_int 20)
+ (const_int 22)
+ (const_int 24)
+ (const_int 26)
+ (const_int 28)
+ (const_int 30)])))
+ (sign_extend:V16HI
+ (vec_select:V16QI
+ (match_operand:V32QI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)
+ (const_int 16)
+ (const_int 18)
+ (const_int 20)
+ (const_int 22)
+ (const_int 24)
+ (const_int 26)
+ (const_int 28)
+ (const_int 30)]))))
+ (mult:V16HI
+ (zero_extend:V16HI
+ (vec_select:V16QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)
+ (const_int 17)
+ (const_int 19)
+ (const_int 21)
+ (const_int 23)
+ (const_int 25)
+ (const_int 27)
+ (const_int 29)
+ (const_int 31)])))
+ (sign_extend:V16HI
+ (vec_select:V16QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)
+ (const_int 17)
+ (const_int 19)
+ (const_int 21)
+ (const_int 23)
+ (const_int 25)
+ (const_int 27)
+ (const_int 29)
+ (const_int 31)]))))))]
+ "TARGET_AVX2"
+ "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "ssse3_pmaddubsw128"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(ss_plus:V8HI
@@ -7466,6 +8814,58 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_expand "avx2_umulhrswv16hi3"
+ [(set (match_operand:V16HI 0 "register_operand" "")
+ (truncate:V16HI
+ (lshiftrt:V16SI
+ (plus:V16SI
+ (lshiftrt:V16SI
+ (mult:V16SI
+ (sign_extend:V16SI
+ (match_operand:V16HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V16SI
+ (match_operand:V16HI 2 "nonimmediate_operand" "")))
+ (const_int 14))
+ (const_vector:V16HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
+
+(define_insn "*avx2_umulhrswv16hi3"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (truncate:V16HI
+ (lshiftrt:V16SI
+ (plus:V16SI
+ (lshiftrt:V16SI
+ (mult:V16SI
+ (sign_extend:V16SI
+ (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
+ (sign_extend:V16SI
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
+ (const_int 14))
+ (const_vector:V16HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
+ "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_expand "ssse3_pmulhrswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "")
(truncate:V8HI
@@ -7554,11 +8954,11 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
-(define_insn "ssse3_pshufbv16qi3"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
- UNSPEC_PSHUFB))]
+(define_insn "<ssse3_avx2>_pshufb<mode>3"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+ (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
+ (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
+ UNSPEC_PSHUFB))]
"TARGET_SSSE3"
"@
pshufb\t{%2, %0|%0, %2}
@@ -7568,7 +8968,7 @@
(set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "ssse3_pshufbv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -7582,11 +8982,11 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
-(define_insn "ssse3_psign<mode>3"
- [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
- (unspec:VI124_128
- [(match_operand:VI124_128 1 "register_operand" "0,x")
- (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")]
+(define_insn "<ssse3_avx2>_psign<mode>3"
+ [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
+ (unspec:VI124_AVX2
+ [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
+ (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
UNSPEC_PSIGN))]
"TARGET_SSSE3"
"@
@@ -7597,7 +8997,7 @@
(set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "ssse3_psign<mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y")
@@ -7612,12 +9012,12 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
-(define_insn "ssse3_palignrti"
- [(set (match_operand:TI 0 "register_operand" "=x,x")
- (unspec:TI [(match_operand:TI 1 "register_operand" "0,x")
- (match_operand:TI 2 "nonimmediate_operand" "xm,xm")
- (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
- UNSPEC_PALIGNR))]
+(define_insn "<ssse3_avx2>_palignr<mode>"
+ [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
+ (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
+ (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
+ UNSPEC_PALIGNR))]
"TARGET_SSSE3"
{
operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
@@ -7639,7 +9039,7 @@
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "ssse3_palignrdi"
[(set (match_operand:DI 0 "register_operand" "=y")
@@ -7660,16 +9060,16 @@
(set_attr "mode" "DI")])
(define_insn "abs<mode>2"
- [(set (match_operand:VI124_128 0 "register_operand" "=x")
- (abs:VI124_128
- (match_operand:VI124_128 1 "nonimmediate_operand" "xm")))]
+ [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
+ (abs:VI124_AVX2
+ (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
"TARGET_SSSE3"
"%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "abs<mode>2"
[(set (match_operand:MMXMODEI 0 "register_operand" "=y")
@@ -7693,7 +9093,7 @@
[(set (match_operand:MODEF 0 "memory_operand" "=m")
(unspec:MODEF
[(match_operand:MODEF 1 "register_operand" "x")]
- UNSPEC_MOVNT))]
+ UNSPEC_MOVNT))]
"TARGET_SSE4A"
"movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
@@ -7713,10 +9113,10 @@
(define_insn "sse4a_extrqi"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
- (match_operand 2 "const_0_to_255_operand" "")
- (match_operand 3 "const_0_to_255_operand" "")]
- UNSPEC_EXTRQI))]
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand 2 "const_0_to_255_operand" "")
+ (match_operand 3 "const_0_to_255_operand" "")]
+ UNSPEC_EXTRQI))]
"TARGET_SSE4A"
"extrq\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "sse")
@@ -7726,9 +9126,9 @@
(define_insn "sse4a_extrq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
- (match_operand:V16QI 2 "register_operand" "x")]
- UNSPEC_EXTRQ))]
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "x")]
+ UNSPEC_EXTRQ))]
"TARGET_SSE4A"
"extrq\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")
@@ -7737,11 +9137,11 @@
(define_insn "sse4a_insertqi"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
- (match_operand:V2DI 2 "register_operand" "x")
- (match_operand 3 "const_0_to_255_operand" "")
- (match_operand 4 "const_0_to_255_operand" "")]
- UNSPEC_INSERTQI))]
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")
+ (match_operand 3 "const_0_to_255_operand" "")
+ (match_operand 4 "const_0_to_255_operand" "")]
+ UNSPEC_INSERTQI))]
"TARGET_SSE4A"
"insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
[(set_attr "type" "sseins")
@@ -7752,9 +9152,9 @@
(define_insn "sse4a_insertq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
- (match_operand:V2DI 2 "register_operand" "x")]
- UNSPEC_INSERTQ))]
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")]
+ UNSPEC_INSERTQ))]
"TARGET_SSE4A"
"insertq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseins")
@@ -7824,23 +9224,23 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<MODE>")])
-(define_insn "sse4_1_movntdqa"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
+(define_insn "<sse4_1_avx2>_movntdqa"
+ [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
+ (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
UNSPEC_MOVNTDQA))]
"TARGET_SSE4_1"
"%vmovntdqa\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
-(define_insn "sse4_1_mpsadbw"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
- (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
- UNSPEC_MPSADBW))]
+(define_insn "<sse4_1_avx2>_mpsadbw"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+ (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
+ (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ UNSPEC_MPSADBW))]
"TARGET_SSE4_1"
"@
mpsadbw\t{%3, %2, %0|%0, %2, %3}
@@ -7850,7 +9250,21 @@
(set_attr "length_immediate" "1")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_packusdw"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (us_truncate:V8HI
+ (match_operand:V8SI 1 "register_operand" "x"))
+ (us_truncate:V8HI
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX2"
+ "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
(define_insn "sse4_1_packusdw"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
@@ -7869,12 +9283,12 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "sse4_1_pblendvb"
- [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x")
- (unspec:V16QI
- [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
- (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
- (match_operand:V16QI 3 "register_operand" "Yz,x")]
+(define_insn "<sse4_1_avx2>_pblendvb"
+ [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
+ (unspec:VI1_AVX2
+ [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
+ (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
+ (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
"@
@@ -7885,13 +9299,13 @@
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "*,1")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
-(define_insn "sse4_1_pblendw"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
- (vec_merge:V8HI
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
- (match_operand:V8HI 1 "register_operand" "0,x")
+(define_insn "<sse4_1_avx2>_pblendw"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+ (vec_merge:VI2_AVX2
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:VI2_AVX2 1 "register_operand" "0,x")
(match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
"TARGET_SSE4_1"
"@
@@ -7902,7 +9316,21 @@
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_pblendd<mode>"
+ [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
+ (vec_merge:VI4_AVX2
+ (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
+ (match_operand:VI4_AVX2 1 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+ "TARGET_AVX2"
+ "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "sse4_1_phminposuw"
[(set (match_operand:V8HI 0 "register_operand" "=x")
@@ -7915,6 +9343,17 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_<code>v16qiv16hi2"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (any_extend:V16HI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX2"
+ "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_<code>v8qiv8hi2"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(any_extend:V8HI
@@ -7935,6 +9374,26 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_<code>v8qiv8si2"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (any_extend:V8SI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_AVX2"
+ "vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_<code>v4qiv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(any_extend:V4SI
@@ -7951,6 +9410,17 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_<code>v8hiv8si2"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (any_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX2"
+ "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_<code>v4hiv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(any_extend:V4SI
@@ -7967,6 +9437,22 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_<code>v4qiv4di2"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (any_extend:V4DI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_AVX2"
+ "vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_<code>v2qiv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(any_extend:V2DI
@@ -7981,6 +9467,22 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_<code>v4hiv4di2"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (any_extend:V4DI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_AVX2"
+ "vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_<code>v2hiv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(any_extend:V2DI
@@ -7995,6 +9497,16 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx2_<code>v4siv4di2"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (any_extend:V4DI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX2"
+ "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_<code>v2siv2di2"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(any_extend:V2DI
@@ -8360,7 +9872,7 @@
;; do not allow the value being added to be a memory operation.
(define_insn "xop_pmacsww"
[(set (match_operand:V8HI 0 "register_operand" "=x")
- (plus:V8HI
+ (plus:V8HI
(mult:V8HI
(match_operand:V8HI 1 "nonimmediate_operand" "%x")
(match_operand:V8HI 2 "nonimmediate_operand" "xm"))
@@ -8372,7 +9884,7 @@
(define_insn "xop_pmacssww"
[(set (match_operand:V8HI 0 "register_operand" "=x")
- (ss_plus:V8HI
+ (ss_plus:V8HI
(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
(match_operand:V8HI 2 "nonimmediate_operand" "xm"))
(match_operand:V8HI 3 "nonimmediate_operand" "x")))]
@@ -8383,7 +9895,7 @@
(define_insn "xop_pmacsdd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
- (plus:V4SI
+ (plus:V4SI
(mult:V4SI
(match_operand:V4SI 1 "nonimmediate_operand" "%x")
(match_operand:V4SI 2 "nonimmediate_operand" "xm"))
@@ -8395,7 +9907,7 @@
(define_insn "xop_pmacssdd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
- (ss_plus:V4SI
+ (ss_plus:V4SI
(mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
(match_operand:V4SI 2 "nonimmediate_operand" "xm"))
(match_operand:V4SI 3 "nonimmediate_operand" "x")))]
@@ -9218,7 +10730,7 @@
int i;
if (GET_MODE (op2) != <ssescalarmode>mode)
- {
+ {
op2 = gen_reg_rtx (<ssescalarmode>mode);
convert_move (op2, operands[2], false);
}
@@ -9250,7 +10762,7 @@
int i;
if (GET_MODE (op2) != <ssescalarmode>mode)
- {
+ {
op2 = gen_reg_rtx (<ssescalarmode>mode);
convert_move (op2, operands[2], false);
}
@@ -9772,6 +11284,99 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_mode_attr AVXTOSSEMODE
+ [(V4DI "V2DI") (V2DI "V2DI")
+ (V8SI "V4SI") (V4SI "V4SI")
+ (V16HI "V8HI") (V8HI "V8HI")
+ (V32QI "V16QI") (V16QI "V16QI")])
+
+(define_insn "avx2_pbroadcast<mode>"
+ [(set (match_operand:VI 0 "register_operand" "=x")
+ (vec_duplicate:VI
+ (vec_select:<ssescalarmode>
+ (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX2"
+ "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_permvarv8si"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (unspec:V8SI
+ [(match_operand:V8SI 1 "register_operand" "x")
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_VPERMSI))]
+ "TARGET_AVX2"
+ "vpermd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_permv4df"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (unspec:V4DF
+ [(match_operand:V4DF 1 "register_operand" "xm")
+ (match_operand:SI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_VPERMDF))]
+ "TARGET_AVX2"
+ "vpermpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_permvarv8sf"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_VPERMSF))]
+ "TARGET_AVX2"
+ "vpermps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_permv4di"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (unspec:V4DI
+ [(match_operand:V4DI 1 "register_operand" "xm")
+ (match_operand:SI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_VPERMDI))]
+ "TARGET_AVX2"
+ "vpermq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_permv2ti"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (unspec:V4DI
+ [(match_operand:V4DI 1 "register_operand" "x")
+ (match_operand:V4DI 2 "register_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_VPERMTI))]
+ "TARGET_AVX2"
+ "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_vec_dupv4df"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_duplicate:V4DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
+ "TARGET_AVX2"
+ "vbroadcastsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
;; Modes handled by AVX vec_dup patterns.
(define_mode_iterator AVX_VEC_DUP_MODE
[V8SI V8SF V4DI V4DF])
@@ -9789,6 +11394,18 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+(define_insn "avx2_vbroadcasti128_<mode>"
+ [(set (match_operand:VI_256 0 "register_operand" "=x")
+ (vec_concat:VI_256
+ (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
+ (match_dup 1)))]
+ "TARGET_AVX2"
+ "vbroadcasti128\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_split
[(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
(vec_duplicate:AVX_VEC_DUP_MODE
@@ -9880,7 +11497,7 @@
}
operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
- elt * GET_MODE_SIZE (<ssescalarmode>mode));
+ elt * GET_MODE_SIZE (<ssescalarmode>mode));
})
(define_expand "avx_vpermil<mode>"
@@ -10061,6 +11678,36 @@
DONE;
})
+(define_insn "avx2_vec_set_lo_v4di"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (vec_concat:V4DI
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")
+ (vec_select:V2DI
+ (match_operand:V4DI 1 "register_operand" "x")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX2"
+ "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_vec_set_hi_v4di"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (vec_concat:V4DI
+ (vec_select:V2DI
+ (match_operand:V4DI 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX2"
+ "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "vec_set_lo_<mode>"
[(set (match_operand:VI8F_256 0 "register_operand" "=x")
(vec_concat:VI8F_256
@@ -10203,24 +11850,39 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_expand "avx_maskload<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "register_operand" "")
- (unspec:VF
+(define_expand "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:V48_AVX2 0 "register_operand" "")
+ (unspec:V48_AVX2
[(match_operand:<sseintvecmode> 2 "register_operand" "")
- (match_operand:VF 1 "memory_operand" "")
+ (match_operand:V48_AVX2 1 "memory_operand" "")
(match_dup 0)]
UNSPEC_MASKMOV))]
"TARGET_AVX")
-(define_expand "avx_maskstore<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "memory_operand" "")
- (unspec:VF
+(define_expand "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:V48_AVX2 0 "memory_operand" "")
+ (unspec:V48_AVX2
[(match_operand:<sseintvecmode> 1 "register_operand" "")
- (match_operand:VF 2 "register_operand" "")
+ (match_operand:V48_AVX2 2 "register_operand" "")
(match_dup 0)]
UNSPEC_MASKMOV))]
"TARGET_AVX")
+(define_insn "*avx2_maskmov<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VI48_AVX2 0 "nonimmediate_operand" "=x,m")
+ (unspec:VI48_AVX2
+ [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
+ (match_operand:VI48_AVX2 2 "nonimmediate_operand" "m,x")
+ (match_dup 0)]
+ UNSPEC_MASKMOV))]
+ "TARGET_AVX2
+ && (REG_P (operands[0]) == MEM_P (operands[2]))"
+ "vpmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
(unspec:VF
@@ -10265,6 +11927,286 @@
DONE;
})
+(define_insn "avx2_extracti128"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (match_operand:V4DI 1 "nonimmediate_operand" "xm")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
+ "TARGET_AVX2"
+ "vextracti128\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_expand "avx2_inserti128"
+ [(match_operand:V4DI 0 "register_operand" "")
+ (match_operand:V4DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_0_to_1_operand" "")]
+ "TARGET_AVX2"
+{
+ rtx (*insn)(rtx, rtx, rtx);
+
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ insn = gen_avx2_vec_set_lo_v4di;
+ break;
+ case 1:
+ insn = gen_avx2_vec_set_hi_v4di;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (insn (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn "avx2_ashrvv8si"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_concat:V8SI
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_operand:V8SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)])))
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 1)]))))
+ (vec_concat:V2SI
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 2)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 2)])))
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 3)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 3)])))))
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 0)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 0)])))
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 1)]))))
+ (vec_concat:V2SI
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 2)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 2)])))
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 3)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 3)])))))))]
+ "TARGET_AVX2"
+ "vpsravd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_ashrvv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)])))
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 1)]))))
+ (vec_concat:V2SI
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 2)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 2)])))
+ (ashiftrt:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 3)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 3)]))))))]
+ "TARGET_AVX2"
+ "vpsravd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx2_<lshift>vv8si"
+ [(set (match_operand:V8SI 0 "register_operand" "=x")
+ (vec_concat:V8SI
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (lshift:SI
+ (vec_select:SI
+ (match_operand:V8SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI
+ (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)])))
+ (lshift:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 1)]))))
+ (vec_concat:V2SI
+ (lshift:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 2)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 2)])))
+ (lshift:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 3)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 3)])))))
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (lshift:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 0)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 0)])))
+ (lshift:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 1)]))))
+ (vec_concat:V2SI
+ (lshift:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 2)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 2)])))
+ (lshift:SI
+ (vec_select:SI
+ (match_dup 1)
+ (parallel [(const_int 3)]))
+ (vec_select:SI
+ (match_dup 2)
+ (parallel [(const_int 3)])))))))]
+ "TARGET_AVX2"
+ "vp<lshift_insn>vd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx2_<lshift>v<mode>"
+ [(set (match_operand:VI4SD_AVX2 0 "register_operand" "=x")
+ (vec_concat:VI4SD_AVX2
+ (vec_concat:<ssehalfvecmode>
+ (lshift:<ssescalarmode>
+ (vec_select:<ssescalarmode>
+ (match_operand:VI4SD_AVX2 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:<ssescalarmode>
+ (match_operand:VI4SD_AVX2 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)])))
+ (lshift:<ssescalarmode>
+ (vec_select:<ssescalarmode>
+ (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:<ssescalarmode>
+ (match_dup 2)
+ (parallel [(const_int 1)]))))
+ (vec_concat:<ssehalfvecmode>
+ (lshift:<ssescalarmode>
+ (vec_select:<ssescalarmode>
+ (match_dup 1)
+ (parallel [(const_int 2)]))
+ (vec_select:<ssescalarmode>
+ (match_dup 2)
+ (parallel [(const_int 2)])))
+ (lshift:<ssescalarmode>
+ (vec_select:<ssescalarmode>
+ (match_dup 1)
+ (parallel [(const_int 3)]))
+ (vec_select:<ssescalarmode>
+ (match_dup 2)
+ (parallel [(const_int 3)]))))))]
+ "TARGET_AVX2"
+ "vp<lshift_insn>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx2_<lshift>vv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_concat:V2DI
+ (lshift:DI
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DI
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)])))
+ (lshift:DI
+ (vec_select:DI
+ (match_dup 1)
+ (parallel [(const_int 1)]))
+ (vec_select:DI
+ (match_dup 2)
+ (parallel [(const_int 1)])))))]
+ "TARGET_AVX2"
+ "vp<lshift_insn>vq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*vec_concat<mode>_avx"
[(set (match_operand:V_256 0 "register_operand" "=x,x")
(vec_concat:V_256
@@ -10278,7 +12220,7 @@
return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
case 1:
switch (get_attr_mode (insn))
- {
+ {
case MODE_V8SF:
return "vmovaps\t{%1, %x0|%x0, %1}";
case MODE_V4DF:
@@ -10373,3 +12315,95 @@
[(set_attr "type" "ssecvt")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+
+;; For gather* insn patterns
+(define_mode_iterator VEC_GATHER_MODE
+ [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
+(define_mode_attr VEC_GATHER_MODE
+ [(V2DI "V4SI") (V2DF "V4SI")
+ (V4DI "V4SI") (V4DF "V4SI")
+ (V4SI "V4SI") (V4SF "V4SI")
+ (V8SI "V8SI") (V8SF "V8SI")])
+
+(define_expand "avx2_gathersi<mode>"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
+ (unspec:VEC_GATHER_MODE
+ [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
+ (match_operand:<ssescalarmode> 2 "memory_operand" "")
+ (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
+ (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
+ (match_operand:SI 5 "const1248_operand " "")]
+ UNSPEC_GATHER))]
+ "TARGET_AVX2")
+
+(define_insn "*avx2_gathersi<mode>"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=x")
+ (unspec:VEC_GATHER_MODE
+ [(match_operand:VEC_GATHER_MODE 1 "register_operand" "0")
+ (mem:<ssescalarmode>
+ (match_operand:P 2 "register_operand" "r"))
+ (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "x")
+ (match_operand:VEC_GATHER_MODE 4 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_GATHER))]
+ "TARGET_AVX2"
+ "v<gthrfirstp>gatherd<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx2_gatherdi<mode>"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
+ (unspec:VEC_GATHER_MODE
+ [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
+ (match_operand:<ssescalarmode> 2 "memory_operand" "")
+ (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
+ (match_operand:VEC_GATHER_MODE 4 "register_operand" "")
+ (match_operand:SI 5 "const1248_operand " "")]
+ UNSPEC_GATHER))]
+ "TARGET_AVX2")
+
+(define_insn "*avx2_gatherdi<mode>"
+ [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=x")
+ (unspec:AVXMODE48P_DI
+ [(match_operand:AVXMODE48P_DI 1 "register_operand" "0")
+ (mem:<ssescalarmode>
+ (match_operand:P 2 "register_operand" "r"))
+ (match_operand:<AVXMODE48P_DI> 3 "register_operand" "x")
+ (match_operand:AVXMODE48P_DI 4 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_GATHER))]
+ "TARGET_AVX2"
+ "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+;; Special handling for VEX.256 with float arguments
+;; since there're still xmms as operands
+(define_expand "avx2_gatherdi<mode>256"
+ [(set (match_operand:VI4F_128 0 "register_operand" "")
+ (unspec:VI4F_128
+ [(match_operand:VI4F_128 1 "register_operand" "")
+ (match_operand:<ssescalarmode> 2 "memory_operand" "")
+ (match_operand:V4DI 3 "register_operand" "")
+ (match_operand:VI4F_128 4 "register_operand" "")
+ (match_operand:SI 5 "const1248_operand " "")]
+ UNSPEC_GATHER))]
+ "TARGET_AVX2")
+
+(define_insn "*avx2_gatherdi<mode>256"
+ [(set (match_operand:VI4F_128 0 "register_operand" "=x")
+ (unspec:VI4F_128
+ [(match_operand:VI4F_128 1 "register_operand" "0")
+ (mem:<ssescalarmode>
+ (match_operand:P 2 "register_operand" "r"))
+ (match_operand:V4DI 3 "register_operand" "x")
+ (match_operand:VI4F_128 4 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_GATHER))]
+ "TARGET_AVX2"
+ "v<gthrfirstp>gatherq<gthrlastp>\t{%4, (%2, %3, %c5), %0|%0, (%2, %3, %c5), %4}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index cf7fdbf..29c02b8 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9455,6 +9455,184 @@ v4df __builtin_ia32_xorpd256 (v4df,v4df)
v8sf __builtin_ia32_xorps256 (v8sf,v8sf)
@end smallexample
+The following built-in functions are available when @option{-mavx2} is
+used. All of them generate the machine instruction that is part of the
+name.
+
+@smallexample
+v32qi __builtin_ia32_mpsadbw256 (v32qi,v32qi,v32qi,int)
+v32qi __builtin_ia32_pabsb256 (v32qi)
+v16hi __builtin_ia32_pabsw256 (v16hi)
+v8si __builtin_ia32_pabsd256 (v8si)
+v16hi builtin_ia32_packssdw256 (v8si,v8si)
+v32qi __builtin_ia32_packsswb256 (v16hi,v16hi)
+v16hi __builtin_ia32_packusdw256 (v8si,v8si)
+v32qi __builtin_ia32_packuswb256 (v16hi,v16hi)
+v32qi__builtin_ia32_paddb256 (v32qi,v32qi)
+v16hi __builtin_ia32_paddw256 (v16hi,v16hi)
+v8si __builtin_ia32_paddd256 (v8si,v8si)
+v4di __builtin_ia32_paddq256 (v4di,v4di)
+v32qi __builtin_ia32_paddsb256 (v32qi,v32qi)
+v16hi __builtin_ia32_paddsw256 (v16hi,v16hi)
+v32qi __builtin_ia32_paddusb256 (v32qi,v32qi)
+v16hi __builtin_ia32_paddusw256 (v16hi,v16hi)
+v4di __builtin_ia32_palignr256 (v4di,v4di,int)
+v4di __builtin_ia32_andsi256 (v4di,v4di)
+v4di __builtin_ia32_andnotsi256 (v4di,v4di)
+v32qi__builtin_ia32_pavgb256 (v32qi,v32qi)
+v16hi __builtin_ia32_pavgw256 (v16hi,v16hi)
+v32qi __builtin_ia32_pblendvb256 (v32qi,v32qi,v32qi)
+v16hi __builtin_ia32_pblendw256 (v16hi,v16hi,int)
+v32qi __builtin_ia32_pcmpeqb256 (v32qi,v32qi)
+v16hi __builtin_ia32_pcmpeqw256 (v16hi,v16hi)
+v8si __builtin_ia32_pcmpeqd256 (c8si,v8si)
+v4di __builtin_ia32_pcmpeqq256 (v4di,v4di)
+v32qi __builtin_ia32_pcmpgtb256 (v32qi,v32qi)
+v16hi __builtin_ia32_pcmpgtw256 (16hi,v16hi)
+v8si __builtin_ia32_pcmpgtd256 (v8si,v8si)
+v4di __builtin_ia32_pcmpgtq256 (v4di,v4di)
+v16hi __builtin_ia32_phaddw256 (v16hi,v16hi)
+v8si __builtin_ia32_phaddd256 (v8si,v8si)
+v16hi __builtin_ia32_phaddsw256 (v16hi,v16hi)
+v16hi __builtin_ia32_phsubw256 (v16hi,v16hi)
+v8si __builtin_ia32_phsubd256 (v8si,v8si)
+v16hi __builtin_ia32_phsubsw256 (v16hi,v16hi)
+v32qi __builtin_ia32_pmaddubsw256 (v32qi,v32qi)
+v16hi __builtin_ia32_pmaddwd256 (v16hi,v16hi)
+v32qi __builtin_ia32_pmaxsb256 (v32qi,v32qi)
+v16hi __builtin_ia32_pmaxsw256 (v16hi,v16hi)
+v8si __builtin_ia32_pmaxsd256 (v8si,v8si)
+v32qi __builtin_ia32_pmaxub256 (v32qi,v32qi)
+v16hi __builtin_ia32_pmaxuw256 (v16hi,v16hi)
+v8si __builtin_ia32_pmaxud256 (v8si,v8si)
+v32qi __builtin_ia32_pminsb256 (v32qi,v32qi)
+v16hi __builtin_ia32_pminsw256 (v16hi,v16hi)
+v8si __builtin_ia32_pminsd256 (v8si,v8si)
+v32qi __builtin_ia32_pminub256 (v32qi,v32qi)
+v16hi __builtin_ia32_pminuw256 (v16hi,v16hi)
+v8si __builtin_ia32_pminud256 (v8si,v8si)
+int __builtin_ia32_pmovmskb256 (v32qi)
+v16hi __builtin_ia32_pmovsxbw256 (v16qi)
+v8si __builtin_ia32_pmovsxbd256 (v16qi)
+v4di __builtin_ia32_pmovsxbq256 (v16qi)
+v8si __builtin_ia32_pmovsxwd256 (v8hi)
+v4di __builtin_ia32_pmovsxwq256 (v8hi)
+v4di __builtin_ia32_pmovsxdq256 (v4si)
+v16hi __builtin_ia32_pmovzxbw256 (v16qi)
+v8si __builtin_ia32_pmovzxbd256 (v16qi)
+v4di __builtin_ia32_pmovzxbq256 (v16qi)
+v8si __builtin_ia32_pmovzxwd256 (v8hi)
+v4di __builtin_ia32_pmovzxwq256 (v8hi)
+v4di __builtin_ia32_pmovzxdq256 (v4si)
+v4di __builtin_ia32_pmuldq256 (v8si,v8si)
+v16hi __builtin_ia32_pmulhrsw256 (v16hi, v16hi)
+v16hi __builtin_ia32_pmulhuw256 (v16hi,v16hi)
+v16hi __builtin_ia32_pmulhw256 (v16hi,v16hi)
+v16hi __builtin_ia32_pmullw256 (v16hi,v16hi)
+v8si __builtin_ia32_pmulld256 (v8si,v8si)
+v4di __builtin_ia32_pmuludq256 (v8si,v8si)
+v4di __builtin_ia32_por256 (v4di,v4di)
+v16hi __builtin_ia32_psadbw256 (v32qi,v32qi)
+v32qi __builtin_ia32_pshufb256 (v32qi,v32qi)
+v8si __builtin_ia32_pshufd256 (v8si,int)
+v16hi __builtin_ia32_pshufhw256 (v16hi,int)
+v16hi __builtin_ia32_pshuflw256 (v16hi,int)
+v32qi __builtin_ia32_psignb256 (v32qi,v32qi)
+v16hi __builtin_ia32_psignw256 (v16hi,v16hi)
+v8si __builtin_ia32_psignd256 (v8si,v8si)
+v4di __builtin_ia32_pslldqi256 (v4di,int)
+v16hi __builtin_ia32_psllwi256 (16hi,int)
+v16hi __builtin_ia32_psllw256(v16hi,v8hi)
+v8si __builtin_ia32_pslldi256 (v8si,int)
+v8si __builtin_ia32_pslld256(v8si,v4si)
+v4di __builtin_ia32_psllqi256 (v4di,int)
+v4di __builtin_ia32_psllq256(v4di,v2di)
+v16hi __builtin_ia32_psrawi256 (v16hi,int)
+v16hi __builtin_ia32_psraw256 (v16hi,v8hi)
+v8si __builtin_ia32_psradi256 (v8si,int)
+v8si __builtin_ia32_psrad256 (v8si,v4si)
+v4di __builtin_ia32_psrldqi256 (v4di, int)
+v16hi __builtin_ia32_psrlwi256 (v16hi,int)
+v16hi __builtin_ia32_psrlw256 (v16hi,v8hi)
+v8si __builtin_ia32_psrldi256 (v8si,int)
+v8si __builtin_ia32_psrld256 (v8si,v4si)
+v4di __builtin_ia32_psrlqi256 (v4di,int)
+v4di __builtin_ia32_psrlq256(v4di,v2di)
+v32qi __builtin_ia32_psubb256 (v32qi,v32qi)
+v32hi __builtin_ia32_psubw256 (v16hi,v16hi)
+v8si __builtin_ia32_psubd256 (v8si,v8si)
+v4di __builtin_ia32_psubq256 (v4di,v4di)
+v32qi __builtin_ia32_psubsb256 (v32qi,v32qi)
+v16hi __builtin_ia32_psubsw256 (v16hi,v16hi)
+v32qi __builtin_ia32_psubusb256 (v32qi,v32qi)
+v16hi __builtin_ia32_psubusw256 (v16hi,v16hi)
+v32qi __builtin_ia32_punpckhbw256 (v32qi,v32qi)
+v16hi __builtin_ia32_punpckhwd256 (v16hi,v16hi)
+v8si __builtin_ia32_punpckhdq256 (v8si,v8si)
+v4di __builtin_ia32_punpckhqdq256 (v4di,v4di)
+v32qi __builtin_ia32_punpcklbw256 (v32qi,v32qi)
+v16hi __builtin_ia32_punpcklwd256 (v16hi,v16hi)
+v8si __builtin_ia32_punpckldq256 (v8si,v8si)
+v4di __builtin_ia32_punpcklqdq256 (v4di,v4di)
+v4di __builtin_ia32_pxor256 (v4di,v4di)
+v4di __builtin_ia32_movntdqa256 (pv4di)
+v4sf __builtin_ia32_vbroadcastss_ps (v4sf)
+v8sf __builtin_ia32_vbroadcastss_ps256 (v4sf)
+v4df __builtin_ia32_vbroadcastsd_pd256 (v2df)
+v4di __builtin_ia32_vbroadcastsi256 (v2di)
+v4si __builtin_ia32_pblendd128 (v4si,v4si)
+v8si __builtin_ia32_pblendd256 (v8si,v8si)
+v32qi __builtin_ia32_pbroadcastb256 (v16qi)
+v16hi __builtin_ia32_pbroadcastw256 (v8hi)
+v8si __builtin_ia32_pbroadcastd256 (v4si)
+v4di __builtin_ia32_pbroadcastq256 (v2di)
+v16qi __builtin_ia32_pbroadcastb128 (v16qi)
+v8hi __builtin_ia32_pbroadcastw128 (v8hi)
+v4si __builtin_ia32_pbroadcastd128 (v4si)
+v2di __builtin_ia32_pbroadcastq128 (v2di)
+v8si __builtin_ia32_permvarsi256 (v8si,v8si)
+v4df __builtin_ia32_permdf256 (v4df,int)
+v8sf __builtin_ia32_permvarsf256 (v8sf,v8sf)
+v4di __builtin_ia32_permdi256 (v4di,int)
+v4di __builtin_ia32_permti256 (v4di,v4di,int)
+v4di __builtin_ia32_extract128i256 (v4di,int)
+v4di __builtin_ia32_insert128i256 (v4di,v2di,int)
+v8si __builtin_ia32_maskloadd256 (pcv8si,v8si)
+v4di __builtin_ia32_maskloadq256 (pcv4di,v4di)
+v4si __builtin_ia32_maskloadd (pcv4si,v4si)
+v2di __builtin_ia32_maskloadq (pcv2di,v2di)
+void __builtin_ia32_maskstored256 (pv8si,v8si,v8si)
+void __builtin_ia32_maskstoreq256 (pv4di,v4di,v4di)
+void __builtin_ia32_maskstored (pv4si,v4si,v4si)
+void __builtin_ia32_maskstoreq (pv2di,v2di,v2di)
+v8si __builtin_ia32_psllv8si (v8si,v8si)
+v4si __builtin_ia32_psllv4si (v4si,v4si)
+v4di __builtin_ia32_psllv4di (v4di,v4di)
+v2di __builtin_ia32_psllv2di (v2di,v2di)
+v8si __builtin_ia32_psrav8si (v8si,v8si)
+v4si __builtin_ia32_psrav4si (v4si,v4si)
+v8si __builtin_ia32_psrlv8si (v8si,v8si)
+v4si __builtin_ia32_psrlv4si (v4si,v4si)
+v4di __builtin_ia32_psrlv4di (v4di,v4di)
+v2di __builtin_ia32_psrlv2di (v2di,v2di)
+v2df __builtin_ia32_gathersiv2df (v2df, pcdouble,v4si,v2df,int)
+v4df __builtin_ia32_gathersiv4df (v4df, pcdouble,v4si,v4df,int)
+v2df __builtin_ia32_gatherdiv2df (v2df, pcdouble,v2di,v2df,int)
+v4df __builtin_ia32_gatherdiv4df (v4df, pcdouble,v4di,v4df,int)
+v4sf __builtin_ia32_gathersiv4sf (v4sf, pcfloat,v4si,v4sf,int)
+v8sf __builtin_ia32_gathersiv8sf (v8sf, pcfloat,v8si,v8sf,int)
+v4sf __builtin_ia32_gatherdiv4sf (v4sf, pcfloat,v2di,v4sf,int)
+v4sf __builtin_ia32_gatherdiv4sf256 (v4sf, pcfloat,v4di,v4sf,int)
+v2di __builtin_ia32_gathersiv2di (v2di, pcint64,v4si,v2di,int)
+v4di __builtin_ia32_gathersiv4di (v4di, pcint64,v4si,v4di,int)
+v2di __builtin_ia32_gatherdiv2di (v2di, pcint64,v2di,v2di,int)
+v4di __builtin_ia32_gatherdiv4di (v4di, pcint64,v4di,v4di,int)
+v4si __builtin_ia32_gathersiv4si (v4si, pcint,v4si,v4si,int)
+v8si __builtin_ia32_gathersiv8si (v8si, pcint,v8si,v8si,int)
+v4si __builtin_ia32_gatherdiv4si (v4si, pcint,v2di,v4si,int)
+v4si __builtin_ia32_gatherdiv4si256 (v4si, pcint,v4di,v4si,int)
+@end smallexample
+
The following built-in functions are available when @option{-maes} is
used. All of them generate the machine instruction that is part of the
name.