aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorHongtao Liu <hongtao.liu@intel.com>2019-06-26 04:46:29 +0000
committerHongtao Liu <liuhongt@gcc.gnu.org>2019-06-26 04:46:29 +0000
commite21b52afe92f2596a5b0963d6efb5be44ab3b227 (patch)
tree699ae022ef64b1f8800a1c563659e4cac0d64f24 /gcc/config
parentb48826985b8b0bba790688ebe717cf626019a415 (diff)
downloadgcc-e21b52afe92f2596a5b0963d6efb5be44ab3b227.zip
gcc-e21b52afe92f2596a5b0963d6efb5be44ab3b227.tar.gz
gcc-e21b52afe92f2596a5b0963d6efb5be44ab3b227.tar.bz2
Enable GCC support for AVX512_VP2INTERSECT which will be in tigerlaker.
There are two instructions for AVX512_VP2INTERSECT: VP2INTERSECTD and VP2INTERSECTQ. gcc/ 2019-06-05 Hongtao Liu <hongtao.liu@intel.com> * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512VP2INTERSECT_SET, OPTION_MASK_ISA_AVX512VP2INTERSECT_UNSET): New macros. (OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512VP2INTERSECT_UNSET. (ix86_handle_option): Handle -mavx512vp2intersect. * config/i386/avx512vp2intersectintrin.h: New. * config/i386/avx512vp2intersectvlintrin.h: New. * config/i386/cpuid.h (bit_AVX512VP2INTERSECT): New. * config/i386/driver-i386.c (host_detect_local_cpu): Detect AVX512VP2INTERSECT. * config/i386/i386-builtin-types.def: Add new types. * config/i386/i386-builtin.def: Add new builtins. * config/i386/i386-builtins.c: (enum processor_features): Add F_AVX512VP2INTERSECT. (static const _isa_names_table isa_names_table): Ditto. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AVX512VP2INTERSECT__. * config/i386/i386-expand.c (ix86_expand_builtin): Expand IX86_BUILTIN_2INTERSECTD512, IX86_BUILTIN_2INTERSECTQ512, IX86_BUILTIN_2INTERSECTD256, IX86_BUILTIN_2INTERSECTQ256, IX86_BUILTIN_2INTERSECTD128, IX86_BUILTIN_2INTERSECTQ128. * config/i386/i386-modes.def (P2QI, P2HI): New modes. * config/i386/i386-options.c (ix86_target_string): Add -mavx512vp2intersect. (ix86_option_override_internal): Handle AVX512VP2INTERSECT. * config/i386/i386.c (ix86_hard_regno_nregs): Allocate two regs for P2HImode and P2QImode. (ix86_hard_regno_mode_ok): Register pair only starts at even hardreg number for P2QImode and P2HImode. (ix86_regmode_natural_size): New function. * config/i386/i386.h (TARGET_AVX512VP2INTERSECT, TARGET_AVX512VP2INTERSECT_P, PTA_AVX512VP2INTERSECT REGMODE_NATURAL_SIZE, MASK_PAIR_REGNO_P): New. * config/i386/i386-protos.h (ix86_regmode_natural_size): Declare * config/i386/i386.opt: Add -mavx512vp2intersect. * config/i386/immintrin.h: Include avx512vp2intersectintrin.h and avx512vp2intersectvlintrin.h. * config/i386/sse.md (define_c_enum "unspec"): Add UNSPEC_VP2INTERSECT. (define_mode_iterator VI48_AVX512VP2VL): New. (avx512vp2intersect_2intersect<mode>, avx512vp2intersect_2intersectv16si): New define_insn patterns. * config.gcc: Add avx512vp2intersectvlintrin.h and avx512vp2intersectintrin.h to extra_headers. * doc/invoke.texi: Document -mavx512vp2intersect. gcc/testsuite 2019-06-06 Hongtao Liu <hongtao.liu@intel.com> Olga Makhotina <olga.makhotina@intel.com> * gcc.target/i386/avx512-check.h: Handle bit_AVX512VP2INTERSECT. * gcc.target/i386/avx512vp2intersect-2intersect-1a.c: New test. * gcc.target/i386/avx512vp2intersect-2intersect-1b.c: Likewise. * gcc.target/i386/avx512vp2intersect-2intersectvl-1a.c: Likewise. * gcc.target/i386/avx512vp2intersect-2intersectvl-1b.c: Likewise. * gcc.target/i386/sse-12.c: Add -mavx512vp2intersect. * gcc.target/i386/sse-13.c: Likewsie. * gcc.target/i386/sse-14.c: Likewise. * gcc.target/i386/sse-22.c: Likewise. * gcc.target/i386/sse-23.c: Likewise. * g++.dg/other/i386-2.C: Likewise. * g++.dg/other/i386-3.C: Likewise. Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com> Co-Authored-By: Olga Makhotina <olga.makhotina@intel.com> From-SVN: r272668
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/avx512vp2intersectintrin.h35
-rw-r--r--gcc/config/i386/avx512vp2intersectvlintrin.h49
-rw-r--r--gcc/config/i386/cpuid.h1
-rw-r--r--gcc/config/i386/driver-i386.c5
-rw-r--r--gcc/config/i386/i386-builtin-types.def7
-rw-r--r--gcc/config/i386/i386-builtin.def8
-rw-r--r--gcc/config/i386/i386-builtins.c2
-rw-r--r--gcc/config/i386/i386-c.c2
-rw-r--r--gcc/config/i386/i386-expand.c73
-rw-r--r--gcc/config/i386/i386-modes.def6
-rw-r--r--gcc/config/i386/i386-options.c6
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/i386.c31
-rw-r--r--gcc/config/i386/i386.h6
-rw-r--r--gcc/config/i386/i386.opt4
-rw-r--r--gcc/config/i386/immintrin.h4
-rw-r--r--gcc/config/i386/sse.md27
17 files changed, 261 insertions, 6 deletions
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
new file mode 100644
index 0000000..60cb52c
--- /dev/null
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -0,0 +1,35 @@
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
+#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VP2INTERSECT__)
+#pragma GCC push_options
+#pragma GCC target("avx512vp2intersect")
+#define __DISABLE_AVX512VP2INTERSECT__
+#endif /* __AVX512VP2INTERSECT__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_2intersect_epi32 (__m512i __A, __m512i __B, __mmask16 *__U,
+ __mmask16 *__M)
+{
+ __builtin_ia32_2intersectd512 (__U, __M, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_2intersect_epi64 (__m512i __A, __m512i __B, __mmask8 *__U,
+ __mmask8 *__M)
+{
+ __builtin_ia32_2intersectq512 (__U, __M, (__v8di) __A, (__v8di) __B);
+}
+
+#ifdef __DISABLE_AVX512VP2INTERSECT__
+#undef __DISABLE_AVX512VP2INTERSECT__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VP2INTERSECT__ */
+
+#endif /* _AVX512VP2INTERSECTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h
new file mode 100644
index 0000000..26eee36
--- /dev/null
+++ b/gcc/config/i386/avx512vp2intersectvlintrin.h
@@ -0,0 +1,49 @@
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
+#define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512vp2intersect,avx512vl")
+#define __DISABLE_AVX512VP2INTERSECTVL__
+#endif /* __AVX512VP2INTERSECTVL__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_2intersect_epi32 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
+{
+ __builtin_ia32_2intersectd128 (__U, __M, (__v4si) __A, (__v4si) __B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_2intersect_epi32 (__m256i __A, __m256i __B, __mmask8 *__U,
+ __mmask8 *__M)
+{
+ __builtin_ia32_2intersectd256 (__U, __M, (__v8si) __A, (__v8si) __B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_2intersect_epi64 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
+{
+ __builtin_ia32_2intersectq128 (__U, __M, (__v2di) __A, (__v2di) __B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_2intersect_epi64 (__m256i __A, __m256i __B, __mmask8 *__U,
+ __mmask8 *__M)
+{
+ __builtin_ia32_2intersectq256 (__U, __M, (__v4di) __A, (__v4di) __B);
+}
+
+#ifdef __DISABLE_AVX512VP2INTERSECTVL__
+#undef __DISABLE_AVX512VP2INTERSECTVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VP2INTERSECTVL__ */
+
+#endif /* _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 2fda7e7..6108577 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -119,6 +119,7 @@
/* %edx */
#define bit_AVX5124VNNIW (1 << 2)
#define bit_AVX5124FMAPS (1 << 3)
+#define bit_AVX512VP2INTERSECT (1 << 8)
#define bit_IBT (1 << 20)
#define bit_PCONFIG (1 << 18)
/* XFEATURE_ENABLED_MASK register bits (%eax == 13, %ecx == 0) */
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index d5c62c0..a9d8981 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -423,6 +423,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_shstk = 0;
unsigned int has_avx512vnni = 0, has_vaes = 0;
unsigned int has_vpclmulqdq = 0;
+ unsigned int has_avx512vp2intersect = 0;
unsigned int has_movdiri = 0, has_movdir64b = 0;
unsigned int has_enqcmd = 0;
unsigned int has_waitpkg = 0;
@@ -532,6 +533,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_avx5124vnniw = edx & bit_AVX5124VNNIW;
has_avx5124fmaps = edx & bit_AVX5124FMAPS;
+ has_avx512vp2intersect = edx & bit_AVX512VP2INTERSECT;
has_shstk = ecx & bit_SHSTK;
has_pconfig = edx & bit_PCONFIG;
@@ -1143,6 +1145,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
+ const char *avx512vp2intersect = has_avx512vp2intersect ? " -mavx512vp2intersect" : " -mno-avx512vp2intersect";
const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
@@ -1165,7 +1168,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
- ptwrite, avx512bf16, enqcmd,
+ ptwrite, avx512bf16, enqcmd, avx512vp2intersect,
NULL);
}
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 4d19060..221c0e7 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -975,6 +975,13 @@ DEF_FUNCTION_TYPE (QI, V8SF, INT, UQI)
DEF_FUNCTION_TYPE (QI, V4SF, INT, UQI)
DEF_FUNCTION_TYPE (VOID, PV32QI, V32HI, USI)
+DEF_FUNCTION_TYPE (VOID, PUHI, PUHI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (VOID, PUQI, PUQI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (VOID, PUQI, PUQI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (VOID, PUQI, PUQI, V8DI, V8DI)
+DEF_FUNCTION_TYPE (VOID, PUQI, PUQI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (VOID, PUQI, PUQI, V2DI, V2DI)
+
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index e547dda..aad62f3 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -288,6 +288,14 @@ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
+/* AVX512VP2INTERSECT */
+BDESC (0, OPTION_MASK_ISA_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
+BDESC (0, OPTION_MASK_ISA_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd256", IX86_BUILTIN_2INTERSECTD256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq256", IX86_BUILTIN_2INTERSECTQ256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4DI_V4DI)
+BDESC (0, OPTION_MASK_ISA_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd128", IX86_BUILTIN_2INTERSECTD128, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq128", IX86_BUILTIN_2INTERSECTQ128, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V2DI_V2DI)
+
/* AVX512VL */
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_loadv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCSHORT_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_loadv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCSHORT_V8HI_UQI)
diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
index 9484b3d..9b6a59d 100644
--- a/gcc/config/i386/i386-builtins.c
+++ b/gcc/config/i386/i386-builtins.c
@@ -1924,6 +1924,7 @@ enum processor_features
F_VPCLMULQDQ,
F_AVX512VNNI,
F_AVX512BITALG,
+ F_AVX512VP2INTERSECT,
F_AVX512BF16,
F_MAX
};
@@ -2070,6 +2071,7 @@ static const _isa_names_table isa_names_table[] =
{"vpclmulqdq", F_VPCLMULQDQ, P_ZERO},
{"avx512vnni", F_AVX512VNNI, P_ZERO},
{"avx512bitalg", F_AVX512BITALG, P_ZERO},
+ {"avx512vp2intersect",F_AVX512VP2INTERSECT, P_ZERO},
{"avx512bf16", F_AVX512BF16, P_ZERO}
};
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 0438a42..ec5f15b 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -404,6 +404,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
if (isa_flag2 & OPTION_MASK_ISA_WBNOINVD)
def_or_undef (parse_in, "__WBNOINVD__");
+ if (isa_flag2 & OPTION_MASK_ISA_AVX512VP2INTERSECT)
+ def_or_undef (parse_in, "__AVX512VP2INTERSECT__");
if (isa_flag & OPTION_MASK_ISA_MMX)
def_or_undef (parse_in, "__MMX__");
if (isa_flag & OPTION_MASK_ISA_3DNOW)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 72be1df..d50b811 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -11339,6 +11339,79 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
emit_move_insn (target, op0);
return target;
+ case IX86_BUILTIN_2INTERSECTD512:
+ case IX86_BUILTIN_2INTERSECTQ512:
+ case IX86_BUILTIN_2INTERSECTD256:
+ case IX86_BUILTIN_2INTERSECTQ256:
+ case IX86_BUILTIN_2INTERSECTD128:
+ case IX86_BUILTIN_2INTERSECTQ128:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ arg3 = CALL_EXPR_ARG (exp, 3);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+
+ if (!address_operand (op0, VOIDmode))
+ {
+ op0 = convert_memory_address (Pmode, op0);
+ op0 = copy_addr_to_reg (op0);
+ }
+ if (!address_operand (op1, VOIDmode))
+ {
+ op1 = convert_memory_address (Pmode, op1);
+ op1 = copy_addr_to_reg (op1);
+ }
+
+ switch (fcode)
+ {
+ case IX86_BUILTIN_2INTERSECTD512:
+ mode4 = P2HImode;
+ icode = CODE_FOR_avx512vp2intersect_2intersectv16si;
+ break;
+ case IX86_BUILTIN_2INTERSECTQ512:
+ mode4 = P2QImode;
+ icode = CODE_FOR_avx512vp2intersect_2intersectv8di;
+ break;
+ case IX86_BUILTIN_2INTERSECTD256:
+ mode4 = P2QImode;
+ icode = CODE_FOR_avx512vp2intersect_2intersectv8si;
+ break;
+ case IX86_BUILTIN_2INTERSECTQ256:
+ mode4 = P2QImode;
+ icode = CODE_FOR_avx512vp2intersect_2intersectv4di;
+ break;
+ case IX86_BUILTIN_2INTERSECTD128:
+ mode4 = P2QImode;
+ icode = CODE_FOR_avx512vp2intersect_2intersectv4si;
+ break;
+ case IX86_BUILTIN_2INTERSECTQ128:
+ mode4 = P2QImode;
+ icode = CODE_FOR_avx512vp2intersect_2intersectv2di;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ mode2 = insn_data[icode].operand[1].mode;
+ mode3 = insn_data[icode].operand[2].mode;
+ if (!insn_data[icode].operand[1].predicate (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+ if (!insn_data[icode].operand[2].predicate (op3, mode3))
+ op3 = copy_to_mode_reg (mode3, op3);
+
+ op4 = gen_reg_rtx (mode4);
+ emit_insn (GEN_FCN (icode) (op4, op2, op3));
+ mode0 = mode4 == P2HImode ? HImode : QImode;
+ emit_move_insn (gen_rtx_MEM (mode0, op0),
+ gen_lowpart (mode0, op4));
+ emit_move_insn (gen_rtx_MEM (mode0, op1),
+ gen_highpart (mode0, op4));
+
+ return 0;
+
case IX86_BUILTIN_RDPMC:
case IX86_BUILTIN_RDTSC:
case IX86_BUILTIN_RDTSCP:
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index d0ae469..e1b86c8 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -101,6 +101,12 @@ VECTOR_MODE (INT, SI, 64); /* V64SI */
INT_MODE (OI, 32);
INT_MODE (XI, 64);
+/* Modes needs a consecutive register pair.
+ Note that Using PARTIAL_INT_MODE but not INT_MODE is to avoid mode promotion
+ issues. */
+PARTIAL_INT_MODE (HI, 16, P2QI);
+PARTIAL_INT_MODE (SI, 32, P2HI);
+
/* Keep the OI and XI modes from confusing the compiler into thinking
that these modes could actually be used for computation. They are
only holders for vectors during data movement. */
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index a44b0d2..6f8851a 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -199,6 +199,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
{ "-mrdpid", OPTION_MASK_ISA_RDPID },
{ "-mpconfig", OPTION_MASK_ISA_PCONFIG },
{ "-mwbnoinvd", OPTION_MASK_ISA_WBNOINVD },
+ { "-mavx512vp2intersect", OPTION_MASK_ISA_AVX512VP2INTERSECT },
{ "-msgx", OPTION_MASK_ISA_SGX },
{ "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW },
{ "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS },
@@ -852,6 +853,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg),
+ IX86_ATTR_ISA ("avx512vp2intersect", OPT_mavx512vp2intersect),
IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
@@ -2027,6 +2029,10 @@ ix86_option_override_internal (bool main_args_p,
& OPTION_MASK_ISA_AVX512BITALG))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG;
+ if (((processor_alias_table[i].flags & PTA_AVX512VP2INTERSECT) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA_AVX512VP2INTERSECT))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512VP2INTERSECT;
if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0)
&& !(opts->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_AVX5124VNNIW))
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 80b6ee7..2faa9f1 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -47,6 +47,7 @@ extern void ix86_reset_previous_fndecl (void);
extern bool ix86_using_red_zone (void);
+extern unsigned int ix86_regmode_natural_size (machine_mode);
#ifdef RTX_CODE
extern int standard_80387_constant_p (rtx);
extern const char *standard_80387_constant_opcode (rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 649a7e4..1ca1712 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -18674,11 +18674,23 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
}
if (COMPLEX_MODE_P (mode))
return 2;
+ /* Register pair for mask registers. */
+ if (mode == P2QImode || mode == P2HImode)
+ return 2;
if (mode == V64SFmode || mode == V64SImode)
return 4;
return 1;
}
+/* Implement REGMODE_NATURAL_SIZE(MODE). */
+unsigned int
+ix86_regmode_natural_size (machine_mode mode)
+{
+ if (mode == P2HImode || mode == P2QImode)
+ return GET_MODE_SIZE (mode) / 2;
+ return UNITS_PER_WORD;
+}
+
/* Implement TARGET_HARD_REGNO_MODE_OK. */
static bool
@@ -18688,15 +18700,24 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
if (CC_REGNO_P (regno))
return GET_MODE_CLASS (mode) == MODE_CC;
if (GET_MODE_CLASS (mode) == MODE_CC
- || GET_MODE_CLASS (mode) == MODE_RANDOM
- || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
+ || GET_MODE_CLASS (mode) == MODE_RANDOM)
return false;
if (STACK_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
if (MASK_REGNO_P (regno))
- return (VALID_MASK_REG_MODE (mode)
- || (TARGET_AVX512BW
- && VALID_MASK_AVX512BW_MODE (mode)));
+ {
+ /* Register pair only starts at even register number. */
+ if ((mode == P2QImode || mode == P2HImode))
+ return MASK_PAIR_REGNO_P(regno);
+
+ return (VALID_MASK_REG_MODE (mode)
+ || (TARGET_AVX512BW
+ && VALID_MASK_AVX512BW_MODE (mode)));
+ }
+
+ if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
+ return false;
+
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 04cf7a7..5e670b5 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -93,6 +93,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_AVX512VNNI_P(x) TARGET_ISA_AVX512VNNI_P(x)
#define TARGET_AVX512BITALG TARGET_ISA_AVX512BITALG
#define TARGET_AVX512BITALG_P(x) TARGET_ISA_AVX512BITALG_P(x)
+#define TARGET_AVX512VP2INTERSECT TARGET_ISA_AVX512VP2INTERSECT
+#define TARGET_AVX512VP2INTERSECT_P(x) TARGET_ISA_AVX512VP2INTERSECT_P(x)
#define TARGET_FMA TARGET_ISA_FMA
#define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x)
#define TARGET_SSE4A TARGET_ISA_SSE4A
@@ -1125,6 +1127,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
+#define REGMODE_NATURAL_SIZE(MODE) ix86_regmode_natural_size (MODE)
+
#define VALID_AVX256_REG_MODE(MODE) \
((MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \
|| (MODE) == V4DImode || (MODE) == V2TImode || (MODE) == V8SFmode \
@@ -1509,6 +1513,7 @@ enum reg_class
#define MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
#define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
+#define MASK_PAIR_REGNO_P(N) ((((N) - FIRST_MASK_REG) & 1) == 0)
#define MMX_REG_P(X) (REG_P (X) && MMX_REGNO_P (REGNO (X)))
#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
@@ -2362,6 +2367,7 @@ const wide_int_bitmask PTA_AVX512BITALG (0, HOST_WIDE_INT_1U << 5);
const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
+const wide_int_bitmask PTA_AVX512VP2INTERSECT (0, HOST_WIDE_INT_1U << 9);
const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9);
const wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10);
const wide_int_bitmask PTA_AVX512BF16 (0, HOST_WIDE_INT_1U << 11);
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index b6e751f..599e799 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -749,6 +749,10 @@ mavx512bitalg
Target Report Mask(ISA_AVX512BITALG) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512BITALG built-in functions and code generation.
+mavx512vp2intersect
+Target Report Mask(ISA_AVX512VP2INTERSECT) Var(ix86_isa_flags2) Save
+Support AVX512VP2INTERSECT built-in functions and code generation.
+
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index db98820..743aa74 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -96,6 +96,10 @@
#include <avx512bitalgintrin.h>
+#include <avx512vp2intersectintrin.h>
+
+#include <avx512vp2intersectvlintrin.h>
+
#include <shaintrin.h>
#include <lzcntintrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a8d1fbf..abf7d98 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -188,6 +188,9 @@
;; For AVX512BITALG support
UNSPEC_VPSHUFBIT
+ ;; For VP2INTERSECT support
+ UNSPEC_VP2INTERSECT
+
;; For AVX512BF16 support
UNSPEC_VCVTNE2PS2BF16
UNSPEC_VCVTNEPS2BF16
@@ -22523,6 +22526,30 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_mode_iterator VI48_AVX512VP2VL
+ [V8DI
+ (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+
+(define_insn "avx512vp2intersect_2intersect<mode>"
+ [(set (match_operand:P2QI 0 "register_operand" "=k")
+ (unspec:P2QI
+ [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
+ UNSPEC_VP2INTERSECT))]
+ "TARGET_AVX512VP2INTERSECT"
+ "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr ("prefix") ("evex"))])
+
+(define_insn "avx512vp2intersect_2intersectv16si"
+ [(set (match_operand:P2HI 0 "register_operand" "=k")
+ (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
+ (match_operand:V16SI 2 "vector_operand" "vm")]
+ UNSPEC_VP2INTERSECT))]
+ "TARGET_AVX512VP2INTERSECT"
+ "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr ("prefix") ("evex"))])
+
(define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
;; Converting from BF to SF
(define_mode_attr bf16_cvt_2sf