aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2021-05-21 09:48:18 +0800
committerliuhongt <hongtao.liu@intel.com>2021-06-28 09:15:40 +0800
commit28560c6d4043d8f6ac570f35fb84e952e9c719fe (patch)
tree197e95607f3aab986ded459debbe1b36ab644932
parenta766c79fc64bba66f16adfe1c8639b5857219f4d (diff)
downloadgcc-28560c6d4043d8f6ac570f35fb84e952e9c719fe.zip
gcc-28560c6d4043d8f6ac570f35fb84e952e9c719fe.tar.gz
gcc-28560c6d4043d8f6ac570f35fb84e952e9c719fe.tar.bz2
Fold blendv builtins into gimple.
Fold __builtin_ia32_pblendvb128 (a, b, c) as VEC_COND_EXPR (c < 0, b, a), similar for float version but with mask operand VIEW_CONVERT_EXPR to same sized integer vectype. gcc/ChangeLog: * config/i386/i386-builtin.def (IX86_BUILTIN_BLENDVPD256, IX86_BUILTIN_BLENDVPS256, IX86_BUILTIN_PBLENDVB256, IX86_BUILTIN_BLENDVPD, IX86_BUILTIN_BLENDVPS, IX86_BUILTIN_PBLENDVB128): Replace icode with CODE_FOR_nothing. * config/i386/i386.c (ix86_gimple_fold_builtin): Fold blendv builtins. * config/i386/sse.md (*<sse4_1_avx2>_pblendvb_lt_subreg_not): New pre_reload splitter. gcc/testsuite/ChangeLog: * gcc.target/i386/funcspec-8.c: Replace __builtin_ia32_blendvpd with __builtin_ia32_roundps_az. * gcc.target/i386/blendv-1.c: New test. * gcc.target/i386/blendv-2.c: New test.
-rw-r--r--gcc/config/i386/i386-builtin.def12
-rw-r--r--gcc/config/i386/i386.c37
-rw-r--r--gcc/config/i386/sse.md22
-rw-r--r--gcc/testsuite/gcc.target/i386/blendv-1.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/blendv-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/funcspec-8.c16
6 files changed, 165 insertions, 14 deletions
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index ea79e0b..1cc0cc6 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -902,13 +902,13 @@ BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_palignrdi,
/* SSE4.1 */
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI)
@@ -1028,8 +1028,8 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpe
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF)
+BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
@@ -1154,7 +1154,7 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX
BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c71c9e6..a93128f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17983,6 +17983,43 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
}
break;
+ case IX86_BUILTIN_PBLENDVB128:
+ case IX86_BUILTIN_PBLENDVB256:
+ case IX86_BUILTIN_BLENDVPS:
+ case IX86_BUILTIN_BLENDVPD:
+ case IX86_BUILTIN_BLENDVPS256:
+ case IX86_BUILTIN_BLENDVPD256:
+ gcc_assert (n_args == 3);
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ arg2 = gimple_call_arg (stmt, 2);
+ if (gimple_call_lhs (stmt))
+ {
+ location_t loc = gimple_location (stmt);
+ tree type = TREE_TYPE (arg2);
+ gimple_seq stmts = NULL;
+ if (VECTOR_FLOAT_TYPE_P (type))
+ {
+ tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
+ ? intSI_type_node : intDI_type_node;
+ type = get_same_sized_vectype (itype, type);
+ arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
+ }
+ tree zero_vec = build_zero_cst (type);
+ tree cmp_type = truth_type_for (type);
+ tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
+ VEC_COND_EXPR, cmp,
+ arg1, arg0);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, false);
+ }
+ else
+ gsi_replace (gsi, gimple_build_nop (), false);
+ return true;
+
+
case IX86_BUILTIN_PCMPEQB128:
case IX86_BUILTIN_PCMPEQW128:
case IX86_BUILTIN_PCMPEQD128:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e4f01e6..3100635 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17898,6 +17898,28 @@
(set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt_subreg_not"
+ [(set (match_operand:VI1_AVX2 0 "register_operand")
+ (unspec:VI1_AVX2
+ [(match_operand:VI1_AVX2 2 "vector_operand")
+ (match_operand:VI1_AVX2 1 "register_operand")
+ (lt:VI1_AVX2
+ (subreg:VI1_AVX2
+ (not (match_operand 3 "register_operand")) 0)
+ (match_operand:VI1_AVX2 4 "const0_operand"))]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1
+ && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:VI1_AVX2
+ [(match_dup 1) (match_dup 2)
+ (lt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))]
+ "operands[3] = gen_lowpart (<MODE>mode, operands[3]);")
+
(define_insn "sse4_1_pblendw"
[(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
(vec_merge:V8HI
diff --git a/gcc/testsuite/gcc.target/i386/blendv-1.c b/gcc/testsuite/gcc.target/i386/blendv-1.c
new file mode 100644
index 0000000..fcbbfb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/blendv-1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -mno-avx512f" } */
+/* { dg-final { scan-assembler-times {pblendvb[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {pblendvb[\t ]*%ymm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvps[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvps[\t ]*%ymm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvpd[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvpd[\t ]*%ymm} 1 } } */
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef float v8sf __attribute__ ((vector_size (32)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef double v4df __attribute__ ((vector_size (32)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v32qi __attribute__ ((vector_size (32)));
+
+v4sf
+foo (v4sf a, v4sf b, v4sf c)
+{
+ return __builtin_ia32_blendvps (a, b, c);
+}
+
+v8sf
+foo2 (v8sf a, v8sf b, v8sf c)
+{
+ return __builtin_ia32_blendvps256 (a, b, c);
+}
+
+v2df
+foo3 (v2df a, v2df b, v2df c)
+{
+ return __builtin_ia32_blendvpd (a, b, c);
+}
+
+v4df
+foo4 (v4df a, v4df b, v4df c)
+{
+ return __builtin_ia32_blendvpd256 (a, b, c);
+}
+
+v16qi
+foo5 (v16qi a, v16qi b, v16qi c)
+{
+ return __builtin_ia32_pblendvb128 (a, b, c);
+}
+
+v32qi
+foo6 (v32qi a, v32qi b, v32qi c)
+{
+ return __builtin_ia32_pblendvb256 (a, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/blendv-2.c b/gcc/testsuite/gcc.target/i386/blendv-2.c
new file mode 100644
index 0000000..e61e023
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/blendv-2.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -mno-avx512f" } */
+/* { dg-final { scan-assembler-not {pblendv} } } */
+/* { dg-final { scan-assembler-not {blendvp} } } */
+
+#include <x86intrin.h>
+__m128
+foo (__m128 a, __m128 b)
+{
+ return _mm_blendv_ps (a, b, _mm_setzero_ps ());
+}
+
+__m256
+foo2 (__m256 a, __m256 b)
+{
+ return _mm256_blendv_ps (a, b, _mm256_set1_ps (-1.0));
+}
+
+__m128d
+foo3 (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_blendv_pd (a, b, _mm_set1_pd (1.0));
+}
+
+__m256d
+foo4 (__m256d a, __m256d b, __m256d c)
+{
+ return _mm256_blendv_pd (a, b, _mm256_set1_pd (-134.3));
+}
+
+__m128i
+foo5 (__m128i a, __m128i b, __m128i c)
+{
+ return _mm_blendv_epi8 (a, b, _mm_set1_epi8 (3));
+}
+
+__m256i
+foo6 (__m256i a, __m256i b, __m256i c)
+{
+ return _mm256_blendv_epi8 (a, b, _mm256_set1_epi8 (-22));
+}
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-8.c b/gcc/testsuite/gcc.target/i386/funcspec-8.c
index 0a6c709..f155411 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-8.c
+++ b/gcc/testsuite/gcc.target/i386/funcspec-8.c
@@ -52,19 +52,19 @@ generic_psignd128 (__m128w a, __m128w b)
#error "-msse4.1 should not be set for this test"
#endif
-__m128d sse4_1_blendvpd (__m128d a, __m128d b, __m128d c) __attribute__((__target__("sse4.1")));
-__m128d generic_blendvpd (__m128d a, __m128d b, __m128d c);
+__m128 sse4_1_roundv4sf2 (__m128 a) __attribute__((__target__("sse4.1")));
+__m128 generic_roundv4sf2 (__m128 a);
-__m128d
-sse4_1_blendvpd (__m128d a, __m128d b, __m128d c)
+__m128
+sse4_1_roundv4sf2 (__m128 a)
{
- return __builtin_ia32_blendvpd (a, b, c);
+ return __builtin_ia32_roundps_az (a);
}
-__m128d
-generic_blendvpd (__m128d a, __m128d b, __m128d c)
+__m128
+generic_blendvpd (__m128 a)
{
- return __builtin_ia32_blendvpd (a, b, c); /* { dg-error "needs isa option" } */
+ return __builtin_ia32_roundps_az (a); /* { dg-error "needs isa option" } */
}
#ifdef __SSE4_2__