aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2022-01-24 11:05:47 +0800
committerliuhongt <hongtao.liu@intel.com>2022-02-13 17:57:38 +0800
commit7e204bd2f189850cb940677c99d8d93eb7dd40cd (patch)
tree3d8eb6dd756e3aa0792cd257f77689c2450155ac
parent23756b70630d6576c9d498cc85ae1dde38a1d5d0 (diff)
downloadgcc-7e204bd2f189850cb940677c99d8d93eb7dd40cd.zip
gcc-7e204bd2f189850cb940677c99d8d93eb7dd40cd.tar.gz
gcc-7e204bd2f189850cb940677c99d8d93eb7dd40cd.tar.bz2
Add vect_recog_cond_expr_convert_pattern.
The pattern converts (cond (cmp a b) (convert c) (convert d)) to (convert (cond (cmp a b) c d)) when 1) types_match (c, d) 2) single_use for (convert c) and (convert d) 3) TYPE_PRECISION (TREE_TYPE (c)) == TYPE_PRECISION (TREE_TYPE (a)) 4) INTEGERAL_TYPE_P (TREE_TYPE (c)) The pattern can save packing of mask and data(partial for data, 2 vs 1). gcc/ChangeLog: PR target/103771 * match.pd (cond_expr_convert_p): New match. * tree-vect-patterns.cc (gimple_cond_expr_convert_p): Declare. (vect_recog_cond_expr_convert_pattern): New. gcc/testsuite/ChangeLog: * gcc.target/i386/pr103771-2.c: New test. * gcc.target/i386/pr103771-3.c: New test.
-rw-r--r--gcc/match.pd14
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103771-2.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103771-3.c21
-rw-r--r--gcc/tree-vect-patterns.cc96
4 files changed, 139 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 10f6228..05a10ab 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7686,3 +7686,17 @@ and,
to the number of trailing zeroes. */
(match (ctz_table_index @1 @2 @3)
(rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3))
+
+(match (cond_expr_convert_p @0 @2 @3 @6)
+ (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@2))
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && INTEGRAL_TYPE_P (TREE_TYPE (@3))
+ && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
+ && TYPE_PRECISION (TREE_TYPE (@0))
+ == TYPE_PRECISION (TREE_TYPE (@2))
+ && TYPE_PRECISION (TREE_TYPE (@0))
+ == TYPE_PRECISION (TREE_TYPE (@3))
+ && single_use (@4)
+ && single_use (@5))))
diff --git a/gcc/testsuite/gcc.target/i386/pr103771-2.c b/gcc/testsuite/gcc.target/i386/pr103771-2.c
new file mode 100644
index 0000000..962a3a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103771-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=cascadelake -O3" } */
+/* { dg-final { scan-assembler-not "kunpck" } } */
+/* { dg-final { scan-assembler-not "kand" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kshift" } } */
+
+#include "pr103771.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr103771-3.c b/gcc/testsuite/gcc.target/i386/pr103771-3.c
new file mode 100644
index 0000000..ef379b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103771-3.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=cascadelake -O3" } */
+/* { dg-final { scan-assembler-not "kunpck" } } */
+/* { dg-final { scan-assembler-not "kand" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kshift" } } */
+
+typedef unsigned char uint8_t;
+
+static uint8_t x264_clip_uint8 (int x, unsigned int y)
+{
+ return x & (~255) ? (-x) >> 31 : y;
+}
+
+void
+mc_weight (uint8_t* __restrict dst, uint8_t* __restrict src,
+ int i_width,int i_scale, unsigned int* __restrict y)
+{
+ for(int x = 0; x < i_width; x++)
+ dst[x] = x264_clip_uint8 (src[x] * i_scale, y[x]);
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 2baf974..a8f96d5 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -924,6 +924,101 @@ vect_reassociating_reduction_p (vec_info *vinfo,
return true;
}
+/* match.pd function to match
+ (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
+ with conditions:
+ 1) @1, @2, c, d, a, b are all integral type.
+ 2) There's single_use for both @1 and @2.
+ 3) a, c and d have same precision.
+ 4) c and @1 have different precision.
+
+ record a and c and d and @3. */
+
+extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
+
+/* Function vect_recog_cond_expr_convert
+
+ Try to find the following pattern:
+
+ TYPE_AB A,B;
+ TYPE_CD C,D;
+ TYPE_E E;
+ TYPE_E op_true = (TYPE_E) A;
+ TYPE_E op_false = (TYPE_E) B;
+
+ E = C cmp D ? op_true : op_false;
+
+ where
+ TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
+ TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
+ single_use of op_true and op_false.
+ TYPE_AB could differ in sign.
+
+ Input:
+
+ * STMT_VINFO: The stmt from which the pattern search begins.
+ here it starts with E = c cmp D ? op_true : op_false;
+
+ Output:
+
+ TYPE1 E' = C cmp D ? A : B;
+ TYPE3 E = (TYPE3) E';
+
+ There may extra nop_convert for A or B to handle different signness.
+
+ * TYPE_OUT: The vector type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ E = (TYPE3)E';
+ E' = C cmp D ? A : B; is recorded in pattern definition statements; */
+
+static gimple *
+vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
+ stmt_vec_info stmt_vinfo, tree *type_out)
+{
+ gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
+ tree lhs, match[4], temp, type, new_lhs, op2;
+ gimple *cond_stmt;
+ gimple *pattern_stmt;
+
+ if (!last_stmt)
+ return NULL;
+
+ lhs = gimple_assign_lhs (last_stmt);
+
+ /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
+ TYPE_PRECISION (A) == TYPE_PRECISION (C). */
+ if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
+ return NULL;
+
+ vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
+
+ op2 = match[2];
+ type = TREE_TYPE (match[1]);
+ if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
+ {
+ op2 = vect_recog_temp_ssa_var (type, NULL);
+ gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
+ append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
+ get_vectype_for_scalar_type (vinfo, type));
+ }
+
+ temp = vect_recog_temp_ssa_var (type, NULL);
+ cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
+ match[1], op2));
+ append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
+ get_vectype_for_scalar_type (vinfo, type));
+ new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+ pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
+ *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "created pattern stmt: %G", pattern_stmt);
+ return pattern_stmt;
+}
+
/* Function vect_recog_dot_prod_pattern
Try to find the following pattern:
@@ -5492,6 +5587,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
/* Must come after over_widening, which narrows the shift as much as
possible beforehand. */
{ vect_recog_average_pattern, "average" },
+ { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
{ vect_recog_mulhs_pattern, "mult_high" },
{ vect_recog_cast_forwprop_pattern, "cast_forwprop" },
{ vect_recog_widen_mult_pattern, "widen_mult" },