[16/n] PR85694: Add detection of averaging operations

This patch adds detection of average instructions: a = (((wide) b + (wide) c) >> 1); --> a = (wide) .AVG_FLOOR (b, c); a = (((wide) b + (wide) c + 1) >> 1); --> a = (wide) .AVG_CEIL (b, c); in cases where users of "a" need only the low half of the result, making the cast to (wide) redundant. The heavy lifting was done by earlier patches. This showed up another problem in vectorizable_call: if the call is a pattern definition statement rather than the main pattern statement, the type of vectorised call might be different from the type of the original statement. 2018-07-03 Richard Sandiford <richard.sandiford@arm.com> gcc/ PR tree-optimization/85694 * doc/md.texi (avgM3_floor, uavgM3_floor, avgM3_ceil) (uavgM3_ceil): Document new optabs. * doc/sourcebuild.texi (vect_avg_qi): Document new target selector. * internal-fn.def (IFN_AVG_FLOOR, IFN_AVG_CEIL): New internal functions. * optabs.def (savg_floor_optab, uavg_floor_optab, savg_ceil_optab) (savg_ceil_optab): New optabs. * tree-vect-patterns.c (vect_recog_average_pattern): New function. (vect_vect_recog_func_ptrs): Add it. * tree-vect-stmts.c (vectorizable_call): Get the type of the zero constant directly from the associated lhs. gcc/testsuite/ PR tree-optimization/85694 * lib/target-supports.exp (check_effective_target_vect_avg_qi): New proc. * gcc.dg/vect/vect-avg-1.c: New test. * gcc.dg/vect/vect-avg-2.c: Likewise. * gcc.dg/vect/vect-avg-3.c: Likewise. * gcc.dg/vect/vect-avg-4.c: Likewise. * gcc.dg/vect/vect-avg-5.c: Likewise. * gcc.dg/vect/vect-avg-6.c: Likewise. * gcc.dg/vect/vect-avg-7.c: Likewise. * gcc.dg/vect/vect-avg-8.c: Likewise. * gcc.dg/vect/vect-avg-9.c: Likewise. * gcc.dg/vect/vect-avg-10.c: Likewise. * gcc.dg/vect/vect-avg-11.c: Likewise. * gcc.dg/vect/vect-avg-12.c: Likewise. * gcc.dg/vect/vect-avg-13.c: Likewise. * gcc.dg/vect/vect-avg-14.c: Likewise. From-SVN: r262335
author: Richard Sandiford <richard.sandiford@arm.com> 2018-07-03 10:03:44 +0000
committer: Richard Sandiford <rsandifo@gcc.gnu.org> 2018-07-03 10:03:44 +0000
commit: 0267732baeb06ec100c1d610197bb88aae1c5123 (patch)
tree: 611c2aafe9c38ce62a4090cc7799136e0a16ad12 /gcc/tree-vect-patterns.c
parent: 4ef79c960aa0967cf0298dc496a30a40d86ebd3c (diff)
download: gcc-0267732baeb06ec100c1d610197bb88aae1c5123.zip
gcc-0267732baeb06ec100c1d610197bb88aae1c5123.tar.gz
gcc-0267732baeb06ec100c1d610197bb88aae1c5123.tar.bz2
1 files changed, 150 insertions, 0 deletions
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index a1649d8..51defa0 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1721,6 +1721,153 @@ vect_recog_over_widening_pattern (vec<gimple *> *stmts, tree *type_out)
   return pattern_stmt;
 }
 
+/* Recognize the patterns:
+
+	    ATYPE a;  // narrower than TYPE
+	    BTYPE b;  // narrower than TYPE
+	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
+     or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
+
+   where only the bottom half of avg is used.  Try to transform them into:
+
+	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
+     or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
+
+  followed by:
+
+	    TYPE avg = (TYPE) avg';
+
+  where NTYPE is no wider than half of TYPE.  Since only the bottom half
+  of avg is used, all or part of the cast of avg' should become redundant.  */
+
+static gimple *
+vect_recog_average_pattern (vec<gimple *> *stmts, tree *type_out)
+{
+  /* Check for a shift right by one bit.  */
+  gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
+  if (!last_stmt
+      || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
+      || !integer_onep (gimple_assign_rhs2 (last_stmt)))
+    return NULL;
+
+  stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
+  vec_info *vinfo = last_stmt_info->vinfo;
+
+  /* Check that the shift result is wider than the users of the
+     result need (i.e. that narrowing would be a natural choice).  */
+  tree lhs = gimple_assign_lhs (last_stmt);
+  tree type = TREE_TYPE (lhs);
+  unsigned int target_precision
+    = vect_element_precision (last_stmt_info->min_output_precision);
+  if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
+    return NULL;
+
+  /* Get the definition of the shift input.  */
+  tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
+  stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
+  if (!plus_stmt_info)
+    return NULL;
+
+  /* Check whether the shift input can be seen as a tree of additions on
+     2 or 3 widened inputs.
+
+     Note that the pattern should be a win even if the result of one or
+     more additions is reused elsewhere: if the pattern matches, we'd be
+     replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
+  internal_fn ifn = IFN_AVG_FLOOR;
+  vect_unpromoted_value unprom[3];
+  tree new_type;
+  unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
+					    PLUS_EXPR, false, 3,
+					    unprom, &new_type);
+  if (nops == 0)
+    return NULL;
+  if (nops == 3)
+    {
+      /* Check that one operand is 1.  */
+      unsigned int i;
+      for (i = 0; i < 3; ++i)
+	if (integer_onep (unprom[i].op))
+	  break;
+      if (i == 3)
+	return NULL;
+      /* Throw away the 1 operand and keep the other two.  */
+      if (i < 2)
+	unprom[i] = unprom[2];
+      ifn = IFN_AVG_CEIL;
+    }
+
+  vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
+
+  /* We know that:
+
+     (a) the operation can be viewed as:
+
+	   TYPE widened0 = (TYPE) UNPROM[0];
+	   TYPE widened1 = (TYPE) UNPROM[1];
+	   TYPE tmp1 = widened0 + widened1 {+ 1};
+	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
+
+     (b) the first two statements are equivalent to:
+
+	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
+	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
+
+     (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
+	 where sensible;
+
+     (d) all the operations can be performed correctly at twice the width of
+	 NEW_TYPE, due to the nature of the average operation; and
+
+     (e) users of the result of the right shift need only TARGET_PRECISION
+	 bits, where TARGET_PRECISION is no more than half of TYPE's
+	 precision.
+
+     Under these circumstances, the only situation in which NEW_TYPE
+     could be narrower than TARGET_PRECISION is if widened0, widened1
+     and an addition result are all used more than once.  Thus we can
+     treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
+     as "free", whereas widening the result of the average instruction
+     from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
+     therefore better not to go narrower than TARGET_PRECISION.  */
+  if (TYPE_PRECISION (new_type) < target_precision)
+    new_type = build_nonstandard_integer_type (target_precision,
+					       TYPE_UNSIGNED (new_type));
+
+  /* Check for target support.  */
+  tree new_vectype = get_vectype_for_scalar_type (new_type);
+  if (!new_vectype
+      || !direct_internal_fn_supported_p (ifn, new_vectype,
+					  OPTIMIZE_FOR_SPEED))
+    return NULL;
+
+  /* The IR requires a valid vector type for the cast result, even though
+     it's likely to be discarded.  */
+  *type_out = get_vectype_for_scalar_type (type);
+  if (!*type_out)
+    return NULL;
+
+  /* Generate the IFN_AVG* call.  */
+  tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
+  tree new_ops[2];
+  vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
+		       unprom, new_vectype);
+  gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
+						    new_ops[1]);
+  gimple_call_set_lhs (average_stmt, new_var);
+  gimple_set_location (average_stmt, gimple_location (last_stmt));
+
+  if (dump_enabled_p ())
+    {
+      dump_printf_loc (MSG_NOTE, vect_location,
+		       "created pattern stmt: ");
+      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
+    }
+
+  stmts->safe_push (last_stmt);
+  return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
+}
+
 /* Recognize cases in which the input to a cast is wider than its
    output, and the input is fed by a widening operation.  Fold this
    by removing the unnecessary intermediate widening.  E.g.:
@@ -4670,6 +4817,9 @@ struct vect_recog_func
    less comples onex (widen_sum only after dot_prod or sad for example).  */
 static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_over_widening_pattern, "over_widening" },
+  /* Must come after over_widening, which narrows the shift as much as
+     possible beforehand.  */
+  { vect_recog_average_pattern, "average" },
   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
   { vect_recog_widen_mult_pattern, "widen_mult" },
   { vect_recog_dot_prod_pattern, "dot_prod" },
author	Richard Sandiford <richard.sandiford@arm.com>	2018-07-03 10:03:44 +0000
committer	Richard Sandiford <rsandifo@gcc.gnu.org>	2018-07-03 10:03:44 +0000
commit	0267732baeb06ec100c1d610197bb88aae1c5123 (patch)
tree	611c2aafe9c38ce62a4090cc7799136e0a16ad12 /gcc/tree-vect-patterns.c
parent	4ef79c960aa0967cf0298dc496a30a40d86ebd3c (diff)
download	gcc-0267732baeb06ec100c1d610197bb88aae1c5123.zip gcc-0267732baeb06ec100c1d610197bb88aae1c5123.tar.gz gcc-0267732baeb06ec100c1d610197bb88aae1c5123.tar.bz2