aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-patterns.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-patterns.c')
-rw-r--r--gcc/tree-vect-patterns.c150
1 files changed, 150 insertions, 0 deletions
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index a1649d8..51defa0 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1721,6 +1721,153 @@ vect_recog_over_widening_pattern (vec<gimple *> *stmts, tree *type_out)
return pattern_stmt;
}
+/* Recognize the patterns:
+
+ ATYPE a; // narrower than TYPE
+ BTYPE b; // narrower than TYPE
+ (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
+ or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
+
+ where only the bottom half of avg is used. Try to transform them into:
+
+ (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
+ or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
+
+ followed by:
+
+ TYPE avg = (TYPE) avg';
+
+ where NTYPE is no wider than half of TYPE. Since only the bottom half
+ of avg is used, all or part of the cast of avg' should become redundant. */
+
+static gimple *
+vect_recog_average_pattern (vec<gimple *> *stmts, tree *type_out)
+{
+ /* Check for a shift right by one bit. */
+ gassign *last_stmt = dyn_cast <gassign *> (stmts->pop ());
+ if (!last_stmt
+ || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
+ || !integer_onep (gimple_assign_rhs2 (last_stmt)))
+ return NULL;
+
+ stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
+ vec_info *vinfo = last_stmt_info->vinfo;
+
+ /* Check that the shift result is wider than the users of the
+ result need (i.e. that narrowing would be a natural choice). */
+ tree lhs = gimple_assign_lhs (last_stmt);
+ tree type = TREE_TYPE (lhs);
+ unsigned int target_precision
+ = vect_element_precision (last_stmt_info->min_output_precision);
+ if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
+ return NULL;
+
+ /* Get the definition of the shift input. */
+ tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
+ stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
+ if (!plus_stmt_info)
+ return NULL;
+
+ /* Check whether the shift input can be seen as a tree of additions on
+ 2 or 3 widened inputs.
+
+ Note that the pattern should be a win even if the result of one or
+ more additions is reused elsewhere: if the pattern matches, we'd be
+ replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
+ internal_fn ifn = IFN_AVG_FLOOR;
+ vect_unpromoted_value unprom[3];
+ tree new_type;
+ unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
+ PLUS_EXPR, false, 3,
+ unprom, &new_type);
+ if (nops == 0)
+ return NULL;
+ if (nops == 3)
+ {
+ /* Check that one operand is 1. */
+ unsigned int i;
+ for (i = 0; i < 3; ++i)
+ if (integer_onep (unprom[i].op))
+ break;
+ if (i == 3)
+ return NULL;
+ /* Throw away the 1 operand and keep the other two. */
+ if (i < 2)
+ unprom[i] = unprom[2];
+ ifn = IFN_AVG_CEIL;
+ }
+
+ vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
+
+ /* We know that:
+
+ (a) the operation can be viewed as:
+
+ TYPE widened0 = (TYPE) UNPROM[0];
+ TYPE widened1 = (TYPE) UNPROM[1];
+ TYPE tmp1 = widened0 + widened1 {+ 1};
+ TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
+
+ (b) the first two statements are equivalent to:
+
+ TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
+ TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
+
+ (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
+ where sensible;
+
+ (d) all the operations can be performed correctly at twice the width of
+ NEW_TYPE, due to the nature of the average operation; and
+
+ (e) users of the result of the right shift need only TARGET_PRECISION
+ bits, where TARGET_PRECISION is no more than half of TYPE's
+ precision.
+
+ Under these circumstances, the only situation in which NEW_TYPE
+ could be narrower than TARGET_PRECISION is if widened0, widened1
+ and an addition result are all used more than once. Thus we can
+ treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
+ as "free", whereas widening the result of the average instruction
+ from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
+ therefore better not to go narrower than TARGET_PRECISION. */
+ if (TYPE_PRECISION (new_type) < target_precision)
+ new_type = build_nonstandard_integer_type (target_precision,
+ TYPE_UNSIGNED (new_type));
+
+ /* Check for target support. */
+ tree new_vectype = get_vectype_for_scalar_type (new_type);
+ if (!new_vectype
+ || !direct_internal_fn_supported_p (ifn, new_vectype,
+ OPTIMIZE_FOR_SPEED))
+ return NULL;
+
+ /* The IR requires a valid vector type for the cast result, even though
+ it's likely to be discarded. */
+ *type_out = get_vectype_for_scalar_type (type);
+ if (!*type_out)
+ return NULL;
+
+ /* Generate the IFN_AVG* call. */
+ tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
+ tree new_ops[2];
+ vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
+ unprom, new_vectype);
+ gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
+ new_ops[1]);
+ gimple_call_set_lhs (average_stmt, new_var);
+ gimple_set_location (average_stmt, gimple_location (last_stmt));
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "created pattern stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
+ }
+
+ stmts->safe_push (last_stmt);
+ return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
+}
+
/* Recognize cases in which the input to a cast is wider than its
output, and the input is fed by a widening operation. Fold this
by removing the unnecessary intermediate widening. E.g.:
@@ -4670,6 +4817,9 @@ struct vect_recog_func
less comples onex (widen_sum only after dot_prod or sad for example). */
static vect_recog_func vect_vect_recog_func_ptrs[] = {
{ vect_recog_over_widening_pattern, "over_widening" },
+ /* Must come after over_widening, which narrows the shift as much as
+ possible beforehand. */
+ { vect_recog_average_pattern, "average" },
{ vect_recog_cast_forwprop_pattern, "cast_forwprop" },
{ vect_recog_widen_mult_pattern, "widen_mult" },
{ vect_recog_dot_prod_pattern, "dot_prod" },