aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-patterns.c
diff options
context:
space:
mode:
authorYuliang Wang <yuliang.wang@arm.com>2019-09-12 09:59:58 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-09-12 09:59:58 +0000
commit58cc98767aa1d8136d36467b892dc4adaf427acc (patch)
tree6b0b99d529e0034fbf28907e2a8dcc95ba22df37 /gcc/tree-vect-patterns.c
parent8c58d9d837098d692d313a7116ed7d4a9e271287 (diff)
downloadgcc-58cc98767aa1d8136d36467b892dc4adaf427acc.zip
gcc-58cc98767aa1d8136d36467b892dc4adaf427acc.tar.gz
gcc-58cc98767aa1d8136d36467b892dc4adaf427acc.tar.bz2
Vectorise multiply high with scaling operations (PR 89386)
2019-09-12 Yuliang Wang <yuliang.wang@arm.com> gcc/ PR tree-optimization/89386 * config/aarch64/aarch64-sve2.md (<su>mull<bt><Vwide>) (<r>shrnb<mode>, <r>shrnt<mode>): New SVE2 patterns. (<su>mulh<r>s<mode>3): New pattern for MULHRS. * config/aarch64/iterators.md (UNSPEC_SMULLB, UNSPEC_SMULLT) (UNSPEC_UMULLB, UNSPEC_UMULLT, UNSPEC_SHRNB, UNSPEC_SHRNT) (UNSPEC_RSHRNB, UNSPEC_RSHRNT, UNSPEC_SMULHS, UNSPEC_SMULHRS) UNSPEC_UMULHS, UNSPEC_UMULHRS): New unspecs. (MULLBT, SHRNB, SHRNT, MULHRS): New int iterators. (su, r): Handle the unspecs above. (bt): New int attribute. * internal-fn.def (IFN_MULHS, IFN_MULHRS): New internal functions. * internal-fn.c (first_commutative_argument): Commutativity info for above. * optabs.def (smulhs_optab, smulhrs_optab, umulhs_optab) (umulhrs_optab): New optabs. * doc/md.texi (smulhs$var{m3}, umulhs$var{m3}) (smulhrs$var{m3}, umulhrs$var{m3}): Documentation for the above. * tree-vect-patterns.c (vect_recog_mulhs_pattern): New pattern function. (vect_vect_recog_func_ptrs): Add it. * testsuite/gcc.target/aarch64/sve2/mulhrs_1.c: New test. * testsuite/gcc.dg/vect/vect-mulhrs-1.c: As above. * testsuite/gcc.dg/vect/vect-mulhrs-2.c: As above. * testsuite/gcc.dg/vect/vect-mulhrs-3.c: As above. * testsuite/gcc.dg/vect/vect-mulhrs-4.c: As above. * doc/sourcebuild.texi (vect_mulhrs_hi): Document new target selector. * testsuite/lib/target-supports.exp (check_effective_target_vect_mulhrs_hi): Return true for AArch64 with SVE2. From-SVN: r275682
Diffstat (limited to 'gcc/tree-vect-patterns.c')
-rw-r--r--gcc/tree-vect-patterns.c170
1 files changed, 170 insertions, 0 deletions
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index ccb2e1e..2f86f9e 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1723,6 +1723,175 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
return pattern_stmt;
}
+/* Recognize the following patterns:
+
+ ATYPE a; // narrower than TYPE
+ BTYPE b; // narrower than TYPE
+
+ 1) Multiply high with scaling
+ TYPE res = ((TYPE) a * (TYPE) b) >> c;
+ 2) ... or also with rounding
+ TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
+
+ where only the bottom half of res is used. */
+
+static gimple *
+vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
+{
+ /* Check for a right shift. */
+ gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
+ if (!last_stmt
+ || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
+ return NULL;
+ vec_info *vinfo = last_stmt_info->vinfo;
+
+ /* Check that the shift result is wider than the users of the
+ result need (i.e. that narrowing would be a natural choice). */
+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
+ unsigned int target_precision
+ = vect_element_precision (last_stmt_info->min_output_precision);
+ if (!INTEGRAL_TYPE_P (lhs_type)
+ || target_precision >= TYPE_PRECISION (lhs_type))
+ return NULL;
+
+ /* Look through any change in sign on the outer shift input. */
+ vect_unpromoted_value unprom_rshift_input;
+ tree rshift_input = vect_look_through_possible_promotion
+ (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
+ if (!rshift_input
+ || TYPE_PRECISION (TREE_TYPE (rshift_input))
+ != TYPE_PRECISION (lhs_type))
+ return NULL;
+
+ /* Get the definition of the shift input. */
+ stmt_vec_info rshift_input_stmt_info
+ = vect_get_internal_def (vinfo, rshift_input);
+ if (!rshift_input_stmt_info)
+ return NULL;
+ gassign *rshift_input_stmt
+ = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
+ if (!rshift_input_stmt)
+ return NULL;
+
+ stmt_vec_info mulh_stmt_info;
+ tree scale_term;
+ internal_fn ifn;
+ unsigned int expect_offset;
+
+ /* Check for the presence of the rounding term. */
+ if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
+ {
+ /* Check that the outer shift was by 1. */
+ if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
+ return NULL;
+
+ /* Check that the second operand of the PLUS_EXPR is 1. */
+ if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
+ return NULL;
+
+ /* Look through any change in sign on the addition input. */
+ vect_unpromoted_value unprom_plus_input;
+ tree plus_input = vect_look_through_possible_promotion
+ (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
+ if (!plus_input
+ || TYPE_PRECISION (TREE_TYPE (plus_input))
+ != TYPE_PRECISION (TREE_TYPE (rshift_input)))
+ return NULL;
+
+ /* Get the definition of the multiply-high-scale part. */
+ stmt_vec_info plus_input_stmt_info
+ = vect_get_internal_def (vinfo, plus_input);
+ if (!plus_input_stmt_info)
+ return NULL;
+ gassign *plus_input_stmt
+ = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
+ if (!plus_input_stmt
+ || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
+ return NULL;
+
+ /* Look through any change in sign on the scaling input. */
+ vect_unpromoted_value unprom_scale_input;
+ tree scale_input = vect_look_through_possible_promotion
+ (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
+ if (!scale_input
+ || TYPE_PRECISION (TREE_TYPE (scale_input))
+ != TYPE_PRECISION (TREE_TYPE (plus_input)))
+ return NULL;
+
+ /* Get the definition of the multiply-high part. */
+ mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
+ if (!mulh_stmt_info)
+ return NULL;
+
+ /* Get the scaling term. */
+ scale_term = gimple_assign_rhs2 (plus_input_stmt);
+
+ expect_offset = target_precision + 2;
+ ifn = IFN_MULHRS;
+ }
+ else
+ {
+ mulh_stmt_info = rshift_input_stmt_info;
+ scale_term = gimple_assign_rhs2 (last_stmt);
+
+ expect_offset = target_precision + 1;
+ ifn = IFN_MULHS;
+ }
+
+ /* Check that the scaling factor is correct. */
+ if (TREE_CODE (scale_term) != INTEGER_CST
+ || wi::to_widest (scale_term) + expect_offset
+ != TYPE_PRECISION (lhs_type))
+ return NULL;
+
+ /* Check whether the scaling input term can be seen as two widened
+ inputs multiplied together. */
+ vect_unpromoted_value unprom_mult[2];
+ tree new_type;
+ unsigned int nops
+ = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
+ false, 2, unprom_mult, &new_type);
+ if (nops != 2)
+ return NULL;
+
+ vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
+
+ /* Adjust output precision. */
+ if (TYPE_PRECISION (new_type) < target_precision)
+ new_type = build_nonstandard_integer_type
+ (target_precision, TYPE_UNSIGNED (new_type));
+
+ /* Check for target support. */
+ tree new_vectype = get_vectype_for_scalar_type (new_type);
+ if (!new_vectype
+ || !direct_internal_fn_supported_p
+ (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
+ return NULL;
+
+ /* The IR requires a valid vector type for the cast result, even though
+ it's likely to be discarded. */
+ *type_out = get_vectype_for_scalar_type (lhs_type);
+ if (!*type_out)
+ return NULL;
+
+ /* Generate the IFN_MULHRS call. */
+ tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
+ tree new_ops[2];
+ vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
+ unprom_mult, new_vectype);
+ gcall *mulhrs_stmt
+ = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
+ gimple_call_set_lhs (mulhrs_stmt, new_var);
+ gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "created pattern stmt: %G", mulhrs_stmt);
+
+ return vect_convert_output (last_stmt_info, lhs_type,
+ mulhrs_stmt, new_vectype);
+}
+
/* Recognize the patterns:
ATYPE a; // narrower than TYPE
@@ -4713,6 +4882,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
/* Must come after over_widening, which narrows the shift as much as
possible beforehand. */
{ vect_recog_average_pattern, "average" },
+ { vect_recog_mulhs_pattern, "mult_high" },
{ vect_recog_cast_forwprop_pattern, "cast_forwprop" },
{ vect_recog_widen_mult_pattern, "widen_mult" },
{ vect_recog_dot_prod_pattern, "dot_prod" },