diff options
author | Yuliang Wang <yuliang.wang@arm.com> | 2019-09-12 09:59:58 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2019-09-12 09:59:58 +0000 |
commit | 58cc98767aa1d8136d36467b892dc4adaf427acc (patch) | |
tree | 6b0b99d529e0034fbf28907e2a8dcc95ba22df37 /gcc/tree-vect-patterns.c | |
parent | 8c58d9d837098d692d313a7116ed7d4a9e271287 (diff) | |
download | gcc-58cc98767aa1d8136d36467b892dc4adaf427acc.zip gcc-58cc98767aa1d8136d36467b892dc4adaf427acc.tar.gz gcc-58cc98767aa1d8136d36467b892dc4adaf427acc.tar.bz2 |
Vectorise multiply high with scaling operations (PR 89386)
2019-09-12 Yuliang Wang <yuliang.wang@arm.com>
gcc/
PR tree-optimization/89386
* config/aarch64/aarch64-sve2.md (<su>mull<bt><Vwide>)
(<r>shrnb<mode>, <r>shrnt<mode>): New SVE2 patterns.
(<su>mulh<r>s<mode>3): New pattern for MULHRS.
* config/aarch64/iterators.md (UNSPEC_SMULLB, UNSPEC_SMULLT)
(UNSPEC_UMULLB, UNSPEC_UMULLT, UNSPEC_SHRNB, UNSPEC_SHRNT)
(UNSPEC_RSHRNB, UNSPEC_RSHRNT, UNSPEC_SMULHS, UNSPEC_SMULHRS)
UNSPEC_UMULHS, UNSPEC_UMULHRS): New unspecs.
(MULLBT, SHRNB, SHRNT, MULHRS): New int iterators.
(su, r): Handle the unspecs above.
(bt): New int attribute.
* internal-fn.def (IFN_MULHS, IFN_MULHRS): New internal functions.
* internal-fn.c (first_commutative_argument): Commutativity info for
above.
* optabs.def (smulhs_optab, smulhrs_optab, umulhs_optab)
(umulhrs_optab): New optabs.
* doc/md.texi (smulhs$var{m3}, umulhs$var{m3})
(smulhrs$var{m3}, umulhrs$var{m3}): Documentation for the above.
* tree-vect-patterns.c (vect_recog_mulhs_pattern): New pattern
function.
(vect_vect_recog_func_ptrs): Add it.
* testsuite/gcc.target/aarch64/sve2/mulhrs_1.c: New test.
* testsuite/gcc.dg/vect/vect-mulhrs-1.c: As above.
* testsuite/gcc.dg/vect/vect-mulhrs-2.c: As above.
* testsuite/gcc.dg/vect/vect-mulhrs-3.c: As above.
* testsuite/gcc.dg/vect/vect-mulhrs-4.c: As above.
* doc/sourcebuild.texi (vect_mulhrs_hi): Document new target selector.
* testsuite/lib/target-supports.exp
(check_effective_target_vect_mulhrs_hi): Return true for AArch64
with SVE2.
From-SVN: r275682
Diffstat (limited to 'gcc/tree-vect-patterns.c')
-rw-r--r-- | gcc/tree-vect-patterns.c | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index ccb2e1e..2f86f9e 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1723,6 +1723,175 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out) return pattern_stmt; } +/* Recognize the following patterns: + + ATYPE a; // narrower than TYPE + BTYPE b; // narrower than TYPE + + 1) Multiply high with scaling + TYPE res = ((TYPE) a * (TYPE) b) >> c; + 2) ... or also with rounding + TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1; + + where only the bottom half of res is used. */ + +static gimple * +vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out) +{ + /* Check for a right shift. */ + gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); + if (!last_stmt + || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR) + return NULL; + vec_info *vinfo = last_stmt_info->vinfo; + + /* Check that the shift result is wider than the users of the + result need (i.e. that narrowing would be a natural choice). */ + tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt)); + unsigned int target_precision + = vect_element_precision (last_stmt_info->min_output_precision); + if (!INTEGRAL_TYPE_P (lhs_type) + || target_precision >= TYPE_PRECISION (lhs_type)) + return NULL; + + /* Look through any change in sign on the outer shift input. */ + vect_unpromoted_value unprom_rshift_input; + tree rshift_input = vect_look_through_possible_promotion + (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input); + if (!rshift_input + || TYPE_PRECISION (TREE_TYPE (rshift_input)) + != TYPE_PRECISION (lhs_type)) + return NULL; + + /* Get the definition of the shift input. */ + stmt_vec_info rshift_input_stmt_info + = vect_get_internal_def (vinfo, rshift_input); + if (!rshift_input_stmt_info) + return NULL; + gassign *rshift_input_stmt + = dyn_cast <gassign *> (rshift_input_stmt_info->stmt); + if (!rshift_input_stmt) + return NULL; + + stmt_vec_info mulh_stmt_info; + tree scale_term; + internal_fn ifn; + unsigned int expect_offset; + + /* Check for the presence of the rounding term. */ + if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR) + { + /* Check that the outer shift was by 1. */ + if (!integer_onep (gimple_assign_rhs2 (last_stmt))) + return NULL; + + /* Check that the second operand of the PLUS_EXPR is 1. */ + if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt))) + return NULL; + + /* Look through any change in sign on the addition input. */ + vect_unpromoted_value unprom_plus_input; + tree plus_input = vect_look_through_possible_promotion + (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input); + if (!plus_input + || TYPE_PRECISION (TREE_TYPE (plus_input)) + != TYPE_PRECISION (TREE_TYPE (rshift_input))) + return NULL; + + /* Get the definition of the multiply-high-scale part. */ + stmt_vec_info plus_input_stmt_info + = vect_get_internal_def (vinfo, plus_input); + if (!plus_input_stmt_info) + return NULL; + gassign *plus_input_stmt + = dyn_cast <gassign *> (plus_input_stmt_info->stmt); + if (!plus_input_stmt + || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR) + return NULL; + + /* Look through any change in sign on the scaling input. */ + vect_unpromoted_value unprom_scale_input; + tree scale_input = vect_look_through_possible_promotion + (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input); + if (!scale_input + || TYPE_PRECISION (TREE_TYPE (scale_input)) + != TYPE_PRECISION (TREE_TYPE (plus_input))) + return NULL; + + /* Get the definition of the multiply-high part. */ + mulh_stmt_info = vect_get_internal_def (vinfo, scale_input); + if (!mulh_stmt_info) + return NULL; + + /* Get the scaling term. */ + scale_term = gimple_assign_rhs2 (plus_input_stmt); + + expect_offset = target_precision + 2; + ifn = IFN_MULHRS; + } + else + { + mulh_stmt_info = rshift_input_stmt_info; + scale_term = gimple_assign_rhs2 (last_stmt); + + expect_offset = target_precision + 1; + ifn = IFN_MULHS; + } + + /* Check that the scaling factor is correct. */ + if (TREE_CODE (scale_term) != INTEGER_CST + || wi::to_widest (scale_term) + expect_offset + != TYPE_PRECISION (lhs_type)) + return NULL; + + /* Check whether the scaling input term can be seen as two widened + inputs multiplied together. */ + vect_unpromoted_value unprom_mult[2]; + tree new_type; + unsigned int nops + = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR, + false, 2, unprom_mult, &new_type); + if (nops != 2) + return NULL; + + vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt); + + /* Adjust output precision. */ + if (TYPE_PRECISION (new_type) < target_precision) + new_type = build_nonstandard_integer_type + (target_precision, TYPE_UNSIGNED (new_type)); + + /* Check for target support. */ + tree new_vectype = get_vectype_for_scalar_type (new_type); + if (!new_vectype + || !direct_internal_fn_supported_p + (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) + return NULL; + + /* The IR requires a valid vector type for the cast result, even though + it's likely to be discarded. */ + *type_out = get_vectype_for_scalar_type (lhs_type); + if (!*type_out) + return NULL; + + /* Generate the IFN_MULHRS call. */ + tree new_var = vect_recog_temp_ssa_var (new_type, NULL); + tree new_ops[2]; + vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, + unprom_mult, new_vectype); + gcall *mulhrs_stmt + = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); + gimple_call_set_lhs (mulhrs_stmt, new_var); + gimple_set_location (mulhrs_stmt, gimple_location (last_stmt)); + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "created pattern stmt: %G", mulhrs_stmt); + + return vect_convert_output (last_stmt_info, lhs_type, + mulhrs_stmt, new_vectype); +} + /* Recognize the patterns: ATYPE a; // narrower than TYPE @@ -4713,6 +4882,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { /* Must come after over_widening, which narrows the shift as much as possible beforehand. */ { vect_recog_average_pattern, "average" }, + { vect_recog_mulhs_pattern, "mult_high" }, { vect_recog_cast_forwprop_pattern, "cast_forwprop" }, { vect_recog_widen_mult_pattern, "widen_mult" }, { vect_recog_dot_prod_pattern, "dot_prod" }, |