diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2021-11-30 09:52:24 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2021-11-30 09:52:24 +0000 |
commit | 30213ae9a2eb53f6bc0913919457ceae2572b019 (patch) | |
tree | 3df652eaa52f2cd2ccb9a6e44c19ca8ffc130824 /gcc/tree-vect-stmts.c | |
parent | 0c1fb64d961eb760aba2601870f19be2b5533bd3 (diff) | |
download | gcc-30213ae9a2eb53f6bc0913919457ceae2572b019.zip gcc-30213ae9a2eb53f6bc0913919457ceae2572b019.tar.gz gcc-30213ae9a2eb53f6bc0913919457ceae2572b019.tar.bz2 |
vect: Make reduction code handle calls
This patch extends the reduction code to handle calls. So far
it's a structural change only; a later patch adds support for
specific function reductions.
Most of the patch consists of using code_helper and gimple_match_op
to describe the reduction operations. The other main change is that
vectorizable_call now needs to handle fully-predicated reductions.
There are some new functions that are provided for ABI completeness
and aren't currently used:
first_commutative_argument
commutative_ternary_op_p
1- and 3-argument forms of gimple_build
gcc/
* builtins.h (associated_internal_fn): Declare overload that
takes a (combined_cfn, return type) pair.
* builtins.c (associated_internal_fn): Split new overload out
of original fndecl version. Also provide an overload that takes
a (combined_cfn, return type) pair.
* internal-fn.h (commutative_binary_fn_p): Declare.
(commutative_ternary_fn_p): Likewise.
(associative_binary_fn_p): Likewise.
* internal-fn.c (commutative_binary_fn_p, commutative_ternary_fn_p):
New functions, split out from...
(first_commutative_argument): ...here.
(associative_binary_fn_p): New function.
* gimple-match.h (code_helper): Add a constructor that takes
internal functions.
(commutative_binary_op_p): Declare.
(commutative_ternary_op_p): Likewise.
(first_commutative_argument): Likewise.
(associative_binary_op_p): Likewise.
(canonicalize_code): Likewise.
(directly_supported_p): Likewise.
(get_conditional_internal_fn): Likewise.
(gimple_build): New overloads that takes a code_helper.
* gimple-fold.c (gimple_build): Likewise.
* gimple-match-head.c (commutative_binary_op_p): New function.
(commutative_ternary_op_p): Likewise.
(first_commutative_argument): Likewise.
(associative_binary_op_p): Likewise.
(canonicalize_code): Likewise.
(directly_supported_p): Likewise.
(get_conditional_internal_fn): Likewise.
* tree-vectorizer.h: Include gimple-match.h.
(neutral_op_for_reduction): Take a code_helper instead of a tree_code.
(needs_fold_left_reduction_p): Likewise.
(reduction_fn_for_scalar_code): Likewise.
(vect_can_vectorize_without_simd_p): Declare a nNew overload that takes
a code_helper.
* tree-vect-loop.c: Include case-cfn-macros.h.
(fold_left_reduction_fn): Take a code_helper instead of a tree_code.
(reduction_fn_for_scalar_code): Likewise.
(neutral_op_for_reduction): Likewise.
(needs_fold_left_reduction_p): Likewise.
(use_mask_by_cond_expr_p): Likewise.
(build_vect_cond_expr): Likewise.
(vect_create_partial_epilog): Likewise. Use gimple_build rather
than gimple_build_assign.
(check_reduction_path): Handle calls and operate on code_helpers
rather than tree_codes.
(vect_is_simple_reduction): Likewise.
(vect_model_reduction_cost): Likewise.
(vect_find_reusable_accumulator): Likewise.
(vect_create_epilog_for_reduction): Likewise.
(vect_transform_cycle_phi): Likewise.
(vectorizable_reduction): Likewise. Make more use of
lane_reduc_code_p.
(vect_transform_reduction): Use gimple_extract_op but expect
a tree_code for now.
(vect_can_vectorize_without_simd_p): New overload that takes
a code_helper.
* tree-vect-stmts.c (vectorizable_call): Handle reductions in
fully-masked loops.
* tree-vect-patterns.c (vect_mark_pattern_stmts): Use
gimple_extract_op when updating STMT_VINFO_REDUC_IDX.
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r-- | gcc/tree-vect-stmts.c | 66 |
1 files changed, 54 insertions, 12 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 2284ad0..101f61f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3202,7 +3202,6 @@ vectorizable_call (vec_info *vinfo, int ndts = ARRAY_SIZE (dt); int ncopies, j; auto_vec<tree, 8> vargs; - auto_vec<tree, 8> orig_vargs; enum { NARROW, NONE, WIDEN } modifier; size_t i, nargs; tree lhs; @@ -3426,6 +3425,8 @@ vectorizable_call (vec_info *vinfo, needs to be generated. */ gcc_assert (ncopies >= 1); + int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); + internal_fn cond_fn = get_conditional_internal_fn (ifn); vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); if (!vec_stmt) /* transformation not required. */ { @@ -3446,14 +3447,33 @@ vectorizable_call (vec_info *vinfo, record_stmt_cost (cost_vec, ncopies / 2, vec_promote_demote, stmt_info, 0, vect_body); - if (loop_vinfo && mask_opno >= 0) + if (loop_vinfo + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) + && (reduc_idx >= 0 || mask_opno >= 0)) { - unsigned int nvectors = (slp_node - ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) - : ncopies); - tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); - vect_record_loop_mask (loop_vinfo, masks, nvectors, - vectype_out, scalar_mask); + if (reduc_idx >= 0 + && (cond_fn == IFN_LAST + || !direct_internal_fn_supported_p (cond_fn, vectype_out, + OPTIMIZE_FOR_SPEED))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because no" + " conditional operation is available.\n"); + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; + } + else + { + unsigned int nvectors + = (slp_node + ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) + : ncopies); + tree scalar_mask = NULL_TREE; + if (mask_opno >= 0) + scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); + vect_record_loop_mask (loop_vinfo, masks, nvectors, + vectype_out, scalar_mask); + } } return true; } @@ -3468,12 +3488,17 @@ vectorizable_call (vec_info *vinfo, vec_dest = vect_create_destination_var (scalar_dest, vectype_out); bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + unsigned int vect_nargs = nargs; + if (masked_loop_p && reduc_idx >= 0) + { + ifn = cond_fn; + vect_nargs += 2; + } if (modifier == NONE || ifn != IFN_LAST) { tree prev_res = NULL_TREE; - vargs.safe_grow (nargs, true); - orig_vargs.safe_grow (nargs, true); + vargs.safe_grow (vect_nargs, true); auto_vec<vec<tree> > vec_defs (nargs); for (j = 0; j < ncopies; ++j) { @@ -3488,12 +3513,23 @@ vectorizable_call (vec_info *vinfo, /* Arguments are ready. Create the new vector stmt. */ FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) { + int varg = 0; + if (masked_loop_p && reduc_idx >= 0) + { + unsigned int vec_num = vec_oprnds0.length (); + /* Always true for SLP. */ + gcc_assert (ncopies == 1); + vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num, + vectype_out, i); + } size_t k; for (k = 0; k < nargs; k++) { vec<tree> vec_oprndsk = vec_defs[k]; - vargs[k] = vec_oprndsk[i]; + vargs[varg++] = vec_oprndsk[i]; } + if (masked_loop_p && reduc_idx >= 0) + vargs[varg++] = vargs[reduc_idx + 1]; gimple *new_stmt; if (modifier == NARROW) { @@ -3546,6 +3582,10 @@ vectorizable_call (vec_info *vinfo, continue; } + int varg = 0; + if (masked_loop_p && reduc_idx >= 0) + vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies, + vectype_out, j); for (i = 0; i < nargs; i++) { op = gimple_call_arg (stmt, i); @@ -3556,8 +3596,10 @@ vectorizable_call (vec_info *vinfo, op, &vec_defs[i], vectypes[i]); } - orig_vargs[i] = vargs[i] = vec_defs[i][j]; + vargs[varg++] = vec_defs[i][j]; } + if (masked_loop_p && reduc_idx >= 0) + vargs[varg++] = vargs[reduc_idx + 1]; if (mask_opno >= 0 && masked_loop_p) { |