From 30213ae9a2eb53f6bc0913919457ceae2572b019 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 30 Nov 2021 09:52:24 +0000 Subject: vect: Make reduction code handle calls This patch extends the reduction code to handle calls. So far it's a structural change only; a later patch adds support for specific function reductions. Most of the patch consists of using code_helper and gimple_match_op to describe the reduction operations. The other main change is that vectorizable_call now needs to handle fully-predicated reductions. There are some new functions that are provided for ABI completeness and aren't currently used: first_commutative_argument commutative_ternary_op_p 1- and 3-argument forms of gimple_build gcc/ * builtins.h (associated_internal_fn): Declare overload that takes a (combined_cfn, return type) pair. * builtins.c (associated_internal_fn): Split new overload out of original fndecl version. Also provide an overload that takes a (combined_cfn, return type) pair. * internal-fn.h (commutative_binary_fn_p): Declare. (commutative_ternary_fn_p): Likewise. (associative_binary_fn_p): Likewise. * internal-fn.c (commutative_binary_fn_p, commutative_ternary_fn_p): New functions, split out from... (first_commutative_argument): ...here. (associative_binary_fn_p): New function. * gimple-match.h (code_helper): Add a constructor that takes internal functions. (commutative_binary_op_p): Declare. (commutative_ternary_op_p): Likewise. (first_commutative_argument): Likewise. (associative_binary_op_p): Likewise. (canonicalize_code): Likewise. (directly_supported_p): Likewise. (get_conditional_internal_fn): Likewise. (gimple_build): New overloads that takes a code_helper. * gimple-fold.c (gimple_build): Likewise. * gimple-match-head.c (commutative_binary_op_p): New function. (commutative_ternary_op_p): Likewise. (first_commutative_argument): Likewise. (associative_binary_op_p): Likewise. (canonicalize_code): Likewise. (directly_supported_p): Likewise. (get_conditional_internal_fn): Likewise. * tree-vectorizer.h: Include gimple-match.h. (neutral_op_for_reduction): Take a code_helper instead of a tree_code. (needs_fold_left_reduction_p): Likewise. (reduction_fn_for_scalar_code): Likewise. (vect_can_vectorize_without_simd_p): Declare a nNew overload that takes a code_helper. * tree-vect-loop.c: Include case-cfn-macros.h. (fold_left_reduction_fn): Take a code_helper instead of a tree_code. (reduction_fn_for_scalar_code): Likewise. (neutral_op_for_reduction): Likewise. (needs_fold_left_reduction_p): Likewise. (use_mask_by_cond_expr_p): Likewise. (build_vect_cond_expr): Likewise. (vect_create_partial_epilog): Likewise. Use gimple_build rather than gimple_build_assign. (check_reduction_path): Handle calls and operate on code_helpers rather than tree_codes. (vect_is_simple_reduction): Likewise. (vect_model_reduction_cost): Likewise. (vect_find_reusable_accumulator): Likewise. (vect_create_epilog_for_reduction): Likewise. (vect_transform_cycle_phi): Likewise. (vectorizable_reduction): Likewise. Make more use of lane_reduc_code_p. (vect_transform_reduction): Use gimple_extract_op but expect a tree_code for now. (vect_can_vectorize_without_simd_p): New overload that takes a code_helper. * tree-vect-stmts.c (vectorizable_call): Handle reductions in fully-masked loops. * tree-vect-patterns.c (vect_mark_pattern_stmts): Use gimple_extract_op when updating STMT_VINFO_REDUC_IDX. --- gcc/tree-vect-stmts.c | 66 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 12 deletions(-) (limited to 'gcc/tree-vect-stmts.c') diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 2284ad0..101f61f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3202,7 +3202,6 @@ vectorizable_call (vec_info *vinfo, int ndts = ARRAY_SIZE (dt); int ncopies, j; auto_vec vargs; - auto_vec orig_vargs; enum { NARROW, NONE, WIDEN } modifier; size_t i, nargs; tree lhs; @@ -3426,6 +3425,8 @@ vectorizable_call (vec_info *vinfo, needs to be generated. */ gcc_assert (ncopies >= 1); + int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); + internal_fn cond_fn = get_conditional_internal_fn (ifn); vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); if (!vec_stmt) /* transformation not required. */ { @@ -3446,14 +3447,33 @@ vectorizable_call (vec_info *vinfo, record_stmt_cost (cost_vec, ncopies / 2, vec_promote_demote, stmt_info, 0, vect_body); - if (loop_vinfo && mask_opno >= 0) + if (loop_vinfo + && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) + && (reduc_idx >= 0 || mask_opno >= 0)) { - unsigned int nvectors = (slp_node - ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) - : ncopies); - tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); - vect_record_loop_mask (loop_vinfo, masks, nvectors, - vectype_out, scalar_mask); + if (reduc_idx >= 0 + && (cond_fn == IFN_LAST + || !direct_internal_fn_supported_p (cond_fn, vectype_out, + OPTIMIZE_FOR_SPEED))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because no" + " conditional operation is available.\n"); + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; + } + else + { + unsigned int nvectors + = (slp_node + ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) + : ncopies); + tree scalar_mask = NULL_TREE; + if (mask_opno >= 0) + scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno); + vect_record_loop_mask (loop_vinfo, masks, nvectors, + vectype_out, scalar_mask); + } } return true; } @@ -3468,12 +3488,17 @@ vectorizable_call (vec_info *vinfo, vec_dest = vect_create_destination_var (scalar_dest, vectype_out); bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + unsigned int vect_nargs = nargs; + if (masked_loop_p && reduc_idx >= 0) + { + ifn = cond_fn; + vect_nargs += 2; + } if (modifier == NONE || ifn != IFN_LAST) { tree prev_res = NULL_TREE; - vargs.safe_grow (nargs, true); - orig_vargs.safe_grow (nargs, true); + vargs.safe_grow (vect_nargs, true); auto_vec > vec_defs (nargs); for (j = 0; j < ncopies; ++j) { @@ -3488,12 +3513,23 @@ vectorizable_call (vec_info *vinfo, /* Arguments are ready. Create the new vector stmt. */ FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) { + int varg = 0; + if (masked_loop_p && reduc_idx >= 0) + { + unsigned int vec_num = vec_oprnds0.length (); + /* Always true for SLP. */ + gcc_assert (ncopies == 1); + vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num, + vectype_out, i); + } size_t k; for (k = 0; k < nargs; k++) { vec vec_oprndsk = vec_defs[k]; - vargs[k] = vec_oprndsk[i]; + vargs[varg++] = vec_oprndsk[i]; } + if (masked_loop_p && reduc_idx >= 0) + vargs[varg++] = vargs[reduc_idx + 1]; gimple *new_stmt; if (modifier == NARROW) { @@ -3546,6 +3582,10 @@ vectorizable_call (vec_info *vinfo, continue; } + int varg = 0; + if (masked_loop_p && reduc_idx >= 0) + vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies, + vectype_out, j); for (i = 0; i < nargs; i++) { op = gimple_call_arg (stmt, i); @@ -3556,8 +3596,10 @@ vectorizable_call (vec_info *vinfo, op, &vec_defs[i], vectypes[i]); } - orig_vargs[i] = vargs[i] = vec_defs[i][j]; + vargs[varg++] = vec_defs[i][j]; } + if (masked_loop_p && reduc_idx >= 0) + vargs[varg++] = vargs[reduc_idx + 1]; if (mask_opno >= 0 && masked_loop_p) { -- cgit v1.1