aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-stmts.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-11-30 09:52:24 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2021-11-30 09:52:24 +0000
commit30213ae9a2eb53f6bc0913919457ceae2572b019 (patch)
tree3df652eaa52f2cd2ccb9a6e44c19ca8ffc130824 /gcc/tree-vect-stmts.c
parent0c1fb64d961eb760aba2601870f19be2b5533bd3 (diff)
downloadgcc-30213ae9a2eb53f6bc0913919457ceae2572b019.zip
gcc-30213ae9a2eb53f6bc0913919457ceae2572b019.tar.gz
gcc-30213ae9a2eb53f6bc0913919457ceae2572b019.tar.bz2
vect: Make reduction code handle calls
This patch extends the reduction code to handle calls. So far it's a structural change only; a later patch adds support for specific function reductions. Most of the patch consists of using code_helper and gimple_match_op to describe the reduction operations. The other main change is that vectorizable_call now needs to handle fully-predicated reductions. There are some new functions that are provided for ABI completeness and aren't currently used: first_commutative_argument commutative_ternary_op_p 1- and 3-argument forms of gimple_build gcc/ * builtins.h (associated_internal_fn): Declare overload that takes a (combined_cfn, return type) pair. * builtins.c (associated_internal_fn): Split new overload out of original fndecl version. Also provide an overload that takes a (combined_cfn, return type) pair. * internal-fn.h (commutative_binary_fn_p): Declare. (commutative_ternary_fn_p): Likewise. (associative_binary_fn_p): Likewise. * internal-fn.c (commutative_binary_fn_p, commutative_ternary_fn_p): New functions, split out from... (first_commutative_argument): ...here. (associative_binary_fn_p): New function. * gimple-match.h (code_helper): Add a constructor that takes internal functions. (commutative_binary_op_p): Declare. (commutative_ternary_op_p): Likewise. (first_commutative_argument): Likewise. (associative_binary_op_p): Likewise. (canonicalize_code): Likewise. (directly_supported_p): Likewise. (get_conditional_internal_fn): Likewise. (gimple_build): New overloads that takes a code_helper. * gimple-fold.c (gimple_build): Likewise. * gimple-match-head.c (commutative_binary_op_p): New function. (commutative_ternary_op_p): Likewise. (first_commutative_argument): Likewise. (associative_binary_op_p): Likewise. (canonicalize_code): Likewise. (directly_supported_p): Likewise. (get_conditional_internal_fn): Likewise. * tree-vectorizer.h: Include gimple-match.h. (neutral_op_for_reduction): Take a code_helper instead of a tree_code. (needs_fold_left_reduction_p): Likewise. (reduction_fn_for_scalar_code): Likewise. (vect_can_vectorize_without_simd_p): Declare a nNew overload that takes a code_helper. * tree-vect-loop.c: Include case-cfn-macros.h. (fold_left_reduction_fn): Take a code_helper instead of a tree_code. (reduction_fn_for_scalar_code): Likewise. (neutral_op_for_reduction): Likewise. (needs_fold_left_reduction_p): Likewise. (use_mask_by_cond_expr_p): Likewise. (build_vect_cond_expr): Likewise. (vect_create_partial_epilog): Likewise. Use gimple_build rather than gimple_build_assign. (check_reduction_path): Handle calls and operate on code_helpers rather than tree_codes. (vect_is_simple_reduction): Likewise. (vect_model_reduction_cost): Likewise. (vect_find_reusable_accumulator): Likewise. (vect_create_epilog_for_reduction): Likewise. (vect_transform_cycle_phi): Likewise. (vectorizable_reduction): Likewise. Make more use of lane_reduc_code_p. (vect_transform_reduction): Use gimple_extract_op but expect a tree_code for now. (vect_can_vectorize_without_simd_p): New overload that takes a code_helper. * tree-vect-stmts.c (vectorizable_call): Handle reductions in fully-masked loops. * tree-vect-patterns.c (vect_mark_pattern_stmts): Use gimple_extract_op when updating STMT_VINFO_REDUC_IDX.
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r--gcc/tree-vect-stmts.c66
1 files changed, 54 insertions, 12 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2284ad0..101f61f 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -3202,7 +3202,6 @@ vectorizable_call (vec_info *vinfo,
int ndts = ARRAY_SIZE (dt);
int ncopies, j;
auto_vec<tree, 8> vargs;
- auto_vec<tree, 8> orig_vargs;
enum { NARROW, NONE, WIDEN } modifier;
size_t i, nargs;
tree lhs;
@@ -3426,6 +3425,8 @@ vectorizable_call (vec_info *vinfo,
needs to be generated. */
gcc_assert (ncopies >= 1);
+ int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+ internal_fn cond_fn = get_conditional_internal_fn (ifn);
vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
if (!vec_stmt) /* transformation not required. */
{
@@ -3446,14 +3447,33 @@ vectorizable_call (vec_info *vinfo,
record_stmt_cost (cost_vec, ncopies / 2,
vec_promote_demote, stmt_info, 0, vect_body);
- if (loop_vinfo && mask_opno >= 0)
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+ && (reduc_idx >= 0 || mask_opno >= 0))
{
- unsigned int nvectors = (slp_node
- ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
- : ncopies);
- tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
- vect_record_loop_mask (loop_vinfo, masks, nvectors,
- vectype_out, scalar_mask);
+ if (reduc_idx >= 0
+ && (cond_fn == IFN_LAST
+ || !direct_internal_fn_supported_p (cond_fn, vectype_out,
+ OPTIMIZE_FOR_SPEED)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop because no"
+ " conditional operation is available.\n");
+ LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+ }
+ else
+ {
+ unsigned int nvectors
+ = (slp_node
+ ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
+ : ncopies);
+ tree scalar_mask = NULL_TREE;
+ if (mask_opno >= 0)
+ scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
+ vect_record_loop_mask (loop_vinfo, masks, nvectors,
+ vectype_out, scalar_mask);
+ }
}
return true;
}
@@ -3468,12 +3488,17 @@ vectorizable_call (vec_info *vinfo,
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ unsigned int vect_nargs = nargs;
+ if (masked_loop_p && reduc_idx >= 0)
+ {
+ ifn = cond_fn;
+ vect_nargs += 2;
+ }
if (modifier == NONE || ifn != IFN_LAST)
{
tree prev_res = NULL_TREE;
- vargs.safe_grow (nargs, true);
- orig_vargs.safe_grow (nargs, true);
+ vargs.safe_grow (vect_nargs, true);
auto_vec<vec<tree> > vec_defs (nargs);
for (j = 0; j < ncopies; ++j)
{
@@ -3488,12 +3513,23 @@ vectorizable_call (vec_info *vinfo,
/* Arguments are ready. Create the new vector stmt. */
FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
{
+ int varg = 0;
+ if (masked_loop_p && reduc_idx >= 0)
+ {
+ unsigned int vec_num = vec_oprnds0.length ();
+ /* Always true for SLP. */
+ gcc_assert (ncopies == 1);
+ vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
+ vectype_out, i);
+ }
size_t k;
for (k = 0; k < nargs; k++)
{
vec<tree> vec_oprndsk = vec_defs[k];
- vargs[k] = vec_oprndsk[i];
+ vargs[varg++] = vec_oprndsk[i];
}
+ if (masked_loop_p && reduc_idx >= 0)
+ vargs[varg++] = vargs[reduc_idx + 1];
gimple *new_stmt;
if (modifier == NARROW)
{
@@ -3546,6 +3582,10 @@ vectorizable_call (vec_info *vinfo,
continue;
}
+ int varg = 0;
+ if (masked_loop_p && reduc_idx >= 0)
+ vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
+ vectype_out, j);
for (i = 0; i < nargs; i++)
{
op = gimple_call_arg (stmt, i);
@@ -3556,8 +3596,10 @@ vectorizable_call (vec_info *vinfo,
op, &vec_defs[i],
vectypes[i]);
}
- orig_vargs[i] = vargs[i] = vec_defs[i][j];
+ vargs[varg++] = vec_defs[i][j];
}
+ if (masked_loop_p && reduc_idx >= 0)
+ vargs[varg++] = vargs[reduc_idx + 1];
if (mask_opno >= 0 && masked_loop_p)
{