aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-07-13 10:17:43 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2021-07-13 10:17:43 +0100
commit1583b8bff0be7e41aa721dde79f90ca0763bd4e2 (patch)
treed2f95d2ac56f2508877f03cd54c3be109b1fed4d /gcc/tree-vect-loop.c
parent7670b6633e51afbbc4b3c8a5775accf7f2d887af (diff)
downloadgcc-1583b8bff0be7e41aa721dde79f90ca0763bd4e2.zip
gcc-1583b8bff0be7e41aa721dde79f90ca0763bd4e2.tar.gz
gcc-1583b8bff0be7e41aa721dde79f90ca0763bd4e2.tar.bz2
vect: Reuse reduction accumulators between loops
This patch adds support for reusing a main loop's reduction accumulator in an epilogue loop. This in turn lets the loops share a single piece of vector->scalar reduction code. The patch has the following restrictions: (1) The epilogue reduction can only operate on a single vector (e.g. ncopies must be 1 for non-SLP reductions, and the group size must be <= the element count for SLP reductions). (2) Both loops must use the same vector mode for their accumulators. This means that the patch is restricted to targets that support --param vect-partial-vector-usage=1. (3) The reduction must be a standard “tree code” reduction. However, these restrictions could be lifted in future. For example, if the main loop operates on 128-bit vectors and the epilogue loop operates on 64-bit vectors, we could in future reduce the 128-bit vector by one stage and use the 64-bit result as the starting point for the epilogue result. The patch tries to handle chained SLP reductions, unchained SLP reductions and non-SLP reductions. It also handles cases in which the epilogue loop is entered directly (rather than via the main loop) and cases in which the epilogue loop can be skipped. vect_get_main_loop_result is a bit more general than the current patch needs. gcc/ * tree-vectorizer.h (vect_reusable_accumulator): New structure. (_loop_vec_info::main_loop_edge): New field. (_loop_vec_info::skip_main_loop_edge): Likewise. (_loop_vec_info::skip_this_loop_edge): Likewise. (_loop_vec_info::reusable_accumulators): Likewise. (_stmt_vec_info::reduc_scalar_results): Likewise. (_stmt_vec_info::reused_accumulator): Likewise. (vect_get_main_loop_result): Declare. * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize reduc_scalar_inputs. (vec_info::free_stmt_vec_info): Free reduc_scalar_inputs. * tree-vect-loop-manip.c (vect_get_main_loop_result): New function. (vect_do_peeling): Fill an epilogue loop's main_loop_edge, skip_main_loop_edge and skip_this_loop_edge fields. * tree-vect-loop.c (INCLUDE_ALGORITHM): Define. (vect_emit_reduction_init_stmts): New function. (get_initial_def_for_reduction): Use it. (get_initial_defs_for_reduction): Likewise. Change the vinfo parameter to a loop_vec_info. (vect_create_epilog_for_reduction): Store the scalar results in the reduc_info. If an epilogue loop is reusing an accumulator from the main loop, and if the epilogue loop can also be skipped, try to place the reduction code in the join block. Record accumulators that could potentially be reused by epilogue loops. (vect_transform_cycle_phi): When vectorizing epilogue loops, try to reuse accumulators from the main loop. Record the initial value in reduc_info for non-SLP reductions too. gcc/testsuite/ * gcc.target/aarch64/sve/reduc_9.c: New test. * gcc.target/aarch64/sve/reduc_9_run.c: Likewise. * gcc.target/aarch64/sve/reduc_10.c: Likewise. * gcc.target/aarch64/sve/reduc_10_run.c: Likewise. * gcc.target/aarch64/sve/reduc_11.c: Likewise. * gcc.target/aarch64/sve/reduc_11_run.c: Likewise. * gcc.target/aarch64/sve/reduc_12.c: Likewise. * gcc.target/aarch64/sve/reduc_12_run.c: Likewise. * gcc.target/aarch64/sve/reduc_13.c: Likewise. * gcc.target/aarch64/sve/reduc_13_run.c: Likewise. * gcc.target/aarch64/sve/reduc_14.c: Likewise. * gcc.target/aarch64/sve/reduc_14_run.c: Likewise. * gcc.target/aarch64/sve/reduc_15.c: Likewise. * gcc.target/aarch64/sve/reduc_15_run.c: Likewise.
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c307
1 files changed, 249 insertions, 58 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index fe7e73f..8c27d75 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -19,6 +19,7 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+#define INCLUDE_ALGORITHM
#include "config.h"
#include "system.h"
#include "coretypes.h"
@@ -823,6 +824,10 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
th (0),
versioning_threshold (0),
vectorization_factor (0),
+ main_loop_edge (nullptr),
+ skip_main_loop_edge (nullptr),
+ skip_this_loop_edge (nullptr),
+ reusable_accumulators (),
max_vectorization_factor (0),
mask_skip_niters (NULL_TREE),
rgroup_compare_type (NULL_TREE),
@@ -4607,7 +4612,32 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
prologue_cost, epilogue_cost);
}
+/* SEQ is a sequence of instructions that initialize the reduction
+ described by REDUC_INFO. Emit them in the appropriate place. */
+static void
+vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo,
+ stmt_vec_info reduc_info, gimple *seq)
+{
+ if (reduc_info->reused_accumulator)
+ {
+ /* When reusing an accumulator from the main loop, we only need
+ initialization instructions if the main loop can be skipped.
+ In that case, emit the initialization instructions at the end
+ of the guard block that does the skip. */
+ edge skip_edge = loop_vinfo->skip_main_loop_edge;
+ gcc_assert (skip_edge);
+ gimple_stmt_iterator gsi = gsi_last_bb (skip_edge->src);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ }
+ else
+ {
+ /* The normal case: emit the initialization instructions on the
+ preheader edge. */
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), seq);
+ }
+}
/* Function get_initial_def_for_reduction
@@ -4675,36 +4705,30 @@ get_initial_def_for_reduction (loop_vec_info loop_vinfo,
}
if (stmts)
- gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+ vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, stmts);
return init_def;
}
-/* Get at the initial defs for the reduction PHIs for REDUC_INFO, whose
- associated SLP node is SLP_NODE. NUMBER_OF_VECTORS is the number of vector
- defs to create. If NEUTRAL_OP is nonnull, introducing extra elements of
- that value will not change the result. */
+/* Get at the initial defs for the reduction PHIs for REDUC_INFO,
+ which performs a reduction involving GROUP_SIZE scalar statements.
+ NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP
+ is nonnull, introducing extra elements of that value will not change the
+ result. */
static void
-get_initial_defs_for_reduction (vec_info *vinfo,
+get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
stmt_vec_info reduc_info,
- slp_tree slp_node,
vec<tree> *vec_oprnds,
unsigned int number_of_vectors,
- bool reduc_chain, tree neutral_op)
+ unsigned int group_size, tree neutral_op)
{
- vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+ vec<tree> &initial_values = reduc_info->reduc_initial_values;
unsigned HOST_WIDE_INT nunits;
unsigned j, number_of_places_left_in_vector;
tree vector_type = STMT_VINFO_VECTYPE (reduc_info);
- unsigned int group_size = stmts.length ();
unsigned int i;
- class loop *loop;
-
- loop = (gimple_bb (reduc_info->stmt))->loop_father;
- gcc_assert (loop);
- edge pe = loop_preheader_edge (loop);
- gcc_assert (!reduc_chain || neutral_op);
+ gcc_assert (group_size == initial_values.length () || neutral_op);
/* NUMBER_OF_COPIES is the number of times we need to use the same values in
created vectors. It is greater than 1 if unrolling is performed.
@@ -4734,18 +4758,13 @@ get_initial_defs_for_reduction (vec_info *vinfo,
{
tree op;
i = j % group_size;
- stmt_vec_info stmt_vinfo = stmts[i];
/* Get the def before the loop. In reduction chain we have only
one initial value. Else we have as many as PHIs in the group. */
- if (reduc_chain)
- op = j != 0 ? neutral_op : vect_phi_initial_value (stmt_vinfo);
- else if (((vec_oprnds->length () + 1) * nunits
- - number_of_places_left_in_vector >= group_size)
- && neutral_op)
+ if (i >= initial_values.length () || (j > i && neutral_op))
op = neutral_op;
else
- op = vect_phi_initial_value (stmt_vinfo);
+ op = initial_values[i];
/* Create 'vect_ = {op0,op1,...,opn}'. */
number_of_places_left_in_vector--;
@@ -4781,8 +4800,8 @@ get_initial_defs_for_reduction (vec_info *vinfo,
{
/* First time round, duplicate ELTS to fill the
required number of vectors. */
- duplicate_and_interleave (vinfo, &ctor_seq, vector_type, elts,
- number_of_vectors, *vec_oprnds);
+ duplicate_and_interleave (loop_vinfo, &ctor_seq, vector_type,
+ elts, number_of_vectors, *vec_oprnds);
break;
}
vec_oprnds->quick_push (init);
@@ -4794,7 +4813,7 @@ get_initial_defs_for_reduction (vec_info *vinfo,
}
}
if (ctor_seq != NULL)
- gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
+ vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, ctor_seq);
}
/* For a statement STMT_INFO taking part in a reduction operation return
@@ -4823,6 +4842,99 @@ info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
return stmt_info;
}
+/* See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that
+ REDUC_INFO can build on. Adjust REDUC_INFO and return true if so, otherwise
+ return false. */
+
+static bool
+vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
+ stmt_vec_info reduc_info)
+{
+ loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
+ if (!main_loop_vinfo)
+ return false;
+
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) != TREE_CODE_REDUCTION)
+ return false;
+
+ unsigned int num_phis = reduc_info->reduc_initial_values.length ();
+ auto_vec<tree, 16> main_loop_results (num_phis);
+ auto_vec<tree, 16> initial_values (num_phis);
+ if (edge main_loop_edge = loop_vinfo->main_loop_edge)
+ {
+ /* The epilogue loop can be entered either from the main loop or
+ from an earlier guard block. */
+ edge skip_edge = loop_vinfo->skip_main_loop_edge;
+ for (tree incoming_value : reduc_info->reduc_initial_values)
+ {
+ /* Look for:
+
+ INCOMING_VALUE = phi<MAIN_LOOP_RESULT(main loop),
+ INITIAL_VALUE(guard block)>. */
+ gcc_assert (TREE_CODE (incoming_value) == SSA_NAME);
+
+ gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (incoming_value));
+ gcc_assert (gimple_bb (phi) == main_loop_edge->dest);
+
+ tree from_main_loop = PHI_ARG_DEF_FROM_EDGE (phi, main_loop_edge);
+ tree from_skip = PHI_ARG_DEF_FROM_EDGE (phi, skip_edge);
+
+ main_loop_results.quick_push (from_main_loop);
+ initial_values.quick_push (from_skip);
+ }
+ }
+ else
+ /* The main loop dominates the epilogue loop. */
+ main_loop_results.splice (reduc_info->reduc_initial_values);
+
+ /* See if the main loop has the kind of accumulator we need. */
+ vect_reusable_accumulator *accumulator
+ = main_loop_vinfo->reusable_accumulators.get (main_loop_results[0]);
+ if (!accumulator
+ || num_phis != accumulator->reduc_info->reduc_scalar_results.length ()
+ || !std::equal (main_loop_results.begin (), main_loop_results.end (),
+ accumulator->reduc_info->reduc_scalar_results.begin ()))
+ return false;
+
+ /* For now, only handle the case in which both loops are operating on the
+ same vector types. In future we could reduce wider vectors to narrower
+ ones as well. */
+ tree vectype = STMT_VINFO_VECTYPE (reduc_info);
+ tree old_vectype = TREE_TYPE (accumulator->reduc_input);
+ if (!useless_type_conversion_p (old_vectype, vectype))
+ return false;
+
+ /* Non-SLP reductions might apply an adjustment after the reduction
+ operation, in order to simplify the initialization of the accumulator.
+ If the epilogue loop carries on from where the main loop left off,
+ it should apply the same adjustment to the final reduction result.
+
+ If the epilogue loop can also be entered directly (rather than via
+ the main loop), we need to be able to handle that case in the same way,
+ with the same adjustment. (In principle we could add a PHI node
+ to select the correct adjustment, but in practice that shouldn't be
+ necessary.) */
+ tree main_adjustment
+ = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info);
+ if (loop_vinfo->main_loop_edge && main_adjustment)
+ {
+ gcc_assert (num_phis == 1);
+ tree initial_value = initial_values[0];
+ /* Check that we can use INITIAL_VALUE as the adjustment and
+ initialize the accumulator with a neutral value instead. */
+ if (!operand_equal_p (initial_value, main_adjustment))
+ return false;
+ tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
+ initial_values[0] = neutral_op_for_reduction (TREE_TYPE (initial_value),
+ code, initial_value);
+ }
+ STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment;
+ reduc_info->reduc_initial_values.truncate (0);
+ reduc_info->reduc_initial_values.splice (initial_values);
+ reduc_info->reused_accumulator = accumulator;
+ return true;
+}
+
/* Function vect_create_epilog_for_reduction
Create code at the loop-epilog to finalize the result of a reduction
@@ -4915,7 +5027,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gimple *use_stmt;
auto_vec<tree> reduc_inputs;
int j, i;
- auto_vec<tree> scalar_results;
+ vec<tree> &scalar_results = reduc_info->reduc_scalar_results;
unsigned int group_size = 1, k;
auto_vec<gimple *> phis;
/* SLP reduction without reduction chain, e.g.,
@@ -4941,16 +5053,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gcc_assert (vectype);
mode = TYPE_MODE (vectype);
- tree initial_def = NULL;
tree induc_val = NULL_TREE;
tree adjustment_def = NULL;
if (slp_node)
;
else
{
- /* Get at the scalar def before the loop, that defines the initial value
- of the reduction variable. */
- initial_def = vect_phi_initial_value (reduc_def_stmt);
/* Optimize: for induction condition reduction, if we can't use zero
for induc_val, use initial_def. */
if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
@@ -5196,6 +5304,37 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
reduc_inputs.safe_push (single_input);
}
+ tree orig_reduc_input = reduc_inputs[0];
+
+ /* If this loop is an epilogue loop that can be skipped after the
+ main loop, we can only share a reduction operation between the
+ main loop and the epilogue if we put it at the target of the
+ skip edge.
+
+ We can still reuse accumulators if this check fails. Doing so has
+ the minor(?) benefit of making the epilogue loop's scalar result
+ independent of the main loop's scalar result. */
+ bool unify_with_main_loop_p = false;
+ if (reduc_info->reused_accumulator
+ && loop_vinfo->skip_this_loop_edge
+ && single_succ_p (exit_bb)
+ && single_succ (exit_bb) == loop_vinfo->skip_this_loop_edge->dest)
+ {
+ unify_with_main_loop_p = true;
+
+ basic_block reduc_block = loop_vinfo->skip_this_loop_edge->dest;
+ reduc_inputs[0] = make_ssa_name (vectype);
+ gphi *new_phi = create_phi_node (reduc_inputs[0], reduc_block);
+ add_phi_arg (new_phi, orig_reduc_input, single_succ_edge (exit_bb),
+ UNKNOWN_LOCATION);
+ add_phi_arg (new_phi, reduc_info->reused_accumulator->reduc_input,
+ loop_vinfo->skip_this_loop_edge, UNKNOWN_LOCATION);
+ exit_gsi = gsi_after_labels (reduc_block);
+ }
+
+ /* Shouldn't be used beyond this point. */
+ exit_bb = nullptr;
+
if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
&& reduc_fn != IFN_LAST)
{
@@ -5405,6 +5544,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
the same as initial_def already. */
tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
induc_val);
+ tree initial_def = reduc_info->reduc_initial_values[0];
tmp = make_ssa_name (new_scalar_dest);
epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
@@ -5425,9 +5565,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gcc_assert (reduc_inputs.length () == 1);
gcc_assert (pow2p_hwi (group_size));
- slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis;
- vec<stmt_vec_info> orig_phis
- = SLP_TREE_SCALAR_STMTS (orig_phis_slp_node);
gimple_seq seq = NULL;
/* Build a vector {0, 1, 2, ...}, with the same number of elements
@@ -5452,7 +5589,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
{
tree initial_value = NULL_TREE;
if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
- initial_value = vect_phi_initial_value (orig_phis[0]);
+ initial_value = reduc_info->reduc_initial_values[0];
neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code,
initial_value);
}
@@ -5466,7 +5603,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
for MIN and MAX reduction, for example. */
if (!neutral_op)
{
- tree scalar_value = vect_phi_initial_value (orig_phis[i]);
+ tree scalar_value = reduc_info->reduc_initial_values[i];
scalar_value = gimple_convert (&seq, TREE_TYPE (vectype),
scalar_value);
vector_identity = gimple_build_vector_from_val (&seq, vectype,
@@ -5780,6 +5917,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
the same as initial_def already. */
tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
induc_val);
+ tree initial_def = reduc_info->reduc_initial_values[0];
tree tmp = make_ssa_name (new_scalar_dest);
epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
@@ -5819,6 +5957,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
scalar_results[0] = new_temp;
}
+ /* Record this operation if it could be reused by the epilogue loop. */
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION)
+ loop_vinfo->reusable_accumulators.put (scalar_results[0],
+ { orig_reduc_input, reduc_info });
+
if (double_reduc)
loop = outer_loop;
@@ -5886,6 +6029,17 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
{
/* Replace the uses: */
orig_name = PHI_RESULT (exit_phi);
+
+ /* Look for a single use at the target of the skip edge. */
+ if (unify_with_main_loop_p)
+ {
+ use_operand_p use_p;
+ gimple *user;
+ if (!single_imm_use (orig_name, &use_p, &user))
+ gcc_unreachable ();
+ orig_name = gimple_get_lhs (user);
+ }
+
scalar_result = scalar_results[k];
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
{
@@ -7421,16 +7575,32 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
else
{
gcc_assert (slp_node == slp_node_instance->reduc_phis);
- tree initial_value = NULL_TREE;
+ vec<tree> &initial_values = reduc_info->reduc_initial_values;
+ vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+
+ unsigned int num_phis = stmts.length ();
if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
- initial_value = vect_phi_initial_value (phi);
- tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
- tree neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out),
- code, initial_value);
- get_initial_defs_for_reduction (loop_vinfo, reduc_info,
- slp_node_instance->reduc_phis,
- &vec_initial_defs, vec_num,
- initial_value != NULL, neutral_op);
+ num_phis = 1;
+ initial_values.reserve (num_phis);
+ for (unsigned int i = 0; i < num_phis; ++i)
+ {
+ gphi *this_phi = as_a<gphi *> (stmts[i]->stmt);
+ initial_values.quick_push (vect_phi_initial_value (this_phi));
+ }
+ if (vec_num == 1)
+ vect_find_reusable_accumulator (loop_vinfo, reduc_info);
+ if (!initial_values.is_empty ())
+ {
+ tree initial_value
+ = (num_phis == 1 ? initial_values[0] : NULL_TREE);
+ tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
+ tree neutral_op
+ = neutral_op_for_reduction (TREE_TYPE (vectype_out),
+ code, initial_value);
+ get_initial_defs_for_reduction (loop_vinfo, reduc_info,
+ &vec_initial_defs, vec_num,
+ stmts.length (), neutral_op);
+ }
}
}
else
@@ -7438,6 +7608,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
/* Get at the scalar def before the loop, that defines the initial
value of the reduction variable. */
tree initial_def = vect_phi_initial_value (phi);
+ reduc_info->reduc_initial_values.safe_push (initial_def);
/* Optimize: if initial_def is for REDUC_MAX smaller than the base
and we can't use zero for induc_val, use initial_def. Similarly
for REDUC_MIN and initial_def larger than the base. */
@@ -7474,21 +7645,30 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
initial_def, initial_def);
else
{
- enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
- tree neutral_op = neutral_op_for_reduction (TREE_TYPE (initial_def),
- code, initial_def);
- gcc_assert (neutral_op);
- /* Try to simplify the vector initialization by applying an
- adjustment after the reduction has been performed. */
- if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
- && !operand_equal_p (neutral_op, initial_def))
+ if (ncopies == 1)
+ vect_find_reusable_accumulator (loop_vinfo, reduc_info);
+ if (!reduc_info->reduc_initial_values.is_empty ())
{
- STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = initial_def;
- initial_def = neutral_op;
+ initial_def = reduc_info->reduc_initial_values[0];
+ enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
+ tree neutral_op
+ = neutral_op_for_reduction (TREE_TYPE (initial_def),
+ code, initial_def);
+ gcc_assert (neutral_op);
+ /* Try to simplify the vector initialization by applying an
+ adjustment after the reduction has been performed. */
+ if (!reduc_info->reused_accumulator
+ && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+ && !operand_equal_p (neutral_op, initial_def))
+ {
+ STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)
+ = initial_def;
+ initial_def = neutral_op;
+ }
+ vec_initial_def
+ = get_initial_def_for_reduction (loop_vinfo, reduc_info,
+ initial_def, neutral_op);
}
- vec_initial_def
- = get_initial_def_for_reduction (loop_vinfo, reduc_info,
- initial_def, neutral_op);
}
}
@@ -7499,6 +7679,17 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
vec_initial_defs.quick_push (vec_initial_def);
}
+ if (auto *accumulator = reduc_info->reused_accumulator)
+ {
+ if (loop_vinfo->main_loop_edge)
+ vec_initial_defs[0]
+ = vect_get_main_loop_result (loop_vinfo, accumulator->reduc_input,
+ vec_initial_defs[0]);
+ else
+ vec_initial_defs.safe_push (accumulator->reduc_input);
+ gcc_assert (vec_initial_defs.length () == 1);
+ }
+
/* Generate the reduction PHIs upfront. */
for (i = 0; i < vec_num; i++)
{