aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop-manip.c
diff options
context:
space:
mode:
authorAndre Vieira <andre.simoesdiasvieira@arm.com>2019-10-29 13:15:46 +0000
committerAndre Vieira <avieira@gcc.gnu.org>2019-10-29 13:15:46 +0000
commit97c146036750e7cb3966d292572ec158a78f356e (patch)
treef8bd6e942ea43e0643932c1975603c427d76bb64 /gcc/tree-vect-loop-manip.c
parent3ab768774318cff1ecc7951a59bc9f66ceb015ff (diff)
downloadgcc-97c146036750e7cb3966d292572ec158a78f356e.zip
gcc-97c146036750e7cb3966d292572ec158a78f356e.tar.gz
gcc-97c146036750e7cb3966d292572ec158a78f356e.tar.bz2
[vect]PR 88915: Vectorize epilogues when versioning loops
gcc/ChangeLog: 2019-10-29 Andre Vieira <andre.simoesdiasvieira@arm.com> PR 88915 * tree-ssa-loop-niter.h (simplify_replace_tree): Change declaration. * tree-ssa-loop-niter.c (simplify_replace_tree): Add context parameter and make the valueize function pointer also take a void pointer. * gcc/tree-ssa-sccvn.c (vn_valueize_wrapper): New function to wrap around vn_valueize, to call it without a context. (process_bb): Use vn_valueize_wrapper instead of vn_valueize. * tree-vect-loop.c (_loop_vec_info): Initialize epilogue_vinfos. (~_loop_vec_info): Release epilogue_vinfos. (vect_analyze_loop_costing): Use knowledge of main VF to estimate number of iterations of epilogue. (vect_analyze_loop_2): Adapt to analyse main loop for all supported vector sizes when vect-epilogues-nomask=1. Also keep track of lowest versioning threshold needed for main loop. (vect_analyze_loop): Likewise. (find_in_mapping): New helper function. (update_epilogue_loop_vinfo): New function. (vect_transform_loop): When vectorizing epilogues re-use analysis done on main loop and call update_epilogue_loop_vinfo to update it. * tree-vect-loop-manip.c (vect_update_inits_of_drs): No longer insert stmts on loop preheader edge. (vect_do_peeling): Enable skip-vectors when doing loop versioning if we decided to vectorize epilogues. Update epilogues NITERS and construct ADVANCE to update epilogues data references where needed. * tree-vectorizer.h (_loop_vec_info): Add epilogue_vinfos. (vect_do_peeling, vect_update_inits_of_drs, determine_peel_for_niter, vect_analyze_loop): Add or update declarations. * tree-vectorizer.c (try_vectorize_loop_1): Make sure to use already created loop_vec_info's for epilogues when available. Otherwise analyse epilogue separately. From-SVN: r277569
Diffstat (limited to 'gcc/tree-vect-loop-manip.c')
-rw-r--r--gcc/tree-vect-loop-manip.c203
1 files changed, 176 insertions, 27 deletions
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index b3246bc..dffb40e 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1726,7 +1726,7 @@ vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
CODE and NITERS are as for vect_update_inits_of_dr. */
-static void
+void
vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
tree_code code)
{
@@ -1736,21 +1736,12 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
- /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
+ /* Adjust niters to sizetype. We used to insert the stmts on loop preheader
+ here, but since we might use these niters to update the epilogues niters
+ and data references we can't insert them here as this definition might not
+ always dominate its uses. */
if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
- {
- gimple_seq seq;
- edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
- tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
-
- niters = fold_convert (sizetype, niters);
- niters = force_gimple_operand (niters, &seq, false, var);
- if (seq)
- {
- basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
- }
- }
+ niters = fold_convert (sizetype, niters);
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
@@ -2393,7 +2384,22 @@ slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
Note this function peels prolog and epilog only if it's necessary,
as well as guards.
- Returns created epilogue or NULL.
+ This function returns the epilogue loop if a decision was made to vectorize
+ it, otherwise NULL.
+
+ The analysis resulting in this epilogue loop's loop_vec_info was performed
+ in the same vect_analyze_loop call as the main loop's. At that time
+ vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
+ vectorization factors than the main loop. This list is stored in the main
+ loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to
+ vectorize the epilogue loop for a lower vectorization factor, the
+ loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
+ updated and linked to the epilogue loop. This is later used to vectorize
+ the epilogue. The reason the loop_vec_info needs updating is that it was
+ constructed based on the original main loop, and the epilogue loop is a
+ copy of this loop, so all links pointing to statements in the original loop
+ need updating. Furthermore, these loop_vec_infos share the
+ data_reference's records, which will also need to be updated.
TODO: Guard for prefer_scalar_loop should be emitted along with
versioning conditions if loop versioning is needed. */
@@ -2403,7 +2409,8 @@ class loop *
vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
tree *niters_vector, tree *step_vector,
tree *niters_vector_mult_vf_var, int th,
- bool check_profitability, bool niters_no_overflow)
+ bool check_profitability, bool niters_no_overflow,
+ tree *advance, drs_init_vec &orig_drs_init)
{
edge e, guard_e;
tree type = TREE_TYPE (niters), guard_cond;
@@ -2411,6 +2418,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
profile_probability prob_prolog, prob_vector, prob_epilog;
int estimated_vf;
int prolog_peeling = 0;
+ bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
/* We currently do not support prolog peeling if the target alignment is not
known at compile time. 'vect_gen_prolog_loop_niters' depends on the
target alignment being constant. */
@@ -2464,19 +2472,73 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
int bound_prolog = 0;
if (prolog_peeling)
niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
- &bound_prolog);
+ &bound_prolog);
else
niters_prolog = build_int_cst (type, 0);
+ loop_vec_info epilogue_vinfo = NULL;
+ if (vect_epilogues)
+ {
+ epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
+ loop_vinfo->epilogue_vinfos.ordered_remove (0);
+ }
+
+ tree niters_vector_mult_vf = NULL_TREE;
+ /* Saving NITERs before the loop, as this may be changed by prologue. */
+ tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
+ edge update_e = NULL, skip_e = NULL;
+ unsigned int lowest_vf = constant_lower_bound (vf);
+ /* If we know the number of scalar iterations for the main loop we should
+ check whether after the main loop there are enough iterations left over
+ for the epilogue. */
+ if (vect_epilogues
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && prolog_peeling >= 0
+ && known_eq (vf, lowest_vf))
+ {
+ unsigned HOST_WIDE_INT eiters
+ = (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
+
+ eiters -= prolog_peeling;
+ eiters
+ = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
+
+ unsigned int ratio;
+ while (!(constant_multiple_p (loop_vinfo->vector_size,
+ epilogue_vinfo->vector_size, &ratio)
+ && eiters >= lowest_vf / ratio))
+ {
+ delete epilogue_vinfo;
+ epilogue_vinfo = NULL;
+ if (loop_vinfo->epilogue_vinfos.length () == 0)
+ {
+ vect_epilogues = false;
+ break;
+ }
+ epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
+ loop_vinfo->epilogue_vinfos.ordered_remove (0);
+ }
+ }
/* Prolog loop may be skipped. */
bool skip_prolog = (prolog_peeling != 0);
- /* Skip to epilog if scalar loop may be preferred. It's only needed
- when we peel for epilog loop and when it hasn't been checked with
- loop versioning. */
+ /* Skip this loop to epilog when there are not enough iterations to enter this
+ vectorized loop. If true we should perform runtime checks on the NITERS
+ to check whether we should skip the current vectorized loop. If we know
+ the number of scalar iterations we may choose to add a runtime check if
+ this number "maybe" smaller than the number of iterations required
+ when we know the number of scalar iterations may potentially
+ be smaller than the number of iterations required to enter this loop, for
+ this we use the upper bounds on the prolog and epilog peeling. When we
+ don't know the number of iterations and don't require versioning it is
+ because we have asserted that there are enough scalar iterations to enter
+ the main loop, so this skip is not necessary. When we are versioning then
+ we only add such a skip if we have chosen to vectorize the epilogue. */
bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
bound_prolog + bound_epilog)
- : !LOOP_REQUIRES_VERSIONING (loop_vinfo));
+ : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
+ || vect_epilogues));
/* Epilog loop must be executed if the number of iterations for epilog
loop is known at compile time, otherwise we need to add a check at
the end of vector loop and skip to the end of epilog loop. */
@@ -2506,6 +2568,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
dump_user_location_t loop_loc = find_loop_location (loop);
class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
+ if (vect_epilogues)
+ /* Make sure to set the epilogue's epilogue scalar loop, such that we can
+ use the original scalar loop as remaining epilogue if necessary. */
+ LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
+ = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
+
if (prolog_peeling)
{
e = loop_preheader_edge (loop);
@@ -2552,6 +2620,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
scale_loop_profile (prolog, prob_prolog, bound_prolog);
}
+
+ /* Save original inits for each data_reference before advancing them with
+ NITERS_PROLOG. */
+ unsigned int i;
+ struct data_reference *dr;
+ vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
+ orig_drs_init.safe_push (std::make_pair (dr, DR_OFFSET (dr)));
+
/* Update init address of DRs. */
vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
/* Update niters for vector loop. */
@@ -2586,8 +2663,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
"loop can't be duplicated to exit edge.\n");
gcc_unreachable ();
}
- /* Peel epilog and put it on exit edge of loop. */
- epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
+ /* Peel epilog and put it on exit edge of loop. If we are vectorizing
+ said epilog then we should use a copy of the main loop as a starting
+ point. This loop may have already had some preliminary transformations
+ to allow for more optimal vectorization, for example if-conversion.
+ If we are not vectorizing the epilog then we should use the scalar loop
+ as the transformations mentioned above make less or no sense when not
+ vectorizing. */
+ epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
+ epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
if (!epilog)
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
@@ -2616,6 +2700,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
guard_to, guard_bb,
prob_vector.invert (),
irred_flag);
+ skip_e = guard_e;
e = EDGE_PRED (guard_to, 0);
e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
@@ -2637,7 +2722,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
}
basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
- tree niters_vector_mult_vf;
/* If loop is peeled for non-zero constant times, now niters refers to
orig_niters - prolog_peeling, it won't overflow even the orig_niters
overflows. */
@@ -2660,7 +2744,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
/* Update IVs of original loop as if they were advanced by
niters_vector_mult_vf steps. */
gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
- edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
+ update_e = skip_vector ? e : loop_preheader_edge (epilog);
vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
update_e);
@@ -2701,10 +2785,75 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
adjust_vec_debug_stmts ();
scev_reset ();
}
+
+ if (vect_epilogues)
+ {
+ epilog->aux = epilogue_vinfo;
+ LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
+
+ loop_constraint_clear (epilog, LOOP_C_INFINITE);
+
+ /* We now must calculate the number of NITERS performed by the previous
+ loop and EPILOGUE_NITERS to be performed by the epilogue. */
+ tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
+ niters_prolog, niters_vector_mult_vf);
+
+ /* If skip_vector we may skip the previous loop, we insert a phi-node to
+ determine whether we are coming from the previous vectorized loop
+ using the update_e edge or the skip_vector basic block using the
+ skip_e edge. */
+ if (skip_vector)
+ {
+ gcc_assert (update_e != NULL && skip_e != NULL);
+ gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
+ update_e->dest);
+ tree new_ssa = make_ssa_name (TREE_TYPE (niters));
+ gimple *stmt = gimple_build_assign (new_ssa, niters);
+ gimple_stmt_iterator gsi;
+ if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
+ && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
+ {
+ gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
+ gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
+ }
+ else
+ {
+ gsi = gsi_last_bb (update_e->src);
+ gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
+ }
+
+ niters = new_ssa;
+ add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
+ add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
+ UNKNOWN_LOCATION);
+ niters = PHI_RESULT (new_phi);
+ }
+
+ /* Subtract the number of iterations performed by the vectorized loop
+ from the number of total iterations. */
+ tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
+ before_loop_niters,
+ niters);
+
+ LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
+ LOOP_VINFO_NITERSM1 (epilogue_vinfo)
+ = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
+ epilogue_niters,
+ build_one_cst (TREE_TYPE (epilogue_niters)));
+
+ /* Set ADVANCE to the number of iterations performed by the previous
+ loop and its prologue. */
+ *advance = niters;
+
+ /* Redo the peeling for niter analysis as the NITERs and alignment
+ may have been updated to take the main loop into account. */
+ determine_peel_for_niter (epilogue_vinfo);
+ }
+
adjust_vec.release ();
free_original_copy_tables ();
- return epilog;
+ return vect_epilogues ? epilog : NULL;
}
/* Function vect_create_cond_for_niters_checks.