diff options
author | Andre Vieira <andre.simoesdiasvieira@arm.com> | 2019-10-29 13:15:46 +0000 |
---|---|---|
committer | Andre Vieira <avieira@gcc.gnu.org> | 2019-10-29 13:15:46 +0000 |
commit | 97c146036750e7cb3966d292572ec158a78f356e (patch) | |
tree | f8bd6e942ea43e0643932c1975603c427d76bb64 /gcc/tree-vect-loop-manip.c | |
parent | 3ab768774318cff1ecc7951a59bc9f66ceb015ff (diff) | |
download | gcc-97c146036750e7cb3966d292572ec158a78f356e.zip gcc-97c146036750e7cb3966d292572ec158a78f356e.tar.gz gcc-97c146036750e7cb3966d292572ec158a78f356e.tar.bz2 |
[vect]PR 88915: Vectorize epilogues when versioning loops
gcc/ChangeLog:
2019-10-29 Andre Vieira <andre.simoesdiasvieira@arm.com>
PR 88915
* tree-ssa-loop-niter.h (simplify_replace_tree): Change declaration.
* tree-ssa-loop-niter.c (simplify_replace_tree): Add context parameter
and make the valueize function pointer also take a void pointer.
* gcc/tree-ssa-sccvn.c (vn_valueize_wrapper): New function to wrap
around vn_valueize, to call it without a context.
(process_bb): Use vn_valueize_wrapper instead of vn_valueize.
* tree-vect-loop.c (_loop_vec_info): Initialize epilogue_vinfos.
(~_loop_vec_info): Release epilogue_vinfos.
(vect_analyze_loop_costing): Use knowledge of main VF to estimate
number of iterations of epilogue.
(vect_analyze_loop_2): Adapt to analyse main loop for all supported
vector sizes when vect-epilogues-nomask=1. Also keep track of lowest
versioning threshold needed for main loop.
(vect_analyze_loop): Likewise.
(find_in_mapping): New helper function.
(update_epilogue_loop_vinfo): New function.
(vect_transform_loop): When vectorizing epilogues re-use analysis done
on main loop and call update_epilogue_loop_vinfo to update it.
* tree-vect-loop-manip.c (vect_update_inits_of_drs): No longer insert
stmts on loop preheader edge.
(vect_do_peeling): Enable skip-vectors when doing loop versioning if
we decided to vectorize epilogues. Update epilogues NITERS and
construct ADVANCE to update epilogues data references where needed.
* tree-vectorizer.h (_loop_vec_info): Add epilogue_vinfos.
(vect_do_peeling, vect_update_inits_of_drs,
determine_peel_for_niter, vect_analyze_loop): Add or update
declarations.
* tree-vectorizer.c (try_vectorize_loop_1): Make sure to use already
created loop_vec_info's for epilogues when available. Otherwise analyse
epilogue separately.
From-SVN: r277569
Diffstat (limited to 'gcc/tree-vect-loop-manip.c')
-rw-r--r-- | gcc/tree-vect-loop-manip.c | 203 |
1 files changed, 176 insertions, 27 deletions
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index b3246bc..dffb40e 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1726,7 +1726,7 @@ vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code) Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO. CODE and NITERS are as for vect_update_inits_of_dr. */ -static void +void vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters, tree_code code) { @@ -1736,21 +1736,12 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters, DUMP_VECT_SCOPE ("vect_update_inits_of_dr"); - /* Adjust niters to sizetype and insert stmts on loop preheader edge. */ + /* Adjust niters to sizetype. We used to insert the stmts on loop preheader + here, but since we might use these niters to update the epilogues niters + and data references we can't insert them here as this definition might not + always dominate its uses. */ if (!types_compatible_p (sizetype, TREE_TYPE (niters))) - { - gimple_seq seq; - edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters"); - - niters = fold_convert (sizetype, niters); - niters = force_gimple_operand (niters, &seq, false, var); - if (seq) - { - basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); - gcc_assert (!new_bb); - } - } + niters = fold_convert (sizetype, niters); FOR_EACH_VEC_ELT (datarefs, i, dr) { @@ -2393,7 +2384,22 @@ slpeel_update_phi_nodes_for_lcssa (class loop *epilog) Note this function peels prolog and epilog only if it's necessary, as well as guards. - Returns created epilogue or NULL. + This function returns the epilogue loop if a decision was made to vectorize + it, otherwise NULL. + + The analysis resulting in this epilogue loop's loop_vec_info was performed + in the same vect_analyze_loop call as the main loop's. At that time + vect_analyze_loop constructs a list of accepted loop_vec_info's for lower + vectorization factors than the main loop. This list is stored in the main + loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to + vectorize the epilogue loop for a lower vectorization factor, the + loop_vec_info sitting at the top of the epilogue_vinfos list is removed, + updated and linked to the epilogue loop. This is later used to vectorize + the epilogue. The reason the loop_vec_info needs updating is that it was + constructed based on the original main loop, and the epilogue loop is a + copy of this loop, so all links pointing to statements in the original loop + need updating. Furthermore, these loop_vec_infos share the + data_reference's records, which will also need to be updated. TODO: Guard for prefer_scalar_loop should be emitted along with versioning conditions if loop versioning is needed. */ @@ -2403,7 +2409,8 @@ class loop * vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, tree *niters_vector, tree *step_vector, tree *niters_vector_mult_vf_var, int th, - bool check_profitability, bool niters_no_overflow) + bool check_profitability, bool niters_no_overflow, + tree *advance, drs_init_vec &orig_drs_init) { edge e, guard_e; tree type = TREE_TYPE (niters), guard_cond; @@ -2411,6 +2418,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, profile_probability prob_prolog, prob_vector, prob_epilog; int estimated_vf; int prolog_peeling = 0; + bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0; /* We currently do not support prolog peeling if the target alignment is not known at compile time. 'vect_gen_prolog_loop_niters' depends on the target alignment being constant. */ @@ -2464,19 +2472,73 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, int bound_prolog = 0; if (prolog_peeling) niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor, - &bound_prolog); + &bound_prolog); else niters_prolog = build_int_cst (type, 0); + loop_vec_info epilogue_vinfo = NULL; + if (vect_epilogues) + { + epilogue_vinfo = loop_vinfo->epilogue_vinfos[0]; + loop_vinfo->epilogue_vinfos.ordered_remove (0); + } + + tree niters_vector_mult_vf = NULL_TREE; + /* Saving NITERs before the loop, as this may be changed by prologue. */ + tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo); + edge update_e = NULL, skip_e = NULL; + unsigned int lowest_vf = constant_lower_bound (vf); + /* If we know the number of scalar iterations for the main loop we should + check whether after the main loop there are enough iterations left over + for the epilogue. */ + if (vect_epilogues + && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && prolog_peeling >= 0 + && known_eq (vf, lowest_vf)) + { + unsigned HOST_WIDE_INT eiters + = (LOOP_VINFO_INT_NITERS (loop_vinfo) + - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); + + eiters -= prolog_peeling; + eiters + = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo); + + unsigned int ratio; + while (!(constant_multiple_p (loop_vinfo->vector_size, + epilogue_vinfo->vector_size, &ratio) + && eiters >= lowest_vf / ratio)) + { + delete epilogue_vinfo; + epilogue_vinfo = NULL; + if (loop_vinfo->epilogue_vinfos.length () == 0) + { + vect_epilogues = false; + break; + } + epilogue_vinfo = loop_vinfo->epilogue_vinfos[0]; + loop_vinfo->epilogue_vinfos.ordered_remove (0); + } + } /* Prolog loop may be skipped. */ bool skip_prolog = (prolog_peeling != 0); - /* Skip to epilog if scalar loop may be preferred. It's only needed - when we peel for epilog loop and when it hasn't been checked with - loop versioning. */ + /* Skip this loop to epilog when there are not enough iterations to enter this + vectorized loop. If true we should perform runtime checks on the NITERS + to check whether we should skip the current vectorized loop. If we know + the number of scalar iterations we may choose to add a runtime check if + this number "maybe" smaller than the number of iterations required + when we know the number of scalar iterations may potentially + be smaller than the number of iterations required to enter this loop, for + this we use the upper bounds on the prolog and epilog peeling. When we + don't know the number of iterations and don't require versioning it is + because we have asserted that there are enough scalar iterations to enter + the main loop, so this skip is not necessary. When we are versioning then + we only add such a skip if we have chosen to vectorize the epilogue. */ bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo), bound_prolog + bound_epilog) - : !LOOP_REQUIRES_VERSIONING (loop_vinfo)); + : (!LOOP_REQUIRES_VERSIONING (loop_vinfo) + || vect_epilogues)); /* Epilog loop must be executed if the number of iterations for epilog loop is known at compile time, otherwise we need to add a check at the end of vector loop and skip to the end of epilog loop. */ @@ -2506,6 +2568,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, dump_user_location_t loop_loc = find_loop_location (loop); class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + if (vect_epilogues) + /* Make sure to set the epilogue's epilogue scalar loop, such that we can + use the original scalar loop as remaining epilogue if necessary. */ + LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo) + = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); + if (prolog_peeling) { e = loop_preheader_edge (loop); @@ -2552,6 +2620,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog); scale_loop_profile (prolog, prob_prolog, bound_prolog); } + + /* Save original inits for each data_reference before advancing them with + NITERS_PROLOG. */ + unsigned int i; + struct data_reference *dr; + vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs; + FOR_EACH_VEC_ELT (datarefs, i, dr) + orig_drs_init.safe_push (std::make_pair (dr, DR_OFFSET (dr))); + /* Update init address of DRs. */ vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR); /* Update niters for vector loop. */ @@ -2586,8 +2663,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, "loop can't be duplicated to exit edge.\n"); gcc_unreachable (); } - /* Peel epilog and put it on exit edge of loop. */ - epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e); + /* Peel epilog and put it on exit edge of loop. If we are vectorizing + said epilog then we should use a copy of the main loop as a starting + point. This loop may have already had some preliminary transformations + to allow for more optimal vectorization, for example if-conversion. + If we are not vectorizing the epilog then we should use the scalar loop + as the transformations mentioned above make less or no sense when not + vectorizing. */ + epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop; + epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e); if (!epilog) { dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc, @@ -2616,6 +2700,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, guard_to, guard_bb, prob_vector.invert (), irred_flag); + skip_e = guard_e; e = EDGE_PRED (guard_to, 0); e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e); @@ -2637,7 +2722,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } basic_block bb_before_epilog = loop_preheader_edge (epilog)->src; - tree niters_vector_mult_vf; /* If loop is peeled for non-zero constant times, now niters refers to orig_niters - prolog_peeling, it won't overflow even the orig_niters overflows. */ @@ -2660,7 +2744,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, /* Update IVs of original loop as if they were advanced by niters_vector_mult_vf steps. */ gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo)); - edge update_e = skip_vector ? e : loop_preheader_edge (epilog); + update_e = skip_vector ? e : loop_preheader_edge (epilog); vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf, update_e); @@ -2701,10 +2785,75 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, adjust_vec_debug_stmts (); scev_reset (); } + + if (vect_epilogues) + { + epilog->aux = epilogue_vinfo; + LOOP_VINFO_LOOP (epilogue_vinfo) = epilog; + + loop_constraint_clear (epilog, LOOP_C_INFINITE); + + /* We now must calculate the number of NITERS performed by the previous + loop and EPILOGUE_NITERS to be performed by the epilogue. */ + tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf), + niters_prolog, niters_vector_mult_vf); + + /* If skip_vector we may skip the previous loop, we insert a phi-node to + determine whether we are coming from the previous vectorized loop + using the update_e edge or the skip_vector basic block using the + skip_e edge. */ + if (skip_vector) + { + gcc_assert (update_e != NULL && skip_e != NULL); + gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)), + update_e->dest); + tree new_ssa = make_ssa_name (TREE_TYPE (niters)); + gimple *stmt = gimple_build_assign (new_ssa, niters); + gimple_stmt_iterator gsi; + if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME + && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL) + { + gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf)); + gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + } + else + { + gsi = gsi_last_bb (update_e->src); + gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); + } + + niters = new_ssa; + add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION); + add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e, + UNKNOWN_LOCATION); + niters = PHI_RESULT (new_phi); + } + + /* Subtract the number of iterations performed by the vectorized loop + from the number of total iterations. */ + tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), + before_loop_niters, + niters); + + LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters; + LOOP_VINFO_NITERSM1 (epilogue_vinfo) + = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters), + epilogue_niters, + build_one_cst (TREE_TYPE (epilogue_niters))); + + /* Set ADVANCE to the number of iterations performed by the previous + loop and its prologue. */ + *advance = niters; + + /* Redo the peeling for niter analysis as the NITERs and alignment + may have been updated to take the main loop into account. */ + determine_peel_for_niter (epilogue_vinfo); + } + adjust_vec.release (); free_original_copy_tables (); - return epilog; + return vect_epilogues ? epilog : NULL; } /* Function vect_create_cond_for_niters_checks. |