diff options
Diffstat (limited to 'gcc/tree-vect-loop-manip.c')
-rw-r--r-- | gcc/tree-vect-loop-manip.c | 74 |
1 files changed, 45 insertions, 29 deletions
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 098b428..c8ee229 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1234,8 +1234,9 @@ vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p) static tree vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog, - int bound_prolog, int vfm1, int th, - int *bound_scalar, bool check_profitability) + int bound_prolog, poly_int64 vfm1, int th, + poly_uint64 *bound_scalar, + bool check_profitability) { tree type = TREE_TYPE (niters_prolog); tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog, @@ -1250,21 +1251,23 @@ vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog, /* Peeling for constant times. */ if (int_niters_prolog >= 0) { - *bound_scalar = (int_niters_prolog + vfm1 < th - ? th - : vfm1 + int_niters_prolog); + *bound_scalar = upper_bound (int_niters_prolog + vfm1, th); return build_int_cst (type, *bound_scalar); } /* Peeling for unknown times. Note BOUND_PROLOG is the upper bound (inlcuded) of niters of prolog loop. */ - if (th >= vfm1 + bound_prolog) + if (known_ge (th, vfm1 + bound_prolog)) { *bound_scalar = th; return build_int_cst (type, th); } - /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */ - else if (th > vfm1) - return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters); + /* Need to do runtime comparison. */ + else if (maybe_gt (th, vfm1)) + { + *bound_scalar = upper_bound (*bound_scalar, th); + return fold_build2 (MAX_EXPR, type, + build_int_cst (type, th), niters); + } } return niters; } @@ -1292,7 +1295,7 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, { tree ni_minus_gap, var; tree niters_vector, step_vector, type = TREE_TYPE (niters); - int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); tree log_vf = NULL_TREE; @@ -1315,14 +1318,15 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, else ni_minus_gap = niters; - if (1) + unsigned HOST_WIDE_INT const_vf; + if (vf.is_constant (&const_vf)) { /* Create: niters >> log2(vf) */ /* If it's known that niters == number of latch executions + 1 doesn't overflow, we can generate niters >> log2(vf); otherwise we generate (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio will be at least one. */ - log_vf = build_int_cst (type, exact_log2 (vf)); + log_vf = build_int_cst (type, exact_log2 (const_vf)); if (niters_no_overflow) niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf); else @@ -1373,7 +1377,8 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo, tree niters_vector, tree *niters_vector_mult_vf_ptr) { - int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + /* We should be using a step_vector of VF if VF is variable. */ + int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant (); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree type = TREE_TYPE (niters_vector); tree log_vf = build_int_cst (type, exact_log2 (vf)); @@ -1790,8 +1795,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, tree type = TREE_TYPE (niters), guard_cond; basic_block guard_bb, guard_to; profile_probability prob_prolog, prob_vector, prob_epilog; - int bound_prolog = 0, bound_scalar = 0, bound = 0; - int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + int bound_prolog = 0; + poly_uint64 bound_scalar = 0; + int estimated_vf; int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); @@ -1800,11 +1806,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, return NULL; prob_vector = profile_probability::guessed_always ().apply_scale (9, 10); - if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2) - vf = 3; + estimated_vf = vect_vf_for_cost (loop_vinfo); + if (estimated_vf == 2) + estimated_vf = 3; prob_prolog = prob_epilog = profile_probability::guessed_always () - .apply_scale (vf - 1, vf); - vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + .apply_scale (estimated_vf - 1, estimated_vf); + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *first_loop = loop; @@ -1824,13 +1831,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, /* Skip to epilog if scalar loop may be preferred. It's only needed when we peel for epilog loop and when it hasn't been checked with loop versioning. */ - bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && !LOOP_REQUIRES_VERSIONING (loop_vinfo)); + bool skip_vector = ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && !LOOP_REQUIRES_VERSIONING (loop_vinfo)) + || !vf.is_constant ()); /* Epilog loop must be executed if the number of iterations for epilog loop is known at compile time, otherwise we need to add a check at the end of vector loop and skip to the end of epilog loop. */ bool skip_epilog = (prolog_peeling < 0 - || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)); + || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + || !vf.is_constant ()); /* PEELING_FOR_GAPS is special because epilog loop must be executed. */ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) skip_epilog = false; @@ -1849,8 +1858,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, needs to be scaled back later. */ basic_block bb_before_loop = loop_preheader_edge (loop)->src; if (prob_vector.initialized_p ()) - scale_bbs_frequencies (&bb_before_loop, 1, prob_vector); - scale_loop_profile (loop, prob_vector, bound); + { + scale_bbs_frequencies (&bb_before_loop, 1, prob_vector); + scale_loop_profile (loop, prob_vector, 0); + } } tree niters_prolog = build_int_cst (type, 0); @@ -2036,15 +2047,20 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog); } - scale_loop_profile (epilog, prob_epilog, bound); + scale_loop_profile (epilog, prob_epilog, 0); } else slpeel_update_phi_nodes_for_lcssa (epilog); - bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2; - /* We share epilog loop with scalar version loop. */ - bound = MAX (bound, bound_scalar - 1); - record_niter_bound (epilog, bound, false, true); + unsigned HOST_WIDE_INT bound1, bound2; + if (vf.is_constant (&bound1) && bound_scalar.is_constant (&bound2)) + { + bound1 -= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 2; + if (bound2) + /* We share epilog loop with scalar version loop. */ + bound1 = MAX (bound1, bound2 - 1); + record_niter_bound (epilog, bound1, false, true); + } delete_update_ssa (); adjust_vec_debug_stmts (); |