aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop-manip.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-03 07:14:07 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-03 07:14:07 +0000
commitd9f21f6acb3aa615834e855e16b6311cd18c5668 (patch)
tree8200866be9cf8f8f2c40f1a190b6dc611888dd8b /gcc/tree-vect-loop-manip.c
parentfba05d9e9a0321c812ddbda7b4caa3977e1db4ef (diff)
downloadgcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.zip
gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.tar.gz
gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.tar.bz2
poly_int: vectoriser vf and uf
This patch changes the type of the vectorisation factor and SLP unrolling factor to poly_uint64. This in turn required some knock-on changes in signedness elsewhere. Cost decisions are generally based on estimated_poly_value, which for VF is wrapped up as vect_vf_for_cost. The patch doesn't on its own enable variable-length vectorisation. It just makes the minimum changes necessary for the code to build with the new VF and UF types. Later patches also make the vectoriser cope with variable TYPE_VECTOR_SUBPARTS and variable GET_MODE_NUNITS, at which point the code really does handle variable-length vectors. The patch also changes MAX_VECTORIZATION_FACTOR to INT_MAX, to avoid hard-coding a particular architectural limit. The patch includes a new test because a development version of the patch accidentally used file print routines instead of dump_*, which would fail with -fopt-info. 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree-vectorizer.h (_slp_instance::unrolling_factor): Change from an unsigned int to a poly_uint64. (_loop_vec_info::slp_unrolling_factor): Likewise. (_loop_vec_info::vectorization_factor): Change from an int to a poly_uint64. (MAX_VECTORIZATION_FACTOR): Bump from 64 to INT_MAX. (vect_get_num_vectors): New function. (vect_update_max_nunits, vect_vf_for_cost): Likewise. (vect_get_num_copies): Use vect_get_num_vectors. (vect_analyze_data_ref_dependences): Change max_vf from an int * to an unsigned int *. (vect_analyze_data_refs): Change min_vf from an int * to a poly_uint64 *. (vect_transform_slp_perm_load): Take the vf as a poly_uint64 rather than an unsigned HOST_WIDE_INT. * tree-vect-data-refs.c (vect_analyze_possibly_independent_ddr) (vect_analyze_data_ref_dependence): Change max_vf from an int * to an unsigned int *. (vect_analyze_data_ref_dependences): Likewise. (vect_compute_data_ref_alignment): Handle polynomial vf. (vect_enhance_data_refs_alignment): Likewise. (vect_prune_runtime_alias_test_list): Likewise. (vect_shift_permute_load_chain): Likewise. (vect_supportable_dr_alignment): Likewise. (dependence_distance_ge_vf): Take the vectorization factor as a poly_uint64 rather than an unsigned HOST_WIDE_INT. (vect_analyze_data_refs): Change min_vf from an int * to a poly_uint64 *. * tree-vect-loop-manip.c (vect_gen_scalar_loop_niters): Take vfm1 as a poly_uint64 rather than an int. Make the same change for the returned bound_scalar. (vect_gen_vector_loop_niters): Handle polynomial vf. (vect_do_peeling): Likewise. Update call to vect_gen_scalar_loop_niters and handle polynomial bound_scalars. (vect_gen_vector_loop_niters_mult_vf): Assert that the vf must be constant. * tree-vect-loop.c (vect_determine_vectorization_factor) (vect_update_vf_for_slp, vect_analyze_loop_2): Handle polynomial vf. (vect_get_known_peeling_cost): Likewise. (vect_estimate_min_profitable_iters, vectorizable_reduction): Likewise. (vect_worthwhile_without_simd_p, vectorizable_induction): Likewise. (vect_transform_loop): Likewise. Use the lowest possible VF when updating the upper bounds of the loop. (vect_min_worthwhile_factor): Make static. Return an unsigned int rather than an int. * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Cope with polynomial unroll factors. (vect_analyze_slp_cost_1, vect_analyze_slp_instance): Likewise. (vect_make_slp_decision): Likewise. (vect_supported_load_permutation_p): Likewise, and polynomial vf too. (vect_analyze_slp_cost): Handle polynomial vf. (vect_slp_analyze_node_operations): Likewise. (vect_slp_analyze_bb_1): Likewise. (vect_transform_slp_perm_load): Take the vf as a poly_uint64 rather than an unsigned HOST_WIDE_INT. * tree-vect-stmts.c (vectorizable_simd_clone_call, vectorizable_store) (vectorizable_load): Handle polynomial vf. * tree-vectorizer.c (simduid_to_vf::vf): Change from an int to a poly_uint64. (adjust_simduid_builtins, shrink_simd_arrays): Update accordingly. gcc/testsuite/ * gcc.dg/vect-opt-info-1.c: New test. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256126
Diffstat (limited to 'gcc/tree-vect-loop-manip.c')
-rw-r--r--gcc/tree-vect-loop-manip.c74
1 files changed, 45 insertions, 29 deletions
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 098b428..c8ee229 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1234,8 +1234,9 @@ vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
static tree
vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
- int bound_prolog, int vfm1, int th,
- int *bound_scalar, bool check_profitability)
+ int bound_prolog, poly_int64 vfm1, int th,
+ poly_uint64 *bound_scalar,
+ bool check_profitability)
{
tree type = TREE_TYPE (niters_prolog);
tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
@@ -1250,21 +1251,23 @@ vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
/* Peeling for constant times. */
if (int_niters_prolog >= 0)
{
- *bound_scalar = (int_niters_prolog + vfm1 < th
- ? th
- : vfm1 + int_niters_prolog);
+ *bound_scalar = upper_bound (int_niters_prolog + vfm1, th);
return build_int_cst (type, *bound_scalar);
}
/* Peeling for unknown times. Note BOUND_PROLOG is the upper
bound (inlcuded) of niters of prolog loop. */
- if (th >= vfm1 + bound_prolog)
+ if (known_ge (th, vfm1 + bound_prolog))
{
*bound_scalar = th;
return build_int_cst (type, th);
}
- /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
- else if (th > vfm1)
- return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
+ /* Need to do runtime comparison. */
+ else if (maybe_gt (th, vfm1))
+ {
+ *bound_scalar = upper_bound (*bound_scalar, th);
+ return fold_build2 (MAX_EXPR, type,
+ build_int_cst (type, th), niters);
+ }
}
return niters;
}
@@ -1292,7 +1295,7 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
{
tree ni_minus_gap, var;
tree niters_vector, step_vector, type = TREE_TYPE (niters);
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
tree log_vf = NULL_TREE;
@@ -1315,14 +1318,15 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
else
ni_minus_gap = niters;
- if (1)
+ unsigned HOST_WIDE_INT const_vf;
+ if (vf.is_constant (&const_vf))
{
/* Create: niters >> log2(vf) */
/* If it's known that niters == number of latch executions + 1 doesn't
overflow, we can generate niters >> log2(vf); otherwise we generate
(niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
will be at least one. */
- log_vf = build_int_cst (type, exact_log2 (vf));
+ log_vf = build_int_cst (type, exact_log2 (const_vf));
if (niters_no_overflow)
niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
else
@@ -1373,7 +1377,8 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
tree niters_vector,
tree *niters_vector_mult_vf_ptr)
{
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ /* We should be using a step_vector of VF if VF is variable. */
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree type = TREE_TYPE (niters_vector);
tree log_vf = build_int_cst (type, exact_log2 (vf));
@@ -1790,8 +1795,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
tree type = TREE_TYPE (niters), guard_cond;
basic_block guard_bb, guard_to;
profile_probability prob_prolog, prob_vector, prob_epilog;
- int bound_prolog = 0, bound_scalar = 0, bound = 0;
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ int bound_prolog = 0;
+ poly_uint64 bound_scalar = 0;
+ int estimated_vf;
int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
@@ -1800,11 +1806,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
return NULL;
prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
- if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
- vf = 3;
+ estimated_vf = vect_vf_for_cost (loop_vinfo);
+ if (estimated_vf == 2)
+ estimated_vf = 3;
prob_prolog = prob_epilog = profile_probability::guessed_always ()
- .apply_scale (vf - 1, vf);
- vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ .apply_scale (estimated_vf - 1, estimated_vf);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *first_loop = loop;
@@ -1824,13 +1831,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
/* Skip to epilog if scalar loop may be preferred. It's only needed
when we peel for epilog loop and when it hasn't been checked with
loop versioning. */
- bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
+ bool skip_vector = ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && !LOOP_REQUIRES_VERSIONING (loop_vinfo))
+ || !vf.is_constant ());
/* Epilog loop must be executed if the number of iterations for epilog
loop is known at compile time, otherwise we need to add a check at
the end of vector loop and skip to the end of epilog loop. */
bool skip_epilog = (prolog_peeling < 0
- || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
+ || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ || !vf.is_constant ());
/* PEELING_FOR_GAPS is special because epilog loop must be executed. */
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
skip_epilog = false;
@@ -1849,8 +1858,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
needs to be scaled back later. */
basic_block bb_before_loop = loop_preheader_edge (loop)->src;
if (prob_vector.initialized_p ())
- scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
- scale_loop_profile (loop, prob_vector, bound);
+ {
+ scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
+ scale_loop_profile (loop, prob_vector, 0);
+ }
}
tree niters_prolog = build_int_cst (type, 0);
@@ -2036,15 +2047,20 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
}
- scale_loop_profile (epilog, prob_epilog, bound);
+ scale_loop_profile (epilog, prob_epilog, 0);
}
else
slpeel_update_phi_nodes_for_lcssa (epilog);
- bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
- /* We share epilog loop with scalar version loop. */
- bound = MAX (bound, bound_scalar - 1);
- record_niter_bound (epilog, bound, false, true);
+ unsigned HOST_WIDE_INT bound1, bound2;
+ if (vf.is_constant (&bound1) && bound_scalar.is_constant (&bound2))
+ {
+ bound1 -= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 2;
+ if (bound2)
+ /* We share epilog loop with scalar version loop. */
+ bound1 = MAX (bound1, bound2 - 1);
+ record_niter_bound (epilog, bound1, false, true);
+ }
delete_update_ssa ();
adjust_vec_debug_stmts ();