aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-stmts.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-03 07:14:07 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-03 07:14:07 +0000
commitd9f21f6acb3aa615834e855e16b6311cd18c5668 (patch)
tree8200866be9cf8f8f2c40f1a190b6dc611888dd8b /gcc/tree-vect-stmts.c
parentfba05d9e9a0321c812ddbda7b4caa3977e1db4ef (diff)
downloadgcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.zip
gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.tar.gz
gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.tar.bz2
poly_int: vectoriser vf and uf
This patch changes the type of the vectorisation factor and SLP unrolling factor to poly_uint64. This in turn required some knock-on changes in signedness elsewhere. Cost decisions are generally based on estimated_poly_value, which for VF is wrapped up as vect_vf_for_cost. The patch doesn't on its own enable variable-length vectorisation. It just makes the minimum changes necessary for the code to build with the new VF and UF types. Later patches also make the vectoriser cope with variable TYPE_VECTOR_SUBPARTS and variable GET_MODE_NUNITS, at which point the code really does handle variable-length vectors. The patch also changes MAX_VECTORIZATION_FACTOR to INT_MAX, to avoid hard-coding a particular architectural limit. The patch includes a new test because a development version of the patch accidentally used file print routines instead of dump_*, which would fail with -fopt-info. 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree-vectorizer.h (_slp_instance::unrolling_factor): Change from an unsigned int to a poly_uint64. (_loop_vec_info::slp_unrolling_factor): Likewise. (_loop_vec_info::vectorization_factor): Change from an int to a poly_uint64. (MAX_VECTORIZATION_FACTOR): Bump from 64 to INT_MAX. (vect_get_num_vectors): New function. (vect_update_max_nunits, vect_vf_for_cost): Likewise. (vect_get_num_copies): Use vect_get_num_vectors. (vect_analyze_data_ref_dependences): Change max_vf from an int * to an unsigned int *. (vect_analyze_data_refs): Change min_vf from an int * to a poly_uint64 *. (vect_transform_slp_perm_load): Take the vf as a poly_uint64 rather than an unsigned HOST_WIDE_INT. * tree-vect-data-refs.c (vect_analyze_possibly_independent_ddr) (vect_analyze_data_ref_dependence): Change max_vf from an int * to an unsigned int *. (vect_analyze_data_ref_dependences): Likewise. (vect_compute_data_ref_alignment): Handle polynomial vf. (vect_enhance_data_refs_alignment): Likewise. (vect_prune_runtime_alias_test_list): Likewise. (vect_shift_permute_load_chain): Likewise. (vect_supportable_dr_alignment): Likewise. (dependence_distance_ge_vf): Take the vectorization factor as a poly_uint64 rather than an unsigned HOST_WIDE_INT. (vect_analyze_data_refs): Change min_vf from an int * to a poly_uint64 *. * tree-vect-loop-manip.c (vect_gen_scalar_loop_niters): Take vfm1 as a poly_uint64 rather than an int. Make the same change for the returned bound_scalar. (vect_gen_vector_loop_niters): Handle polynomial vf. (vect_do_peeling): Likewise. Update call to vect_gen_scalar_loop_niters and handle polynomial bound_scalars. (vect_gen_vector_loop_niters_mult_vf): Assert that the vf must be constant. * tree-vect-loop.c (vect_determine_vectorization_factor) (vect_update_vf_for_slp, vect_analyze_loop_2): Handle polynomial vf. (vect_get_known_peeling_cost): Likewise. (vect_estimate_min_profitable_iters, vectorizable_reduction): Likewise. (vect_worthwhile_without_simd_p, vectorizable_induction): Likewise. (vect_transform_loop): Likewise. Use the lowest possible VF when updating the upper bounds of the loop. (vect_min_worthwhile_factor): Make static. Return an unsigned int rather than an int. * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Cope with polynomial unroll factors. (vect_analyze_slp_cost_1, vect_analyze_slp_instance): Likewise. (vect_make_slp_decision): Likewise. (vect_supported_load_permutation_p): Likewise, and polynomial vf too. (vect_analyze_slp_cost): Handle polynomial vf. (vect_slp_analyze_node_operations): Likewise. (vect_slp_analyze_bb_1): Likewise. (vect_transform_slp_perm_load): Take the vf as a poly_uint64 rather than an unsigned HOST_WIDE_INT. * tree-vect-stmts.c (vectorizable_simd_clone_call, vectorizable_store) (vectorizable_load): Handle polynomial vf. * tree-vectorizer.c (simduid_to_vf::vf): Change from an int to a poly_uint64. (adjust_simduid_builtins, shrink_simd_arrays): Update accordingly. gcc/testsuite/ * gcc.dg/vect-opt-info-1.c: New test. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256126
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r--gcc/tree-vect-stmts.c62
1 files changed, 40 insertions, 22 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 8d1dc04..bae72d09 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -3361,6 +3361,16 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
arginfo.quick_push (thisarginfo);
}
+ unsigned HOST_WIDE_INT vf;
+ if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not considering SIMD clones; not yet supported"
+ " for variable-width vectors.\n");
+ return NULL;
+ }
+
unsigned int badness = 0;
struct cgraph_node *bestn = NULL;
if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
@@ -3370,13 +3380,11 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
n = n->simdclone->next_clone)
{
unsigned int this_badness = 0;
- if (n->simdclone->simdlen
- > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ if (n->simdclone->simdlen > vf
|| n->simdclone->nargs != nargs)
continue;
- if (n->simdclone->simdlen
- < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ if (n->simdclone->simdlen < vf)
+ this_badness += (exact_log2 (vf)
- exact_log2 (n->simdclone->simdlen)) * 1024;
if (n->simdclone->inbranch)
this_badness += 2048;
@@ -3465,7 +3473,7 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
- ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+ ncopies = vf / nunits;
/* If the function isn't const, only allow it in simd loops where user
has asserted that at least nunits consecutive iterations can be
@@ -5694,7 +5702,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
gather_scatter_info gs_info;
enum vect_def_type scatter_src_dt = vect_unknown_def_type;
gimple *new_stmt;
- int vf;
+ poly_uint64 vf;
vec_load_store_type vls_type;
tree ref_type;
@@ -6664,7 +6672,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
tree dataref_offset = NULL_TREE;
gimple *ptr_incr = NULL;
int ncopies;
- int i, j, group_size, group_gap_adj;
+ int i, j, group_size;
+ poly_int64 group_gap_adj;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree byte_offset = NULL_TREE;
@@ -6682,7 +6691,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
bool slp_perm = false;
enum tree_code code;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- int vf;
+ poly_uint64 vf;
tree aggr_type;
gather_scatter_info gs_info;
vec_info *vinfo = stmt_info->vinfo;
@@ -6752,8 +6761,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
on the unrolled body effectively re-orders stmts. */
if (ncopies > 1
&& STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
- && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
+ && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ STMT_VINFO_MIN_NEG_DIST (stmt_info)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6793,8 +6802,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
on the unrolled body effectively re-orders stmts. */
if (!PURE_SLP_STMT (stmt_info)
&& STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
- && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
+ && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ STMT_VINFO_MIN_NEG_DIST (stmt_info)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7156,7 +7165,10 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
fits in. */
if (slp_perm)
{
- ncopies = (group_size * vf + nunits - 1) / nunits;
+ /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
+ variable VF. */
+ unsigned int const_vf = vf.to_constant ();
+ ncopies = (group_size * const_vf + nunits - 1) / nunits;
dr_chain.create (ncopies);
}
else
@@ -7274,7 +7286,10 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
fits in. */
if (slp_perm)
{
- vec_num = (group_size * vf + nunits - 1) / nunits;
+ /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
+ variable VF. */
+ unsigned int const_vf = vf.to_constant ();
+ vec_num = (group_size * const_vf + nunits - 1) / nunits;
group_gap_adj = vf * group_size - nunits * vec_num;
}
else
@@ -7740,11 +7755,13 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
we need to skip the gaps after we manage to fully load
all elements. group_gap_adj is GROUP_SIZE here. */
group_elt += nunits;
- if (group_gap_adj != 0 && ! slp_perm
- && group_elt == group_size - group_gap_adj)
+ if (maybe_ne (group_gap_adj, 0U)
+ && !slp_perm
+ && known_eq (group_elt, group_size - group_gap_adj))
{
- wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
- * group_gap_adj);
+ poly_wide_int bump_val
+ = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
+ * group_gap_adj);
tree bump = wide_int_to_tree (sizetype, bump_val);
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump);
@@ -7753,10 +7770,11 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
/* Bump the vector pointer to account for a gap or for excess
elements loaded for a permuted SLP load. */
- if (group_gap_adj != 0 && slp_perm)
+ if (maybe_ne (group_gap_adj, 0U) && slp_perm)
{
- wide_int bump_val = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
- * group_gap_adj);
+ poly_wide_int bump_val
+ = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
+ * group_gap_adj);
tree bump = wide_int_to_tree (sizetype, bump_val);
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
stmt, bump);