diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-03 07:14:07 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-03 07:14:07 +0000 |
commit | d9f21f6acb3aa615834e855e16b6311cd18c5668 (patch) | |
tree | 8200866be9cf8f8f2c40f1a190b6dc611888dd8b /gcc/tree-vectorizer.h | |
parent | fba05d9e9a0321c812ddbda7b4caa3977e1db4ef (diff) | |
download | gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.zip gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.tar.gz gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.tar.bz2 |
poly_int: vectoriser vf and uf
This patch changes the type of the vectorisation factor and SLP
unrolling factor to poly_uint64. This in turn required some knock-on
changes in signedness elsewhere.
Cost decisions are generally based on estimated_poly_value,
which for VF is wrapped up as vect_vf_for_cost.
The patch doesn't on its own enable variable-length vectorisation.
It just makes the minimum changes necessary for the code to build
with the new VF and UF types. Later patches also make the
vectoriser cope with variable TYPE_VECTOR_SUBPARTS and variable
GET_MODE_NUNITS, at which point the code really does handle
variable-length vectors.
The patch also changes MAX_VECTORIZATION_FACTOR to INT_MAX,
to avoid hard-coding a particular architectural limit.
The patch includes a new test because a development version of the patch
accidentally used file print routines instead of dump_*, which would
fail with -fopt-info.
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* tree-vectorizer.h (_slp_instance::unrolling_factor): Change
from an unsigned int to a poly_uint64.
(_loop_vec_info::slp_unrolling_factor): Likewise.
(_loop_vec_info::vectorization_factor): Change from an int
to a poly_uint64.
(MAX_VECTORIZATION_FACTOR): Bump from 64 to INT_MAX.
(vect_get_num_vectors): New function.
(vect_update_max_nunits, vect_vf_for_cost): Likewise.
(vect_get_num_copies): Use vect_get_num_vectors.
(vect_analyze_data_ref_dependences): Change max_vf from an int *
to an unsigned int *.
(vect_analyze_data_refs): Change min_vf from an int * to a
poly_uint64 *.
(vect_transform_slp_perm_load): Take the vf as a poly_uint64 rather
than an unsigned HOST_WIDE_INT.
* tree-vect-data-refs.c (vect_analyze_possibly_independent_ddr)
(vect_analyze_data_ref_dependence): Change max_vf from an int *
to an unsigned int *.
(vect_analyze_data_ref_dependences): Likewise.
(vect_compute_data_ref_alignment): Handle polynomial vf.
(vect_enhance_data_refs_alignment): Likewise.
(vect_prune_runtime_alias_test_list): Likewise.
(vect_shift_permute_load_chain): Likewise.
(vect_supportable_dr_alignment): Likewise.
(dependence_distance_ge_vf): Take the vectorization factor as a
poly_uint64 rather than an unsigned HOST_WIDE_INT.
(vect_analyze_data_refs): Change min_vf from an int * to a
poly_uint64 *.
* tree-vect-loop-manip.c (vect_gen_scalar_loop_niters): Take
vfm1 as a poly_uint64 rather than an int. Make the same change
for the returned bound_scalar.
(vect_gen_vector_loop_niters): Handle polynomial vf.
(vect_do_peeling): Likewise. Update call to
vect_gen_scalar_loop_niters and handle polynomial bound_scalars.
(vect_gen_vector_loop_niters_mult_vf): Assert that the vf must
be constant.
* tree-vect-loop.c (vect_determine_vectorization_factor)
(vect_update_vf_for_slp, vect_analyze_loop_2): Handle polynomial vf.
(vect_get_known_peeling_cost): Likewise.
(vect_estimate_min_profitable_iters, vectorizable_reduction): Likewise.
(vect_worthwhile_without_simd_p, vectorizable_induction): Likewise.
(vect_transform_loop): Likewise. Use the lowest possible VF when
updating the upper bounds of the loop.
(vect_min_worthwhile_factor): Make static. Return an unsigned int
rather than an int.
* tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Cope with
polynomial unroll factors.
(vect_analyze_slp_cost_1, vect_analyze_slp_instance): Likewise.
(vect_make_slp_decision): Likewise.
(vect_supported_load_permutation_p): Likewise, and polynomial
vf too.
(vect_analyze_slp_cost): Handle polynomial vf.
(vect_slp_analyze_node_operations): Likewise.
(vect_slp_analyze_bb_1): Likewise.
(vect_transform_slp_perm_load): Take the vf as a poly_uint64 rather
than an unsigned HOST_WIDE_INT.
* tree-vect-stmts.c (vectorizable_simd_clone_call, vectorizable_store)
(vectorizable_load): Handle polynomial vf.
* tree-vectorizer.c (simduid_to_vf::vf): Change from an int to
a poly_uint64.
(adjust_simduid_builtins, shrink_simd_arrays): Update accordingly.
gcc/testsuite/
* gcc.dg/vect-opt-info-1.c: New test.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256126
Diffstat (limited to 'gcc/tree-vectorizer.h')
-rw-r--r-- | gcc/tree-vectorizer.h | 57 |
1 files changed, 44 insertions, 13 deletions
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 22bbc9a..9619286 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -129,7 +129,7 @@ typedef struct _slp_instance { unsigned int group_size; /* The unrolling factor required to vectorized this SLP instance. */ - unsigned int unrolling_factor; + poly_uint64 unrolling_factor; /* The group of nodes that contain loads of this SLP instance. */ vec<slp_tree> loads; @@ -245,7 +245,7 @@ typedef struct _loop_vec_info : public vec_info { poly_uint64 versioning_threshold; /* Unrolling factor */ - int vectorization_factor; + poly_uint64 vectorization_factor; /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR if there is no particular limit. */ @@ -297,7 +297,7 @@ typedef struct _loop_vec_info : public vec_info { /* The unrolling factor needed to SLP the loop. In case of that pure SLP is applied to the loop, i.e., no unrolling is needed, this is 1. */ - unsigned slp_unrolling_factor; + poly_uint64 slp_unrolling_factor; /* Cost of a single scalar iteration. */ int single_scalar_iteration_cost; @@ -815,8 +815,7 @@ struct dataref_aux { conversion. */ #define MAX_INTERM_CVT_STEPS 3 -/* The maximum vectorization factor supported by any target (V64QI). */ -#define MAX_VECTORIZATION_FACTOR 64 +#define MAX_VECTORIZATION_FACTOR INT_MAX /* Nonzero if TYPE represents a (scalar) boolean type or type in the middle-end compatible with it (unsigned precision 1 integral @@ -1109,6 +1108,16 @@ unlimited_cost_model (loop_p loop) return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); } +/* Return the number of vectors of type VECTYPE that are needed to get + NUNITS elements. NUNITS should be based on the vectorization factor, + so it is always a known multiple of the number of elements in VECTYPE. */ + +static inline unsigned int +vect_get_num_vectors (poly_uint64 nunits, tree vectype) +{ + return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant (); +} + /* Return the number of copies needed for loop vectorization when a statement operates on vectors of type VECTYPE. This is the vectorization factor divided by the number of elements in @@ -1117,10 +1126,32 @@ unlimited_cost_model (loop_p loop) static inline unsigned int vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) { - gcc_checking_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - % TYPE_VECTOR_SUBPARTS (vectype) == 0); - return (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - / TYPE_VECTOR_SUBPARTS (vectype)); + return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype); +} + +/* Update maximum unit count *MAX_NUNITS so that it accounts for + the number of units in vector type VECTYPE. *MAX_NUNITS can be 1 + if we haven't yet recorded any vector types. */ + +static inline void +vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype) +{ + /* All unit counts have the form current_vector_size * X for some + rational X, so two unit sizes must have a common multiple. + Everything is a multiple of the initial value of 1. */ + poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); + *max_nunits = force_common_multiple (*max_nunits, nunits); +} + +/* Return the vectorization factor that should be used for costing + purposes while vectorizing the loop described by LOOP_VINFO. + Pick a reasonable estimate if the vectorization factor isn't + known at compile time. */ + +static inline unsigned int +vect_vf_for_cost (loop_vec_info loop_vinfo) +{ + return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); } /* Return the size of the value accessed by unvectorized data reference DR. @@ -1223,7 +1254,7 @@ extern enum dr_alignment_support vect_supportable_dr_alignment (struct data_reference *, bool); extern tree vect_get_smallest_scalar_type (gimple *, HOST_WIDE_INT *, HOST_WIDE_INT *); -extern bool vect_analyze_data_ref_dependences (loop_vec_info, int *); +extern bool vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *); extern bool vect_slp_analyze_instance_dependence (slp_instance); extern bool vect_enhance_data_refs_alignment (loop_vec_info); extern bool vect_analyze_data_refs_alignment (loop_vec_info); @@ -1233,7 +1264,7 @@ extern bool vect_analyze_data_ref_accesses (vec_info *); extern bool vect_prune_runtime_alias_test_list (loop_vec_info); extern bool vect_check_gather_scatter (gimple *, loop_vec_info, gather_scatter_info *); -extern bool vect_analyze_data_refs (vec_info *, int *); +extern bool vect_analyze_data_refs (vec_info *, poly_uint64 *); extern void vect_record_base_alignments (vec_info *); extern tree vect_create_data_ref_ptr (gimple *, tree, struct loop *, tree, tree *, gimple_stmt_iterator *, @@ -1291,8 +1322,8 @@ extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, /* In tree-vect-slp.c. */ extern void vect_free_slp_instance (slp_instance); extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , - gimple_stmt_iterator *, int, - slp_instance, bool, unsigned *); + gimple_stmt_iterator *, poly_uint64, + slp_instance, bool, unsigned *); extern bool vect_slp_analyze_operations (vec_info *); extern bool vect_schedule_slp (vec_info *); extern bool vect_analyze_slp (vec_info *, unsigned); |