aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.h
diff options
context:
space:
mode:
authorKewen Lin <linkw@linux.ibm.com>2020-07-19 20:40:10 -0500
committerKewen Lin <linkw@linux.ibm.com>2020-07-19 21:13:28 -0500
commit9fb832ce382d649b7687426e6bc4e5d3715cb78a (patch)
tree3b2bbe610050f8c47da747491c465de3889f1138 /gcc/tree-vectorizer.h
parent3ca6f6698c70c4fa0c98822c73364063fa13ceea (diff)
downloadgcc-9fb832ce382d649b7687426e6bc4e5d3715cb78a.zip
gcc-9fb832ce382d649b7687426e6bc4e5d3715cb78a.tar.gz
gcc-9fb832ce382d649b7687426e6bc4e5d3715cb78a.tar.bz2
vect: Support length-based partial vectors approach
Power9 supports vector load/store instruction lxvl/stxvl which allow us to operate partial vectors with one specific length. This patch extends some of current mask-based partial vectors support code for length-based approach, also adds some length specific support code. So far it assumes that we can only have one partial vectors approach at the same time, it will disable to use partial vectors if both approaches co-exist. Like the description of optab len_load/len_store, the length-based approach can have two flavors, one is length in bytes, the other is length in lanes. This patch is mainly implemented and tested for length in bytes, but as Richard S. suggested, most of code has considered both flavors. This also introduces one parameter vect-partial-vector-usage allow users to control when the loop vectorizer considers using partial vectors as an alternative to falling back to scalar code. gcc/ChangeLog: * config/rs6000/rs6000.c (rs6000_option_override_internal): Set param_vect_partial_vector_usage to 0 explicitly. * doc/invoke.texi (vect-partial-vector-usage): Document new option. * optabs-query.c (get_len_load_store_mode): New function. * optabs-query.h (get_len_load_store_mode): New declare. * params.opt (vect-partial-vector-usage): New. * tree-vect-loop-manip.c (vect_set_loop_controls_directly): Add the handlings for vectorization using length-based partial vectors, call vect_gen_len for length generation, and rename some variables with items instead of scalars. (vect_set_loop_condition_partial_vectors): Add the handlings for vectorization using length-based partial vectors. (vect_do_peeling): Allow remaining eiters less than epilogue vf for LOOP_VINFO_USING_PARTIAL_VECTORS_P. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Init epil_using_partial_vectors_p. (_loop_vec_info::~_loop_vec_info): Call release_vec_loop_controls for lengths destruction. (vect_verify_loop_lens): New function. (vect_analyze_loop): Add handlings for epilogue of loop when it's marked to use vectorization using partial vectors. (vect_analyze_loop_2): Add the check to allow only one vectorization approach using partial vectorization at the same time. Check param vect-partial-vector-usage for partial vectors decision. Mark LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P if the epilogue is considerable to use partial vectors. Call release_vec_loop_controls for lengths destruction. (vect_estimate_min_profitable_iters): Adjust for loop vectorization using length-based partial vectors. (vect_record_loop_mask): Init factor to 1 for vectorization using mask-based partial vectors. (vect_record_loop_len): New function. (vect_get_loop_len): Likewise. * tree-vect-stmts.c (check_load_store_for_partial_vectors): Add checks for vectorization using length-based partial vectors. Factor some code to lambda function get_valid_nvectors. (vectorizable_store): Add handlings when using length-based partial vectors. (vectorizable_load): Likewise. (vect_gen_len): New function. * tree-vectorizer.h (struct rgroup_controls): Add field factor mainly for length-based partial vectors. (vec_loop_lens): New typedef. (_loop_vec_info): Add lens and epil_using_partial_vectors_p. (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P): New macro. (LOOP_VINFO_LENS): Likewise. (LOOP_VINFO_FULLY_WITH_LENGTH_P): Likewise. (vect_record_loop_len): New declare. (vect_get_loop_len): Likewise. (vect_gen_len): Likewise.
Diffstat (limited to 'gcc/tree-vectorizer.h')
-rw-r--r--gcc/tree-vectorizer.h35
1 files changed, 32 insertions, 3 deletions
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 91d3291..5466c78 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -480,14 +480,21 @@ is_a_helper <_bb_vec_info *>::test (vec_info *i)
first level being indexed by nV - 1 (since nV == 0 doesn't exist) and
the second being indexed by the mask index 0 <= i < nV. */
-/* The controls (like masks) needed by rgroups with nV vectors,
+/* The controls (like masks or lengths) needed by rgroups with nV vectors,
according to the description above. */
struct rgroup_controls {
/* The largest nS for all rgroups that use these controls. */
unsigned int max_nscalars_per_iter;
- /* The type of control to use, based on the highest nS recorded above.
- For mask-based approach, it's used for mask_type. */
+ /* For the largest nS recorded above, the loop controls divide each scalar
+ into FACTOR equal-sized pieces. This is useful if we need to split
+ element-based accesses into byte-based accesses. */
+ unsigned int factor;
+
+ /* This is a vector type with MAX_NSCALARS_PER_ITER * VF / nV elements.
+ For mask-based controls, it is the type of the masks in CONTROLS.
+ For length-based controls, it can be any vector type that has the
+ specified number of elements; the type of the elements doesn't matter. */
tree type;
/* A vector of nV controls, in iteration order. */
@@ -496,6 +503,8 @@ struct rgroup_controls {
typedef auto_vec<rgroup_controls> vec_loop_masks;
+typedef auto_vec<rgroup_controls> vec_loop_lens;
+
typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
/*-----------------------------------------------------------------*/
@@ -543,6 +552,10 @@ public:
on inactive scalars. */
vec_loop_masks masks;
+ /* The lengths that a loop with length should use to avoid operating
+ on inactive scalars. */
+ vec_loop_lens lens;
+
/* Set of scalar conditions that have loop mask applied. */
scalar_cond_masked_set_type scalar_cond_masked_set;
@@ -651,6 +664,10 @@ public:
the vector loop can handle fewer than VF scalars. */
bool using_partial_vectors_p;
+ /* True if we've decided to use partially-populated vectors for the
+ epilogue of loop. */
+ bool epil_using_partial_vectors_p;
+
/* When we have grouped data accesses with gaps, we may introduce invalid
memory accesses. We peel the last iteration of the loop to prevent
this. */
@@ -714,9 +731,12 @@ public:
#define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
#define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
#define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
+#define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L) \
+ (L)->epil_using_partial_vectors_p
#define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
#define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor
#define LOOP_VINFO_MASKS(L) (L)->masks
+#define LOOP_VINFO_LENS(L) (L)->lens
#define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters
#define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type
#define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type
@@ -754,6 +774,10 @@ public:
(LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
&& !LOOP_VINFO_MASKS (L).is_empty ())
+#define LOOP_VINFO_FULLY_WITH_LENGTH_P(L) \
+ (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
+ && !LOOP_VINFO_LENS (L).is_empty ())
+
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
((L)->may_misalign_stmts.length () > 0)
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
@@ -1953,6 +1977,11 @@ extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
unsigned int, tree, tree);
extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
unsigned int, tree, unsigned int);
+extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
+ tree, unsigned int);
+extern tree vect_get_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
+ unsigned int);
+extern gimple_seq vect_gen_len (tree, tree, tree, tree);
extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
/* Drive for loop transformation stage. */