aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-13 17:59:32 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-13 17:59:32 +0000
commit535e7c114a7ad2ad7a6a0def88cf9448fcd5f029 (patch)
tree1c9a22e58ae70f7dd6784c23c21a355f50625864 /gcc/tree-vect-loop.c
parentc2700f7466bac153def05a0e070aa78cd2ffc0ae (diff)
downloadgcc-535e7c114a7ad2ad7a6a0def88cf9448fcd5f029.zip
gcc-535e7c114a7ad2ad7a6a0def88cf9448fcd5f029.tar.gz
gcc-535e7c114a7ad2ad7a6a0def88cf9448fcd5f029.tar.bz2
Handle peeling for alignment with masking
This patch adds support for aligning vectors by using a partial first iteration. E.g. if the start pointer is 3 elements beyond an aligned address, the first iteration will have a mask in which the first three elements are false. On SVE, the optimisation is only useful for vector-length-specific code. Vector-length-agnostic code doesn't try to align vectors since the vector length might not be a power of 2. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree-vectorizer.h (_loop_vec_info::mask_skip_niters): New field. (LOOP_VINFO_MASK_SKIP_NITERS): New macro. (vect_use_loop_mask_for_alignment_p): New function. (vect_prepare_for_masked_peels, vect_gen_while_not): Declare. * tree-vect-loop-manip.c (vect_set_loop_masks_directly): Add an niters_skip argument. Make sure that the first niters_skip elements of the first iteration are inactive. (vect_set_loop_condition_masked): Handle LOOP_VINFO_MASK_SKIP_NITERS. Update call to vect_set_loop_masks_directly. (get_misalign_in_elems): New function, split out from... (vect_gen_prolog_loop_niters): ...here. (vect_update_init_of_dr): Take a code argument that specifies whether the adjustment should be added or subtracted. (vect_update_init_of_drs): Likewise. (vect_prepare_for_masked_peels): New function. (vect_do_peeling): Skip prologue peeling if we're using a mask instead. Update call to vect_update_inits_of_drs. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize mask_skip_niters. (vect_analyze_loop_2): Allow fully-masked loops with peeling for alignment. Do not include the number of peeled iterations in the minimum threshold in that case. (vectorizable_induction): Adjust the start value down by LOOP_VINFO_MASK_SKIP_NITERS iterations. (vect_transform_loop): Call vect_prepare_for_masked_peels. Take the number of skipped iterations into account when calculating the loop bounds. * tree-vect-stmts.c (vect_gen_while_not): New function. gcc/testsuite/ * gcc.target/aarch64/sve/nopeel_1.c: New test. * gcc.target/aarch64/sve/peel_ind_1.c: Likewise. * gcc.target/aarch64/sve/peel_ind_1_run.c: Likewise. * gcc.target/aarch64/sve/peel_ind_2.c: Likewise. * gcc.target/aarch64/sve/peel_ind_2_run.c: Likewise. * gcc.target/aarch64/sve/peel_ind_3.c: Likewise. * gcc.target/aarch64/sve/peel_ind_3_run.c: Likewise. * gcc.target/aarch64/sve/peel_ind_4.c: Likewise. * gcc.target/aarch64/sve/peel_ind_4_run.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256630
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c87
1 files changed, 58 insertions, 29 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 1666332..d7cc12f 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1121,6 +1121,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in)
versioning_threshold (0),
vectorization_factor (0),
max_vectorization_factor (0),
+ mask_skip_niters (NULL_TREE),
mask_compare_type (NULL_TREE),
unaligned_dr (NULL),
peeling_for_alignment (0),
@@ -2269,16 +2270,6 @@ start_over:
" gaps is required.\n");
}
- if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
- && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
- {
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because peeling for"
- " alignment is required.\n");
- }
-
/* Decide whether to use a fully-masked loop for this vectorization
factor. */
LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
@@ -2379,18 +2370,21 @@ start_over:
increase threshold for this case if necessary. */
if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
{
- poly_uint64 niters_th;
+ poly_uint64 niters_th = 0;
- /* Niters for peeled prolog loop. */
- if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+ if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
{
- struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
- tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
-
- niters_th = TYPE_VECTOR_SUBPARTS (vectype) - 1;
+ /* Niters for peeled prolog loop. */
+ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+ {
+ struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
+ tree vectype
+ = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
+ niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1;
+ }
+ else
+ niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
}
- else
- niters_th = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
/* Niters for at least one iteration of vectorized loop. */
if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
@@ -7336,9 +7330,28 @@ vectorizable_induction (gimple *phi,
init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
loop_preheader_edge (iv_loop));
- /* Convert the step to the desired type. */
+ /* Convert the initial value and step to the desired type. */
stmts = NULL;
+ init_expr = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
+
+ /* If we are using the loop mask to "peel" for alignment then we need
+ to adjust the start value here. */
+ tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ if (skip_niters != NULL_TREE)
+ {
+ if (FLOAT_TYPE_P (vectype))
+ skip_niters = gimple_build (&stmts, FLOAT_EXPR, TREE_TYPE (vectype),
+ skip_niters);
+ else
+ skip_niters = gimple_convert (&stmts, TREE_TYPE (vectype),
+ skip_niters);
+ tree skip_step = gimple_build (&stmts, MULT_EXPR, TREE_TYPE (vectype),
+ skip_niters, step_expr);
+ init_expr = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE (vectype),
+ init_expr, skip_step);
+ }
+
if (stmts)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
@@ -8209,6 +8222,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
split_edge (loop_preheader_edge (loop));
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && vect_use_loop_mask_for_alignment_p (loop_vinfo))
+ /* This will deal with any possible peeling. */
+ vect_prepare_for_masked_peels (loop_vinfo);
+
/* FORNOW: the vectorizer supports only loops which body consist
of one basic block (header + empty latch). When the vectorizer will
support more involved loop forms, the order by which the BBs are
@@ -8488,29 +8506,40 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* +1 to convert latch counts to loop iteration counts,
-min_epilogue_iters to remove iterations that cannot be performed
by the vector code. */
- int bias = 1 - min_epilogue_iters;
+ int bias_for_lowest = 1 - min_epilogue_iters;
+ int bias_for_assumed = bias_for_lowest;
+ int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ if (alignment_npeels && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ /* When the amount of peeling is known at compile time, the first
+ iteration will have exactly alignment_npeels active elements.
+ In the worst case it will have at least one. */
+ int min_first_active = (alignment_npeels > 0 ? alignment_npeels : 1);
+ bias_for_lowest += lowest_vf - min_first_active;
+ bias_for_assumed += assumed_vf - min_first_active;
+ }
/* In these calculations the "- 1" converts loop iteration counts
back to latch counts. */
if (loop->any_upper_bound)
loop->nb_iterations_upper_bound
= (final_iter_may_be_partial
- ? wi::udiv_ceil (loop->nb_iterations_upper_bound + bias,
+ ? wi::udiv_ceil (loop->nb_iterations_upper_bound + bias_for_lowest,
lowest_vf) - 1
- : wi::udiv_floor (loop->nb_iterations_upper_bound + bias,
+ : wi::udiv_floor (loop->nb_iterations_upper_bound + bias_for_lowest,
lowest_vf) - 1);
if (loop->any_likely_upper_bound)
loop->nb_iterations_likely_upper_bound
= (final_iter_may_be_partial
- ? wi::udiv_ceil (loop->nb_iterations_likely_upper_bound + bias,
- lowest_vf) - 1
- : wi::udiv_floor (loop->nb_iterations_likely_upper_bound + bias,
- lowest_vf) - 1);
+ ? wi::udiv_ceil (loop->nb_iterations_likely_upper_bound
+ + bias_for_lowest, lowest_vf) - 1
+ : wi::udiv_floor (loop->nb_iterations_likely_upper_bound
+ + bias_for_lowest, lowest_vf) - 1);
if (loop->any_estimate)
loop->nb_iterations_estimate
= (final_iter_may_be_partial
- ? wi::udiv_ceil (loop->nb_iterations_estimate + bias,
+ ? wi::udiv_ceil (loop->nb_iterations_estimate + bias_for_assumed,
assumed_vf) - 1
- : wi::udiv_floor (loop->nb_iterations_estimate + bias,
+ : wi::udiv_floor (loop->nb_iterations_estimate + bias_for_assumed,
assumed_vf) - 1);
if (dump_enabled_p ())