aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-13 18:00:41 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-13 18:00:41 +0000
commitd1d20a49a788bdb82f09ada6377d932ceac07934 (patch)
treeda9369ee0298c56a7f3c618a641a99026cd3d33c /gcc/tree-vect-loop.c
parent4aa157e8d2aec2e4f9e97dcee86068135e0dcb2f (diff)
downloadgcc-d1d20a49a788bdb82f09ada6377d932ceac07934.zip
gcc-d1d20a49a788bdb82f09ada6377d932ceac07934.tar.gz
gcc-d1d20a49a788bdb82f09ada6377d932ceac07934.tar.bz2
Use single-iteration epilogues when peeling for gaps
This patch adds support for fully-masking loops that require peeling for gaps. It peels exactly one scalar iteration and uses the masked loop to handle the rest. Previously we would fall back on using a standard unmasked loop instead. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree-vect-loop-manip.c (vect_gen_scalar_loop_niters): Replace vfm1 with a bound_epilog parameter. (vect_do_peeling): Update calls accordingly, and move the prologue call earlier in the function. Treat the base bound_epilog as 0 for fully-masked loops and retain vf - 1 for other loops. Add 1 to this base when peeling for gaps. * tree-vect-loop.c (vect_analyze_loop_2): Allow peeling for gaps with fully-masked loops. (vect_estimate_min_profitable_iters): Handle the single peeled iteration in that case. gcc/testsuite/ * gcc.target/aarch64/sve/struct_vect_18.c: Check the number of branches. * gcc.target/aarch64/sve/struct_vect_19.c: Likewise. * gcc.target/aarch64/sve/struct_vect_20.c: New test. * gcc.target/aarch64/sve/struct_vect_20_run.c: Likewise. * gcc.target/aarch64/sve/struct_vect_21.c: Likewise. * gcc.target/aarch64/sve/struct_vect_21_run.c: Likewise. * gcc.target/aarch64/sve/struct_vect_22.c: Likewise. * gcc.target/aarch64/sve/struct_vect_22_run.c: Likewise. * gcc.target/aarch64/sve/struct_vect_23.c: Likewise. * gcc.target/aarch64/sve/struct_vect_23_run.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256635
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c27
1 files changed, 17 insertions, 10 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index b3779e7..4b9226f 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2260,16 +2260,6 @@ start_over:
return false;
}
- if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
- && LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
- {
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop because peeling for"
- " gaps is required.\n");
- }
-
/* Decide whether to use a fully-masked loop for this vectorization
factor. */
LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
@@ -3714,6 +3704,23 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
{
peel_iters_prologue = 0;
peel_iters_epilogue = 0;
+
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ {
+ /* We need to peel exactly one iteration. */
+ peel_iters_epilogue += 1;
+ stmt_info_for_cost *si;
+ int j;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+ j, si)
+ {
+ struct _stmt_vec_info *stmt_info
+ = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
+ (void) add_stmt_cost (target_cost_data, si->count,
+ si->kind, stmt_info, si->misalign,
+ vect_epilogue);
+ }
+ }
}
else if (npeel < 0)
{