aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop-manip.cc
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2023-08-06 21:23:31 +0200
committerJan Hubicka <jh@suse.cz>2023-08-06 21:23:31 +0200
commit838237aeeba578fc2cf42bfd3ecb9d9a4fb7a2b4 (patch)
tree2539c5a2f161c15c5d4f109dcdcdef65d0c70374 /gcc/tree-vect-loop-manip.cc
parent3802297528685aa16c4f43bb1f0cedbdbf22923d (diff)
downloadgcc-838237aeeba578fc2cf42bfd3ecb9d9a4fb7a2b4.zip
gcc-838237aeeba578fc2cf42bfd3ecb9d9a4fb7a2b4.tar.gz
gcc-838237aeeba578fc2cf42bfd3ecb9d9a4fb7a2b4.tar.bz2
Fix profile update after peeled epilogues
Epilogue peeling expects the scalar loop to have same number of executions as the vector loop which is true at the beggining of vectorization. However if the epilogues are vectorized, this is no longer the case. In this situation the loop preheader is replaced by new guard code with correct profile, however loop body is left unscaled. This leads to loop that exists more often then it is entered. This patch add slogic to scale the frequencies down and also to fix profile of original preheader where necesary. Bootstrapped/regtested x86_64-linux, comitted. gcc/ChangeLog: * tree-vect-loop-manip.cc (vect_do_peeling): Fix profile update of peeled epilogues. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-bitfield-read-1.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-2.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-3.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-4.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-5.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-6.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-7.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-1.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-2.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-3.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-4.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-5.c: Check profile consistency. * gcc.dg/vect/vect-epilogues-2.c: Check profile consistency. * gcc.dg/vect/vect-epilogues.c: Check profile consistency. * gcc.dg/vect/vect-mask-store-move-1.c: Check profile consistency.
Diffstat (limited to 'gcc/tree-vect-loop-manip.cc')
-rw-r--r--gcc/tree-vect-loop-manip.cc13
1 files changed, 11 insertions, 2 deletions
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 9de897d..0e7e223 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3271,6 +3271,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
adjust_vec_debug_stmts ();
scev_reset ();
}
+ basic_block bb_before_epilog = NULL;
if (epilog_peeling)
{
@@ -3290,6 +3291,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
epilog->force_vectorize = false;
slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
+ bb_before_epilog = loop_preheader_edge (epilog)->src;
/* Scalar version loop may be preferred. In this case, add guard
and skip to epilog. Note this only happens when the number of
@@ -3317,6 +3319,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
/* Simply propagate profile info from guard_bb to guard_to which is
a merge point of control flow. */
+ profile_count old_count = guard_to->count;
guard_to->count = guard_bb->count;
/* Restore the counts of the epilog loop if we didn't use the scalar loop. */
@@ -3332,9 +3335,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
free (bbs);
free (original_bbs);
}
- }
+ else
+ scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1);
- basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
+ /* Only need to handle basic block before epilog loop if it's not
+ the guard_bb, which is the case when skip_vector is true. */
+ if (guard_bb != bb_before_epilog)
+ bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
+ bb_before_epilog = loop_preheader_edge (epilog)->src;
+ }
/* If loop is peeled for non-zero constant times, now niters refers to
orig_niters - prolog_peeling, it won't overflow even the orig_niters
overflows. */