aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2023-08-06 21:23:31 +0200
committerJan Hubicka <jh@suse.cz>2023-08-06 21:23:31 +0200
commit838237aeeba578fc2cf42bfd3ecb9d9a4fb7a2b4 (patch)
tree2539c5a2f161c15c5d4f109dcdcdef65d0c70374 /gcc
parent3802297528685aa16c4f43bb1f0cedbdbf22923d (diff)
downloadgcc-838237aeeba578fc2cf42bfd3ecb9d9a4fb7a2b4.zip
gcc-838237aeeba578fc2cf42bfd3ecb9d9a4fb7a2b4.tar.gz
gcc-838237aeeba578fc2cf42bfd3ecb9d9a4fb7a2b4.tar.bz2
Fix profile update after peeled epilogues
Epilogue peeling expects the scalar loop to have same number of executions as the vector loop which is true at the beggining of vectorization. However if the epilogues are vectorized, this is no longer the case. In this situation the loop preheader is replaced by new guard code with correct profile, however loop body is left unscaled. This leads to loop that exists more often then it is entered. This patch add slogic to scale the frequencies down and also to fix profile of original preheader where necesary. Bootstrapped/regtested x86_64-linux, comitted. gcc/ChangeLog: * tree-vect-loop-manip.cc (vect_do_peeling): Fix profile update of peeled epilogues. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-bitfield-read-1.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-2.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-3.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-4.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-5.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-6.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-read-7.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-1.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-2.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-3.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-4.c: Check profile consistency. * gcc.dg/vect/vect-bitfield-write-5.c: Check profile consistency. * gcc.dg/vect/vect-epilogues-2.c: Check profile consistency. * gcc.dg/vect/vect-epilogues.c: Check profile consistency. * gcc.dg/vect/vect-mask-store-move-1.c: Check profile consistency.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-read-7.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-epilogues-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-epilogues.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-mask-store-move-1.c2
-rw-r--r--gcc/tree-vect-loop-manip.cc13
16 files changed, 41 insertions, 2 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
index 42e50d9..147c959 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
@@ -39,3 +40,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
index a9aeefc..982e6a7 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_shift } */
/* { dg-require-effective-target vect_long_long } */
@@ -42,3 +43,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
index c7d0fd2..f2a43c3 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
@@ -43,3 +44,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
index 6a3ed8c..9f6f022 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_shift } */
/* { dg-require-effective-target vect_long_long } */
@@ -44,3 +45,4 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
index b2889df..662aed1 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
@@ -41,3 +42,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c
index 2445f53..9b315d6 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
@@ -41,3 +42,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-7.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-7.c
index 4b1ec8a..6d1043d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-7.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-7.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
@@ -42,3 +43,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
index 22e6235..7c710cf 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
@@ -38,3 +39,4 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
index 0c8291c..3b609183 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_long_long } */
@@ -42,3 +43,4 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
index 46fcb02..e96da82 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_long_long } */
@@ -43,3 +44,4 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c
index 5a7227a..6644221 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
@@ -41,3 +42,4 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c
index e0b36e4..386de50 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
@@ -41,3 +42,4 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-epilogues-2.c b/gcc/testsuite/gcc.dg/vect/vect-epilogues-2.c
index b251e1f..63c5e23 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-epilogues-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-epilogues-2.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-do compile } */
int
@@ -55,3 +56,4 @@ f6 (int *x, int a)
x[a] += 1;
return res;
}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-epilogues.c b/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
index ab7e8a1..11b8c83 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-do compile } */
/* Copied from PR 88915. */
@@ -17,3 +18,4 @@ void pixel_avg( unsigned char *dst, int i_dst_stride,
}
/* { dg-final { scan-tree-dump "LOOP EPILOGUE VECTORIZED" "vect" { target vect_multiple_sizes xfail { { arm32 && be } || vect_partial_vectors_usage_2 } } } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mask-store-move-1.c b/gcc/testsuite/gcc.dg/vect/vect-mask-store-move-1.c
index 1e06b58..700adf9 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-mask-store-move-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-mask-store-move-1.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
/* { dg-do compile } */
/* { dg-additional-options "-mavx2" { target { i?86-*-* x86_64-*-* } } } */
@@ -16,3 +17,4 @@ void foo (int n)
}
/* { dg-final { scan-tree-dump-times "Move stmt to created bb" 4 "vect" { target { i?86-*-* x86_64-*-* } xfail { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 9de897d..0e7e223 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3271,6 +3271,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
adjust_vec_debug_stmts ();
scev_reset ();
}
+ basic_block bb_before_epilog = NULL;
if (epilog_peeling)
{
@@ -3290,6 +3291,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
epilog->force_vectorize = false;
slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
+ bb_before_epilog = loop_preheader_edge (epilog)->src;
/* Scalar version loop may be preferred. In this case, add guard
and skip to epilog. Note this only happens when the number of
@@ -3317,6 +3319,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
/* Simply propagate profile info from guard_bb to guard_to which is
a merge point of control flow. */
+ profile_count old_count = guard_to->count;
guard_to->count = guard_bb->count;
/* Restore the counts of the epilog loop if we didn't use the scalar loop. */
@@ -3332,9 +3335,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
free (bbs);
free (original_bbs);
}
- }
+ else
+ scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1);
- basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
+ /* Only need to handle basic block before epilog loop if it's not
+ the guard_bb, which is the case when skip_vector is true. */
+ if (guard_bb != bb_before_epilog)
+ bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
+ bb_before_epilog = loop_preheader_edge (epilog)->src;
+ }
/* If loop is peeled for non-zero constant times, now niters refers to
orig_niters - prolog_peeling, it won't overflow even the orig_niters
overflows. */