aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2023-07-09 15:14:54 +0200
committerJan Hubicka <jh@suse.cz>2023-07-09 15:14:54 +0200
commitd6c1d7c4009bfe759719675ce3bc03ca503b9bf4 (patch)
tree0e054a53897d69c35a7f773012bf6752b6380250
parent95b712928b479f7a65910cf1c550ed67b8976617 (diff)
downloadgcc-d6c1d7c4009bfe759719675ce3bc03ca503b9bf4.zip
gcc-d6c1d7c4009bfe759719675ce3bc03ca503b9bf4.tar.gz
gcc-d6c1d7c4009bfe759719675ce3bc03ca503b9bf4.tar.bz2
Improve dumping of profile_count
Dumps of profile_counts are quite hard to interpret since they are 64bit fixed point values. In many cases one looks at a single function and it is better to think of basic block frequency, that is how many times it is executed each invocatoin. This patch makes CFG dumps to also print this info. For example: main() { for (int i = 0; i < 10; i++) t(); } the -fdump-tree-optimized-blocks-details now prints: int main () { unsigned int ivtmp_1; unsigned int ivtmp_2; ;; basic block 2, loop depth 0, count 97603128 (estimated locally, freq 1.0000), maybe hot ;; prev block 0, next block 3, flags: (NEW, VISITED) ;; pred: ENTRY [always] count:97603128 (estimated locally, freq 1.0000) (FALLTHRU,EXECUTABLE) ;; succ: 3 [always] count:97603128 (estimated locally, freq 1.0000) (FALLTHRU,EXECUTABLE) ;; basic block 3, loop depth 1, count 976138697 (estimated locally, freq 10.0011), maybe hot ;; prev block 2, next block 4, flags: (NEW, VISITED) ;; pred: 3 [90.0% (guessed)] count:878535568 (estimated locally, freq 9.0011) (TRUE_VALUE,EXECUTABLE) ;; 2 [always] count:97603128 (estimated locally, freq 1.0000) (FALLTHRU,EXECUTABLE) # ivtmp_2 = PHI <ivtmp_1(3), 10(2)> t (); ivtmp_1 = ivtmp_2 + 4294967295; if (ivtmp_1 != 0) goto <bb 3>; [90.00%] else goto <bb 4>; [10.00%] ;; succ: 3 [90.0% (guessed)] count:878535568 (estimated locally, freq 9.0011) (TRUE_VALUE,EXECUTABLE) ;; 4 [10.0% (guessed)] count:97603129 (estimated locally, freq 1.0000) (FALSE_VALUE,EXECUTABLE) ;; basic block 4, loop depth 0, count 97603128 (estimated locally, freq 1.0000), maybe hot ;; prev block 3, next block 1, flags: (NEW, VISITED) ;; pred: 3 [10.0% (guessed)] count:97603129 (estimated locally, freq 1.0000) (FALSE_VALUE,EXECUTABLE) return 0; ;; succ: EXIT [always] count:97603128 (estimated locally, freq 1.0000) (EXECUTABLE) } Which makes it easier to see that the inner bb is executed 10 times per invocation gcc/ChangeLog: * cfg.cc (check_bb_profile): Dump counts with relative frequency. (dump_edge_info): Likewise. (dump_bb_info): Likewise. * profile-count.cc (profile_count::dump): Add comma between quality and freq. gcc/testsuite/ChangeLog: * gcc.dg/predict-22.c: Update template.
-rw-r--r--gcc/cfg.cc8
-rw-r--r--gcc/profile-count.cc2
-rw-r--r--gcc/testsuite/gcc.dg/predict-22.c2
3 files changed, 6 insertions, 6 deletions
diff --git a/gcc/cfg.cc b/gcc/cfg.cc
index 740d4f3..0de6d6b 100644
--- a/gcc/cfg.cc
+++ b/gcc/cfg.cc
@@ -475,9 +475,9 @@ check_bb_profile (basic_block bb, FILE * file, int indent)
{
fprintf (file, ";; %sInvalid sum of incoming counts ",
s_indent);
- sum.dump (file);
+ sum.dump (file, fun);
fprintf (file, ", should be ");
- bb->count.dump (file);
+ bb->count.dump (file, fun);
fprintf (file, "\n");
}
}
@@ -525,7 +525,7 @@ dump_edge_info (FILE *file, edge e, dump_flags_t flags, int do_succ)
if (e->count ().initialized_p () && do_details)
{
fputs (" count:", file);
- e->count ().dump (file);
+ e->count ().dump (file, cfun);
}
if (e->flags && do_details)
@@ -808,7 +808,7 @@ dump_bb_info (FILE *outf, basic_block bb, int indent, dump_flags_t flags,
if (bb->count.initialized_p ())
{
fputs (", count ", outf);
- bb->count.dump (outf);
+ bb->count.dump (outf, cfun);
}
if (maybe_hot_bb_p (fun, bb))
fputs (", maybe hot", outf);
diff --git a/gcc/profile-count.cc b/gcc/profile-count.cc
index 6bf9700..2c07ebc 100644
--- a/gcc/profile-count.cc
+++ b/gcc/profile-count.cc
@@ -94,7 +94,7 @@ profile_count::dump (char *buffer, struct function *fun) const
else if (fun && initialized_p ()
&& fun->cfg
&& ENTRY_BLOCK_PTR_FOR_FN (fun)->count.initialized_p ())
- sprintf (buffer, "%" PRId64 " (%s freq %.4f)", m_val,
+ sprintf (buffer, "%" PRId64 " (%s, freq %.4f)", m_val,
profile_quality_display_names[m_quality],
to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (fun)->count).to_double ());
else
diff --git a/gcc/testsuite/gcc.dg/predict-22.c b/gcc/testsuite/gcc.dg/predict-22.c
index f14c2b6..1aed03f 100644
--- a/gcc/testsuite/gcc.dg/predict-22.c
+++ b/gcc/testsuite/gcc.dg/predict-22.c
@@ -55,5 +55,5 @@ foo (int x, int y, int z)
baz (&f);
}
/* { dg-final { scan-tree-dump-times "Invalid sum" 0 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "count 0 .precise.," 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "count 0 .precise" 1 "optimized"} } */
/* { dg-final { scan-rtl-dump-times "COLD_PARTITION" 1 "bbpart"} } */