aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Hubicka <hubicka@ucw.cz>2025-09-04 17:23:20 +0200
committerJan Hubicka <hubicka@ucw.cz>2025-09-04 17:25:26 +0200
commit1da3c4d90e678af0fed89c5638c97a41e5e04547 (patch)
treedac6ecc0880281a7a583ea017a5869211065da38
parent640fd2f0ccdce4a74f239af818dee409ea7f5587 (diff)
downloadgcc-1da3c4d90e678af0fed89c5638c97a41e5e04547.zip
gcc-1da3c4d90e678af0fed89c5638c97a41e5e04547.tar.gz
gcc-1da3c4d90e678af0fed89c5638c97a41e5e04547.tar.bz2
Fix scalng of auto-fdo profiles in liner
with auto-fdo it is possible that function bar with non-zero profile is inlined into foo with zero profile and foo is the only caller of it. In this case we currently scale bar to also have zero profile which makes it optimized for size. With normal profiles this does not happen, since basic blocks with non-zero count must have some way to be reached. This patch makes inliner to scale caller in this case which mitigates the problem (to some degree). Bootstrapped/regtested x86_64-linux, plan to commit it shortly. gcc/ChangeLog: * ipa-inline-transform.cc (inline_call): If function with AFDO profile is inlined into function with GUESSED_GLOBAL0_AFDO or GUESSED_GLOBAL0_ADJUSTED, scale caller to AFDO profile. * profile-count.h (profile_count::apply_scale): If num is AFDO and den is not GUESSED, make result AFDO rather then GUESSED.
-rw-r--r--gcc/ipa-inline-transform.cc34
-rw-r--r--gcc/profile-count.h14
2 files changed, 46 insertions, 2 deletions
diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc
index 9d759d2..5c244bc 100644
--- a/gcc/ipa-inline-transform.cc
+++ b/gcc/ipa-inline-transform.cc
@@ -344,6 +344,40 @@ inline_call (struct cgraph_edge *e, bool update_original,
to = e->caller;
if (to->inlined_to)
to = to->inlined_to;
+
+ /* In case callee has AFDO profile but caller has GLOBAL0 we need
+ to re-scale it so it can have non-zero AFDO profile. */
+ if (callee->count.quality () == AFDO
+ && e->count.nonzero_p ()
+ && (to->count.quality () == GUESSED_GLOBAL0_AFDO
+ || to->count.quality () == GUESSED_GLOBAL0_ADJUSTED))
+ {
+ profile_count num = callee->count;
+ profile_count den = e->count;
+ profile_count::adjust_for_ipa_scaling (&num, &den);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Rescalling profile of caller %s "
+ "to allow non-zero AFDO counts:",
+ to->dump_name ());
+ den.dump (dump_file);
+ fprintf (dump_file, " -> ");
+ num.dump (dump_file);
+ fprintf (dump_file, "\n");
+ }
+ to->apply_scale (num, den);
+ to->frequency = std::max (to->frequency, callee->frequency);
+ /* Do not update original, so possible additional calls of callee
+ are handled reasonably well. */
+ update_original = false;
+ gcc_checking_assert (to->count.quality () == AFDO);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Scaled profile of %s: ", to->dump_name ());
+ to->count.dump (dump_file);
+ fprintf (dump_file, "\n");
+ }
+ }
if (to->thunk)
{
struct cgraph_node *target = to->callees->callee;
diff --git a/gcc/profile-count.h b/gcc/profile-count.h
index c893aec..65c4596 100644
--- a/gcc/profile-count.h
+++ b/gcc/profile-count.h
@@ -1212,8 +1212,18 @@ public:
/* Be sure that ret is not local if num is global.
Also ensure that ret is not global0 when num is global. */
if (num.ipa_p ())
- ret.m_quality = MAX (ret.m_quality,
- num == num.ipa () ? GUESSED : num.m_quality);
+ {
+ /* This is common case of AFDO scaling when we upgrade
+ GLOBAL0_AFDO function to AFDO. Be sure that result
+ is AFDO and not GUESSED (which is unnecesarily low). */
+ if (num.m_quality == AFDO
+ && (ret.m_quality != GUESSED
+ && ret.m_quality != GUESSED_LOCAL))
+ ret.m_quality = AFDO;
+ else
+ ret.m_quality = MAX (ret.m_quality,
+ num == num.ipa () ? GUESSED : num.m_quality);
+ }
return ret;
}