diff options
author | Jan Hubicka <hubicka@ucw.cz> | 2025-09-04 17:23:20 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@ucw.cz> | 2025-09-04 17:25:26 +0200 |
commit | 1da3c4d90e678af0fed89c5638c97a41e5e04547 (patch) | |
tree | dac6ecc0880281a7a583ea017a5869211065da38 /gcc | |
parent | 640fd2f0ccdce4a74f239af818dee409ea7f5587 (diff) | |
download | gcc-1da3c4d90e678af0fed89c5638c97a41e5e04547.zip gcc-1da3c4d90e678af0fed89c5638c97a41e5e04547.tar.gz gcc-1da3c4d90e678af0fed89c5638c97a41e5e04547.tar.bz2 |
Fix scalng of auto-fdo profiles in liner
with auto-fdo it is possible that function bar with non-zero profile is inlined
into foo with zero profile and foo is the only caller of it. In this case
we currently scale bar to also have zero profile which makes it optimized
for size. With normal profiles this does not happen, since basic blocks with
non-zero count must have some way to be reached.
This patch makes inliner to scale caller in this case which mitigates the
problem (to some degree).
Bootstrapped/regtested x86_64-linux, plan to commit it shortly.
gcc/ChangeLog:
* ipa-inline-transform.cc (inline_call): If function with
AFDO profile is inlined into function with
GUESSED_GLOBAL0_AFDO or GUESSED_GLOBAL0_ADJUSTED, scale
caller to AFDO profile.
* profile-count.h (profile_count::apply_scale): If num is AFDO
and den is not GUESSED, make result AFDO rather then GUESSED.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ipa-inline-transform.cc | 34 | ||||
-rw-r--r-- | gcc/profile-count.h | 14 |
2 files changed, 46 insertions, 2 deletions
diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc index 9d759d2..5c244bc 100644 --- a/gcc/ipa-inline-transform.cc +++ b/gcc/ipa-inline-transform.cc @@ -344,6 +344,40 @@ inline_call (struct cgraph_edge *e, bool update_original, to = e->caller; if (to->inlined_to) to = to->inlined_to; + + /* In case callee has AFDO profile but caller has GLOBAL0 we need + to re-scale it so it can have non-zero AFDO profile. */ + if (callee->count.quality () == AFDO + && e->count.nonzero_p () + && (to->count.quality () == GUESSED_GLOBAL0_AFDO + || to->count.quality () == GUESSED_GLOBAL0_ADJUSTED)) + { + profile_count num = callee->count; + profile_count den = e->count; + profile_count::adjust_for_ipa_scaling (&num, &den); + if (dump_file) + { + fprintf (dump_file, "Rescalling profile of caller %s " + "to allow non-zero AFDO counts:", + to->dump_name ()); + den.dump (dump_file); + fprintf (dump_file, " -> "); + num.dump (dump_file); + fprintf (dump_file, "\n"); + } + to->apply_scale (num, den); + to->frequency = std::max (to->frequency, callee->frequency); + /* Do not update original, so possible additional calls of callee + are handled reasonably well. */ + update_original = false; + gcc_checking_assert (to->count.quality () == AFDO); + if (dump_file) + { + fprintf (dump_file, "Scaled profile of %s: ", to->dump_name ()); + to->count.dump (dump_file); + fprintf (dump_file, "\n"); + } + } if (to->thunk) { struct cgraph_node *target = to->callees->callee; diff --git a/gcc/profile-count.h b/gcc/profile-count.h index c893aec..65c4596 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -1212,8 +1212,18 @@ public: /* Be sure that ret is not local if num is global. Also ensure that ret is not global0 when num is global. */ if (num.ipa_p ()) - ret.m_quality = MAX (ret.m_quality, - num == num.ipa () ? GUESSED : num.m_quality); + { + /* This is common case of AFDO scaling when we upgrade + GLOBAL0_AFDO function to AFDO. Be sure that result + is AFDO and not GUESSED (which is unnecesarily low). */ + if (num.m_quality == AFDO + && (ret.m_quality != GUESSED + && ret.m_quality != GUESSED_LOCAL)) + ret.m_quality = AFDO; + else + ret.m_quality = MAX (ret.m_quality, + num == num.ipa () ? GUESSED : num.m_quality); + } return ret; } |