diff options
author | Mingming Liu <mingmingl@google.com> | 2024-06-29 23:21:33 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-29 23:21:33 -0700 |
commit | 1518b260ce2cbd9286365709642dc749e542d683 (patch) | |
tree | d7493659aa4433b46fc2430f67de8d4cccb0aa67 /llvm/lib/Transforms/Utils/InlineFunction.cpp | |
parent | 1d27348e537e1b25d727ec6f4d5d820a4dd8a789 (diff) | |
download | llvm-1518b260ce2cbd9286365709642dc749e542d683.zip llvm-1518b260ce2cbd9286365709642dc749e542d683.tar.gz llvm-1518b260ce2cbd9286365709642dc749e542d683.tar.bz2 |
[TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (#81442)
Clang's `-fwhole-program-vtables` is required for this optimization to
take place. If `-fwhole-program-vtables` is not enabled, this change is
no-op.
* Function-comparison (before):
```
%vtable = load ptr, ptr %obj
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
%cond = icmp eq ptr %func, @callee
br i1 %cond, label bb1, label bb2:
bb1:
call @callee
bb2:
call %func
```
* VTable-comparison (after):
```
%vtable = load ptr, ptr %obj
%cond = icmp eq ptr %vtable, @vtable-address-point
br i1 %cond, label bb1, label bb2:
bb1:
call @callee
bb2:
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
call %func
```
Key changes:
1. Find out virtual calls and the vtables they come from.
- The ICP relies on type intrinsic `llvm.type.test` to find out virtual
calls and the
compatible vtables, and relies on type metadata to find the address
point for comparison.
2. ICP pass does cost-benefit analysis and compares vtable only when the
number of vtables for a function candidate is within (option specified)
threshold.
3. Sink the function addressing and vtable load instruction to indirect
fallback.
- The sink helper functions are simplified versions of
`InstCombinerImpl::tryToSinkInstruction`. Currently debug intrinsics are
not handled. Ideally `InstCombinerImpl::tryToSinkInstructionDbgValues`
and `InstCombinerImpl::tryToSinkInstructionDbgVariableRecords` could be
moved into Transforms/Utils/Local.cpp (or another util cpp file) to
handle debug intrinsics when moving instructions across basic blocks.
4. Keep value profiles updated
1) Update vtable value profiles after inline
2) For either function-based comparison or vtable-based comparison,
update both vtable and indirect call value profiles.
Diffstat (limited to 'llvm/lib/Transforms/Utils/InlineFunction.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/InlineFunction.cpp | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f2130e4..0725add 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" @@ -56,6 +57,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -1976,16 +1978,28 @@ void llvm::updateProfileCallee( ? 0 : PriorEntryCount + EntryDelta; + auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount, + const uint64_t PriorEntryCount) { + Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB); + if (VPtr) + scaleProfData(*VPtr, NewEntryCount, PriorEntryCount); + }; + // During inlining ? if (VMap) { uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; for (auto Entry : *VMap) { if (isa<CallInst>(Entry.first)) - if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) + if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) { CI->updateProfWeight(CloneEntryCount, PriorEntryCount); + updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount); + } + if (isa<InvokeInst>(Entry.first)) - if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) + if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) { II->updateProfWeight(CloneEntryCount, PriorEntryCount); + updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount); + } } } @@ -1996,10 +2010,14 @@ void llvm::updateProfileCallee( // No need to update the callsite if it is pruned during inlining. if (!VMap || VMap->count(&BB)) for (Instruction &I : BB) { - if (CallInst *CI = dyn_cast<CallInst>(&I)) + if (CallInst *CI = dyn_cast<CallInst>(&I)) { CI->updateProfWeight(NewEntryCount, PriorEntryCount); - if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) + updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount); + } + if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { II->updateProfWeight(NewEntryCount, PriorEntryCount); + updateVTableProfWeight(II, NewEntryCount, PriorEntryCount); + } } } } |