aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/InlineFunction.cpp
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2024-06-29 23:21:33 -0700
committerGitHub <noreply@github.com>2024-06-29 23:21:33 -0700
commit1518b260ce2cbd9286365709642dc749e542d683 (patch)
treed7493659aa4433b46fc2430f67de8d4cccb0aa67 /llvm/lib/Transforms/Utils/InlineFunction.cpp
parent1d27348e537e1b25d727ec6f4d5d820a4dd8a789 (diff)
downloadllvm-1518b260ce2cbd9286365709642dc749e542d683.zip
llvm-1518b260ce2cbd9286365709642dc749e542d683.tar.gz
llvm-1518b260ce2cbd9286365709642dc749e542d683.tar.bz2
[TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (#81442)
Clang's `-fwhole-program-vtables` is required for this optimization to take place. If `-fwhole-program-vtables` is not enabled, this change is no-op. * Function-comparison (before): ``` %vtable = load ptr, ptr %obj %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 %func = load ptr, ptr %vfn %cond = icmp eq ptr %func, @callee br i1 %cond, label bb1, label bb2: bb1: call @callee bb2: call %func ``` * VTable-comparison (after): ``` %vtable = load ptr, ptr %obj %cond = icmp eq ptr %vtable, @vtable-address-point br i1 %cond, label bb1, label bb2: bb1: call @callee bb2: %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 %func = load ptr, ptr %vfn call %func ``` Key changes: 1. Find out virtual calls and the vtables they come from. - The ICP relies on type intrinsic `llvm.type.test` to find out virtual calls and the compatible vtables, and relies on type metadata to find the address point for comparison. 2. ICP pass does cost-benefit analysis and compares vtable only when the number of vtables for a function candidate is within (option specified) threshold. 3. Sink the function addressing and vtable load instruction to indirect fallback. - The sink helper functions are simplified versions of `InstCombinerImpl::tryToSinkInstruction`. Currently debug intrinsics are not handled. Ideally `InstCombinerImpl::tryToSinkInstructionDbgValues` and `InstCombinerImpl::tryToSinkInstructionDbgVariableRecords` could be moved into Transforms/Utils/Local.cpp (or another util cpp file) to handle debug intrinsics when moving instructions across basic blocks. 4. Keep value profiles updated 1) Update vtable value profiles after inline 2) For either function-based comparison or vtable-based comparison, update both vtable and indirect call value profiles.
Diffstat (limited to 'llvm/lib/Transforms/Utils/InlineFunction.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp26
1 files changed, 22 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f2130e4..0725add 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
@@ -56,6 +57,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -1976,16 +1978,28 @@ void llvm::updateProfileCallee(
? 0
: PriorEntryCount + EntryDelta;
+ auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
+ const uint64_t PriorEntryCount) {
+ Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
+ if (VPtr)
+ scaleProfData(*VPtr, NewEntryCount, PriorEntryCount);
+ };
+
// During inlining ?
if (VMap) {
uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
for (auto Entry : *VMap) {
if (isa<CallInst>(Entry.first))
- if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+ if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) {
CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
+ updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);
+ }
+
if (isa<InvokeInst>(Entry.first))
- if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second))
+ if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) {
II->updateProfWeight(CloneEntryCount, PriorEntryCount);
+ updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);
+ }
}
}
@@ -1996,10 +2010,14 @@ void llvm::updateProfileCallee(
// No need to update the callsite if it is pruned during inlining.
if (!VMap || VMap->count(&BB))
for (Instruction &I : BB) {
- if (CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
CI->updateProfWeight(NewEntryCount, PriorEntryCount);
- if (InvokeInst *II = dyn_cast<InvokeInst>(&I))
+ updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);
+ }
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
II->updateProfWeight(NewEntryCount, PriorEntryCount);
+ updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);
+ }
}
}
}