diff options
author | Alexandros Lamprineas <alexandros.lamprineas@arm.com> | 2023-05-12 00:07:49 +0100 |
---|---|---|
committer | Alexandros Lamprineas <alexandros.lamprineas@arm.com> | 2023-05-24 11:40:12 +0100 |
commit | ced90d1ff64a89a13479a37a3b17a411a3259f9f (patch) | |
tree | 562b4c470249174e57e523fca797e391feeeea79 /llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | |
parent | 4447b82b897ad5dda3a7a4178520731bcc6f22ec (diff) | |
download | llvm-ced90d1ff64a89a13479a37a3b17a411a3259f9f.zip llvm-ced90d1ff64a89a13479a37a3b17a411a3259f9f.tar.gz llvm-ced90d1ff64a89a13479a37a3b17a411a3259f9f.tar.bz2 |
[FuncSpec] Improve the accuracy of the cost model.
Instead of blindly traversing the use-def chain of constant arguments,
compute known constants along the way. Stop as soon as a user cannot
be replaced by a constant. Keep it light-weight by handling some basic
instruction types.
Differential Revision: https://reviews.llvm.org/D150464
Diffstat (limited to 'llvm/lib/Transforms/IPO/FunctionSpecialization.cpp')
-rw-r--r-- | llvm/lib/Transforms/IPO/FunctionSpecialization.cpp | 260 |
1 files changed, 219 insertions, 41 deletions
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 87cc0f6..a970253 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -48,11 +48,14 @@ #include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/ConstantFold.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -94,6 +97,210 @@ static cl::opt<bool> SpecializeLiteralConstant( "Enable specialization of functions that take a literal constant as an " "argument")); +// Estimates the instruction cost of all the basic blocks in \p WorkList. +// The successors of such blocks are added to the list as long as they are +// executable and they have a unique predecessor. \p WorkList represents +// the basic blocks of a specialization which become dead once we replace +// instructions that are known to be constants. The aim here is to estimate +// the combination of size and latency savings in comparison to the non +// specialized version of the function. +static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList, + ConstMap &KnownConstants, SCCPSolver &Solver, + BlockFrequencyInfo &BFI, + TargetTransformInfo &TTI) { + Cost Bonus = 0; + + // Accumulate the instruction cost of each basic block weighted by frequency. + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.pop_back_val(); + + uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() / + BFI.getEntryFreq(); + if (!Weight) + continue; + + for (Instruction &I : *BB) { + // Disregard SSA copies. + if (auto *II = dyn_cast<IntrinsicInst>(&I)) + if (II->getIntrinsicID() == Intrinsic::ssa_copy) + continue; + // If it's a known constant we have already accounted for it. + if (KnownConstants.contains(&I)) + continue; + + Bonus += Weight * + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus + << " after user " << I << "\n"); + } + + // Keep adding dead successors to the list as long as they are + // executable and they have a unique predecessor. + for (BasicBlock *SuccBB : successors(BB)) + if (Solver.isBlockExecutable(SuccBB) && + SuccBB->getUniquePredecessor() == BB) + WorkList.push_back(SuccBB); + } + return Bonus; +} + +static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) { + if (auto It = KnownConstants.find(V); It != KnownConstants.end()) + return It->second; + return nullptr; +} + +Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { + // Cache the iterator before visiting. + LastVisited = KnownConstants.insert({Use, C}).first; + + if (auto *I = dyn_cast<SwitchInst>(User)) + return estimateSwitchInst(*I); + + if (auto *I = dyn_cast<BranchInst>(User)) + return estimateBranchInst(*I); + + C = visit(*User); + if (!C) + return 0; + + KnownConstants.insert({User, C}); + + uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() / + BFI.getEntryFreq(); + if (!Weight) + return 0; + + Cost Bonus = Weight * + TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus + << " for user " << *User << "\n"); + + for (auto *U : User->users()) + if (auto *UI = dyn_cast<Instruction>(U)) + if (Solver.isBlockExecutable(UI->getParent())) + Bonus += getUserBonus(UI, User, C); + + return Bonus; +} + +Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { + if (I.getCondition() != LastVisited->first) + return 0; + + auto *C = cast<ConstantInt>(LastVisited->second); + BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor(); + // Initialize the worklist with the dead basic blocks. These are the + // destination labels which are different from the one corresponding + // to \p C. They should be executable and have a unique predecessor. + SmallVector<BasicBlock *> WorkList; + for (const auto &Case : I.cases()) { + BasicBlock *BB = Case.getCaseSuccessor(); + if (BB == Succ || !Solver.isBlockExecutable(BB) || + BB->getUniquePredecessor() != I.getParent()) + continue; + WorkList.push_back(BB); + } + + return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI); +} + +Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { + if (I.getCondition() != LastVisited->first) + return 0; + + BasicBlock *Succ = I.getSuccessor(LastVisited->second->isOneValue()); + // Initialize the worklist with the dead successor as long as + // it is executable and has a unique predecessor. + SmallVector<BasicBlock *> WorkList; + if (Solver.isBlockExecutable(Succ) && + Succ->getUniquePredecessor() == I.getParent()) + WorkList.push_back(Succ); + + return estimateBasicBlocks(WorkList, KnownConstants, Solver, BFI, TTI); +} + +Constant *InstCostVisitor::visitLoadInst(LoadInst &I) { + if (isa<ConstantPointerNull>(LastVisited->second)) + return nullptr; + return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL); +} + +Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { + SmallVector<Value *, 8> Operands; + Operands.reserve(I.getNumOperands()); + + for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) { + Value *V = I.getOperand(Idx); + auto *C = dyn_cast<Constant>(V); + if (!C) + C = findConstantFor(V, KnownConstants); + if (!C) + return nullptr; + Operands.push_back(C); + } + + auto *Ptr = cast<Constant>(Operands[0]); + auto Ops = ArrayRef(Operands.begin() + 1, Operands.end()); + return ConstantFoldGetElementPtr(I.getSourceElementType(), Ptr, + I.isInBounds(), std::nullopt, Ops); +} + +Constant *InstCostVisitor::visitSelectInst(SelectInst &I) { + if (I.getCondition() != LastVisited->first) + return nullptr; + + Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue() + : I.getTrueValue(); + auto *C = dyn_cast<Constant>(V); + if (!C) + C = findConstantFor(V, KnownConstants); + return C; +} + +Constant *InstCostVisitor::visitCastInst(CastInst &I) { + return ConstantFoldCastOperand(I.getOpcode(), LastVisited->second, + I.getType(), DL); +} + +Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { + bool Swap = I.getOperand(1) == LastVisited->first; + Value *V = Swap ? I.getOperand(0) : I.getOperand(1); + auto *Other = dyn_cast<Constant>(V); + if (!Other) + Other = findConstantFor(V, KnownConstants); + + if (!Other) + return nullptr; + + Constant *Const = LastVisited->second; + return Swap ? + ConstantFoldCompareInstOperands(I.getPredicate(), Other, Const, DL) + : ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL); +} + +Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) { + return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL); +} + +Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) { + bool Swap = I.getOperand(1) == LastVisited->first; + Value *V = Swap ? I.getOperand(0) : I.getOperand(1); + auto *Other = dyn_cast<Constant>(V); + if (!Other) + Other = findConstantFor(V, KnownConstants); + + if (!Other) + return nullptr; + + Constant *Const = LastVisited->second; + return dyn_cast_or_null<Constant>(Swap ? + simplifyBinOp(I.getOpcode(), Other, Const, SimplifyQuery(DL)) + : simplifyBinOp(I.getOpcode(), Const, Other, SimplifyQuery(DL))); +} + Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca, CallInst *Call) { Value *StoreValue = nullptr; @@ -412,10 +619,6 @@ CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) { CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); for (BasicBlock &BB : *F) Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " - << F->getName() << " is " << Metrics.NumInsts - << " instructions\n"); } return Metrics; } @@ -496,8 +699,9 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost, } else { // Calculate the specialisation gain. Cost Score = 0 - SpecCost; + InstCostVisitor Visitor = getInstCostVisitorFor(F); for (ArgInfo &A : S.Args) - Score += getSpecializationBonus(A.Formal, A.Actual); + Score += getSpecializationBonus(A.Formal, A.Actual, Visitor); // Discard unprofitable specialisations. if (!ForceSpecialization && Score <= 0) @@ -584,49 +788,23 @@ Cost FunctionSpecializer::getSpecializationCost(Function *F) { // Otherwise, set the specialization cost to be the cost of all the // instructions in the function. - return Metrics.NumInsts * InlineConstants::getInstrCost(); -} - -static Cost getUserBonus(User *U, TargetTransformInfo &TTI, - BlockFrequencyInfo &BFI) { - auto *I = dyn_cast_or_null<Instruction>(U); - // If not an instruction we do not know how to evaluate. - // Keep minimum possible cost for now so that it doesnt affect - // specialization. - if (!I) - return 0; - - uint64_t Weight = BFI.getBlockFreq(I->getParent()).getFrequency() / - BFI.getEntryFreq(); - if (!Weight) - return 0; - - Cost Bonus = Weight * - TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); - - // Traverse recursively if there are more uses. - // TODO: Any other instructions to be added here? - if (I->mayReadFromMemory() || I->isCast()) - for (auto *User : I->users()) - Bonus += getUserBonus(User, TTI, BFI); - - return Bonus; + return Metrics.NumInsts; } /// Compute a bonus for replacing argument \p A with constant \p C. -Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C) { - Function *F = A->getParent(); - auto &TTI = (GetTTI)(*F); - auto &BFI = (GetBFI)(*F); +Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, + InstCostVisitor &Visitor) { LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " << C->getNameOrAsOperand() << "\n"); Cost TotalCost = 0; - for (auto *U : A->users()) { - TotalCost += getUserBonus(U, TTI, BFI); - LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; - TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); - } + for (auto *U : A->users()) + if (auto *UI = dyn_cast<Instruction>(U)) + if (Solver.isBlockExecutable(UI->getParent())) + TotalCost += Visitor.getUserBonus(UI, A, C); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus " + << TotalCost << " for argument " << *A << "\n"); // The below heuristic is only concerned with exposing inlining // opportunities via indirect call promotion. If the argument is not a |