aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
diff options
context:
space:
mode:
authorAlexandros Lamprineas <alexandros.lamprineas@arm.com>2023-08-07 13:34:48 +0100
committerAlexandros Lamprineas <alexandros.lamprineas@arm.com>2023-08-09 10:28:46 +0100
commitd1b376fd7bf73bca557f3c174d4c129ed4d45ae5 (patch)
treef5abc9aa8f7b3da0b00ddb1388a575102cb9a5a7 /llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
parent7ec88f06d5833dfb4c7029c7645ae6cb89520504 (diff)
downloadllvm-d1b376fd7bf73bca557f3c174d4c129ed4d45ae5.zip
llvm-d1b376fd7bf73bca557f3c174d4c129ed4d45ae5.tar.gz
llvm-d1b376fd7bf73bca557f3c174d4c129ed4d45ae5.tar.bz2
[FuncSpec] Rework the discardment logic for unprofitable specializations.
Currently we make an arbitrary comparison between codesize and latency in order to decide whether to keep a specialization or not. Sometimes the latency savings are biased in favor of loops because of imprecise block frequencies, therefore this metric contains a lot of noise. This patch tries to address the problem as follows: * Reject specializations whose codesize savings are less than X% of the original function size. * Reject specializations whose latency savings are less than Y% of the original function size. * Reject specializations whose inlining bonus is less than Z% of the original function size. I am not saying this is super precise, but at least X, Y and Z are configurable, allowing us to tweak the cost model. Moreover, it lets us prioritize codesize over latency, which is a less noisy metric. I am also increasing the minimum size a function should have to be considered a candidate for specialization. Initially the cost of a function was calculated as CodeMetrics::NumInsts * InlineConstants::getInstrCost() which later in D150464 was altered into CodeMetrics::NumInsts since the metric is supposed to model TargetTransformInfo::TCK_CodeSize. However, we omitted adjusting MinFunctionSize in that commit. Differential Revision: https://reviews.llvm.org/D157123
Diffstat (limited to 'llvm/lib/Transforms/IPO/FunctionSpecialization.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp105
1 files changed, 70 insertions, 35 deletions
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index dd27519..1582e87 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -89,10 +89,25 @@ static cl::opt<unsigned> MaxBlockPredecessors(
"considered during the estimation of dead code"));
static cl::opt<unsigned> MinFunctionSize(
- "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc(
+ "funcspec-min-function-size", cl::init(300), cl::Hidden, cl::desc(
"Don't specialize functions that have less than this number of "
"instructions"));
+static cl::opt<unsigned> MinCodeSizeSavings(
+ "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc(
+ "Reject specializations whose codesize savings are less than this"
+ "much percent of the original function size"));
+
+static cl::opt<unsigned> MinLatencySavings(
+ "funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc(
+ "Reject specializations whose latency savings are less than this"
+ "much percent of the original function size"));
+
+static cl::opt<unsigned> MinInliningBonus(
+ "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
+ "Reject specializations whose inlining bonus is less than this"
+ "much percent of the original function size"));
+
static cl::opt<bool> SpecializeOnAddress(
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
"Enable function specialization on the address of global values"));
@@ -180,6 +195,22 @@ Bonus InstCostVisitor::getBonusFromPendingPHIs() {
return B;
}
+/// Compute a bonus for replacing argument \p A with constant \p C.
+Bonus InstCostVisitor::getSpecializationBonus(Argument *A, Constant *C) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
+ << C->getNameOrAsOperand() << "\n");
+ Bonus B;
+ for (auto *U : A->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (isBlockExecutable(UI->getParent()))
+ B += getUserBonus(UI, A, C);
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
+ << B.CodeSize << ", Latency = " << B.Latency
+ << "} for argument " << *A << "\n");
+ return B;
+}
+
Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
// We have already propagated a constant for this user.
if (KnownConstants.contains(User))
@@ -589,15 +620,15 @@ bool FunctionSpecializer::run() {
int64_t Sz = *Metrics.NumInsts.getValue();
assert(Sz > 0 && "CodeSize should be positive");
// It is safe to down cast from int64_t, NumInsts is always positive.
- unsigned SpecCost = static_cast<unsigned>(Sz);
+ unsigned FuncSize = static_cast<unsigned>(Sz);
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
- << F.getName() << " is " << SpecCost << "\n");
+ << F.getName() << " is " << FuncSize << "\n");
if (Inserted && Metrics.isRecursive)
promoteConstantStackValues(&F);
- if (!findSpecializations(&F, SpecCost, AllSpecs, SM)) {
+ if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: No possible specializations found for "
<< F.getName() << "\n");
@@ -732,7 +763,7 @@ static Function *cloneCandidateFunction(Function *F) {
return Clone;
}
-bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost,
+bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
SmallVectorImpl<Spec> &AllSpecs,
SpecMap &SM) {
// A mapping from a specialisation signature to the index of the respective
@@ -799,21 +830,42 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost,
} else {
// Calculate the specialisation gain.
Bonus B;
+ unsigned Score = 0;
InstCostVisitor Visitor = getInstCostVisitorFor(F);
- for (ArgInfo &A : S.Args)
- B += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+ for (ArgInfo &A : S.Args) {
+ B += Visitor.getSpecializationBonus(A.Formal, A.Actual);
+ Score += getInliningBonus(A.Formal, A.Actual);
+ }
B += Visitor.getBonusFromPendingPHIs();
- LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score {CodeSize = "
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
<< B.CodeSize << ", Latency = " << B.Latency
- << "}\n");
+ << ", Inlining = " << Score << "}\n");
+
+ auto IsProfitable = [&FuncSize](Bonus &B, unsigned Score) -> bool {
+ // No check required.
+ if (ForceSpecialization)
+ return true;
+ // Minimum inlining bonus.
+ if (Score > MinInliningBonus * FuncSize / 100)
+ return true;
+ // Minimum codesize savings.
+ if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100)
+ return false;
+ // Minimum latency savings.
+ if (B.Latency < MinLatencySavings * FuncSize / 100)
+ return false;
+ return true;
+ };
// Discard unprofitable specialisations.
- if (!ForceSpecialization && B.Latency <= SpecCost - B.CodeSize)
+ if (!IsProfitable(B, Score))
continue;
// Create a new specialisation entry.
- auto &Spec = AllSpecs.emplace_back(F, S, B.Latency);
+ Score += std::max(B.CodeSize, B.Latency);
+ auto &Spec = AllSpecs.emplace_back(F, S, Score);
if (CS.getFunction() != F)
Spec.CallSites.push_back(&CS);
const unsigned Index = AllSpecs.size() - 1;
@@ -879,31 +931,14 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
return Clone;
}
-/// Compute a bonus for replacing argument \p A with constant \p C.
-Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
- InstCostVisitor &Visitor) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
- << C->getNameOrAsOperand() << "\n");
-
- Bonus B;
- for (auto *U : A->users())
- if (auto *UI = dyn_cast<Instruction>(U))
- if (Visitor.isBlockExecutable(UI->getParent()))
- B += Visitor.getUserBonus(UI, A, C);
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
- << B.CodeSize << ", Latency = " << B.Latency
- << "} for argument " << *A << "\n");
-
- // The below heuristic is only concerned with exposing inlining
- // opportunities via indirect call promotion. If the argument is not a
- // (potentially casted) function pointer, give up.
- //
- // TODO: Perhaps we should consider checking such inlining opportunities
- // while traversing the users of the specialization arguments ?
+/// Compute the inlining bonus for replacing argument \p A with constant \p C.
+/// The below heuristic is only concerned with exposing inlining
+/// opportunities via indirect call promotion. If the argument is not a
+/// (potentially casted) function pointer, give up.
+unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) {
Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
if (!CalledFunction)
- return B;
+ return 0;
// Get TTI for the called function (used for the inline cost).
auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -948,7 +983,7 @@ Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
<< " for user " << *U << "\n");
}
- return B += {0, InliningBonus};
+ return InliningBonus > 0 ? static_cast<unsigned>(InliningBonus) : 0;
}
/// Determine if it is possible to specialise the function for constant values