Reland [FuncSpec] Split the specialization bonus into CodeSize and Latency.

Currently we use a combined metric TargetTransformInfo::TCK_SizeAndLatency when estimating the specialization bonus. This is suboptimal, and in some cases erroneous. For example we shouldn't be weighting the codesize decrease attributed to constant propagation by the block frequency of the dead code. Instead only the latency savings should be weighted by block frequency. The total codesize savings from all the specialization arguments should be deducted from the specialization cost. Differential Revision: https://reviews.llvm.org/D155103
author: Alexandros Lamprineas <alexandros.lamprineas@arm.com> 2023-07-31 12:00:48 +0100
committer: Alexandros Lamprineas <alexandros.lamprineas@arm.com> 2023-08-02 12:41:13 +0100
commit: 5bfefff1c44fd992b673e1ff9c9f1865f9d81af1 (patch)
tree: 7db7476afcc4b8dce1037a6502057e1bd0093a35
parent: d1d0e135a16431001684ffd480da9ec6796c6659 (diff)
download: llvm-5bfefff1c44fd992b673e1ff9c9f1865f9d81af1.zip
llvm-5bfefff1c44fd992b673e1ff9c9f1865f9d81af1.tar.gz
llvm-5bfefff1c44fd992b673e1ff9c9f1865f9d81af1.tar.bz2
3 files changed, 185 insertions, 136 deletions
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5efb3cb..8bcb928 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -105,17 +105,49 @@ struct Spec {
   SpecSig Sig;
 
   // Profitability of the specialization.
-  Cost Score;
+  unsigned Score;
 
   // List of call sites, matching this specialization.
   SmallVector<CallBase *> CallSites;
 
-  Spec(Function *F, const SpecSig &S, Cost Score)
+  Spec(Function *F, const SpecSig &S, unsigned Score)
       : F(F), Sig(S), Score(Score) {}
-  Spec(Function *F, const SpecSig &&S, Cost Score)
+  Spec(Function *F, const SpecSig &&S, unsigned Score)
       : F(F), Sig(S), Score(Score) {}
 };
 
+struct Bonus {
+  unsigned CodeSize = 0;
+  unsigned Latency = 0;
+
+  Bonus() = default;
+
+  Bonus(Cost CodeSize, Cost Latency) {
+    int64_t Sz = *CodeSize.getValue();
+    int64_t Ltc = *Latency.getValue();
+
+    assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative");
+    // It is safe to down cast since we know the arguments
+    // cannot be negative and Cost is of type int64_t.
+    this->CodeSize = static_cast<unsigned>(Sz);
+    this->Latency = static_cast<unsigned>(Ltc);
+  }
+
+  Bonus &operator+=(const Bonus RHS) {
+    CodeSize += RHS.CodeSize;
+    Latency += RHS.Latency;
+    return *this;
+  }
+
+  Bonus operator+(const Bonus RHS) const {
+    return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency);
+  }
+
+  bool operator==(const Bonus RHS) const {
+    return CodeSize == RHS.CodeSize && Latency == RHS.Latency;
+  }
+};
+
 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
   const DataLayout &DL;
   BlockFrequencyInfo &BFI;
@@ -144,10 +176,10 @@ public:
     return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
   }
 
-  Cost getUserBonus(Instruction *User, Value *Use = nullptr,
-                    Constant *C = nullptr);
+  Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
+                     Constant *C = nullptr);
 
-  Cost getBonusFromPendingPHIs();
+  Bonus getBonusFromPendingPHIs();
 
 private:
   friend class InstVisitor<InstCostVisitor, Constant *>;
@@ -209,8 +241,8 @@ public:
   }
 
   /// Compute a bonus for replacing argument \p A with constant \p C.
-  Cost getSpecializationBonus(Argument *A, Constant *C,
-                              InstCostVisitor &Visitor);
+  Bonus getSpecializationBonus(Argument *A, Constant *C,
+                               InstCostVisitor &Visitor);
 
 private:
   Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
@@ -237,7 +269,7 @@ private:
   /// @param AllSpecs A vector to add potential specializations to.
   /// @param SM  A map for a function's specialisation range
   /// @return True, if any potential specializations were found
-  bool findSpecializations(Function *F, Cost SpecCost,
+  bool findSpecializations(Function *F, unsigned SpecCost,
                            SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
 
   bool isCandidateFunction(Function *F);
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index d917342..cc02f94 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -101,29 +101,21 @@ static cl::opt<bool> SpecializeLiteralConstant(
     "Enable specialization of functions that take a literal constant as an "
     "argument"));
 
-// Estimates the instruction cost of all the basic blocks in \p WorkList.
-// The successors of such blocks are added to the list as long as they are
-// executable and they have a unique predecessor. \p WorkList represents
-// the basic blocks of a specialization which become dead once we replace
-// instructions that are known to be constants. The aim here is to estimate
-// the combination of size and latency savings in comparison to the non
-// specialized version of the function.
+// Estimates the codesize savings due to dead code after constant propagation.
+// \p WorkList represents the basic blocks of a specialization which will
+// eventually become dead once we replace instructions that are known to be
+// constants. The successors of such blocks are added to the list as long as
+// the \p Solver found they were executable prior to specialization, and only
+// if they have a unique predecessor.
 static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
                                 DenseSet<BasicBlock *> &DeadBlocks,
                                 ConstMap &KnownConstants, SCCPSolver &Solver,
-                                BlockFrequencyInfo &BFI,
                                 TargetTransformInfo &TTI) {
-  Cost Bonus = 0;
-
+  Cost CodeSize = 0;
   // Accumulate the instruction cost of each basic block weighted by frequency.
   while (!WorkList.empty()) {
     BasicBlock *BB = WorkList.pop_back_val();
 
-    uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
-                      BFI.getEntryFreq();
-    if (!Weight)
-      continue;
-
     // These blocks are considered dead as far as the InstCostVisitor
     // is concerned. They haven't been proven dead yet by the Solver,
     // but may become if we propagate the specialization arguments.
@@ -139,11 +131,11 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
       if (KnownConstants.contains(&I))
         continue;
 
-      Bonus += Weight *
-          TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+      Cost C = TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
 
-      LLVM_DEBUG(dbgs() << "FnSpecialization:     Bonus " << Bonus
-                        << " after user " << I << "\n");
+      LLVM_DEBUG(dbgs() << "FnSpecialization:     CodeSize " << C
+                        << " for user " << I << "\n");
+      CodeSize += C;
     }
 
     // Keep adding dead successors to the list as long as they are
@@ -153,7 +145,7 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
           SuccBB->getUniquePredecessor() == BB)
         WorkList.push_back(SuccBB);
   }
-  return Bonus;
+  return CodeSize;
 }
 
 static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
@@ -164,55 +156,57 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
   return nullptr;
 }
 
-Cost InstCostVisitor::getBonusFromPendingPHIs() {
-  Cost Bonus = 0;
+Bonus InstCostVisitor::getBonusFromPendingPHIs() {
+  Bonus B;
   while (!PendingPHIs.empty()) {
     Instruction *Phi = PendingPHIs.pop_back_val();
     // The pending PHIs could have been proven dead by now.
     if (isBlockExecutable(Phi->getParent()))
-      Bonus += getUserBonus(Phi);
+      B += getUserBonus(Phi);
   }
-  return Bonus;
+  return B;
 }
 
-Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
+Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
   // We have already propagated a constant for this user.
   if (KnownConstants.contains(User))
-    return 0;
+    return {0, 0};
 
   // Cache the iterator before visiting.
   LastVisited = Use ? KnownConstants.insert({Use, C}).first
                     : KnownConstants.end();
 
-  if (auto *I = dyn_cast<SwitchInst>(User))
-    return estimateSwitchInst(*I);
-
-  if (auto *I = dyn_cast<BranchInst>(User))
-    return estimateBranchInst(*I);
-
-  C = visit(*User);
-  if (!C)
-    return 0;
+  Cost CodeSize = 0;
+  if (auto *I = dyn_cast<SwitchInst>(User)) {
+    CodeSize = estimateSwitchInst(*I);
+  } else if (auto *I = dyn_cast<BranchInst>(User)) {
+    CodeSize = estimateBranchInst(*I);
+  } else {
+    C = visit(*User);
+    if (!C)
+      return {0, 0};
+    KnownConstants.insert({User, C});
+  }
 
-  KnownConstants.insert({User, C});
+  CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize);
 
   uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
                     BFI.getEntryFreq();
-  if (!Weight)
-    return 0;
 
-  Cost Bonus = Weight *
-      TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency);
+  Cost Latency = Weight *
+      TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency);
 
-  LLVM_DEBUG(dbgs() << "FnSpecialization:     Bonus " << Bonus
-                    << " for user " << *User << "\n");
+  LLVM_DEBUG(dbgs() << "FnSpecialization:     {CodeSize = " << CodeSize
+                    << ", Latency = " << Latency << "} for user "
+                    << *User << "\n");
 
+  Bonus B(CodeSize, Latency);
   for (auto *U : User->users())
     if (auto *UI = dyn_cast<Instruction>(U))
       if (UI != User && isBlockExecutable(UI->getParent()))
-        Bonus += getUserBonus(UI, User, C);
+        B += getUserBonus(UI, User, C);
 
-  return Bonus;
+  return B;
 }
 
 Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
@@ -238,8 +232,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
     WorkList.push_back(BB);
   }
 
-  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
-                             TTI);
+  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI);
 }
 
 Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
@@ -256,8 +249,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
       Succ->getUniquePredecessor() == I.getParent())
     WorkList.push_back(Succ);
 
-  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
-                             TTI);
+  return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI);
 }
 
 Constant *InstCostVisitor::visitPHINode(PHINode &I) {
@@ -578,13 +570,18 @@ bool FunctionSpecializer::run() {
     if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
       continue;
 
+    int64_t Sz = *Metrics.NumInsts.getValue();
+    assert(Sz > 0 && "CodeSize should be positive");
+    // It is safe to down cast from int64_t, NumInsts is always positive.
+    unsigned SpecCost = static_cast<unsigned>(Sz);
+
     LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
-                      << F.getName() << " is " << Metrics.NumInsts << "\n");
+                      << F.getName() << " is " << SpecCost << "\n");
 
     if (Inserted && Metrics.isRecursive)
       promoteConstantStackValues(&F);
 
-    if (!findSpecializations(&F, Metrics.NumInsts, AllSpecs, SM)) {
+    if (!findSpecializations(&F, SpecCost, AllSpecs, SM)) {
       LLVM_DEBUG(
           dbgs() << "FnSpecialization: No possible specializations found for "
                  << F.getName() << "\n");
@@ -719,7 +716,7 @@ static Function *cloneCandidateFunction(Function *F) {
   return Clone;
 }
 
-bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
+bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost,
                                               SmallVectorImpl<Spec> &AllSpecs,
                                               SpecMap &SM) {
   // A mapping from a specialisation signature to the index of the respective
@@ -785,21 +782,22 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
       AllSpecs[Index].CallSites.push_back(&CS);
     } else {
       // Calculate the specialisation gain.
-      Cost Score = 0;
+      Bonus B;
       InstCostVisitor Visitor = getInstCostVisitorFor(F);
       for (ArgInfo &A : S.Args)
-        Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
-      Score += Visitor.getBonusFromPendingPHIs();
+        B += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+      B += Visitor.getBonusFromPendingPHIs();
 
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = "
-                        << Score << "\n");
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score {CodeSize = "
+                        << B.CodeSize << ", Latency = " << B.Latency
+                        << "}\n");
 
       // Discard unprofitable specialisations.
-      if (!ForceSpecialization && Score <= SpecCost)
+      if (!ForceSpecialization && B.Latency <= SpecCost - B.CodeSize)
         continue;
 
       // Create a new specialisation entry.
-      auto &Spec = AllSpecs.emplace_back(F, S, Score);
+      auto &Spec = AllSpecs.emplace_back(F, S, B.Latency);
       if (CS.getFunction() != F)
         Spec.CallSites.push_back(&CS);
       const unsigned Index = AllSpecs.size() - 1;
@@ -866,19 +864,20 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
 }
 
 /// Compute a bonus for replacing argument \p A with constant \p C.
-Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
+Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
                                                  InstCostVisitor &Visitor) {
   LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
                     << C->getNameOrAsOperand() << "\n");
 
-  Cost TotalCost = 0;
+  Bonus B;
   for (auto *U : A->users())
     if (auto *UI = dyn_cast<Instruction>(U))
       if (Visitor.isBlockExecutable(UI->getParent()))
-        TotalCost += Visitor.getUserBonus(UI, A, C);
+        B += Visitor.getUserBonus(UI, A, C);
 
-  LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated user bonus "
-                    << TotalCost << " for argument " << *A << "\n");
+  LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated bonus {CodeSize = "
+                    << B.CodeSize << ", Latency = " << B.Latency
+                    << "} for argument " << *A << "\n");
 
   // The below heuristic is only concerned with exposing inlining
   // opportunities via indirect call promotion. If the argument is not a
@@ -888,7 +887,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
   // while traversing the users of the specialization arguments ?
   Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
   if (!CalledFunction)
-    return TotalCost;
+    return B;
 
   // Get TTI for the called function (used for the inline cost).
   auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -898,7 +897,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
   // calls to be promoted to direct calls. If the indirect call promotion
   // would likely enable the called function to be inlined, specializing is a
   // good idea.
-  int Bonus = 0;
+  int InliningBonus = 0;
   for (User *U : A->users()) {
     if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
       continue;
@@ -925,15 +924,15 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
     // We clamp the bonus for this call to be between zero and the default
     // threshold.
     if (IC.isAlways())
-      Bonus += Params.DefaultThreshold;
+      InliningBonus += Params.DefaultThreshold;
     else if (IC.isVariable() && IC.getCostDelta() > 0)
-      Bonus += IC.getCostDelta();
+      InliningBonus += IC.getCostDelta();
 
-    LLVM_DEBUG(dbgs() << "FnSpecialization:   Inlining bonus " << Bonus
+    LLVM_DEBUG(dbgs() << "FnSpecialization:   Inlining bonus " << InliningBonus
                       << " for user " << *U << "\n");
   }
 
-  return TotalCost + Bonus;
+  return B += {0, InliningBonus};
 }
 
 /// Determine if it is possible to specialise the function for constant values
diff --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
index 6018263..f9bbbcff 100644
--- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
+++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp
@@ -96,12 +96,18 @@ protected:
                                GetAC);
   }
 
-  Cost getInstCost(Instruction &I) {
+  Bonus getInstCost(Instruction &I, bool SizeOnly = false) {
     auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
     auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
 
-    return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
-         TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+    Cost CodeSize =
+        TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+    Cost Latency = SizeOnly ? 0 :
+        BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
+        TTI.getInstructionCost(&I, TargetTransformInfo::TCK_Latency);
+
+    return {CodeSize, Latency};
   }
 };
 
@@ -145,12 +151,13 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {
   Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1);
 
   auto FuncIter = F->begin();
-  ++FuncIter;
+  BasicBlock &Loop = *++FuncIter;
   BasicBlock &Case1 = *++FuncIter;
   BasicBlock &Case2 = *++FuncIter;
   BasicBlock &BB1 = *++FuncIter;
   BasicBlock &BB2 = *++FuncIter;
 
+  Instruction &Switch = Loop.front();
   Instruction &Mul = Case1.front();
   Instruction &And = Case2.front();
   Instruction &Sdiv = *++Case2.begin();
@@ -160,22 +167,25 @@ TEST_F(FunctionSpecializationTest, SwitchInst) {
   Instruction &BrLoop = BB2.back();
 
   // mul
-  Cost Ref = getInstCost(Mul);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Mul);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // and + or + add
   Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
-
-  // sdiv + br + br
-  Ref = getInstCost(Sdiv) + getInstCost(BrBB2) + getInstCost(BrLoop);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+
+  // switch + sdiv + br + br
+  Ref = getInstCost(Switch) +
+        getInstCost(Sdiv, /*SizeOnly =*/ true) +
+        getInstCost(BrBB2, /*SizeOnly =*/ true) +
+        getInstCost(BrLoop, /*SizeOnly =*/ true);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }
 
 TEST_F(FunctionSpecializationTest, BranchInst) {
@@ -207,10 +217,11 @@ TEST_F(FunctionSpecializationTest, BranchInst) {
   Constant *False = ConstantInt::getFalse(M.getContext());
 
   auto FuncIter = F->begin();
-  ++FuncIter;
+  BasicBlock &Loop = *++FuncIter;
   BasicBlock &BB0 = *++FuncIter;
   BasicBlock &BB1 = *++FuncIter;
 
+  Instruction &Branch = Loop.front();
   Instruction &Mul = BB0.front();
   Instruction &Sub = *++BB0.begin();
   Instruction &BrBB1 = BB0.back();
@@ -219,23 +230,26 @@ TEST_F(FunctionSpecializationTest, BranchInst) {
   Instruction &BrLoop = BB1.back();
 
   // mul
-  Cost Ref = getInstCost(Mul);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Mul);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // add
   Ref = getInstCost(Add);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
-
-  // sub + br + sdiv + br
-  Ref = getInstCost(Sub) + getInstCost(BrBB1) + getInstCost(Sdiv) +
-        getInstCost(BrLoop);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
+
+  // branch + sub + br + sdiv + br
+  Ref = getInstCost(Branch) +
+        getInstCost(Sub, /*SizeOnly =*/ true) +
+        getInstCost(BrBB1, /*SizeOnly =*/ true) +
+        getInstCost(Sdiv, /*SizeOnly =*/ true) +
+        getInstCost(BrLoop, /*SizeOnly =*/ true);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }
 
 TEST_F(FunctionSpecializationTest, Misc) {
@@ -281,26 +295,26 @@ TEST_F(FunctionSpecializationTest, Misc) {
   Instruction &Smax = *BlockIter++;
 
   // icmp + zext
-  Cost Ref = getInstCost(Icmp) + getInstCost(Zext);
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Bonus Ref = getInstCost(Icmp) + getInstCost(Zext);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // select
   Ref = getInstCost(Select);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
   // gep + load + freeze + smax
   Ref = getInstCost(Gep) + getInstCost(Load) + getInstCost(Freeze) +
         getInstCost(Smax);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
-  Bonus = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor);
-  EXPECT_TRUE(Bonus == 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor);
+  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
 }
 
 TEST_F(FunctionSpecializationTest, PhiNode) {
@@ -342,29 +356,33 @@ TEST_F(FunctionSpecializationTest, PhiNode) {
   BasicBlock &BB = *++FuncIter;
 
   Instruction &PhiLoop = Loop.front();
+  Instruction &Switch = Loop.back();
   Instruction &Add = Case1.front();
   Instruction &PhiCase2 = Case2.front();
   Instruction &BrBB = Case2.back();
   Instruction &PhiBB = BB.front();
   Instruction &Icmp = *++BB.begin();
+  Instruction &Branch = BB.back();
 
-  Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
-  EXPECT_EQ(Bonus, 0);
+  Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor);
+  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
 
-  Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
-  EXPECT_EQ(Bonus, 0);
+  Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor);
+  EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0);
 
-  // phi + br
-  Cost Ref = getInstCost(PhiCase2) + getInstCost(BrBB);
-  Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+  // switch + phi + br
+  Bonus Ref = getInstCost(Switch) +
+              getInstCost(PhiCase2, /*SizeOnly =*/ true) +
+              getInstCost(BrBB, /*SizeOnly =*/ true);
+  Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor);
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 
-  // phi + phi + add + icmp
+  // phi + phi + add + icmp + branch
   Ref = getInstCost(PhiBB) + getInstCost(PhiLoop) + getInstCost(Add) +
-        getInstCost(Icmp);
-  Bonus = Visitor.getBonusFromPendingPHIs();
-  EXPECT_EQ(Bonus, Ref);
-  EXPECT_TRUE(Bonus > 0);
+        getInstCost(Icmp) + getInstCost(Branch);
+  Test = Visitor.getBonusFromPendingPHIs();
+  EXPECT_EQ(Test, Ref);
+  EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);
 }
author	Alexandros Lamprineas <alexandros.lamprineas@arm.com>	2023-07-31 12:00:48 +0100
committer	Alexandros Lamprineas <alexandros.lamprineas@arm.com>	2023-08-02 12:41:13 +0100
commit	5bfefff1c44fd992b673e1ff9c9f1865f9d81af1 (patch)
tree	7db7476afcc4b8dce1037a6502057e1bd0093a35
parent	d1d0e135a16431001684ffd480da9ec6796c6659 (diff)
download	llvm-5bfefff1c44fd992b673e1ff9c9f1865f9d81af1.zip llvm-5bfefff1c44fd992b673e1ff9c9f1865f9d81af1.tar.gz llvm-5bfefff1c44fd992b673e1ff9c9f1865f9d81af1.tar.bz2