1 files changed, 171 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 27e70c5..ba0ac01 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -81,6 +81,10 @@ static cl::opt<bool> DisableAdvancedPeeling(
     cl::desc(
         "Disable advance peeling. Issues for convergent targets (D134803)."));
 
+static cl::opt<bool> EnablePeelingForIV(
+    "enable-peeling-for-iv", cl::init(false), cl::Hidden,
+    cl::desc("Enable peeling to convert Phi nodes into IVs"));
+
 static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
 
 // Check whether we are capable of peeling this loop.
@@ -155,45 +159,170 @@ namespace {
 // corresponding calls to g are determined and the code for computing
 // x, y, and a can be removed.
 //
+// Similarly, there are cases where peeling makes Phi nodes loop-inductions
+// (i.e., the value is increased or decreased by a fixed amount on every
+// iteration). For example, consider the following function.
+//
+//   #define N 100
+//   void f(int a[], int b[]) {
+//     int im = N - 1;
+//     for (int i = 0; i < N; i++) {
+//       a[i] = b[i] + b[im];
+//       im = i;
+//     }
+//   }
+//
+// The IR of the loop will look something like the following.
+//
+//   %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
+//   %im = phi i32 [ 99, %entry ], [ %i, %for.body ]
+//   ...
+//   %i.next = add nuw nsw i32 %i, 1
+//   ...
+//
+// In this case, %im becomes a loop-induction variable by peeling 1 iteration,
+// because %i is a loop-induction one. The peeling count can be determined by
+// the same algorithm with loop-invariant case. Such peeling is profitable for
+// loop-vectorization.
+//
 // The PhiAnalyzer class calculates how many times a loop should be
 // peeled based on the above analysis of the phi nodes in the loop while
 // respecting the maximum specified.
 class PhiAnalyzer {
 public:
-  PhiAnalyzer(const Loop &L, unsigned MaxIterations);
+  PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV);
 
   // Calculate the sufficient minimum number of iterations of the loop to peel
   // such that phi instructions become determined (subject to allowable limits)
   std::optional<unsigned> calculateIterationsToPeel();
 
 protected:
-  using PeelCounter = std::optional<unsigned>;
+  enum class PeelCounterType {
+    Invariant,
+    Induction,
+  };
+
+  using PeelCounterValue = std::pair<unsigned, PeelCounterType>;
+  using PeelCounter = std::optional<PeelCounterValue>;
   const PeelCounter Unknown = std::nullopt;
 
   // Add 1 respecting Unknown and return Unknown if result over MaxIterations
   PeelCounter addOne(PeelCounter PC) const {
     if (PC == Unknown)
       return Unknown;
-    return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown;
+    auto [Val, Ty] = *PC;
+    return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown;
   }
 
-  // Calculate the number of iterations after which the given value
-  // becomes an invariant.
+  // Return a value representing zero for the given counter type.
+  PeelCounter makeZero(PeelCounterType Ty) const {
+    return PeelCounter({0, Ty});
+  }
+
+  // Calculate the number of iterations after which the given value becomes an
+  // invariant or an induction.
   PeelCounter calculate(const Value &);
 
+  // Auxiliary function to calculate the number of iterations for a comparison
+  // instruction or a binary operator.
+  PeelCounter mergeTwoCounter(const Instruction &CmpOrBinaryOp,
+                              const PeelCounterValue &LHS,
+                              const PeelCounterValue &RHS) const;
+
+  // Returns true if the \p Phi is an induction in the target loop. This is a
+  // lightweight check and possible to detect an IV in some cases.
+  bool isInductionPHI(const PHINode *Phi) const;
+
   const Loop &L;
   const unsigned MaxIterations;
+  const bool PeelForIV;
 
-  // Map of Values to number of iterations to invariance
-  SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
+  // Map of Values to number of iterations to invariance or induction
+  SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction;
 };
 
-PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
-    : L(L), MaxIterations(MaxIterations) {
+PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV)
+    : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) {
   assert(canPeel(&L) && "loop is not suitable for peeling");
   assert(MaxIterations > 0 && "no peeling is allowed?");
 }
 
+/// Test whether \p Phi is an induction variable. Although this can be
+/// determined using SCEV analysis, it is expensive to compute here. Instead,
+/// we perform cheaper checks that may not detect complex cases but are
+/// sufficient for some situations.
+bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const {
+  // Currently we only support a loop that has single latch.
+  BasicBlock *Latch = L.getLoopLatch();
+  if (Latch == nullptr)
+    return false;
+
+  Value *Cur = Phi->getIncomingValueForBlock(Latch);
+  SmallPtrSet<Value *, 4> Visited;
+  bool VisitBinOp = false;
+
+  // Starting from the incoming value of the Phi, we follow the use-def chain.
+  // We consider Phi to be an IV if we can reach it again by traversing only
+  // add, sub, or cast instructions.
+  while (true) {
+    if (Cur == Phi)
+      break;
+
+    // Avoid infinite loop.
+    if (Visited.contains(Cur))
+      return false;
+
+    auto *I = dyn_cast<Instruction>(Cur);
+    if (!I || !L.contains(I))
+      return false;
+
+    Visited.insert(Cur);
+
+    if (auto *Cast = dyn_cast<CastInst>(I)) {
+      Cur = Cast->getOperand(0);
+    } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
+      if (BinOp->getOpcode() != Instruction::Add &&
+          BinOp->getOpcode() != Instruction::Sub)
+        return false;
+      if (!isa<ConstantInt>(BinOp->getOperand(1)))
+        return false;
+
+      VisitBinOp = true;
+      Cur = BinOp->getOperand(0);
+    } else {
+      return false;
+    }
+  }
+
+  // Ignore cases where no binary operations are visited.
+  return VisitBinOp;
+}
+
+/// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is
+/// considered an IV only if it is an addition or a subtraction. Otherwise the
+/// result can be a value that is neither an loop-invariant nor an IV.
+///
+/// If both \p LHS and \p RHS are loop-invariants, then the result of
+/// \CmpOrBinaryOp is also a loop-invariant.
+PhiAnalyzer::PeelCounter
+PhiAnalyzer::mergeTwoCounter(const Instruction &CmpOrBinaryOp,
+                             const PeelCounterValue &LHS,
+                             const PeelCounterValue &RHS) const {
+  auto &[LVal, LTy] = LHS;
+  auto &[RVal, RTy] = RHS;
+  unsigned NewVal = std::max(LVal, RVal);
+
+  if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) {
+    if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) {
+      if (BinOp->getOpcode() == Instruction::Add ||
+          BinOp->getOpcode() == Instruction::Sub)
+        return PeelCounter({NewVal, PeelCounterType::Induction});
+    }
+    return Unknown;
+  }
+  return PeelCounter({NewVal, PeelCounterType::Invariant});
+}
+
 // This function calculates the number of iterations after which the value
 // becomes an invariant. The pre-calculated values are memorized in a map.
 // N.B. This number will be Unknown or <= MaxIterations.
@@ -212,25 +341,34 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
   // If we already know the answer, take it from the map.
   // Otherwise, place Unknown to map to avoid infinite recursion. Such
   // cycles can never stop on an invariant.
-  auto [I, Inserted] = IterationsToInvariance.try_emplace(&V, Unknown);
+  auto [I, Inserted] =
+      IterationsToInvarianceOrInduction.try_emplace(&V, Unknown);
   if (!Inserted)
     return I->second;
 
   if (L.isLoopInvariant(&V))
     // Loop invariant so known at start.
-    return (IterationsToInvariance[&V] = 0);
+    return (IterationsToInvarianceOrInduction[&V] =
+                makeZero(PeelCounterType::Invariant));
   if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
     if (Phi->getParent() != L.getHeader()) {
       // Phi is not in header block so Unknown.
-      assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
+      assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
+             "unexpected value saved");
       return Unknown;
     }
+
+    // If Phi is an induction, register it as a starting point.
+    if (PeelForIV && isInductionPHI(Phi))
+      return (IterationsToInvarianceOrInduction[&V] =
+                  makeZero(PeelCounterType::Induction));
+
     // We need to analyze the input from the back edge and add 1.
     Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
     PeelCounter Iterations = calculate(*Input);
-    assert(IterationsToInvariance[Input] == Iterations &&
+    assert(IterationsToInvarianceOrInduction[Input] == Iterations &&
            "unexpected value saved");
-    return (IterationsToInvariance[Phi] = addOne(Iterations));
+    return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations));
   }
   if (const Instruction *I = dyn_cast<Instruction>(&V)) {
     if (isa<CmpInst>(I) || I->isBinaryOp()) {
@@ -241,26 +379,30 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
       PeelCounter RHS = calculate(*I->getOperand(1));
       if (RHS == Unknown)
         return Unknown;
-      return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)});
+      return (IterationsToInvarianceOrInduction[I] =
+                  mergeTwoCounter(*I, *LHS, *RHS));
     }
     if (I->isCast())
       // Cast instructions get the value of the operand.
-      return (IterationsToInvariance[I] = calculate(*I->getOperand(0)));
+      return (IterationsToInvarianceOrInduction[I] =
+                  calculate(*I->getOperand(0)));
   }
   // TODO: handle more expressions
 
   // Everything else is Unknown.
-  assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
+  assert(IterationsToInvarianceOrInduction[&V] == Unknown &&
+         "unexpected value saved");
   return Unknown;
 }
 
 std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
   unsigned Iterations = 0;
   for (auto &PHI : L.getHeader()->phis()) {
-    PeelCounter ToInvariance = calculate(PHI);
-    if (ToInvariance != Unknown) {
-      assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
-      Iterations = std::max(Iterations, *ToInvariance);
+    PeelCounter ToInvarianceOrInduction = calculate(PHI);
+    if (ToInvarianceOrInduction != Unknown) {
+      unsigned Val = ToInvarianceOrInduction->first;
+      assert(Val <= MaxIterations && "bad result in phi analysis");
+      Iterations = std::max(Iterations, Val);
       if (Iterations == MaxIterations)
         break;
     }
@@ -654,14 +796,15 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
   // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
   unsigned DesiredPeelCount = TargetPeelCount;
 
-  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
-  // iterations of the loop. For this we compute the number for iterations after
-  // which every Phi is guaranteed to become an invariant, and try to peel the
-  // maximum number of iterations among these values, thus turning all those
-  // Phis into invariants.
+  // Here we try to get rid of Phis which become invariants or inductions after
+  // 1, 2, ..., N iterations of the loop. For this we compute the number for
+  // iterations after which every Phi is guaranteed to become an invariant or an
+  // induction, and try to peel the maximum number of iterations among these
+  // values, thus turning all those Phis into invariants or inductions.
   if (MaxPeelCount > DesiredPeelCount) {
     // Check how many iterations are useful for resolving Phis
-    auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel();
+    auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV)
+                        .calculateIterationsToPeel();
     if (NumPeels)
       DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
   }
@@ -680,7 +823,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
     if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
       LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
                         << " iteration(s) to turn"
-                        << " some Phis into invariants.\n");
+                        << " some Phis into invariants or inductions.\n");
       PP.PeelCount = DesiredPeelCount;
       PP.PeelProfiledIterations = false;
       PP.PeelLast = false;