diff options
author | Florian Hahn <flo@fhahn.com> | 2021-10-12 11:10:40 +0100 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2021-10-12 11:42:28 +0100 |
commit | cd0ba9dc58c5806f4e3cc9635ab1f64af6973a83 (patch) | |
tree | 88ea8e04a6a590ee63042160f3f5ea01ad268eb3 /llvm/lib/Transforms/Utils/LoopPeel.cpp | |
parent | 6a1f50b84ae8f8a8087fcdbe5f27dae8c76878f1 (diff) | |
download | llvm-cd0ba9dc58c5806f4e3cc9635ab1f64af6973a83.zip llvm-cd0ba9dc58c5806f4e3cc9635ab1f64af6973a83.tar.gz llvm-cd0ba9dc58c5806f4e3cc9635ab1f64af6973a83.tar.bz2 |
[LoopPeel] Peel if it turns invariant loads dereferenceable.
This patch adds a new cost heuristic that allows peeling a single
iteration off read-only loops, if the loop contains a load that
1. is feeding an exit condition,
2. dominates the latch,
3. is not already known to be dereferenceable,
4. and has a loop invariant address.
If all non-latch exits are terminated with unreachable, such loads
in the loop are guaranteed to be dereferenceable after peeling,
enabling hoisting/CSE'ing them.
This enables vectorization of loops with certain runtime-checks, like
multiple calls to `std::vector::at` if the vector is passed as pointer.
Reviewed By: mkazantsev
Differential Revision: https://reviews.llvm.org/D108114
Diffstat (limited to 'llvm/lib/Transforms/Utils/LoopPeel.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/LoopPeel.cpp | 68 |
1 files changed, 66 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index a6cdce1..e9f6af8 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -165,6 +166,66 @@ static unsigned calculateIterationsToInvariance( return ToInvariance; } +// Try to find any invariant memory reads that will become dereferenceable in +// the remainder loop after peeling. The load must also be used (transitively) +// by an exit condition. Returns the number of iterations to peel off (at the +// moment either 0 or 1). +static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, + DominatorTree &DT) { + // Skip loops with a single exiting block, because there should be no benefit + // for the heuristic below. + if (L.getExitingBlock()) + return 0; + + // All non-latch exit blocks must have an UnreachableInst terminator. + // Otherwise the heuristic below may not be profitable. + SmallVector<BasicBlock *, 4> Exits; + L.getUniqueNonLatchExitBlocks(Exits); + if (any_of(Exits, [](const BasicBlock *BB) { + return !isa<UnreachableInst>(BB->getTerminator()); + })) + return 0; + + // Now look for invariant loads that dominate the latch and are not known to + // be dereferenceable. If there are such loads and no writes, they will become + // dereferenceable in the loop if the first iteration is peeled off. Also + // collect the set of instructions controlled by such loads. Only peel if an + // exit condition uses (transitively) such a load. + BasicBlock *Header = L.getHeader(); + BasicBlock *Latch = L.getLoopLatch(); + SmallPtrSet<Value *, 8> LoadUsers; + const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); + for (BasicBlock *BB : L.blocks()) { + for (Instruction &I : *BB) { + if (I.mayWriteToMemory()) + return 0; + + auto Iter = LoadUsers.find(&I); + if (Iter != LoadUsers.end()) { + for (Value *U : I.users()) + LoadUsers.insert(U); + } + // Do not look for reads in the header; they can already be hoisted + // without peeling. + if (BB == Header) + continue; + if (auto *LI = dyn_cast<LoadInst>(&I)) { + Value *Ptr = LI->getPointerOperand(); + if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) && + !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, &DT)) + for (Value *U : I.users()) + LoadUsers.insert(U); + } + } + } + SmallVector<BasicBlock *> ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + for (BasicBlock *Exiting : ExitingBlocks) + if (LoadUsers.find(Exiting->getTerminator()) != LoadUsers.end()) + return 1; + return 0; +} + // Return the number of iterations to peel off that make conditions in the // body true/false. For example, if we peel 2 iterations off the loop below, // the condition i < 2 can be evaluated at compile time. @@ -280,8 +341,8 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE, - unsigned Threshold) { + unsigned &TripCount, DominatorTree &DT, + ScalarEvolution &SE, unsigned Threshold) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the PP.PeelCount value set by the target in // TTI.getPeelingPreferences or by the flag -unroll-peel-count. @@ -348,6 +409,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, DesiredPeelCount = std::max(DesiredPeelCount, countToEliminateCompares(*L, MaxPeelCount, SE)); + if (DesiredPeelCount == 0) + DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT); + if (DesiredPeelCount > 0) { DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. |