aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/IndVarSimplify.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp54
1 files changed, 52 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c327311..7ebcc21 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
+static cl::opt<bool> LoopPredicationTraps(
+ "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
+ cl::desc("Predicate conditions that trap in loops with only local writes"));
+
static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
@@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
return Changed;
}
+static bool crashingBBWithoutEffect(const BasicBlock &BB) {
+ return llvm::all_of(BB, [](const Instruction &I) {
+ // TODO: for now this is overly restrictive, to make sure nothing in this
+ // BB can depend on the loop body.
+ // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
+ // load does not have a side effect, but we could have
+ // %a = load ptr, ptr %ptr
+ // %b = load i32, ptr %a
+ // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
+ // dereference by skipping over loop iterations.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->onlyAccessesInaccessibleMemory())
+ return true;
+ }
+ return isa<UnreachableInst>(I);
+ });
+}
+
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
+ bool HasThreadLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
- if (I.mayHaveSideEffects())
- return false;
+ if (I.mayHaveSideEffects()) {
+ if (!LoopPredicationTraps)
+ return false;
+ HasThreadLocalSideEffects = true;
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // Simple stores cannot be observed by other threads.
+ // If HasThreadLocalSideEffects is set, we check
+ // crashingBBWithoutEffect to make sure that the crashing BB cannot
+ // observe them either.
+ if (!SI->isSimple())
+ return false;
+ } else {
+ return false;
+ }
+ }
bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
@@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (HasThreadLocalSideEffects) {
+ const BasicBlock *Unreachable = nullptr;
+ for (const BasicBlock *Succ : BI->successors()) {
+ if (isa<UnreachableInst>(Succ->getTerminator()))
+ Unreachable = Succ;
+ }
+ // Exit BB which have one branch back into the loop and another one to
+ // a trap can still be optimized, because local side effects cannot
+ // be observed in the exit case (the trap). We could be smarter about
+ // this, but for now lets pattern match common cases that directly trap.
+ if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
+ return Changed;
+ }
Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?