diff options
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 2b6dbde..492d3d0 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1284,7 +1284,34 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID) { - if (BO->getParent() != Cmp->getParent()) { + auto isIVIncrement = [this, &Cmp](BinaryOperator *BO) { + auto *PN = dyn_cast<PHINode>(BO->getOperand(0)); + if (!PN) + return false; + const Loop *L = LI->getLoopFor(BO->getParent()); + if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch()) + return false; + const BasicBlock *Latch = L->getLoopLatch(); + if (PN->getIncomingValueForBlock(Latch) != BO) + return false; + if (auto *Step = dyn_cast<Instruction>(BO->getOperand(1))) + if (L->contains(Step->getParent())) + return false; + // IV increment may have other users than the IV. We do not want to make + // dominance queries to analyze the legality of moving it towards the cmp, + // so just check that there is no other users. + if (!BO->hasOneUse()) + return false; + // Do not risk on moving increment into a child loop. + if (LI->getLoopFor(Cmp->getParent()) != L) + return false; + // Ultimately, the insertion point must dominate latch. This should be a + // cheap check because no CFG changes & dom tree recomputation happens + // during the transform. + Function *F = BO->getParent()->getParent(); + return getDT(*F).dominates(Cmp->getParent(), Latch); + }; + if (BO->getParent() != Cmp->getParent() && !isIVIncrement(BO)) { // We used to use a dominator tree here to allow multi-block optimization. // But that was problematic because: // 1. It could cause a perf regression by hoisting the math op into the @@ -1295,6 +1322,14 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, // This is because we recompute the DT on every change in the main CGP // run-loop. The recomputing is probably unnecessary in many cases, so if // that was fixed, using a DT here would be ok. + // + // There is one important particular case we still want to handle: if BO is + // the IV increment. Important properties that make it profitable: + // - We can speculate IV increment anywhere in the loop (as long as the + // indvar Phi is its only user); + // - Upon computing Cmp, we effectively compute something equivalent to the + // IV increment (despite it loops differently in the IR). So moving it up + // to the cmp point does not really increase register pressure. return false; } |