Return "[Codegenprepare][X86] Use usub with overflow opt for IV increment"

The patch did not account for one corner case where cmp does not dominate the loop latch. This patch adds this check, hopefully it's cheap because the CFG does not change during the transform, so DT queries should be executed quickly. If you see compile time slowness from this, please revert. Differential Revision: https://reviews.llvm.org/D96119
author: Max Kazantsev <mkazantsev@azul.com> 2021-02-11 19:25:30 +0700
committer: Max Kazantsev <mkazantsev@azul.com> 2021-02-11 19:49:23 +0700
commit: 418c218efa950245ba075b9bb3a53505b807c5df (patch)
tree: 8c7ce9db31cb1e3ca6084f50daf847311c34f1d0 /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent: 78717f56ba4e777db02d80cb86db89282bd9bd8f (diff)
download: llvm-418c218efa950245ba075b9bb3a53505b807c5df.zip
llvm-418c218efa950245ba075b9bb3a53505b807c5df.tar.gz
llvm-418c218efa950245ba075b9bb3a53505b807c5df.tar.bz2
1 files changed, 36 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 2b6dbde..492d3d0 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1284,7 +1284,34 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
                                                  Value *Arg0, Value *Arg1,
                                                  CmpInst *Cmp,
                                                  Intrinsic::ID IID) {
-  if (BO->getParent() != Cmp->getParent()) {
+  auto isIVIncrement = [this, &Cmp](BinaryOperator *BO) {
+    auto *PN = dyn_cast<PHINode>(BO->getOperand(0));
+    if (!PN)
+      return false;
+    const Loop *L = LI->getLoopFor(BO->getParent());
+    if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
+      return false;
+    const BasicBlock *Latch = L->getLoopLatch();
+    if (PN->getIncomingValueForBlock(Latch) != BO)
+      return false;
+    if (auto *Step = dyn_cast<Instruction>(BO->getOperand(1)))
+      if (L->contains(Step->getParent()))
+        return false;
+    // IV increment may have other users than the IV. We do not want to make
+    // dominance queries to analyze the legality of moving it towards the cmp,
+    // so just check that there is no other users.
+    if (!BO->hasOneUse())
+      return false;
+    // Do not risk on moving increment into a child loop.
+    if (LI->getLoopFor(Cmp->getParent()) != L)
+      return false;
+    // Ultimately, the insertion point must dominate latch. This should be a
+    // cheap check because no CFG changes & dom tree recomputation happens
+    // during the transform.
+    Function *F = BO->getParent()->getParent();
+    return getDT(*F).dominates(Cmp->getParent(), Latch);
+  };
+  if (BO->getParent() != Cmp->getParent() && !isIVIncrement(BO)) {
     // We used to use a dominator tree here to allow multi-block optimization.
     // But that was problematic because:
     // 1. It could cause a perf regression by hoisting the math op into the
@@ -1295,6 +1322,14 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
     //    This is because we recompute the DT on every change in the main CGP
     //    run-loop. The recomputing is probably unnecessary in many cases, so if
     //    that was fixed, using a DT here would be ok.
+    //
+    // There is one important particular case we still want to handle: if BO is
+    // the IV increment. Important properties that make it profitable:
+    // - We can speculate IV increment anywhere in the loop (as long as the
+    //   indvar Phi is its only user);
+    // - Upon computing Cmp, we effectively compute something equivalent to the
+    //   IV increment (despite it loops differently in the IR). So moving it up
+    //   to the cmp point does not really increase register pressure.
     return false;
   }
author	Max Kazantsev <mkazantsev@azul.com>	2021-02-11 19:25:30 +0700
committer	Max Kazantsev <mkazantsev@azul.com>	2021-02-11 19:49:23 +0700
commit	418c218efa950245ba075b9bb3a53505b807c5df (patch)
tree	8c7ce9db31cb1e3ca6084f50daf847311c34f1d0 /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent	78717f56ba4e777db02d80cb86db89282bd9bd8f (diff)
download	llvm-418c218efa950245ba075b9bb3a53505b807c5df.zip llvm-418c218efa950245ba075b9bb3a53505b807c5df.tar.gz llvm-418c218efa950245ba075b9bb3a53505b807c5df.tar.bz2