aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2018-05-30 18:50:22 +0000
committerTom Stellard <tstellar@redhat.com>2018-05-30 18:50:22 +0000
commit699cfaec64872933f92e3985c45139342ed99f59 (patch)
tree249f0a4bdbc2d54ac2416d950469fff62f9d5420
parent8d6927666a2f9faff6570b48a92f7a43da0db734 (diff)
downloadllvm-699cfaec64872933f92e3985c45139342ed99f59.zip
llvm-699cfaec64872933f92e3985c45139342ed99f59.tar.gz
llvm-699cfaec64872933f92e3985c45139342ed99f59.tar.bz2
Merging r328798:
------------------------------------------------------------------------ r328798 | haicheng | 2018-03-29 09:01:26 -0700 (Thu, 29 Mar 2018) | 37 lines [JumpThreading] Don't select an edge that we know we can't thread In r312664 (D36404), JumpThreading stopped threading edges into loop headers. Unfortunately, I observed a significant performance regression as a result of this change. Upon further investigation, the problematic pattern looked something like this (after many high level optimizations): while (true) { bool cond = ...; if (!cond) { <body> } if (cond) break; } Now, naturally we want jump threading to essentially eliminate the second if check and hook up the edges appropriately. However, the above mentioned change, prevented it from doing this because it would have to thread an edge into the loop header. Upon further investigation, what is happening is that since both branches are threadable, JumpThreading picks one of them at arbitrarily. In my case, because of the way that the IR ended up, it tended to pick the one to the loop header, bailing out immediately after. However, if it had picked the one to the exit block, everything would have worked out fine (because the only remaining branch would then be folded, not thraded which is acceptable). Thus, to fix this problem, we can simply eliminate loop headers from consideration as possible threading targets earlier, to make sure that if there are multiple eligible branches, we can still thread one of the ones that don't target a loop header. Patch by Keno Fischer! Differential Revision: https://reviews.llvm.org/D42260 ------------------------------------------------------------------------ llvm-svn: 333577
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp17
-rw-r--r--llvm/test/Transforms/JumpThreading/header-succ.ll99
2 files changed, 115 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 141c993..2f16454 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1454,6 +1454,9 @@ FindMostPopularDest(BasicBlock *BB,
if (PredToDest.second)
DestPopularity[PredToDest.second]++;
+ if (DestPopularity.empty())
+ return nullptr;
+
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
BasicBlock *MostPopularDest = DPI->first;
@@ -1629,8 +1632,20 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// threadable destination (the common case) we can avoid this.
BasicBlock *MostPopularDest = OnlyDest;
- if (MostPopularDest == MultipleDestSentinel)
+ if (MostPopularDest == MultipleDestSentinel) {
+ // Remove any loop headers from the Dest list, ThreadEdge conservatively
+ // won't process them, but we might have other destination that are eligible
+ // and we still want to process.
+ erase_if(PredToDestList,
+ [&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
+ return LoopHeaders.count(PredToDest.second) != 0;
+ });
+
+ if (PredToDestList.empty())
+ return false;
+
MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+ }
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
diff --git a/llvm/test/Transforms/JumpThreading/header-succ.ll b/llvm/test/Transforms/JumpThreading/header-succ.ll
new file mode 100644
index 0000000..859d44cf
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/header-succ.ll
@@ -0,0 +1,99 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+; Check that the heuristic for avoiding accidental introduction of irreducible
+; loops doesn't also prevent us from threading simple constructs where this
+; isn't a problem.
+
+declare void @opaque_body()
+
+define void @jump_threading_loopheader() {
+; CHECK-LABEL: @jump_threading_loopheader
+top:
+ br label %entry
+
+entry:
+ %ind = phi i32 [0, %top], [%nextind, %latch]
+ %nextind = add i32 %ind, 1
+ %cmp = icmp ule i32 %ind, 10
+; CHECK: br i1 %cmp, label %latch, label %exit
+ br i1 %cmp, label %body, label %latch
+
+body:
+ call void @opaque_body()
+; CHECK: br label %entry
+ br label %latch
+
+latch:
+ %cond = phi i2 [1, %entry], [2, %body]
+ switch i2 %cond, label %unreach [
+ i2 2, label %entry
+ i2 1, label %exit
+ ]
+
+unreach:
+ unreachable
+
+exit:
+ ret void
+}
+
+; We also need to check the opposite order of the branches, in the switch
+; instruction because jump-threading relies on that to decide which edge to
+; try to thread first.
+define void @jump_threading_loopheader2() {
+; CHECK-LABEL: @jump_threading_loopheader2
+top:
+ br label %entry
+
+entry:
+ %ind = phi i32 [0, %top], [%nextind, %latch]
+ %nextind = add i32 %ind, 1
+ %cmp = icmp ule i32 %ind, 10
+; CHECK: br i1 %cmp, label %exit, label %latch
+ br i1 %cmp, label %body, label %latch
+
+body:
+ call void @opaque_body()
+; CHECK: br label %entry
+ br label %latch
+
+latch:
+ %cond = phi i2 [1, %entry], [2, %body]
+ switch i2 %cond, label %unreach [
+ i2 1, label %entry
+ i2 2, label %exit
+ ]
+
+unreach:
+ unreachable
+
+exit:
+ ret void
+}
+
+; Check if we can handle undef branch condition.
+define void @jump_threading_loopheader3() {
+; CHECK-LABEL: @jump_threading_loopheader3
+top:
+ br label %entry
+
+entry:
+ %ind = phi i32 [0, %top], [%nextind, %latch]
+ %nextind = add i32 %ind, 1
+ %cmp = icmp ule i32 %ind, 10
+; CHECK: br i1 %cmp, label %latch, label %exit
+ br i1 %cmp, label %body, label %latch
+
+body:
+ call void @opaque_body()
+; CHECK: br label %entry
+ br label %latch
+
+latch:
+ %phi = phi i32 [undef, %entry], [0, %body]
+ %cmp1 = icmp eq i32 %phi, 0
+ br i1 %cmp1, label %entry, label %exit
+
+exit:
+ ret void
+}