aboutsummaryrefslogtreecommitdiff
path: root/polly
diff options
context:
space:
mode:
authorSiddharth Bhat <siddu.druid@gmail.com>2017-07-14 10:00:25 +0000
committerSiddharth Bhat <siddu.druid@gmail.com>2017-07-14 10:00:25 +0000
commit03346c270129145e8d3123cffdbddc3297a1052a (patch)
tree75080680be61bf6da27adf93652ffc78829cbfe1 /polly
parent87a70679831b185cc21f9c9dca07d98ac95b1207 (diff)
downloadllvm-03346c270129145e8d3123cffdbddc3297a1052a.zip
llvm-03346c270129145e8d3123cffdbddc3297a1052a.tar.gz
llvm-03346c270129145e8d3123cffdbddc3297a1052a.tar.bz2
[PPCGCodeGeneration] Fix runtime check adjustments since they make assumptions about BB layout.
- There is a conditional branch that is used to switch between the old and new versions of the code. - If we detect that the build was unsuccessful, `PPCGCodeGeneration` will change the runtime check to be always set to false. - To actually *reach* this runtime check instruction, `PPCGCodeGeneration` was using assumptions about the layout of the BBs. - However, invariant load hoisting violates this assumption by inserting an extra basic block in the middle. - Fix the assumption on the layout by having `createScopConditionally` return the conditional branch instruction. - Use this reference to set to always-false. llvm-svn: 308010
Diffstat (limited to 'polly')
-rw-r--r--polly/include/polly/CodeGen/Utils.h14
-rw-r--r--polly/lib/CodeGen/CodeGeneration.cpp2
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp10
-rw-r--r--polly/lib/CodeGen/Utils.cpp11
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll84
5 files changed, 108 insertions, 13 deletions
diff --git a/polly/include/polly/CodeGen/Utils.h b/polly/include/polly/CodeGen/Utils.h
index 2fa3ccf..9bce03d6 100644
--- a/polly/include/polly/CodeGen/Utils.h
+++ b/polly/include/polly/CodeGen/Utils.h
@@ -22,6 +22,7 @@ class BasicBlock;
class DominatorTree;
class RegionInfo;
class LoopInfo;
+class BranchInst;
} // namespace llvm
namespace polly {
@@ -60,9 +61,14 @@ using BBPair = std::pair<llvm::BasicBlock *, llvm::BasicBlock *>;
/// @param P A reference to the pass calling this function.
/// @param RTC The runtime condition checked before executing the new SCoP.
///
-/// @return The 'StartBlock' to which new code can be added.
-BBPair executeScopConditionally(Scop &S, llvm::Value *RTC,
- llvm::DominatorTree &DT, llvm::RegionInfo &RI,
- llvm::LoopInfo &LI);
+/// @return An std::pair:
+/// - The first element is a BBPair of (StartBlock, EndBlock).
+/// - The second element is the BranchInst which conditionally
+/// branches to the SCoP based on the RTC.
+///
+std::pair<BBPair, llvm::BranchInst *>
+executeScopConditionally(Scop &S, llvm::Value *RTC, llvm::DominatorTree &DT,
+ llvm::RegionInfo &RI, llvm::LoopInfo &LI);
+
} // namespace polly
#endif
diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp
index 8617e1c..5e845f1 100644
--- a/polly/lib/CodeGen/CodeGeneration.cpp
+++ b/polly/lib/CodeGen/CodeGeneration.cpp
@@ -176,7 +176,7 @@ static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT,
// which may introduce scalar dependences that prevent us from correctly
// code generating this scop.
BBPair StartExitBlocks =
- executeScopConditionally(S, Builder.getTrue(), DT, RI, LI);
+ std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI));
BasicBlock *StartBlock = std::get<0>(StartExitBlocks);
BasicBlock *ExitBlock = std::get<1>(StartExitBlocks);
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 7b096a1..6ccf787 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -2935,10 +2935,14 @@ public:
// the SCEVExpander may introduce while code generating the parameters and
// which may introduce scalar dependences that prevent us from correctly
// code generating this scop.
- BBPair StartExitBlocks =
+ BBPair StartExitBlocks;
+ BranchInst *CondBr = nullptr;
+ std::tie(StartExitBlocks, CondBr) =
executeScopConditionally(*S, Builder.getTrue(), *DT, *RI, *LI);
BasicBlock *StartBlock = std::get<0>(StartExitBlocks);
+ assert(CondBr && "CondBr not initialized by executeScopConditionally");
+
GPUNodeBuilder NodeBuilder(Builder, Annotator, *DL, *LI, *SE, *DT, *S,
StartBlock, Prog, Runtime, Architecture);
@@ -2966,10 +2970,10 @@ public:
/// kernel, the SCoP is probably mostly sequential. Hence, there is no
/// point in running it on a GPU.
if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel)
- SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
+ CondBr->setOperand(0, Builder.getFalse());
if (!NodeBuilder.BuildSuccessful)
- SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
+ CondBr->setOperand(0, Builder.getFalse());
}
bool runOnScop(Scop &CurrentScop) override {
diff --git a/polly/lib/CodeGen/Utils.cpp b/polly/lib/CodeGen/Utils.cpp
index 9c2cfd03..4d595fb 100644
--- a/polly/lib/CodeGen/Utils.cpp
+++ b/polly/lib/CodeGen/Utils.cpp
@@ -76,9 +76,9 @@ static BasicBlock *splitEdge(BasicBlock *Prev, BasicBlock *Succ,
return MiddleBlock;
}
-polly::BBPair polly::executeScopConditionally(Scop &S, Value *RTC,
- DominatorTree &DT, RegionInfo &RI,
- LoopInfo &LI) {
+std::pair<polly::BBPair, BranchInst *>
+polly::executeScopConditionally(Scop &S, Value *RTC, DominatorTree &DT,
+ RegionInfo &RI, LoopInfo &LI) {
Region &R = S.getRegion();
PollyIRBuilder Builder(S.getEntry());
@@ -148,7 +148,8 @@ polly::BBPair polly::executeScopConditionally(Scop &S, Value *RTC,
BasicBlock::Create(F->getContext(), "polly.exiting", F);
SplitBlock->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(SplitBlock);
- Builder.CreateCondBr(RTC, StartBlock, S.getEntry());
+ BranchInst *CondBr = Builder.CreateCondBr(RTC, StartBlock, S.getEntry());
+
if (Loop *L = LI.getLoopFor(SplitBlock)) {
L->addBasicBlockToLoop(StartBlock, LI);
L->addBasicBlockToLoop(ExitingBlock, LI);
@@ -216,5 +217,5 @@ polly::BBPair polly::executeScopConditionally(Scop &S, Value *RTC,
// ExitBB //
// / \ //
- return std::make_pair(StartBlock, ExitingBlock);
+ return std::make_pair(std::make_pair(StartBlock, ExitingBlock), CondBr);
}
diff --git a/polly/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll b/polly/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll
new file mode 100644
index 0000000..7e83aa9
--- /dev/null
+++ b/polly/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll
@@ -0,0 +1,84 @@
+; RUN: opt %loadPolly -analyze -polly-use-llvm-names -polly-scops \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP
+
+; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
+
+; REQUIRES: pollyacc
+
+; SCOP: Function: f
+; SCOP-NEXT: Region: %entry.split---%for.end
+; SCOP-NEXT: Max Loop Depth: 1
+; SCOP-NEXT: Invariant Accesses: {
+; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; SCOP-NEXT: [tmp, tmp1] -> { Stmt_if_end[i0] -> MemRef_end[0] };
+; SCOP-NEXT: Execution Context: [tmp, tmp1] -> { : }
+; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; SCOP-NEXT: [tmp, tmp1] -> { Stmt_for_body[i0] -> MemRef_control[0] };
+; SCOP-NEXT: Execution Context: [tmp, tmp1] -> { : tmp > 0 }
+; SCOP-NEXT: }
+
+; Check that we generate a correct "always false" branch.
+; HOST-IR: br i1 false, label %polly.start, label %entry.split.pre_entry_bb
+
+; This test case checks that we generate correct code if PPCGCodeGeneration
+; decides a build is unsuccessful with invariant load hoisting enabled.
+;
+; There is a conditional branch which switches between the original code and
+; the new code. We try to set this conditional branch to branch on false.
+; However, invariant load hoisting changes the structure of the scop, so we
+; need to change the way we *locate* this instruction.
+;
+; void f(const int *end, int *arr, const int *control, const int *readarr) {
+; for (int i = 0; i < *end; i++) {
+; int t = 0;
+; if (*control > 3) {
+; t += readarr[i];
+; }
+; arr[i] = t;
+; }
+; }
+;
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.12.0"
+
+define void @f(i32* %end, i32* %arr, i32* %control, i32* %readarr) {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %tmp3 = load i32, i32* %end, align 4
+ %cmp4 = icmp sgt i32 %tmp3, 0
+ br i1 %cmp4, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry.split
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %if.end
+ %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
+ %tmp1 = load i32, i32* %control, align 4
+ %cmp1 = icmp sgt i32 %tmp1, 3
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %arrayidx = getelementptr inbounds i32, i32* %readarr, i32 %i.05
+ %tmp2 = load i32, i32* %arrayidx, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ %t.0 = phi i32 [ %tmp2, %if.then ], [ 0, %for.body ]
+ %arrayidx2 = getelementptr inbounds i32, i32* %arr, i32 %i.05
+ store i32 %t.0, i32* %arrayidx2, align 4
+ %inc = add nuw nsw i32 %i.05, 1
+ %tmp = load i32, i32* %end, align 4
+ %cmp = icmp slt i32 %inc, %tmp
+ br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %if.end
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
+ ret void
+}
+