diff options
author | Siddharth Bhat <siddu.druid@gmail.com> | 2017-07-14 10:00:25 +0000 |
---|---|---|
committer | Siddharth Bhat <siddu.druid@gmail.com> | 2017-07-14 10:00:25 +0000 |
commit | 03346c270129145e8d3123cffdbddc3297a1052a (patch) | |
tree | 75080680be61bf6da27adf93652ffc78829cbfe1 /polly | |
parent | 87a70679831b185cc21f9c9dca07d98ac95b1207 (diff) | |
download | llvm-03346c270129145e8d3123cffdbddc3297a1052a.zip llvm-03346c270129145e8d3123cffdbddc3297a1052a.tar.gz llvm-03346c270129145e8d3123cffdbddc3297a1052a.tar.bz2 |
[PPCGCodeGeneration] Fix runtime check adjustments since they make assumptions about BB layout.
- There is a conditional branch that is used to switch between the old
and new versions of the code.
- If we detect that the build was unsuccessful, `PPCGCodeGeneration` will
change the runtime check to be always set to false.
- To actually *reach* this runtime check instruction, `PPCGCodeGeneration`
was using assumptions about the layout of the BBs.
- However, invariant load hoisting violates this assumption by inserting
an extra basic block in the middle.
- Fix the assumption on the layout by having `createScopConditionally`
return the conditional branch instruction.
- Use this reference to set to always-false.
llvm-svn: 308010
Diffstat (limited to 'polly')
-rw-r--r-- | polly/include/polly/CodeGen/Utils.h | 14 | ||||
-rw-r--r-- | polly/lib/CodeGen/CodeGeneration.cpp | 2 | ||||
-rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 10 | ||||
-rw-r--r-- | polly/lib/CodeGen/Utils.cpp | 11 | ||||
-rw-r--r-- | polly/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll | 84 |
5 files changed, 108 insertions, 13 deletions
diff --git a/polly/include/polly/CodeGen/Utils.h b/polly/include/polly/CodeGen/Utils.h index 2fa3ccf..9bce03d6 100644 --- a/polly/include/polly/CodeGen/Utils.h +++ b/polly/include/polly/CodeGen/Utils.h @@ -22,6 +22,7 @@ class BasicBlock; class DominatorTree; class RegionInfo; class LoopInfo; +class BranchInst; } // namespace llvm namespace polly { @@ -60,9 +61,14 @@ using BBPair = std::pair<llvm::BasicBlock *, llvm::BasicBlock *>; /// @param P A reference to the pass calling this function. /// @param RTC The runtime condition checked before executing the new SCoP. /// -/// @return The 'StartBlock' to which new code can be added. -BBPair executeScopConditionally(Scop &S, llvm::Value *RTC, - llvm::DominatorTree &DT, llvm::RegionInfo &RI, - llvm::LoopInfo &LI); +/// @return An std::pair: +/// - The first element is a BBPair of (StartBlock, EndBlock). +/// - The second element is the BranchInst which conditionally +/// branches to the SCoP based on the RTC. +/// +std::pair<BBPair, llvm::BranchInst *> +executeScopConditionally(Scop &S, llvm::Value *RTC, llvm::DominatorTree &DT, + llvm::RegionInfo &RI, llvm::LoopInfo &LI); + } // namespace polly #endif diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index 8617e1c..5e845f1 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -176,7 +176,7 @@ static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT, // which may introduce scalar dependences that prevent us from correctly // code generating this scop. BBPair StartExitBlocks = - executeScopConditionally(S, Builder.getTrue(), DT, RI, LI); + std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI)); BasicBlock *StartBlock = std::get<0>(StartExitBlocks); BasicBlock *ExitBlock = std::get<1>(StartExitBlocks); diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 7b096a1..6ccf787 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -2935,10 +2935,14 @@ public: // the SCEVExpander may introduce while code generating the parameters and // which may introduce scalar dependences that prevent us from correctly // code generating this scop. - BBPair StartExitBlocks = + BBPair StartExitBlocks; + BranchInst *CondBr = nullptr; + std::tie(StartExitBlocks, CondBr) = executeScopConditionally(*S, Builder.getTrue(), *DT, *RI, *LI); BasicBlock *StartBlock = std::get<0>(StartExitBlocks); + assert(CondBr && "CondBr not initialized by executeScopConditionally"); + GPUNodeBuilder NodeBuilder(Builder, Annotator, *DL, *LI, *SE, *DT, *S, StartBlock, Prog, Runtime, Architecture); @@ -2966,10 +2970,10 @@ public: /// kernel, the SCoP is probably mostly sequential. Hence, there is no /// point in running it on a GPU. if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel) - SplitBlock->getTerminator()->setOperand(0, Builder.getFalse()); + CondBr->setOperand(0, Builder.getFalse()); if (!NodeBuilder.BuildSuccessful) - SplitBlock->getTerminator()->setOperand(0, Builder.getFalse()); + CondBr->setOperand(0, Builder.getFalse()); } bool runOnScop(Scop &CurrentScop) override { diff --git a/polly/lib/CodeGen/Utils.cpp b/polly/lib/CodeGen/Utils.cpp index 9c2cfd03..4d595fb 100644 --- a/polly/lib/CodeGen/Utils.cpp +++ b/polly/lib/CodeGen/Utils.cpp @@ -76,9 +76,9 @@ static BasicBlock *splitEdge(BasicBlock *Prev, BasicBlock *Succ, return MiddleBlock; } -polly::BBPair polly::executeScopConditionally(Scop &S, Value *RTC, - DominatorTree &DT, RegionInfo &RI, - LoopInfo &LI) { +std::pair<polly::BBPair, BranchInst *> +polly::executeScopConditionally(Scop &S, Value *RTC, DominatorTree &DT, + RegionInfo &RI, LoopInfo &LI) { Region &R = S.getRegion(); PollyIRBuilder Builder(S.getEntry()); @@ -148,7 +148,8 @@ polly::BBPair polly::executeScopConditionally(Scop &S, Value *RTC, BasicBlock::Create(F->getContext(), "polly.exiting", F); SplitBlock->getTerminator()->eraseFromParent(); Builder.SetInsertPoint(SplitBlock); - Builder.CreateCondBr(RTC, StartBlock, S.getEntry()); + BranchInst *CondBr = Builder.CreateCondBr(RTC, StartBlock, S.getEntry()); + if (Loop *L = LI.getLoopFor(SplitBlock)) { L->addBasicBlockToLoop(StartBlock, LI); L->addBasicBlockToLoop(ExitingBlock, LI); @@ -216,5 +217,5 @@ polly::BBPair polly::executeScopConditionally(Scop &S, Value *RTC, // ExitBB // // / \ // - return std::make_pair(StartBlock, ExitingBlock); + return std::make_pair(std::make_pair(StartBlock, ExitingBlock), CondBr); } diff --git a/polly/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll b/polly/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll new file mode 100644 index 0000000..7e83aa9 --- /dev/null +++ b/polly/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll @@ -0,0 +1,84 @@ +; RUN: opt %loadPolly -analyze -polly-use-llvm-names -polly-scops \ +; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP + +; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \ +; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR + +; REQUIRES: pollyacc + +; SCOP: Function: f +; SCOP-NEXT: Region: %entry.split---%for.end +; SCOP-NEXT: Max Loop Depth: 1 +; SCOP-NEXT: Invariant Accesses: { +; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; SCOP-NEXT: [tmp, tmp1] -> { Stmt_if_end[i0] -> MemRef_end[0] }; +; SCOP-NEXT: Execution Context: [tmp, tmp1] -> { : } +; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; SCOP-NEXT: [tmp, tmp1] -> { Stmt_for_body[i0] -> MemRef_control[0] }; +; SCOP-NEXT: Execution Context: [tmp, tmp1] -> { : tmp > 0 } +; SCOP-NEXT: } + +; Check that we generate a correct "always false" branch. +; HOST-IR: br i1 false, label %polly.start, label %entry.split.pre_entry_bb + +; This test case checks that we generate correct code if PPCGCodeGeneration +; decides a build is unsuccessful with invariant load hoisting enabled. +; +; There is a conditional branch which switches between the original code and +; the new code. We try to set this conditional branch to branch on false. +; However, invariant load hoisting changes the structure of the scop, so we +; need to change the way we *locate* this instruction. +; +; void f(const int *end, int *arr, const int *control, const int *readarr) { +; for (int i = 0; i < *end; i++) { +; int t = 0; +; if (*control > 3) { +; t += readarr[i]; +; } +; arr[i] = t; +; } +; } +; + +target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" +target triple = "i386-apple-macosx10.12.0" + +define void @f(i32* %end, i32* %arr, i32* %control, i32* %readarr) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %tmp3 = load i32, i32* %end, align 4 + %cmp4 = icmp sgt i32 %tmp3, 0 + br i1 %cmp4, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry.split + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %if.end + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ] + %tmp1 = load i32, i32* %control, align 4 + %cmp1 = icmp sgt i32 %tmp1, 3 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %readarr, i32 %i.05 + %tmp2 = load i32, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + %t.0 = phi i32 [ %tmp2, %if.then ], [ 0, %for.body ] + %arrayidx2 = getelementptr inbounds i32, i32* %arr, i32 %i.05 + store i32 %t.0, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %i.05, 1 + %tmp = load i32, i32* %end, align 4 + %cmp = icmp slt i32 %inc, %tmp + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %if.end + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + ret void +} + |