aboutsummaryrefslogtreecommitdiff
path: root/polly
diff options
context:
space:
mode:
authorSiddharth Bhat <siddu.druid@gmail.com>2017-07-06 13:42:42 +0000
committerSiddharth Bhat <siddu.druid@gmail.com>2017-07-06 13:42:42 +0000
commit761e5b9310c8ca101c26909a24c440bfc69bf827 (patch)
tree8bd0dc28eeabdcbb2618c8651c4b78ed98ac4a96 /polly
parent9c3e2eac6a5e48962250342c72989e321e889607 (diff)
downloadllvm-761e5b9310c8ca101c26909a24c440bfc69bf827.zip
llvm-761e5b9310c8ca101c26909a24c440bfc69bf827.tar.gz
llvm-761e5b9310c8ca101c26909a24c440bfc69bf827.tar.bz2
[Polly] [PPCGCodeGeneration] Teach `must_kills` to kill scalars that are local to the scop.
- By definition, we can pass something as a `kill` to PPCG if we know that no data can flow across a kill. - This is useful for more complex examples where we have scalars that are local to a scop. - If the local is only used within a scop, we are free to kill it. Differential Revision: https://reviews.llvm.org/D35045 llvm-svn: 307260
Diffstat (limited to 'polly')
-rw-r--r--polly/lib/CodeGen/PPCGCodeGeneration.cpp30
-rw-r--r--polly/test/GPGPU/add-scalars-in-scop-to-kills.ll71
-rw-r--r--polly/test/GPGPU/phi-nodes-in-kernel.ll25
3 files changed, 117 insertions, 9 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 4e49d1c..d2e4b2b 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -138,6 +138,25 @@ struct MustKillsInfo {
MustKillsInfo() : KillsSchedule(nullptr), TaggedMustKills(nullptr){};
};
+/// Check if SAI's uses are entirely contained within Scop S.
+/// If a scalar is used only with a Scop, we are free to kill it, as no data
+/// can flow in/out of the value any more.
+/// @see computeMustKillsInfo
+static bool isScalarUsesContainedInScop(const Scop &S,
+ const ScopArrayInfo *SAI) {
+ assert(SAI->isValueKind() && "this function only deals with scalars."
+ " Dealing with arrays required alias analysis");
+
+ const Region &R = S.getRegion();
+ for (User *U : SAI->getBasePtr()->users()) {
+ Instruction *I = dyn_cast<Instruction>(U);
+ assert(I && "invalid user of scop array info");
+ if (!R.contains(I))
+ return false;
+ }
+ return true;
+}
+
/// Compute must-kills needed to enable live range reordering with PPCG.
///
/// @params S The Scop to compute live range reordering information
@@ -147,13 +166,14 @@ static MustKillsInfo computeMustKillsInfo(const Scop &S) {
const isl::space ParamSpace(isl::manage(S.getParamSpace()));
MustKillsInfo Info;
- // 1. Collect phi nodes in scop.
+ // 1. Collect all ScopArrayInfo that satisfy *any* of the criteria:
+ // 1.1 phi nodes in scop.
+ // 1.2 scalars that are only used within the scop
SmallVector<isl::id, 4> KillMemIds;
for (ScopArrayInfo *SAI : S.arrays()) {
- if (!SAI->isPHIKind())
- continue;
-
- KillMemIds.push_back(isl::manage(SAI->getBasePtrId()));
+ if (SAI->isPHIKind() ||
+ (SAI->isValueKind() && isScalarUsesContainedInScop(S, SAI)))
+ KillMemIds.push_back(isl::manage(SAI->getBasePtrId()));
}
Info.TaggedMustKills = isl::union_map::empty(isl::space(ParamSpace));
diff --git a/polly/test/GPGPU/add-scalars-in-scop-to-kills.ll b/polly/test/GPGPU/add-scalars-in-scop-to-kills.ll
new file mode 100644
index 0000000..f3272de
--- /dev/null
+++ b/polly/test/GPGPU/add-scalars-in-scop-to-kills.ll
@@ -0,0 +1,71 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s -check-prefix=SCOP
+; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR
+
+; REQUIRES: pollyacc
+
+; Check that we detect a scop.
+; SCOP: Function: checkScalarKill
+; SCOP-NEXT: Region: %XLoopInit---%for.end
+; SCOP-NEXT: Max Loop Depth: 1
+
+; Check that we have a scalar that is not a phi node in the scop.
+; SCOP: i32 MemRef_x_0; // Element size 4
+
+; Check that kernel launch is generated in host IR.
+; the declare would not be generated unless a call to a kernel exists.
+; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*)
+
+; Check that we add variables that are local to a scop into the kills that we
+; pass to PPCG. This should enable PPCG to codegen this example.
+; void checkScalarKill(int A[], int B[], int C[], const int control1, int control2) {
+; int x;
+; #pragma scop
+; for(int i = 0; i < 1000; i++) {
+; XLoopInit: x = 0;
+;
+; if (control1 > 2)
+; C1Add: x += 10;
+; if (control2 > 3)
+; C2Add: x += A[i];
+;
+; BLoopAccumX: B[i] += x;
+; }
+;
+; #pragma endscop
+; }
+; ModuleID = 'test.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @checkScalarKill(i32* %A, i32* %B, i32* %C, i32 %control1, i32 %control2) {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ br label %XLoopInit
+
+XLoopInit: ; preds = %entry.split, %BLoopAccumX
+ %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %BLoopAccumX ]
+ %cmp1 = icmp sgt i32 %control1, 2
+ %x.0 = select i1 %cmp1, i32 10, i32 0
+ %cmp2 = icmp sgt i32 %control2, 3
+ br i1 %cmp2, label %C2Add, label %BLoopAccumX
+
+C2Add: ; preds = %XLoopInit
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %tmp6 = load i32, i32* %arrayidx, align 4
+ %add4 = add nsw i32 %tmp6, %x.0
+ br label %BLoopAccumX
+
+BLoopAccumX: ; preds = %XLoopInit, %C2Add
+ %x.1 = phi i32 [ %add4, %C2Add ], [ %x.0, %XLoopInit ]
+ %arrayidx7 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %tmp11 = load i32, i32* %arrayidx7, align 4
+ %add8 = add nsw i32 %tmp11, %x.1
+ store i32 %add8, i32* %arrayidx7, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %XLoopInit, label %for.end
+
+for.end: ; preds = %BLoopAccumX
+ ret void
+}
diff --git a/polly/test/GPGPU/phi-nodes-in-kernel.ll b/polly/test/GPGPU/phi-nodes-in-kernel.ll
index eb32030..5519e89 100644
--- a/polly/test/GPGPU/phi-nodes-in-kernel.ll
+++ b/polly/test/GPGPU/phi-nodes-in-kernel.ll
@@ -11,6 +11,24 @@
; REQUIRES: pollyacc
+; Approximate C source:
+; void kernel_dynprog(int c[50]) {
+; int iter = 0;
+; int outl = 0;
+;
+; while(1) {
+; for(int indvar = 1 ; indvar <= 49; indvar++) {
+; c[indvar] = undef;
+; }
+; add78 = c[49] + outl;
+; inc80 = iter + 1;
+;
+; if (true) break;
+;
+; outl = add78;
+; iter = inc80;
+; }
+;}
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -24,8 +42,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
-; CODE: cudaCheckReturn(cudaMemcpy(&MemRef_out_l_055, dev_MemRef_out_l_055, sizeof(i32), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
+; CODE: cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
; CODE-NEXT: }
; CODE: # kernel0
@@ -40,8 +57,8 @@ target triple = "x86_64-unknown-linux-gnu"
; IR: [[REGA:%.+]] = bitcast i32* %out_l.055.phiops to i8*
; IR-NEXT: call void @polly_copyFromHostToDevice(i8* [[REGA]], i8* %p_dev_array_MemRef_out_l_055__phi, i64 4)
-; IR: [[REGC:%.+]] = bitcast i32* %out_l.055.s2a to i8*
-; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_out_l_055, i8* [[REGC]], i64 4)
+; IR: [[REGC:%.+]] = bitcast i32* %38 to i8*
+; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196)
; KERNEL-IR: entry:
; KERNEL-IR-NEXT: %out_l.055.s2a = alloca i32