GPGPU: Bail out of scops with hoisted invariant loads

This is currently not supported and will only be added later. Also update the test cases to ensure no invariant code hoisting is applied. llvm-svn: 275987
author: Tobias Grosser <tobias@grosser.es> 2016-07-19 15:56:25 +0000
committer: Tobias Grosser <tobias@grosser.es> 2016-07-19 15:56:25 +0000
commit: 2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9 (patch)
tree: c9fe19b974acf76dab5a8187903f897bfc856616
parent: ccf48a2732ca453ed6f849ae8600a2deed15dee2 (diff)
download: llvm-2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9.zip
llvm-2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9.tar.gz
llvm-2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9.tar.bz2
3 files changed, 14 insertions, 5 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index b8b6679..aecab88 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -1939,6 +1939,9 @@ public:
     return InvariantEquivClasses;
   }
 
+  /// @brief Check if the scop has any invariant access.
+  bool hasInvariantAccesses() { return !InvariantEquivClasses.empty(); }
+
   /// @brief Mark the SCoP as optimized by the scheduler.
   void markAsOptimized() { IsOptimized = true; }
 
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
index 050a71e..1621252b 100644
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -1013,6 +1013,10 @@ public:
     DL = &S->getRegion().getEntry()->getParent()->getParent()->getDataLayout();
     RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
 
+    // We currently do not support scops with invariant loads.
+    if (S->hasInvariantAccesses())
+      return false;
+
     auto PPCGScop = createPPCGScop();
     auto PPCGProg = createPPCGProg(PPCGScop);
     auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
diff --git a/polly/test/GPGPU/host-statement.ll b/polly/test/GPGPU/host-statement.ll
index ded1d58..73b0ea5 100644
--- a/polly/test/GPGPU/host-statement.ll
+++ b/polly/test/GPGPU/host-statement.ll
@@ -1,8 +1,10 @@
 ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -polly-invariant-load-hoisting=false \
 ; RUN: -disable-output < %s | \
 ; RUN: FileCheck -check-prefix=CODE %s
 
 ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -polly-invariant-load-hoisting=false \
 ; RUN: -disable-output < %s | \
 ; RUN: FileCheck -check-prefix=KERNEL-IR %s
 
@@ -16,15 +18,16 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
 ; This test case tests that we can correctly handle a ScopStmt that is
 ; scheduled on the host, instead of within a kernel.
 
-; CODE: Code
+; CODE-LABEL: Code
 ; CODE-NEXT: ====
 ; CODE-NEXT: # host
 ; CODE-NEXT: {
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
 ; CODE-NEXT:   {
 ; CODE-NEXT:     dim3 k0_dimBlock(32);
 ; CODE-NEXT:     dim3 k0_dimGrid(16);
-; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1);
+; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
 ; CODE-NEXT:     cudaCheckKernel();
 ; CODE-NEXT:   }
 
@@ -39,14 +42,13 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
 ; CODE:     {
 ; CODE-NEXT:       dim3 k2_dimBlock(16, 32);
 ; CODE-NEXT:       dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
-; CODE-NEXT:       kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q,  p_0, p_1);
+; CODE-NEXT:       kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
 ; CODE-NEXT:       cudaCheckKernel();
 ; CODE-NEXT:     }
 
 ; CODE:   }
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT:   if (p_0 <= 510 && p_1 <= 510)
-; CODE-NEXT:     cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_Q, dev_MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
 ; CODE-NEXT:     Stmt_for_cond33_preheader();
author	Tobias Grosser <tobias@grosser.es>	2016-07-19 15:56:25 +0000
committer	Tobias Grosser <tobias@grosser.es>	2016-07-19 15:56:25 +0000
commit	2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9 (patch)
tree	c9fe19b974acf76dab5a8187903f897bfc856616
parent	ccf48a2732ca453ed6f849ae8600a2deed15dee2 (diff)
download	llvm-2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9.zip llvm-2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9.tar.gz llvm-2d58a64e7ffd76a3c8cfd586aa72b171f10a38a9.tar.bz2