AMDGPU: Split SILowerControlFlow into two pieces

Do most of the lowering in a pre-RA pass. Keep the skip jump insertion late, plus a few other things that require more work to move out. One concern I have is now there may be COPY instructions which do not have the necessary implicit exec uses if they will be lowered to v_mov_b32. This has a positive effect on SGPR usage in shader-db. llvm-svn: 279464
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-08-22 19:33:16 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-08-22 19:33:16 +0000
commit: 78fc9daf8d1825d32f170f8e60f9158550f93e93 (patch)
tree: 0fabfdfbe326321516366f98a99cf7de6c0b2e38 /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent: 88d7da01ca7af18ed6bd446d388999bf9668a3cf (diff)
download: llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.zip
llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.gz
llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.bz2
1 files changed, 14 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f782ea3..c8900ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -80,6 +80,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeSIInsertWaitsPass(*PR);
   initializeSIWholeQuadModePass(*PR);
   initializeSILowerControlFlowPass(*PR);
+  initializeSIInsertSkipsPass(*PR);
   initializeSIDebuggerInsertNopsPass(*PR);
 }
 
@@ -532,13 +533,6 @@ bool GCNPassConfig::addGlobalInstructionSelect() {
 #endif
 
 void GCNPassConfig::addPreRegAlloc() {
-  // This needs to be run directly before register allocation because
-  // earlier passes might recompute live intervals.
-  // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
-  if (getOptLevel() > CodeGenOpt::None) {
-    insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
-  }
-
   if (getOptLevel() > CodeGenOpt::None) {
     // Don't do this with no optimizations since it throws away debug info by
     // merging nonadjacent loads.
@@ -556,10 +550,22 @@ void GCNPassConfig::addPreRegAlloc() {
 }
 
 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+  // FIXME: We have to disable the verifier here because of PHIElimination +
+  // TwoAddressInstructions disabling it.
+  insertPass(&TwoAddressInstructionPassID, &SILowerControlFlowID, false);
+
   TargetPassConfig::addFastRegAlloc(RegAllocPass);
 }
 
 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+  // This needs to be run directly before register allocation because earlier
+  // passes might recompute live intervals.
+  insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
+
+  // TODO: It might be better to run this right after phi elimination, but for
+  // now that would require not running the verifier.
+  insertPass(&RenameIndependentSubregsID, &SILowerControlFlowID);
+
   TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
 }
 
@@ -579,7 +585,7 @@ void GCNPassConfig::addPreEmitPass() {
 
   addPass(createSIInsertWaitsPass());
   addPass(createSIShrinkInstructionsPass());
-  addPass(createSILowerControlFlowPass());
+  addPass(&SIInsertSkipsPassID);
   addPass(createSIDebuggerInsertNopsPass());
 }
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-08-22 19:33:16 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-08-22 19:33:16 +0000
commit	78fc9daf8d1825d32f170f8e60f9158550f93e93 (patch)
tree	0fabfdfbe326321516366f98a99cf7de6c0b2e38 /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent	88d7da01ca7af18ed6bd446d388999bf9668a3cf (diff)
download	llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.zip llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.gz llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.bz2