aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-22 19:33:16 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-22 19:33:16 +0000
commit78fc9daf8d1825d32f170f8e60f9158550f93e93 (patch)
tree0fabfdfbe326321516366f98a99cf7de6c0b2e38 /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent88d7da01ca7af18ed6bd446d388999bf9668a3cf (diff)
downloadllvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.zip
llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.gz
llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.bz2
AMDGPU: Split SILowerControlFlow into two pieces
Do most of the lowering in a pre-RA pass. Keep the skip jump insertion late, plus a few other things that require more work to move out. One concern I have is now there may be COPY instructions which do not have the necessary implicit exec uses if they will be lowered to v_mov_b32. This has a positive effect on SGPR usage in shader-db. llvm-svn: 279464
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp22
1 files changed, 14 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f782ea3..c8900ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -80,6 +80,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSIInsertWaitsPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
+ initializeSIInsertSkipsPass(*PR);
initializeSIDebuggerInsertNopsPass(*PR);
}
@@ -532,13 +533,6 @@ bool GCNPassConfig::addGlobalInstructionSelect() {
#endif
void GCNPassConfig::addPreRegAlloc() {
- // This needs to be run directly before register allocation because
- // earlier passes might recompute live intervals.
- // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
- if (getOptLevel() > CodeGenOpt::None) {
- insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
- }
-
if (getOptLevel() > CodeGenOpt::None) {
// Don't do this with no optimizations since it throws away debug info by
// merging nonadjacent loads.
@@ -556,10 +550,22 @@ void GCNPassConfig::addPreRegAlloc() {
}
void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ // FIXME: We have to disable the verifier here because of PHIElimination +
+ // TwoAddressInstructions disabling it.
+ insertPass(&TwoAddressInstructionPassID, &SILowerControlFlowID, false);
+
TargetPassConfig::addFastRegAlloc(RegAllocPass);
}
void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ // This needs to be run directly before register allocation because earlier
+ // passes might recompute live intervals.
+ insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
+
+ // TODO: It might be better to run this right after phi elimination, but for
+ // now that would require not running the verifier.
+ insertPass(&RenameIndependentSubregsID, &SILowerControlFlowID);
+
TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
}
@@ -579,7 +585,7 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createSIInsertWaitsPass());
addPass(createSIShrinkInstructionsPass());
- addPass(createSILowerControlFlowPass());
+ addPass(&SIInsertSkipsPassID);
addPass(createSIDebuggerInsertNopsPass());
}