diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-08-22 19:33:16 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-08-22 19:33:16 +0000 |
commit | 78fc9daf8d1825d32f170f8e60f9158550f93e93 (patch) | |
tree | 0fabfdfbe326321516366f98a99cf7de6c0b2e38 /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | |
parent | 88d7da01ca7af18ed6bd446d388999bf9668a3cf (diff) | |
download | llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.zip llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.gz llvm-78fc9daf8d1825d32f170f8e60f9158550f93e93.tar.bz2 |
AMDGPU: Split SILowerControlFlow into two pieces
Do most of the lowering in a pre-RA pass. Keep the skip jump
insertion late, plus a few other things that require more
work to move out.
One concern I have is now there may be COPY instructions
which do not have the necessary implicit exec uses
if they will be lowered to v_mov_b32.
This has a positive effect on SGPR usage in shader-db.
llvm-svn: 279464
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f782ea3..c8900ff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -80,6 +80,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSIInsertWaitsPass(*PR); initializeSIWholeQuadModePass(*PR); initializeSILowerControlFlowPass(*PR); + initializeSIInsertSkipsPass(*PR); initializeSIDebuggerInsertNopsPass(*PR); } @@ -532,13 +533,6 @@ bool GCNPassConfig::addGlobalInstructionSelect() { #endif void GCNPassConfig::addPreRegAlloc() { - // This needs to be run directly before register allocation because - // earlier passes might recompute live intervals. - // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass - if (getOptLevel() > CodeGenOpt::None) { - insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); - } - if (getOptLevel() > CodeGenOpt::None) { // Don't do this with no optimizations since it throws away debug info by // merging nonadjacent loads. @@ -556,10 +550,22 @@ void GCNPassConfig::addPreRegAlloc() { } void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + // FIXME: We have to disable the verifier here because of PHIElimination + + // TwoAddressInstructions disabling it. + insertPass(&TwoAddressInstructionPassID, &SILowerControlFlowID, false); + TargetPassConfig::addFastRegAlloc(RegAllocPass); } void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + // This needs to be run directly before register allocation because earlier + // passes might recompute live intervals. + insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); + + // TODO: It might be better to run this right after phi elimination, but for + // now that would require not running the verifier. + insertPass(&RenameIndependentSubregsID, &SILowerControlFlowID); + TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); } @@ -579,7 +585,7 @@ void GCNPassConfig::addPreEmitPass() { addPass(createSIInsertWaitsPass()); addPass(createSIShrinkInstructionsPass()); - addPass(createSILowerControlFlowPass()); + addPass(&SIInsertSkipsPassID); addPass(createSIDebuggerInsertNopsPass()); } |