aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-08-29 19:15:22 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-08-29 19:15:22 +0000
commitc2ff0eb69762f0c87545b74c89d99cfdbb0913e9 (patch)
tree23670976c67603007a5f4b62773205d746b8b9dc /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parentc10c33444e9c02124eb39d1521646ee1bc8a5525 (diff)
downloadllvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.zip
llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.tar.gz
llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.tar.bz2
AMDGPU/SI: Improve SILoadStoreOptimizer and run it before the scheduler
Summary: The SILoadStoreOptimizer can now look ahead more then one instruction when looking for instructions to merge, which greatly improves the number of loads/stores that we are able to merge. Moving the pass before scheduling avoids increasing register pressure after the scheduler, so that the scheduler's register pressure estimates will be more accurate. It also gives more consistent results, since it is no longer affected by minor scheduling changes. Reviewers: arsenm Subscribers: arsenm, kzhuravl, llvm-commits Differential Revision: https://reviews.llvm.org/D23814 llvm-svn: 279991
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp12
1 files changed, 1 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c8900ff..a86603a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -497,6 +497,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsID);
addPass(&DeadMachineInstructionElimID);
+ addPass(&SILoadStoreOptimizerID);
}
void GCNPassConfig::addIRPasses() {
@@ -533,17 +534,6 @@ bool GCNPassConfig::addGlobalInstructionSelect() {
#endif
void GCNPassConfig::addPreRegAlloc() {
- if (getOptLevel() > CodeGenOpt::None) {
- // Don't do this with no optimizations since it throws away debug info by
- // merging nonadjacent loads.
-
- // This should be run after scheduling, but before register allocation. It
- // also need extra copies to the address operand to be eliminated.
-
- // FIXME: Move pre-RA and remove extra reg coalescer run.
- insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
- insertPass(&MachineSchedulerID, &RegisterCoalescerID);
- }
addPass(createSIShrinkInstructionsPass());
addPass(createSIWholeQuadModePass());