AMDGPU/SI: Improve SILoadStoreOptimizer and run it before the scheduler

Summary: The SILoadStoreOptimizer can now look ahead more then one instruction when looking for instructions to merge, which greatly improves the number of loads/stores that we are able to merge. Moving the pass before scheduling avoids increasing register pressure after the scheduler, so that the scheduler's register pressure estimates will be more accurate. It also gives more consistent results, since it is no longer affected by minor scheduling changes. Reviewers: arsenm Subscribers: arsenm, kzhuravl, llvm-commits Differential Revision: https://reviews.llvm.org/D23814 llvm-svn: 279991
author: Tom Stellard <thomas.stellard@amd.com> 2016-08-29 19:15:22 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2016-08-29 19:15:22 +0000
commit: c2ff0eb69762f0c87545b74c89d99cfdbb0913e9 (patch)
tree: 23670976c67603007a5f4b62773205d746b8b9dc /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent: c10c33444e9c02124eb39d1521646ee1bc8a5525 (diff)
download: llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.zip
llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.tar.gz
llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.tar.bz2
1 files changed, 1 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c8900ff..a86603a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -497,6 +497,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
   // XXX - Can we get away without running DeadMachineInstructionElim again?
   addPass(&SIFoldOperandsID);
   addPass(&DeadMachineInstructionElimID);
+  addPass(&SILoadStoreOptimizerID);
 }
 
 void GCNPassConfig::addIRPasses() {
@@ -533,17 +534,6 @@ bool GCNPassConfig::addGlobalInstructionSelect() {
 #endif
 
 void GCNPassConfig::addPreRegAlloc() {
-  if (getOptLevel() > CodeGenOpt::None) {
-    // Don't do this with no optimizations since it throws away debug info by
-    // merging nonadjacent loads.
-
-    // This should be run after scheduling, but before register allocation. It
-    // also need extra copies to the address operand to be eliminated.
-
-    // FIXME: Move pre-RA and remove extra reg coalescer run.
-    insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
-    insertPass(&MachineSchedulerID, &RegisterCoalescerID);
-  }
 
   addPass(createSIShrinkInstructionsPass());
   addPass(createSIWholeQuadModePass());
author	Tom Stellard <thomas.stellard@amd.com>	2016-08-29 19:15:22 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2016-08-29 19:15:22 +0000
commit	c2ff0eb69762f0c87545b74c89d99cfdbb0913e9 (patch)
tree	23670976c67603007a5f4b62773205d746b8b9dc /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parent	c10c33444e9c02124eb39d1521646ee1bc8a5525 (diff)
download	llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.zip llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.tar.gz llvm-c2ff0eb69762f0c87545b74c89d99cfdbb0913e9.tar.bz2