diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-08-29 19:42:52 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-08-29 19:42:52 +0000 |
commit | 0d23ebe8883af4b280897af751614c7b433e00f7 (patch) | |
tree | 451e879c6008fb9fa90460d4fe9d22640f3ea01b /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | |
parent | 9b1669ae353e02c0fdbbe1ac181f18a453e98b35 (diff) | |
download | llvm-0d23ebe8883af4b280897af751614c7b433e00f7.zip llvm-0d23ebe8883af4b280897af751614c7b433e00f7.tar.gz llvm-0d23ebe8883af4b280897af751614c7b433e00f7.tar.bz2 |
AMDGPU/SI: Implement a custom MachineSchedStrategy
Summary:
GCNSchedStrategy re-uses most of GenericScheduler, it's just uses
a different method to compute the excess and critical register
pressure limits.
It's not enabled by default, to enable it you need to pass -misched=gcn
to llc.
Shader DB stats:
32464 shaders in 17874 tests
Totals:
SGPRS: 1542846 -> 1643125 (6.50 %)
VGPRS: 1005595 -> 904653 (-10.04 %)
Spilled SGPRs: 29929 -> 27745 (-7.30 %)
Spilled VGPRs: 334 -> 352 (5.39 %)
Scratch VGPRs: 1612 -> 1624 (0.74 %) dwords per thread
Code Size: 36688188 -> 37034900 (0.95 %) bytes
LDS: 1913 -> 1913 (0.00 %) blocks
Max Waves: 254101 -> 265125 (4.34 %)
Wait states: 0 -> 0 (0.00 %)
Totals from affected shaders:
SGPRS: 1338220 -> 1438499 (7.49 %)
VGPRS: 886221 -> 785279 (-11.39 %)
Spilled SGPRs: 29869 -> 27685 (-7.31 %)
Spilled VGPRs: 334 -> 352 (5.39 %)
Scratch VGPRs: 1612 -> 1624 (0.74 %) dwords per thread
Code Size: 34315716 -> 34662428 (1.01 %) bytes
LDS: 1551 -> 1551 (0.00 %) blocks
Max Waves: 188127 -> 199151 (5.86 %)
Wait states: 0 -> 0 (0.00 %)
Reviewers: arsenm, mareko, nhaehnle, MatzeB, atrick
Subscribers: arsenm, kzhuravl, llvm-commits
Differential Revision: https://reviews.llvm.org/D23688
llvm-svn: 279995
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a86603a..f144ce2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -18,6 +18,7 @@ #include "AMDGPUCallLowering.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" +#include "GCNSchedStrategy.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" #include "R600MachineScheduler.h" @@ -96,6 +97,14 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { return new SIScheduleDAGMI(C); } +static ScheduleDAGInstrs * +createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C)); + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + return DAG; +} + static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler); @@ -104,6 +113,11 @@ static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler); +static MachineSchedRegistry +GCNMaxOccupancySchedRegistry("gcn-max-occupancy", + "Run GCN scheduler to maximize occupancy", + createGCNMaxOccupancyMachineScheduler); + static StringRef computeDataLayout(const Triple &TT) { if (TT.getArch() == Triple::r600) { // 32-bit pointers. @@ -467,7 +481,7 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); if (ST.enableSIScheduler()) return createSIMachineScheduler(C); - return nullptr; + return createGCNMaxOccupancyMachineScheduler(C); } bool GCNPassConfig::addPreISel() { |