aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp42
1 files changed, 41 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 2a3271b..851346c 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -79,7 +79,8 @@ enum InstClassEnum {
MIMG,
TBUFFER_LOAD,
TBUFFER_STORE,
- GLOBAL_LOAD
+ GLOBAL_LOAD,
+ GLOBAL_LOAD_SADDR
};
struct AddressRegs {
@@ -87,6 +88,7 @@ struct AddressRegs {
bool SBase = false;
bool SRsrc = false;
bool SOffset = false;
+ bool SAddr = false;
bool VAddr = false;
bool Addr = false;
bool SSamp = false;
@@ -305,14 +307,18 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
switch (Opc) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::GLOBAL_LOAD_DWORD:
+ case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
return 1;
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::GLOBAL_LOAD_DWORDX2:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
return 2;
case AMDGPU::GLOBAL_LOAD_DWORDX3:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
return 3;
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::GLOBAL_LOAD_DWORDX4:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
return 4;
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return 8;
@@ -402,6 +408,11 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_LOAD_DWORDX3:
case AMDGPU::GLOBAL_LOAD_DWORDX4:
return GLOBAL_LOAD;
+ case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+ return GLOBAL_LOAD_SADDR;
}
}
@@ -440,6 +451,11 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_LOAD_DWORDX3:
case AMDGPU::GLOBAL_LOAD_DWORDX4:
return AMDGPU::GLOBAL_LOAD_DWORD;
+ case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+ return AMDGPU::GLOBAL_LOAD_DWORD_SADDR;
}
}
@@ -502,6 +518,12 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::DS_WRITE_B64_gfx9:
Result.Addr = true;
return Result;
+ case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR:
+ Result.SAddr = true;
+ LLVM_FALLTHROUGH;
case AMDGPU::GLOBAL_LOAD_DWORD:
case AMDGPU::GLOBAL_LOAD_DWORDX2:
case AMDGPU::GLOBAL_LOAD_DWORDX3:
@@ -579,6 +601,9 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
if (Regs.SOffset)
AddrIdx[NumAddresses++] =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset);
+ if (Regs.SAddr)
+ AddrIdx[NumAddresses++] =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
if (Regs.VAddr)
AddrIdx[NumAddresses++] =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
@@ -1402,6 +1427,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair(
auto MIB = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg);
+ if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
+ MIB.add(*SAddr);
+
const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
@@ -1471,6 +1499,17 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
case 4:
return AMDGPU::GLOBAL_LOAD_DWORDX4;
}
+ case GLOBAL_LOAD_SADDR:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR;
+ case 3:
+ return AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR;
+ case 4:
+ return AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR;
+ }
case MIMG:
assert((countPopulation(CI.DMask | Paired.DMask) == Width) &&
"No overlaps");
@@ -2115,6 +2154,7 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;
case GLOBAL_LOAD:
+ case GLOBAL_LOAD_SADDR:
NewMI = mergeGlobalLoadPair(CI, Paired, Where->I);
OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;