aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2025-08-18 14:31:41 -0700
committerGitHub <noreply@github.com>2025-08-18 14:31:41 -0700
commit668e6492b833fc3f329d3e772ab7c52a4d3fec93 (patch)
tree505ed59834f829bfd60c49e023181ea30201b0ef /llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
parent13dd65096b5311c01aa67ed34f85d4b03b57426b (diff)
downloadllvm-668e6492b833fc3f329d3e772ab7c52a4d3fec93.zip
llvm-668e6492b833fc3f329d3e772ab7c52a4d3fec93.tar.gz
llvm-668e6492b833fc3f329d3e772ab7c52a4d3fec93.tar.bz2
[AMDGPU] Support merging of flat GVS ops (#154200)
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp62
1 files changed, 62 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index b49c5a9..e204d6b 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -87,6 +87,8 @@ enum InstClassEnum {
GLOBAL_STORE_SADDR,
FLAT_LOAD,
FLAT_STORE,
+ FLAT_LOAD_SADDR,
+ FLAT_STORE_SADDR,
GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of
GLOBAL_STORE // any CombineInfo, they are only ever returned by
// getCommonInstClass.
@@ -354,6 +356,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
case AMDGPU::FLAT_LOAD_DWORD:
case AMDGPU::FLAT_STORE_DWORD:
+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
return 1;
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
@@ -367,6 +371,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
case AMDGPU::FLAT_LOAD_DWORDX2:
case AMDGPU::FLAT_STORE_DWORDX2:
+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
return 2;
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
@@ -380,6 +386,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
case AMDGPU::FLAT_LOAD_DWORDX3:
case AMDGPU::FLAT_STORE_DWORDX3:
+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
return 3;
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
@@ -393,6 +401,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
case AMDGPU::FLAT_LOAD_DWORDX4:
case AMDGPU::FLAT_STORE_DWORDX4:
+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
return 4;
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
@@ -575,6 +585,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
return GLOBAL_STORE_SADDR;
+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
+ return FLAT_LOAD_SADDR;
+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
+ return FLAT_STORE_SADDR;
}
}
@@ -661,6 +681,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
+ return AMDGPU::FLAT_LOAD_DWORD_SADDR;
+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
+ return AMDGPU::FLAT_STORE_DWORD_SADDR;
}
}
@@ -776,6 +806,14 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORD_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX2_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX3_SADDR:
+ case AMDGPU::FLAT_LOAD_DWORDX4_SADDR:
+ case AMDGPU::FLAT_STORE_DWORD_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX2_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX3_SADDR:
+ case AMDGPU::FLAT_STORE_DWORDX4_SADDR:
Result.SAddr = true;
[[fallthrough]];
case AMDGPU::GLOBAL_LOAD_DWORD:
@@ -1875,6 +1913,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
case 4:
return AMDGPU::FLAT_STORE_DWORDX4;
}
+ case FLAT_LOAD_SADDR:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::FLAT_LOAD_DWORDX2_SADDR;
+ case 3:
+ return AMDGPU::FLAT_LOAD_DWORDX3_SADDR;
+ case 4:
+ return AMDGPU::FLAT_LOAD_DWORDX4_SADDR;
+ }
+ case FLAT_STORE_SADDR:
+ switch (Width) {
+ default:
+ return 0;
+ case 2:
+ return AMDGPU::FLAT_STORE_DWORDX2_SADDR;
+ case 3:
+ return AMDGPU::FLAT_STORE_DWORDX3_SADDR;
+ case 4:
+ return AMDGPU::FLAT_STORE_DWORDX4_SADDR;
+ }
case MIMG:
assert(((unsigned)llvm::popcount(CI.DMask | Paired.DMask) == Width) &&
"No overlaps");
@@ -2508,12 +2568,14 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;
case FLAT_LOAD:
+ case FLAT_LOAD_SADDR:
case GLOBAL_LOAD:
case GLOBAL_LOAD_SADDR:
NewMI = mergeFlatLoadPair(CI, Paired, Where->I);
OptimizeListAgain |= CI.Width + Paired.Width < 4;
break;
case FLAT_STORE:
+ case FLAT_STORE_SADDR:
case GLOBAL_STORE:
case GLOBAL_STORE_SADDR:
NewMI = mergeFlatStorePair(CI, Paired, Where->I);