diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2025-08-18 14:31:41 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-18 14:31:41 -0700 |
commit | 668e6492b833fc3f329d3e772ab7c52a4d3fec93 (patch) | |
tree | 505ed59834f829bfd60c49e023181ea30201b0ef /llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | |
parent | 13dd65096b5311c01aa67ed34f85d4b03b57426b (diff) | |
download | llvm-668e6492b833fc3f329d3e772ab7c52a4d3fec93.zip llvm-668e6492b833fc3f329d3e772ab7c52a4d3fec93.tar.gz llvm-668e6492b833fc3f329d3e772ab7c52a4d3fec93.tar.bz2 |
[AMDGPU] Support merging of flat GVS ops (#154200)
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index b49c5a9..e204d6b 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -87,6 +87,8 @@ enum InstClassEnum { GLOBAL_STORE_SADDR, FLAT_LOAD, FLAT_STORE, + FLAT_LOAD_SADDR, + FLAT_STORE_SADDR, GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of GLOBAL_STORE // any CombineInfo, they are only ever returned by // getCommonInstClass. @@ -354,6 +356,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORD_SADDR: case AMDGPU::FLAT_LOAD_DWORD: case AMDGPU::FLAT_STORE_DWORD: + case AMDGPU::FLAT_LOAD_DWORD_SADDR: + case AMDGPU::FLAT_STORE_DWORD_SADDR: return 1; case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM: @@ -367,6 +371,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: case AMDGPU::FLAT_LOAD_DWORDX2: case AMDGPU::FLAT_STORE_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX2_SADDR: + case AMDGPU::FLAT_STORE_DWORDX2_SADDR: return 2; case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM: @@ -380,6 +386,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::FLAT_LOAD_DWORDX3: case AMDGPU::FLAT_STORE_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX3_SADDR: + case AMDGPU::FLAT_STORE_DWORDX3_SADDR: return 3; case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM: @@ -393,6 +401,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: case AMDGPU::FLAT_LOAD_DWORDX4: case AMDGPU::FLAT_STORE_DWORDX4: + case AMDGPU::FLAT_LOAD_DWORDX4_SADDR: + case AMDGPU::FLAT_STORE_DWORDX4_SADDR: return 4; case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM: @@ -575,6 +585,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: return GLOBAL_STORE_SADDR; + case AMDGPU::FLAT_LOAD_DWORD_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX2_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX3_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX4_SADDR: + return FLAT_LOAD_SADDR; + case AMDGPU::FLAT_STORE_DWORD_SADDR: + case AMDGPU::FLAT_STORE_DWORDX2_SADDR: + case AMDGPU::FLAT_STORE_DWORDX3_SADDR: + case AMDGPU::FLAT_STORE_DWORDX4_SADDR: + return FLAT_STORE_SADDR; } } @@ -661,6 +681,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: return AMDGPU::GLOBAL_STORE_DWORD_SADDR; + case AMDGPU::FLAT_LOAD_DWORD_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX2_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX3_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX4_SADDR: + return AMDGPU::FLAT_LOAD_DWORD_SADDR; + case AMDGPU::FLAT_STORE_DWORD_SADDR: + case AMDGPU::FLAT_STORE_DWORDX2_SADDR: + case AMDGPU::FLAT_STORE_DWORDX3_SADDR: + case AMDGPU::FLAT_STORE_DWORDX4_SADDR: + return AMDGPU::FLAT_STORE_DWORD_SADDR; } } @@ -776,6 +806,14 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + case AMDGPU::FLAT_LOAD_DWORD_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX2_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX3_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX4_SADDR: + case AMDGPU::FLAT_STORE_DWORD_SADDR: + case AMDGPU::FLAT_STORE_DWORDX2_SADDR: + case AMDGPU::FLAT_STORE_DWORDX3_SADDR: + case AMDGPU::FLAT_STORE_DWORDX4_SADDR: Result.SAddr = true; [[fallthrough]]; case AMDGPU::GLOBAL_LOAD_DWORD: @@ -1875,6 +1913,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, case 4: return AMDGPU::FLAT_STORE_DWORDX4; } + case FLAT_LOAD_SADDR: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::FLAT_LOAD_DWORDX2_SADDR; + case 3: + return AMDGPU::FLAT_LOAD_DWORDX3_SADDR; + case 4: + return AMDGPU::FLAT_LOAD_DWORDX4_SADDR; + } + case FLAT_STORE_SADDR: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::FLAT_STORE_DWORDX2_SADDR; + case 3: + return AMDGPU::FLAT_STORE_DWORDX3_SADDR; + case 4: + return AMDGPU::FLAT_STORE_DWORDX4_SADDR; + } case MIMG: assert(((unsigned)llvm::popcount(CI.DMask | Paired.DMask) == Width) && "No overlaps"); @@ -2508,12 +2568,14 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( OptimizeListAgain |= CI.Width + Paired.Width < 4; break; case FLAT_LOAD: + case FLAT_LOAD_SADDR: case GLOBAL_LOAD: case GLOBAL_LOAD_SADDR: NewMI = mergeFlatLoadPair(CI, Paired, Where->I); OptimizeListAgain |= CI.Width + Paired.Width < 4; break; case FLAT_STORE: + case FLAT_STORE_SADDR: case GLOBAL_STORE: case GLOBAL_STORE_SADDR: NewMI = mergeFlatStorePair(CI, Paired, Where->I); |