diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2022-02-24 11:32:21 -0800 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2022-03-09 10:04:37 -0800 |
commit | 33fb23f728146cd9a1b8a0bf74c666fb2eeffbb5 (patch) | |
tree | 7ba23db4c240f84986d1cdb9a2f75febc2eb3751 /llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | |
parent | c7218164c48bf69054e67c66bd9e99a077fbd3c4 (diff) | |
download | llvm-33fb23f728146cd9a1b8a0bf74c666fb2eeffbb5.zip llvm-33fb23f728146cd9a1b8a0bf74c666fb2eeffbb5.tar.gz llvm-33fb23f728146cd9a1b8a0bf74c666fb2eeffbb5.tar.bz2 |
[AMDGPU] Merge flat with global in the SILoadStoreOptimizer
Flat can be merged with flat global since address cast is a no-op.
A combined memory operation needs to be promoted to flat.
Differential Revision: https://reviews.llvm.org/D120431
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 83 |
1 files changed, 53 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 4fc8d3d..6d4e1d2 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -79,12 +79,13 @@ enum InstClassEnum { MIMG, TBUFFER_LOAD, TBUFFER_STORE, - GLOBAL_LOAD, GLOBAL_LOAD_SADDR, - GLOBAL_STORE, GLOBAL_STORE_SADDR, FLAT_LOAD, - FLAT_STORE + FLAT_STORE, + GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of + GLOBAL_STORE // any CombineInfo, they are only ever returned by + // getCommonInstClass. }; struct AddressRegs { @@ -275,6 +276,9 @@ private: static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI, const CombineInfo &Paired); + static InstClassEnum getCommonInstClass(const CombineInfo &CI, + const CombineInfo &Paired); + public: static char ID; @@ -438,7 +442,11 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_LOAD_DWORDX2: case AMDGPU::GLOBAL_LOAD_DWORDX3: case AMDGPU::GLOBAL_LOAD_DWORDX4: - return GLOBAL_LOAD; + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX4: + return FLAT_LOAD; case AMDGPU::GLOBAL_LOAD_DWORD_SADDR: case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: @@ -448,22 +456,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX2: case AMDGPU::GLOBAL_STORE_DWORDX3: case AMDGPU::GLOBAL_STORE_DWORDX4: - return GLOBAL_STORE; - case AMDGPU::GLOBAL_STORE_DWORD_SADDR: - case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: - case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: - case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: - return GLOBAL_STORE_SADDR; - case AMDGPU::FLAT_LOAD_DWORD: - case AMDGPU::FLAT_LOAD_DWORDX2: - case AMDGPU::FLAT_LOAD_DWORDX3: - case AMDGPU::FLAT_LOAD_DWORDX4: - return FLAT_LOAD; case AMDGPU::FLAT_STORE_DWORD: case AMDGPU::FLAT_STORE_DWORDX2: case AMDGPU::FLAT_STORE_DWORDX3: case AMDGPU::FLAT_STORE_DWORDX4: return FLAT_STORE; + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + return GLOBAL_STORE_SADDR; } } @@ -501,7 +503,11 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_LOAD_DWORDX2: case AMDGPU::GLOBAL_LOAD_DWORDX3: case AMDGPU::GLOBAL_LOAD_DWORDX4: - return AMDGPU::GLOBAL_LOAD_DWORD; + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX4: + return AMDGPU::FLAT_LOAD_DWORD; case AMDGPU::GLOBAL_LOAD_DWORD_SADDR: case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: @@ -511,25 +517,37 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX2: case AMDGPU::GLOBAL_STORE_DWORDX3: case AMDGPU::GLOBAL_STORE_DWORDX4: - return AMDGPU::GLOBAL_STORE_DWORD; - case AMDGPU::GLOBAL_STORE_DWORD_SADDR: - case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: - case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: - case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: - return AMDGPU::GLOBAL_STORE_DWORD_SADDR; - case AMDGPU::FLAT_LOAD_DWORD: - case AMDGPU::FLAT_LOAD_DWORDX2: - case AMDGPU::FLAT_LOAD_DWORDX3: - case AMDGPU::FLAT_LOAD_DWORDX4: - return AMDGPU::FLAT_LOAD_DWORD; case AMDGPU::FLAT_STORE_DWORD: case AMDGPU::FLAT_STORE_DWORDX2: case AMDGPU::FLAT_STORE_DWORDX3: case AMDGPU::FLAT_STORE_DWORDX4: return AMDGPU::FLAT_STORE_DWORD; + case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + return AMDGPU::GLOBAL_STORE_DWORD_SADDR; } } +// GLOBAL loads and stores are classified as FLAT initially. If both combined +// instructions are FLAT GLOBAL adjust the class to GLOBAL_LOAD or GLOBAL_STORE. +// If either or both instructions are non segment specific FLAT the resulting +// combined operation will be FLAT, potentially promoting one of the GLOBAL +// operations to FLAT. +// For other instructions return the original unmodified class. +InstClassEnum +SILoadStoreOptimizer::getCommonInstClass(const CombineInfo &CI, + const CombineInfo &Paired) { + assert(CI.InstClass == Paired.InstClass); + + if ((CI.InstClass == FLAT_LOAD || CI.InstClass == FLAT_STORE) && + SIInstrInfo::isFLATGlobal(*CI.I) && SIInstrInfo::isFLATGlobal(*Paired.I)) + return (CI.InstClass == FLAT_STORE) ? GLOBAL_STORE : GLOBAL_LOAD; + + return CI.InstClass; +} + static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) { AddressRegs Result; @@ -762,10 +780,15 @@ SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI, // A base pointer for the combined operation is the same as the leading // operation's pointer. if (Paired < CI) - MMOa = MMOb; + std::swap(MMOa, MMOb); + + MachinePointerInfo PtrInfo(MMOa->getPointerInfo()); + // If merging FLAT and GLOBAL set address space to FLAT. + if (MMOb->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) + PtrInfo.AddrSpace = AMDGPUAS::FLAT_ADDRESS; MachineFunction *MF = CI.I->getMF(); - return MF->getMachineMemOperand(MMOa, MMOa->getPointerInfo(), Size); + return MF->getMachineMemOperand(MMOa, PtrInfo, Size); } bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, @@ -1576,7 +1599,7 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, const CombineInfo &Paired) { const unsigned Width = CI.Width + Paired.Width; - switch (CI.InstClass) { + switch (getCommonInstClass(CI, Paired)) { default: assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE); // FIXME: Handle d16 correctly |