aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2022-02-24 11:32:21 -0800
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2022-03-09 10:04:37 -0800
commit33fb23f728146cd9a1b8a0bf74c666fb2eeffbb5 (patch)
tree7ba23db4c240f84986d1cdb9a2f75febc2eb3751 /llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
parentc7218164c48bf69054e67c66bd9e99a077fbd3c4 (diff)
downloadllvm-33fb23f728146cd9a1b8a0bf74c666fb2eeffbb5.zip
llvm-33fb23f728146cd9a1b8a0bf74c666fb2eeffbb5.tar.gz
llvm-33fb23f728146cd9a1b8a0bf74c666fb2eeffbb5.tar.bz2
[AMDGPU] Merge flat with global in the SILoadStoreOptimizer
Flat can be merged with flat global since address cast is a no-op. A combined memory operation needs to be promoted to flat. Differential Revision: https://reviews.llvm.org/D120431
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp83
1 files changed, 53 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 4fc8d3d..6d4e1d2 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -79,12 +79,13 @@ enum InstClassEnum {
MIMG,
TBUFFER_LOAD,
TBUFFER_STORE,
- GLOBAL_LOAD,
GLOBAL_LOAD_SADDR,
- GLOBAL_STORE,
GLOBAL_STORE_SADDR,
FLAT_LOAD,
- FLAT_STORE
+ FLAT_STORE,
+ GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of
+ GLOBAL_STORE // any CombineInfo, they are only ever returned by
+ // getCommonInstClass.
};
struct AddressRegs {
@@ -275,6 +276,9 @@ private:
static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI,
const CombineInfo &Paired);
+ static InstClassEnum getCommonInstClass(const CombineInfo &CI,
+ const CombineInfo &Paired);
+
public:
static char ID;
@@ -438,7 +442,11 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_LOAD_DWORDX2:
case AMDGPU::GLOBAL_LOAD_DWORDX3:
case AMDGPU::GLOBAL_LOAD_DWORDX4:
- return GLOBAL_LOAD;
+ case AMDGPU::FLAT_LOAD_DWORD:
+ case AMDGPU::FLAT_LOAD_DWORDX2:
+ case AMDGPU::FLAT_LOAD_DWORDX3:
+ case AMDGPU::FLAT_LOAD_DWORDX4:
+ return FLAT_LOAD;
case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
@@ -448,22 +456,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORDX2:
case AMDGPU::GLOBAL_STORE_DWORDX3:
case AMDGPU::GLOBAL_STORE_DWORDX4:
- return GLOBAL_STORE;
- case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
- case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
- case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
- case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
- return GLOBAL_STORE_SADDR;
- case AMDGPU::FLAT_LOAD_DWORD:
- case AMDGPU::FLAT_LOAD_DWORDX2:
- case AMDGPU::FLAT_LOAD_DWORDX3:
- case AMDGPU::FLAT_LOAD_DWORDX4:
- return FLAT_LOAD;
case AMDGPU::FLAT_STORE_DWORD:
case AMDGPU::FLAT_STORE_DWORDX2:
case AMDGPU::FLAT_STORE_DWORDX3:
case AMDGPU::FLAT_STORE_DWORDX4:
return FLAT_STORE;
+ case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+ return GLOBAL_STORE_SADDR;
}
}
@@ -501,7 +503,11 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_LOAD_DWORDX2:
case AMDGPU::GLOBAL_LOAD_DWORDX3:
case AMDGPU::GLOBAL_LOAD_DWORDX4:
- return AMDGPU::GLOBAL_LOAD_DWORD;
+ case AMDGPU::FLAT_LOAD_DWORD:
+ case AMDGPU::FLAT_LOAD_DWORDX2:
+ case AMDGPU::FLAT_LOAD_DWORDX3:
+ case AMDGPU::FLAT_LOAD_DWORDX4:
+ return AMDGPU::FLAT_LOAD_DWORD;
case AMDGPU::GLOBAL_LOAD_DWORD_SADDR:
case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR:
case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR:
@@ -511,25 +517,37 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::GLOBAL_STORE_DWORDX2:
case AMDGPU::GLOBAL_STORE_DWORDX3:
case AMDGPU::GLOBAL_STORE_DWORDX4:
- return AMDGPU::GLOBAL_STORE_DWORD;
- case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
- case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
- case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
- case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
- return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
- case AMDGPU::FLAT_LOAD_DWORD:
- case AMDGPU::FLAT_LOAD_DWORDX2:
- case AMDGPU::FLAT_LOAD_DWORDX3:
- case AMDGPU::FLAT_LOAD_DWORDX4:
- return AMDGPU::FLAT_LOAD_DWORD;
case AMDGPU::FLAT_STORE_DWORD:
case AMDGPU::FLAT_STORE_DWORDX2:
case AMDGPU::FLAT_STORE_DWORDX3:
case AMDGPU::FLAT_STORE_DWORDX4:
return AMDGPU::FLAT_STORE_DWORD;
+ case AMDGPU::GLOBAL_STORE_DWORD_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR:
+ case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR:
+ return AMDGPU::GLOBAL_STORE_DWORD_SADDR;
}
}
+// GLOBAL loads and stores are classified as FLAT initially. If both combined
+// instructions are FLAT GLOBAL adjust the class to GLOBAL_LOAD or GLOBAL_STORE.
+// If either or both instructions are non segment specific FLAT the resulting
+// combined operation will be FLAT, potentially promoting one of the GLOBAL
+// operations to FLAT.
+// For other instructions return the original unmodified class.
+InstClassEnum
+SILoadStoreOptimizer::getCommonInstClass(const CombineInfo &CI,
+ const CombineInfo &Paired) {
+ assert(CI.InstClass == Paired.InstClass);
+
+ if ((CI.InstClass == FLAT_LOAD || CI.InstClass == FLAT_STORE) &&
+ SIInstrInfo::isFLATGlobal(*CI.I) && SIInstrInfo::isFLATGlobal(*Paired.I))
+ return (CI.InstClass == FLAT_STORE) ? GLOBAL_STORE : GLOBAL_LOAD;
+
+ return CI.InstClass;
+}
+
static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
AddressRegs Result;
@@ -762,10 +780,15 @@ SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI,
// A base pointer for the combined operation is the same as the leading
// operation's pointer.
if (Paired < CI)
- MMOa = MMOb;
+ std::swap(MMOa, MMOb);
+
+ MachinePointerInfo PtrInfo(MMOa->getPointerInfo());
+ // If merging FLAT and GLOBAL set address space to FLAT.
+ if (MMOb->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
+ PtrInfo.AddrSpace = AMDGPUAS::FLAT_ADDRESS;
MachineFunction *MF = CI.I->getMF();
- return MF->getMachineMemOperand(MMOa, MMOa->getPointerInfo(), Size);
+ return MF->getMachineMemOperand(MMOa, PtrInfo, Size);
}
bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
@@ -1576,7 +1599,7 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
const CombineInfo &Paired) {
const unsigned Width = CI.Width + Paired.Width;
- switch (CI.InstClass) {
+ switch (getCommonInstClass(CI, Paired)) {
default:
assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE);
// FIXME: Handle d16 correctly