aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2022-02-22 16:33:06 -0800
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2022-02-24 10:47:57 -0800
commitcefa1c5ca93d3a130623a5ccb90216832f3c0a03 (patch)
tree80a1032d1ed683c33004333f51b9e7b6410d1c43 /llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
parente38fc14c43b00f0606ad31a6df9dad1c54413afc (diff)
downloadllvm-cefa1c5ca93d3a130623a5ccb90216832f3c0a03.zip
llvm-cefa1c5ca93d3a130623a5ccb90216832f3c0a03.tar.gz
llvm-cefa1c5ca93d3a130623a5ccb90216832f3c0a03.tar.bz2
[AMDGPU] Fix combined MMO in load-store merge
Loads and stores can be out of order in the SILoadStoreOptimizer. When combining MachineMemOperands of two instructions operands are sent in the IR order into the combineKnownAdjacentMMOs. At the moment it picks the first operand and just replaces its offset and size. This essentially loses alignment information and may generally result in an incorrect base pointer to be used. Use a base pointer in memory addresses order instead and only adjust size. Differential Revision: https://reviews.llvm.org/D120370
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp90
1 files changed, 36 insertions, 54 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 851346c..a38edaf 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -163,6 +163,11 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
}
void setMI(MachineBasicBlock::iterator MI, const SILoadStoreOptimizer &LSO);
+
+ // Compare by pointer order.
+ bool operator<(const CombineInfo& Other) const {
+ return (InstClass == MIMG) ? DMask < Other.DMask : Offset < Other.Offset;
+ }
};
struct BaseRegisters {
@@ -260,6 +265,9 @@ private:
MemInfoMap &Visited, SmallPtrSet<MachineInstr *, 4> &AnchorList,
std::list<std::list<CombineInfo>> &MergeableInsts) const;
+ static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI,
+ const CombineInfo &Paired);
+
public:
static char ID;
@@ -662,19 +670,23 @@ bool SILoadStoreOptimizer::canSwapInstructions(
return true;
}
-// This function assumes that \p A and \p B have are identical except for
-// size and offset, and they reference adjacent memory.
-static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
- const MachineMemOperand *A,
- const MachineMemOperand *B) {
- unsigned MinOffset = std::min(A->getOffset(), B->getOffset());
- unsigned Size = A->getSize() + B->getSize();
- // This function adds the offset parameter to the existing offset for A,
- // so we pass 0 here as the offset and then manually set it to the correct
- // value after the call.
- MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size);
- MMO->setOffset(MinOffset);
- return MMO;
+// Given that \p CI and \p Paired are adjacent memory operations produce a new
+// MMO for the combined operation with a new access size.
+MachineMemOperand *
+SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI,
+ const CombineInfo &Paired) {
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
+
+ unsigned Size = MMOa->getSize() + MMOb->getSize();
+
+ // A base pointer for the combined operation is the same as the leading
+ // operation's pointer.
+ if (Paired < CI)
+ MMOa = MMOb;
+
+ MachineFunction *MF = CI.I->getMF();
+ return MF->getMachineMemOperand(MMOa, MMOa->getPointerInfo(), Size);
}
bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
@@ -1157,10 +1169,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
- MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
unsigned SubRegIdx0, SubRegIdx1;
std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired);
@@ -1199,16 +1208,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg)
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.CPol) // cpol
- .addMemOperand(
- combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1257,9 +1262,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1267,7 +1269,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1319,9 +1321,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1330,8 +1329,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(
- combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1395,9 +1393,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1406,8 +1401,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(
- combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1430,14 +1424,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair(
if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
MIB.add(*SAddr);
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
.addImm(std::min(CI.Offset, Paired.Offset))
.addImm(CI.CPol)
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1520,15 +1511,9 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
std::pair<unsigned, unsigned>
SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
const CombineInfo &Paired) {
- bool ReverseOrder;
- if (CI.InstClass == MIMG) {
- assert(
- (countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
- "No overlaps");
- ReverseOrder = CI.DMask > Paired.DMask;
- } else {
- ReverseOrder = CI.Offset > Paired.Offset;
- }
+ assert((CI.InstClass != MIMG || (countPopulation(CI.DMask | Paired.DMask) ==
+ CI.Width + Paired.Width)) &&
+ "No overlaps");
unsigned Idx0;
unsigned Idx1;
@@ -1544,7 +1529,7 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
assert(CI.Width >= 1 && CI.Width <= 4);
assert(Paired.Width >= 1 && Paired.Width <= 4);
- if (ReverseOrder) {
+ if (Paired < CI) {
Idx1 = Idxs[0][Paired.Width - 1];
Idx0 = Idxs[Paired.Width][CI.Width - 1];
} else {
@@ -1618,9 +1603,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1628,7 +1610,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
CI.I->eraseFromParent();
Paired.I->eraseFromParent();