aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp90
1 files changed, 36 insertions, 54 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 851346c..a38edaf 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -163,6 +163,11 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
}
void setMI(MachineBasicBlock::iterator MI, const SILoadStoreOptimizer &LSO);
+
+ // Compare by pointer order.
+ bool operator<(const CombineInfo& Other) const {
+ return (InstClass == MIMG) ? DMask < Other.DMask : Offset < Other.Offset;
+ }
};
struct BaseRegisters {
@@ -260,6 +265,9 @@ private:
MemInfoMap &Visited, SmallPtrSet<MachineInstr *, 4> &AnchorList,
std::list<std::list<CombineInfo>> &MergeableInsts) const;
+ static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI,
+ const CombineInfo &Paired);
+
public:
static char ID;
@@ -662,19 +670,23 @@ bool SILoadStoreOptimizer::canSwapInstructions(
return true;
}
-// This function assumes that \p A and \p B have are identical except for
-// size and offset, and they reference adjacent memory.
-static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
- const MachineMemOperand *A,
- const MachineMemOperand *B) {
- unsigned MinOffset = std::min(A->getOffset(), B->getOffset());
- unsigned Size = A->getSize() + B->getSize();
- // This function adds the offset parameter to the existing offset for A,
- // so we pass 0 here as the offset and then manually set it to the correct
- // value after the call.
- MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size);
- MMO->setOffset(MinOffset);
- return MMO;
+// Given that \p CI and \p Paired are adjacent memory operations produce a new
+// MMO for the combined operation with a new access size.
+MachineMemOperand *
+SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI,
+ const CombineInfo &Paired) {
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
+
+ unsigned Size = MMOa->getSize() + MMOb->getSize();
+
+ // A base pointer for the combined operation is the same as the leading
+ // operation's pointer.
+ if (Paired < CI)
+ MMOa = MMOb;
+
+ MachineFunction *MF = CI.I->getMF();
+ return MF->getMachineMemOperand(MMOa, MMOa->getPointerInfo(), Size);
}
bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
@@ -1157,10 +1169,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired,
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
- MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
unsigned SubRegIdx0, SubRegIdx1;
std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired);
@@ -1199,16 +1208,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg)
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.CPol) // cpol
- .addMemOperand(
- combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1257,9 +1262,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1267,7 +1269,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1319,9 +1321,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1330,8 +1329,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(
- combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1395,9 +1393,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1406,8 +1401,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(
- combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
CI.I->eraseFromParent();
Paired.I->eraseFromParent();
@@ -1430,14 +1424,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair(
if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr))
MIB.add(*SAddr);
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr))
.addImm(std::min(CI.Offset, Paired.Offset))
.addImm(CI.CPol)
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
@@ -1520,15 +1511,9 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
std::pair<unsigned, unsigned>
SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
const CombineInfo &Paired) {
- bool ReverseOrder;
- if (CI.InstClass == MIMG) {
- assert(
- (countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
- "No overlaps");
- ReverseOrder = CI.DMask > Paired.DMask;
- } else {
- ReverseOrder = CI.Offset > Paired.Offset;
- }
+ assert((CI.InstClass != MIMG || (countPopulation(CI.DMask | Paired.DMask) ==
+ CI.Width + Paired.Width)) &&
+ "No overlaps");
unsigned Idx0;
unsigned Idx1;
@@ -1544,7 +1529,7 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
assert(CI.Width >= 1 && CI.Width <= 4);
assert(Paired.Width >= 1 && Paired.Width <= 4);
- if (ReverseOrder) {
+ if (Paired < CI) {
Idx1 = Idxs[0][Paired.Width - 1];
Idx0 = Idxs[Paired.Width][CI.Width - 1];
} else {
@@ -1618,9 +1603,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
// will return true if this is the case.
assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand());
- const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
- const MachineMemOperand *MMOb = *Paired.I->memoperands_begin();
-
MachineInstr *New =
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
@@ -1628,7 +1610,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(0) // swz
- .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+ .addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
CI.I->eraseFromParent();
Paired.I->eraseFromParent();