diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2022-02-22 16:33:06 -0800 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2022-02-24 10:47:57 -0800 |
commit | cefa1c5ca93d3a130623a5ccb90216832f3c0a03 (patch) | |
tree | 80a1032d1ed683c33004333f51b9e7b6410d1c43 /llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | |
parent | e38fc14c43b00f0606ad31a6df9dad1c54413afc (diff) | |
download | llvm-cefa1c5ca93d3a130623a5ccb90216832f3c0a03.zip llvm-cefa1c5ca93d3a130623a5ccb90216832f3c0a03.tar.gz llvm-cefa1c5ca93d3a130623a5ccb90216832f3c0a03.tar.bz2 |
[AMDGPU] Fix combined MMO in load-store merge
Loads and stores can be out of order in the SILoadStoreOptimizer.
When combining MachineMemOperands of two instructions operands are
sent in the IR order into the combineKnownAdjacentMMOs. At the
moment it picks the first operand and just replaces its offset and
size. This essentially loses alignment information and may generally
result in an incorrect base pointer to be used.
Use a base pointer in memory addresses order instead and only adjust
size.
Differential Revision: https://reviews.llvm.org/D120370
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 90 |
1 files changed, 36 insertions, 54 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 851346c..a38edaf 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -163,6 +163,11 @@ class SILoadStoreOptimizer : public MachineFunctionPass { } void setMI(MachineBasicBlock::iterator MI, const SILoadStoreOptimizer &LSO); + + // Compare by pointer order. + bool operator<(const CombineInfo& Other) const { + return (InstClass == MIMG) ? DMask < Other.DMask : Offset < Other.Offset; + } }; struct BaseRegisters { @@ -260,6 +265,9 @@ private: MemInfoMap &Visited, SmallPtrSet<MachineInstr *, 4> &AnchorList, std::list<std::list<CombineInfo>> &MergeableInsts) const; + static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI, + const CombineInfo &Paired); + public: static char ID; @@ -662,19 +670,23 @@ bool SILoadStoreOptimizer::canSwapInstructions( return true; } -// This function assumes that \p A and \p B have are identical except for -// size and offset, and they reference adjacent memory. -static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF, - const MachineMemOperand *A, - const MachineMemOperand *B) { - unsigned MinOffset = std::min(A->getOffset(), B->getOffset()); - unsigned Size = A->getSize() + B->getSize(); - // This function adds the offset parameter to the existing offset for A, - // so we pass 0 here as the offset and then manually set it to the correct - // value after the call. - MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size); - MMO->setOffset(MinOffset); - return MMO; +// Given that \p CI and \p Paired are adjacent memory operations produce a new +// MMO for the combined operation with a new access size. +MachineMemOperand * +SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI, + const CombineInfo &Paired) { + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); + + unsigned Size = MMOa->getSize() + MMOb->getSize(); + + // A base pointer for the combined operation is the same as the leading + // operation's pointer. + if (Paired < CI) + MMOa = MMOb; + + MachineFunction *MF = CI.I->getMF(); + return MF->getMachineMemOperand(MMOa, MMOa->getPointerInfo(), Size); } bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, @@ -1157,10 +1169,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - - MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); unsigned SubRegIdx0, SubRegIdx1; std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired); @@ -1199,16 +1208,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) .addImm(MergedOffset) // offset .addImm(CI.CPol) // cpol - .addMemOperand( - combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1257,9 +1262,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1267,7 +1269,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1319,9 +1321,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1330,8 +1329,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand( - combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1395,9 +1393,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1406,8 +1401,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand( - combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1430,14 +1424,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair( if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr)) MIB.add(*SAddr); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) .addImm(std::min(CI.Offset, Paired.Offset)) .addImm(CI.CPol) - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1520,15 +1511,9 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, std::pair<unsigned, unsigned> SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, const CombineInfo &Paired) { - bool ReverseOrder; - if (CI.InstClass == MIMG) { - assert( - (countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) && - "No overlaps"); - ReverseOrder = CI.DMask > Paired.DMask; - } else { - ReverseOrder = CI.Offset > Paired.Offset; - } + assert((CI.InstClass != MIMG || (countPopulation(CI.DMask | Paired.DMask) == + CI.Width + Paired.Width)) && + "No overlaps"); unsigned Idx0; unsigned Idx1; @@ -1544,7 +1529,7 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, assert(CI.Width >= 1 && CI.Width <= 4); assert(Paired.Width >= 1 && Paired.Width <= 4); - if (ReverseOrder) { + if (Paired < CI) { Idx1 = Idxs[0][Paired.Width - 1]; Idx0 = Idxs[Paired.Width][CI.Width - 1]; } else { @@ -1618,9 +1603,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1628,7 +1610,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); |