diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 90 |
1 files changed, 36 insertions, 54 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 851346c..a38edaf 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -163,6 +163,11 @@ class SILoadStoreOptimizer : public MachineFunctionPass { } void setMI(MachineBasicBlock::iterator MI, const SILoadStoreOptimizer &LSO); + + // Compare by pointer order. + bool operator<(const CombineInfo& Other) const { + return (InstClass == MIMG) ? DMask < Other.DMask : Offset < Other.Offset; + } }; struct BaseRegisters { @@ -260,6 +265,9 @@ private: MemInfoMap &Visited, SmallPtrSet<MachineInstr *, 4> &AnchorList, std::list<std::list<CombineInfo>> &MergeableInsts) const; + static MachineMemOperand *combineKnownAdjacentMMOs(const CombineInfo &CI, + const CombineInfo &Paired); + public: static char ID; @@ -662,19 +670,23 @@ bool SILoadStoreOptimizer::canSwapInstructions( return true; } -// This function assumes that \p A and \p B have are identical except for -// size and offset, and they reference adjacent memory. -static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF, - const MachineMemOperand *A, - const MachineMemOperand *B) { - unsigned MinOffset = std::min(A->getOffset(), B->getOffset()); - unsigned Size = A->getSize() + B->getSize(); - // This function adds the offset parameter to the existing offset for A, - // so we pass 0 here as the offset and then manually set it to the correct - // value after the call. - MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size); - MMO->setOffset(MinOffset); - return MMO; +// Given that \p CI and \p Paired are adjacent memory operations produce a new +// MMO for the combined operation with a new access size. +MachineMemOperand * +SILoadStoreOptimizer::combineKnownAdjacentMMOs(const CombineInfo &CI, + const CombineInfo &Paired) { + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); + + unsigned Size = MMOa->getSize() + MMOb->getSize(); + + // A base pointer for the combined operation is the same as the leading + // operation's pointer. + if (Paired < CI) + MMOa = MMOb; + + MachineFunction *MF = CI.I->getMF(); + return MF->getMachineMemOperand(MMOa, MMOa->getPointerInfo(), Size); } bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, @@ -1157,10 +1169,7 @@ SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI, CombineInfo &Paired, // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - - MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); unsigned SubRegIdx0, SubRegIdx1; std::tie(SubRegIdx0, SubRegIdx1) = getSubRegIdxs(CI, Paired); @@ -1199,16 +1208,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = BuildMI(*MBB, InsertBefore, DL, TII->get(Opcode), DestReg) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) .addImm(MergedOffset) // offset .addImm(CI.CPol) // cpol - .addMemOperand( - combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1257,9 +1262,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1267,7 +1269,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1319,9 +1321,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1330,8 +1329,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand( - combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1395,9 +1393,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1406,8 +1401,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand( - combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1430,14 +1424,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair( if (auto *SAddr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::saddr)) MIB.add(*SAddr); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) .addImm(std::min(CI.Offset, Paired.Offset)) .addImm(CI.CPol) - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); @@ -1520,15 +1511,9 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, std::pair<unsigned, unsigned> SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, const CombineInfo &Paired) { - bool ReverseOrder; - if (CI.InstClass == MIMG) { - assert( - (countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) && - "No overlaps"); - ReverseOrder = CI.DMask > Paired.DMask; - } else { - ReverseOrder = CI.Offset > Paired.Offset; - } + assert((CI.InstClass != MIMG || (countPopulation(CI.DMask | Paired.DMask) == + CI.Width + Paired.Width)) && + "No overlaps"); unsigned Idx0; unsigned Idx1; @@ -1544,7 +1529,7 @@ SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, assert(CI.Width >= 1 && CI.Width <= 4); assert(Paired.Width >= 1 && Paired.Width <= 4); - if (ReverseOrder) { + if (Paired < CI) { Idx1 = Idxs[0][Paired.Width - 1]; Idx0 = Idxs[Paired.Width][CI.Width - 1]; } else { @@ -1618,9 +1603,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( // will return true if this is the case. assert(CI.I->hasOneMemOperand() && Paired.I->hasOneMemOperand()); - const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); - const MachineMemOperand *MMOb = *Paired.I->memoperands_begin(); - MachineInstr *New = MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) @@ -1628,7 +1610,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair( .addImm(CI.CPol) // cpol .addImm(0) // tfe .addImm(0) // swz - .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + .addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); |