diff options
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 69 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp | 3 |
2 files changed, 70 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 0ebee2c..906d62a3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1215,7 +1215,7 @@ bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const { LLT MemTy = LdSt.getMMO().getMemoryType(); SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( {{MemTy, MemTy.getSizeInBits().getKnownMinValue(), - AtomicOrdering::NotAtomic}}); + AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic}}); unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode()); SmallVector<LLT> OpTys; if (IndexedOpc == TargetOpcode::G_INDEXED_STORE) @@ -6745,6 +6745,73 @@ bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, return MatchNaN(1) || MatchNaN(2); } +// Combine multiple FDIVs with the same divisor into multiple FMULs by the +// reciprocal. +// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip) +bool CombinerHelper::matchRepeatedFPDivisor( + MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const { + assert(MI.getOpcode() == TargetOpcode::G_FDIV); + + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + + if (!MI.getFlag(MachineInstr::MIFlag::FmArcp)) + return false; + + // Skip if current node is a reciprocal/fneg-reciprocal. + auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI); + if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0))) + return false; + + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. + unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors(); + if (!MinUses) + return false; + + // Find all FDIV users of the same divisor. For the moment we limit all + // instructions to a single BB and use the first Instr in MatchInfo as the + // dominating position. + MatchInfo.push_back(&MI); + for (auto &U : MRI.use_nodbg_instructions(Y)) { + if (&U == &MI || U.getParent() != MI.getParent()) + continue; + if (U.getOpcode() == TargetOpcode::G_FDIV && + U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) { + // This division is eligible for optimization only if global unsafe math + // is enabled or if this division allows reciprocal formation. + if (U.getFlag(MachineInstr::MIFlag::FmArcp)) { + MatchInfo.push_back(&U); + if (dominates(U, *MatchInfo[0])) + std::swap(MatchInfo[0], MatchInfo.back()); + } + } + } + + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + return MatchInfo.size() >= MinUses; +} + +void CombinerHelper::applyRepeatedFPDivisor( + SmallVector<MachineInstr *> &MatchInfo) const { + // Generate the new div at the position of the first instruction, that we have + // ensured will dominate all other instructions. + Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]); + LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg()); + auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), + MatchInfo[0]->getOperand(2).getReg(), + MatchInfo[0]->getFlags()); + + // Replace all found div's with fmul instructions. + for (MachineInstr *MI : MatchInfo) { + Builder.setInsertPt(*MI->getParent(), MI); + Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(), + Div->getOperand(0).getReg(), MI->getFlags()); + MI->eraseFromParent(); + } +} + bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) const { assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD"); Register LHS = MI.getOperand(1).getReg(); diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index b2f8435..cdc1f64 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -958,7 +958,8 @@ void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) { for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) { LLT Ty = LLT::scalar(Size); SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( - {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}}); + {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic, + AtomicOrdering::NotAtomic}}); SmallVector<LLT> StoreTys({Ty, PtrTy}); LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs); LegalizeActionStep ActionStep = LI.getAction(Q); |