diff options
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 42 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 86 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/RegAllocFast.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/RegisterUsageInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/TargetOptionsImpl.cpp | 2 |
7 files changed, 107 insertions, 45 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9e78ec9..8ea1326 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4030,7 +4030,6 @@ bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { /// if it is simplified. class SimplificationTracker { DenseMap<Value *, Value *> Storage; - const SimplifyQuery &SQ; // Tracks newly created Phi nodes. The elements are iterated by insertion // order. PhiNodeSet AllPhiNodes; @@ -4038,8 +4037,6 @@ class SimplificationTracker { SmallPtrSet<SelectInst *, 32> AllSelectNodes; public: - SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {} - Value *Get(Value *V) { do { auto SV = Storage.find(V); @@ -4049,30 +4046,6 @@ public: } while (true); } - Value *Simplify(Value *Val) { - SmallVector<Value *, 32> WorkList; - SmallPtrSet<Value *, 32> Visited; - WorkList.push_back(Val); - while (!WorkList.empty()) { - auto *P = WorkList.pop_back_val(); - if (!Visited.insert(P).second) - continue; - if (auto *PI = dyn_cast<Instruction>(P)) - if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) { - for (auto *U : PI->users()) - WorkList.push_back(cast<Value>(U)); - Put(PI, V); - PI->replaceAllUsesWith(V); - if (auto *PHI = dyn_cast<PHINode>(PI)) - AllPhiNodes.erase(PHI); - if (auto *Select = dyn_cast<SelectInst>(PI)) - AllSelectNodes.erase(Select); - PI->eraseFromParent(); - } - } - return Get(Val); - } - void Put(Value *From, Value *To) { Storage.insert({From, To}); } void ReplacePhi(PHINode *From, PHINode *To) { @@ -4133,8 +4106,7 @@ private: /// Common Type for all different fields in addressing modes. Type *CommonType = nullptr; - /// SimplifyQuery for simplifyInstruction utility. - const SimplifyQuery &SQ; + const DataLayout &DL; /// Original Address. Value *Original; @@ -4143,8 +4115,8 @@ private: Value *CommonValue = nullptr; public: - AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) - : SQ(_SQ), Original(OriginalValue) {} + AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue) + : DL(DL), Original(OriginalValue) {} ~AddressingModeCombiner() { eraseCommonValueIfDead(); } @@ -4256,7 +4228,7 @@ private: // Keep track of keys where the value is null. We will need to replace it // with constant null when we know the common type. SmallVector<Value *, 2> NullValue; - Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); + Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); for (auto &AM : AddrModes) { Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); if (DV) { @@ -4306,7 +4278,7 @@ private: // simplification is possible only if original phi/selects were not // simplified yet. // Using this mapping we can find the current value in AddrToBase. - SimplificationTracker ST(SQ); + SimplificationTracker ST; // First step, DFS to create PHI nodes for all intermediate blocks. // Also fill traverse order for the second step. @@ -4465,7 +4437,6 @@ private: PHI->addIncoming(ST.Get(Map[PV]), B); } } - Map[Current] = ST.Simplify(V); } } @@ -5856,8 +5827,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the graph are compatible. bool PhiOrSelectSeen = false; SmallVector<Instruction *, 16> AddrModeInsts; - const SimplifyQuery SQ(*DL, TLInfo); - AddressingModeCombiner AddrModes(SQ, Addr); + AddressingModeCombiner AddrModes(*DL, Addr); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f9d27b0..52c43a4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_FMINIMUMNUM: case G_FMAXIMUMNUM: return lowerFMinNumMaxNum(MI); + case G_FMINIMUM: + case G_FMAXIMUM: + return lowerFMinimumMaximum(MI); case G_MERGE_VALUES: return lowerMergeValues(MI); case G_UNMERGE_VALUES: @@ -8777,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + auto [Dst, Src0, Src1] = MI.getFirst3Regs(); + LLT Ty = MRI.getType(Dst); + LLT CmpTy = Ty.changeElementSize(1); + + bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM); + unsigned OpcIeee = + IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE; + unsigned OpcNonIeee = + IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM; + bool MinMaxMustRespectOrderedZero = false; + Register Res; + + // IEEE variants don't need canonicalization + if (LI.isLegalOrCustom({OpcIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0); + MinMaxMustRespectOrderedZero = true; + } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0); + } else { + auto Compare = MIRBuilder.buildFCmp( + IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1); + Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0); + } + + // Propagate any NaN of both operands + if (!MI.getFlag(MachineInstr::FmNoNans) && + (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) { + auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1); + + LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType(); + APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy)); + Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0); + if (Ty.isVector()) + NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0); + + Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0); + } + + // fminimum/fmaximum requires -0.0 less than +0.0 + if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) { + GISelValueTracking VT(MIRBuilder.getMF()); + KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero); + KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero); + + if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) { + const unsigned Flags = MI.getFlags(); + Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0); + auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero); + + unsigned TestClass = IsMax ? fcPosZero : fcNegZero; + + auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass); + auto LHSSelect = + MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags); + + auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass); + auto RHSSelect = + MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags); + + Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0); + } + } + + MIRBuilder.buildCopy(Dst, Res); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c Register DstReg = MI.getOperand(0).getReg(); @@ -9010,6 +9084,8 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { SmallVector<Register, 32> BuildVec; LLT EltTy = DstTy.getScalarType(); + DenseMap<unsigned, Register> CachedExtract; + for (int Idx : Mask) { if (Idx < 0) { if (!Undef.isValid()) @@ -9023,9 +9099,13 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { int NumElts = Src0Ty.getNumElements(); Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; - auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); - auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); - BuildVec.push_back(Extract.getReg(0)); + auto [It, Inserted] = CachedExtract.try_emplace(Idx); + if (Inserted) { + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + It->second = + MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0); + } + BuildVec.push_back(It->second); } assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR"); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 72b364c..697b779 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -211,7 +211,7 @@ private: unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); } }; - using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>; + using LiveRegMap = SparseSet<LiveReg, unsigned, identity, uint16_t>; /// This map contains entries for each virtual register that is currently /// available in a physical register. LiveRegMap LiveVirtRegs; diff --git a/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/llvm/lib/CodeGen/RegisterUsageInfo.cpp index 7a4628a..2ef380f 100644 --- a/llvm/lib/CodeGen/RegisterUsageInfo.cpp +++ b/llvm/lib/CodeGen/RegisterUsageInfo.cpp @@ -44,7 +44,7 @@ void PhysicalRegisterUsageInfo::setTargetMachine(const TargetMachine &TM) { } bool PhysicalRegisterUsageInfo::doInitialization(Module &M) { - RegMasks.grow(M.size()); + RegMasks.reserve(M.size()); return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index dee0909..a522650 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2015,9 +2015,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Register InReg = FuncInfo.InitializeRegForValue(Inst); std::optional<CallingConv::ID> CallConv; - auto *CI = dyn_cast<CallInst>(Inst); - if (CI && !CI->isInlineAsm()) - CallConv = CI->getCallingConv(); + auto *CB = dyn_cast<CallBase>(Inst); + if (CB && !CB->isInlineAsm()) + CallConv = CB->getCallingConv(); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, Inst->getType(), CallConv); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 920dff9..da4e409 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9899,6 +9899,18 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { // Use a rotate by 8. This can be further expanded if necessary. return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); case MVT::i32: + // This is meant for ARM speficially, which has ROTR but no ROTL. + if (isOperationLegalOrCustom(ISD::ROTR, VT)) { + SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT); + // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF + SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask); + SDValue Rotr = + DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT)); + SDValue Rotl = + DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask); + return DAG.getNode(ISD::OR, dl, VT, Rotr, And2); + } Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT)); diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 049efe8..c33bf8b 100644 --- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -44,7 +44,7 @@ bool TargetOptions::FramePointerIsReserved(const MachineFunction &MF) const { return false; return StringSwitch<bool>(FPAttr.getValueAsString()) - .Cases("all", "non-leaf", "reserved", true) + .Cases({"all", "non-leaf", "reserved"}, true) .Case("none", false); } |
