diff options
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 58 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 48 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 100 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/MIRParser/MIParser.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/MachineVerifier.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/RegAllocFast.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 95 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 12 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/TargetLoweringBase.cpp | 5 |
12 files changed, 249 insertions, 105 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b425b95..1f10478 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -391,19 +391,6 @@ void CombinerHelper::applyCombineConcatVectors( MI.eraseFromParent(); } -bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Invalid instruction"); - auto &Shuffle = cast<GShuffleVector>(MI); - - Register SrcVec1 = Shuffle.getSrc1Reg(); - Register SrcVec2 = Shuffle.getSrc2Reg(); - - LLT SrcVec1Type = MRI.getType(SrcVec1); - LLT SrcVec2Type = MRI.getType(SrcVec2); - return SrcVec1Type.isVector() && SrcVec2Type.isVector(); -} - void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const { auto &Shuffle = cast<GShuffleVector>(MI); @@ -535,11 +522,9 @@ bool CombinerHelper::matchCombineShuffleVector( LLT DstType = MRI.getType(MI.getOperand(0).getReg()); Register Src1 = MI.getOperand(1).getReg(); LLT SrcType = MRI.getType(Src1); - // As bizarre as it may look, shuffle vector can actually produce - // scalar! This is because at the IR level a <1 x ty> shuffle - // vector is perfectly valid. - unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; - unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; + + unsigned DstNumElts = DstType.getNumElements(); + unsigned SrcNumElts = SrcType.getNumElements(); // If the resulting vector is smaller than the size of the source // vectors being concatenated, we won't be able to replace the @@ -556,7 +541,7 @@ bool CombinerHelper::matchCombineShuffleVector( // // TODO: If the size between the source and destination don't match // we could still emit an extract vector element in that case. - if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) + if (DstNumElts < 2 * SrcNumElts) return false; // Check that the shuffle mask can be broken evenly between the @@ -619,39 +604,6 @@ void CombinerHelper::applyCombineShuffleVector( MI.eraseFromParent(); } -bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Invalid instruction kind"); - - ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); - return Mask.size() == 1; -} - -void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) const { - Register DstReg = MI.getOperand(0).getReg(); - Builder.setInsertPt(*MI.getParent(), MI); - - int I = MI.getOperand(3).getShuffleMask()[0]; - Register Src1 = MI.getOperand(1).getReg(); - LLT Src1Ty = MRI.getType(Src1); - int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; - Register SrcReg; - if (I >= Src1NumElts) { - SrcReg = MI.getOperand(2).getReg(); - I -= Src1NumElts; - } else if (I >= 0) - SrcReg = Src1; - - if (I < 0) - Builder.buildUndef(DstReg); - else if (!MRI.getType(SrcReg).isVector()) - Builder.buildCopy(DstReg, SrcReg); - else - Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I); - - MI.eraseFromParent(); -} - namespace { /// Select a preference between two uses. CurrentUse is the current preference @@ -8369,7 +8321,7 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI, return false; ArrayRef<int> Mask = Shuffle.getMask(); - const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; + const unsigned NumSrcElems = Src1Ty.getNumElements(); bool TouchesSrc1 = false; bool TouchesSrc2 = false; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 04d9309..d6f23b6 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -602,6 +602,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, Depth + 1); + OffsetKnown = OffsetKnown.sext(BitWidth); + WidthKnown = WidthKnown.sext(BitWidth); Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); // Sign extend the extracted value using shift left and arithmetic shift // right. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b49040b..1fc90d0 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3359,6 +3359,54 @@ bool IRTranslator::translateShuffleVector(const User &U, Mask = SVI->getShuffleMask(); else Mask = cast<ConstantExpr>(U).getShuffleMask(); + + // As GISel does not represent <1 x > vectors as a separate type from scalars, + // we transform shuffle_vector with a scalar output to an + // ExtractVectorElement. If the input type is also scalar it becomes a Copy. + unsigned DstElts = cast<FixedVectorType>(U.getType())->getNumElements(); + unsigned SrcElts = + cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements(); + if (DstElts == 1) { + unsigned M = Mask[0]; + if (SrcElts == 1) { + if (M == 0 || M == 1) + return translateCopy(U, *U.getOperand(M), MIRBuilder); + MIRBuilder.buildUndef(getOrCreateVReg(U)); + } else { + Register Dst = getOrCreateVReg(U); + if (M < SrcElts) { + MIRBuilder.buildExtractVectorElementConstant( + Dst, getOrCreateVReg(*U.getOperand(0)), M); + } else if (M < SrcElts * 2) { + MIRBuilder.buildExtractVectorElementConstant( + Dst, getOrCreateVReg(*U.getOperand(1)), M - SrcElts); + } else { + MIRBuilder.buildUndef(Dst); + } + } + return true; + } + + // A single element src is transformed to a build_vector. + if (SrcElts == 1) { + SmallVector<Register> Ops; + Register Undef; + for (int M : Mask) { + LLT SrcTy = getLLTForType(*U.getOperand(0)->getType(), *DL); + if (M == 0 || M == 1) { + Ops.push_back(getOrCreateVReg(*U.getOperand(M))); + } else { + if (!Undef.isValid()) { + Undef = MRI->createGenericVirtualRegister(SrcTy); + MIRBuilder.buildUndef(Undef); + } + Ops.push_back(Undef); + } + } + MIRBuilder.buildBuildVector(getOrCreateVReg(U), Ops); + return true; + } + ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask); MIRBuilder .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)}, diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 38ec83f..178529f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_FMINIMUMNUM: case G_FMAXIMUMNUM: return lowerFMinNumMaxNum(MI); + case G_FMINIMUM: + case G_FMAXIMUM: + return lowerFMinimumMaximum(MI); case G_MERGE_VALUES: return lowerMergeValues(MI); case G_UNMERGE_VALUES: @@ -5819,6 +5822,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = MIRBuilder.buildUndef(NarrowTy).getReg(0); + } else if (NewElts == 1) { + Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0); } else { Register Op0 = Inputs[InputUsed[0]]; // If only one input was used, use an undefined vector for the other. @@ -8775,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + auto [Dst, Src0, Src1] = MI.getFirst3Regs(); + LLT Ty = MRI.getType(Dst); + LLT CmpTy = Ty.changeElementSize(1); + + bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM); + unsigned OpcIeee = + IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE; + unsigned OpcNonIeee = + IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM; + bool MinMaxMustRespectOrderedZero = false; + Register Res; + + // IEEE variants don't need canonicalization + if (LI.isLegalOrCustom({OpcIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0); + MinMaxMustRespectOrderedZero = true; + } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0); + } else { + auto Compare = MIRBuilder.buildFCmp( + IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1); + Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0); + } + + // Propagate any NaN of both operands + if (!MI.getFlag(MachineInstr::FmNoNans) && + (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) { + auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1); + + LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType(); + APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy)); + Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0); + if (Ty.isVector()) + NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0); + + Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0); + } + + // fminimum/fmaximum requires -0.0 less than +0.0 + if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) { + GISelValueTracking VT(MIRBuilder.getMF()); + KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero); + KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero); + + if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) { + const unsigned Flags = MI.getFlags(); + Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0); + auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero); + + unsigned TestClass = IsMax ? fcPosZero : fcNegZero; + + auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass); + auto LHSSelect = + MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags); + + auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass); + auto RHSSelect = + MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags); + + Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0); + } + } + + MIRBuilder.buildCopy(Dst, Res); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c Register DstReg = MI.getOperand(0).getReg(); @@ -9016,22 +9092,18 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { continue; } - if (Src0Ty.isScalar()) { - BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); - } else { - int NumElts = Src0Ty.getNumElements(); - Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; - int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; - auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); - auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); - BuildVec.push_back(Extract.getReg(0)); - } + assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR"); + + int NumElts = Src0Ty.getNumElements(); + Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; + int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); + BuildVec.push_back(Extract.getReg(0)); } - if (DstTy.isVector()) - MIRBuilder.buildBuildVector(DstReg, BuildVec); - else - MIRBuilder.buildCopy(DstReg, BuildVec[0]); + assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR"); + MIRBuilder.buildBuildVector(DstReg, BuildVec); MI.eraseFromParent(); return Legalized; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 27df7e3..4b4df98 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -800,10 +800,11 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, LLT DstTy = Res.getLLTTy(*getMRI()); LLT Src1Ty = Src1.getLLTTy(*getMRI()); LLT Src2Ty = Src2.getLLTTy(*getMRI()); - const LLT DstElemTy = DstTy.isVector() ? DstTy.getElementType() : DstTy; - const LLT ElemTy1 = Src1Ty.isVector() ? Src1Ty.getElementType() : Src1Ty; - const LLT ElemTy2 = Src2Ty.isVector() ? Src2Ty.getElementType() : Src2Ty; + const LLT DstElemTy = DstTy.getScalarType(); + const LLT ElemTy1 = Src1Ty.getScalarType(); + const LLT ElemTy2 = Src2Ty.getScalarType(); assert(DstElemTy == ElemTy1 && DstElemTy == ElemTy2); + assert(Mask.size() > 1 && "Scalar G_SHUFFLE_VECTOR are not supported"); (void)DstElemTy; (void)ElemTy1; (void)ElemTy2; diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6a464d9..4795d81 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -2788,6 +2788,9 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { if (expectAndConsume(MIToken::rparen)) return error("shufflemask should be terminated by ')'."); + if (ShufMask.size() < 2) + return error("shufflemask should have > 1 element"); + ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask); Dest = MachineOperand::CreateShuffleMask(MaskAlloc); return false; diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 1154855..c0710c4 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1924,13 +1924,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (Src0Ty != Src1Ty) report("Source operands must be the same type", MI); - if (Src0Ty.getScalarType() != DstTy.getScalarType()) + if (Src0Ty.getScalarType() != DstTy.getScalarType()) { report("G_SHUFFLE_VECTOR cannot change element type", MI); + break; + } + if (!Src0Ty.isVector()) { + report("G_SHUFFLE_VECTOR must have vector src", MI); + break; + } + if (!DstTy.isVector()) { + report("G_SHUFFLE_VECTOR must have vector dst", MI); + break; + } // Don't check that all operands are vector because scalars are used in // place of 1 element vectors. - int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1; - int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + int SrcNumElts = Src0Ty.getNumElements(); + int DstNumElts = DstTy.getNumElements(); ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask(); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 72b364c..697b779 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -211,7 +211,7 @@ private: unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); } }; - using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>; + using LiveRegMap = SparseSet<LiveReg, unsigned, identity, uint16_t>; /// This map contains entries for each virtual register that is currently /// available in a physical register. LiveRegMap LiveVirtRegs; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d2ea652..8676060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19993,8 +19993,12 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); - for (SDNode *Op : Ptr->users()) { + for (SDUse &U : Ptr->uses()) { + if (U.getResNo() != Ptr.getResNo()) + continue; + // Check for #1. + SDNode *Op = U.getUser(); if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI)) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index bfa566a..dee0909 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1162,6 +1162,43 @@ SDValue SelectionDAGBuilder::getMemoryRoot() { return updateRoot(PendingLoads); } +SDValue SelectionDAGBuilder::getFPOperationRoot(fp::ExceptionBehavior EB) { + // If the new exception behavior differs from that of the pending + // ones, chain up them and update the root. + switch (EB) { + case fp::ExceptionBehavior::ebMayTrap: + case fp::ExceptionBehavior::ebIgnore: + // Floating-point exceptions produced by such operations are not intended + // to be observed, so the sequence of these operations does not need to be + // preserved. + // + // They however must not be mixed with the instructions that have strict + // exception behavior. Placing an operation with 'ebIgnore' behavior between + // 'ebStrict' operations could distort the observed exception behavior. + if (!PendingConstrainedFPStrict.empty()) { + assert(PendingConstrainedFP.empty()); + updateRoot(PendingConstrainedFPStrict); + } + break; + case fp::ExceptionBehavior::ebStrict: + // Floating-point exception produced by these operations may be observed, so + // they must be correctly chained. If trapping on FP exceptions is + // disabled, the exceptions can be observed only by functions that read + // exception flags, like 'llvm.get_fpenv' or 'fetestexcept'. It means that + // the order of operations is not significant between barriers. + // + // If trapping is enabled, each operation becomes an implicit observation + // point, so the operations must be sequenced according their original + // source order. + if (!PendingConstrainedFP.empty()) { + assert(PendingConstrainedFPStrict.empty()); + updateRoot(PendingConstrainedFP); + } + // TODO: Add support for trapping-enabled scenarios. + } + return DAG.getRoot(); +} + SDValue SelectionDAGBuilder::getRoot() { // Chain up all pending constrained intrinsics together with all // pending loads, by simply appending them to PendingLoads and @@ -8298,6 +8335,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } } +void SelectionDAGBuilder::pushFPOpOutChain(SDValue Result, + fp::ExceptionBehavior EB) { + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + assert(OutChain.getValueType() == MVT::Other); + + // Instead of updating the root immediately, push the produced chain to the + // appropriate list, deferring the update until the root is requested. In this + // case, the nodes from the lists are chained using TokenFactor, indicating + // that the operations are independent. + // + // In particular, the root is updated before any call that might access the + // floating-point environment, except for constrained intrinsics. + switch (EB) { + case fp::ExceptionBehavior::ebMayTrap: + case fp::ExceptionBehavior::ebIgnore: + PendingConstrainedFP.push_back(OutChain); + break; + case fp::ExceptionBehavior::ebStrict: + PendingConstrainedFPStrict.push_back(OutChain); + break; + } +} + void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); @@ -8305,42 +8366,16 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( // We do not need to serialize constrained FP intrinsics against // each other or against (nonvolatile) loads, so they can be // chained like loads. - SDValue Chain = DAG.getRoot(); + fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); + SDValue Chain = getFPOperationRoot(EB); SmallVector<SDValue, 4> Opers; Opers.push_back(Chain); for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I) Opers.push_back(getValue(FPI.getArgOperand(I))); - auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { - assert(Result.getNode()->getNumValues() == 2); - - // Push node to the appropriate list so that future instructions can be - // chained up correctly. - SDValue OutChain = Result.getValue(1); - switch (EB) { - case fp::ExceptionBehavior::ebIgnore: - // The only reason why ebIgnore nodes still need to be chained is that - // they might depend on the current rounding mode, and therefore must - // not be moved across instruction that may change that mode. - [[fallthrough]]; - case fp::ExceptionBehavior::ebMayTrap: - // These must not be moved across calls or instructions that may change - // floating-point exception masks. - PendingConstrainedFP.push_back(OutChain); - break; - case fp::ExceptionBehavior::ebStrict: - // These must not be moved across calls or instructions that may change - // floating-point exception masks or read floating-point exception flags. - // In addition, they cannot be optimized out even if unused. - PendingConstrainedFPStrict.push_back(OutChain); - break; - } - }; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType()); SDVTList VTs = DAG.getVTList(VT, MVT::Other); - fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); SDNodeFlags Flags; if (EB == fp::ExceptionBehavior::ebIgnore) @@ -8364,7 +8399,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { Opers.pop_back(); SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); - pushOutChain(Mul, EB); + pushFPOpOutChain(Mul, EB); Opcode = ISD::STRICT_FADD; Opers.clear(); Opers.push_back(Mul.getValue(1)); @@ -8395,7 +8430,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags); - pushOutChain(Result, EB); + pushFPOpOutChain(Result, EB); SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index c7577fa..47e19f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -195,6 +195,11 @@ private: /// Update root to include all chains from the Pending list. SDValue updateRoot(SmallVectorImpl<SDValue> &Pending); + /// Given a node representing a floating-point operation and its specified + /// exception behavior, this either updates the root or stores the node in + /// a list to be added to chains latter. + void pushFPOpOutChain(SDValue Result, fp::ExceptionBehavior EB); + /// A unique monotonically increasing number used to order the SDNodes we /// create. unsigned SDNodeOrder; @@ -300,6 +305,13 @@ public: /// memory node that may need to be ordered after any prior load instructions. SDValue getMemoryRoot(); + /// Return the current virtual root of the Selection DAG, flushing + /// PendingConstrainedFP or PendingConstrainedFPStrict items if the new + /// exception behavior (specified by \p EB) differs from that of the pending + /// instructions. This must be done before emitting constrained FP operation + /// call. + SDValue getFPOperationRoot(fp::ExceptionBehavior EB); + /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict) /// items. This must be done before emitting any call other any other node /// that may need to be ordered after FP instructions due to other side diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 060b1dd..59798b3 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -2097,6 +2097,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { } Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { + // MSVC CRT has a function to validate security cookie. + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported) + return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall)); return nullptr; } |
