diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 204 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 70 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1 |
9 files changed, 234 insertions, 129 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c5c3866..c97300d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -509,6 +509,7 @@ namespace { SDValue visitFMUL(SDNode *N); template <class MatchContextClass> SDValue visitFMA(SDNode *N); SDValue visitFMAD(SDNode *N); + SDValue visitFMULADD(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); @@ -657,13 +658,13 @@ namespace { bool InexpensiveOnly = false, std::optional<EVT> OutVT = std::nullopt); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); + SDValue buildRsqrtEstimate(SDValue Op); + SDValue buildSqrtEstimate(SDValue Op); + SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip); SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -1991,6 +1992,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA<EmptyMatchContext>(N); case ISD::FMAD: return visitFMAD(N); + case ISD::FMULADD: return visitFMULADD(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); @@ -5042,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { unsigned Opc = N->getOpcode(); bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); - ConstantSDNode *N1C = isConstOrConstSplat(N1); // X / undef -> undef // X % undef -> undef @@ -5074,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { // division-by-zero or remainder-by-zero, so assume the divisor is 1. // TODO: Similarly, if we're zero-extending a boolean divisor, then assume // it's a 1. - if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1)) return IsDiv ? N0 : DAG.getConstant(0, DL, VT); return SDValue(); @@ -10628,7 +10629,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // folding this will increase the total number of instructions. if (N0.getOpcode() == ISD::SRL && (N0.getOperand(1) == N1 || N0.hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + TLI.shouldFoldConstantShiftPairToMask(N)) { if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { @@ -11207,7 +11208,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or // (and (srl x, (sub c2, c1), MASK) if ((N0.getOperand(1) == N1 || N0->hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + TLI.shouldFoldConstantShiftPairToMask(N)) { auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { const APInt &LHSC = LHS->getAPIntValue(); @@ -17086,11 +17087,6 @@ static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { N->getFlags().hasAllowContract(); } -// Returns true if `N` can assume no infinities involved in its computation. -static bool hasNoInfs(const TargetOptions &Options, SDValue N) { - return Options.NoInfsFPMath || N->getFlags().hasNoInfs(); -} - /// Try to perform FMA combining on a given FADD node. template <class MatchContextClass> SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { @@ -17666,7 +17662,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // The transforms below are incorrect when x == 0 and y == inf, because the // intermediate multiplication produces a nan. SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1; - if (!hasNoInfs(Options, FAdd)) + if (!FAdd->getFlags().hasNoInfs()) return SDValue(); // Floating-point multiply-add without intermediate rounding. @@ -17763,7 +17759,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -17829,7 +17824,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool AllowNewConst = (Level < AfterLegalizeDAG); // If nnan is enabled, fold lots of things. - if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) { + if (Flags.hasNoNaNs() && AllowNewConst) { // If allowed, fold (fadd (fneg x), x) -> 0.0 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, DL, VT); @@ -17978,7 +17973,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -18006,7 +18000,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0 == N1) { // (fsub x, x) -> 0.0 - if (Options.NoNaNsFPMath || Flags.hasNoNaNs()) + if (Flags.hasNoNaNs()) return DAG.getConstantFP(0.0f, DL, VT); } @@ -18317,7 +18311,6 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; // FMA nodes have flags that propagate to the created nodes. SelectionDAG::FlagInserter FlagsInserter(DAG, N); MatchContextClass matcher(DAG, TLI, N); @@ -18343,8 +18336,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); } - if ((Options.NoNaNsFPMath && Options.NoInfsFPMath) || - (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) { + if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) { if (N->getFlags().hasNoSignedZeros() || (N2CFP && !N2CFP->isExactlyValue(-0.0))) { if (N0CFP && N0CFP->isZero()) @@ -18449,6 +18441,21 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFMULADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Constant fold FMULADD. + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2})) + return C; + + return SDValue(); +} + // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) @@ -18533,7 +18540,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -18580,20 +18586,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0))) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -18625,7 +18629,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); SDValue AAZ = DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); - if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ)) return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); // Estimate creation failed. Clean up speculatively created nodes. @@ -18635,7 +18639,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) - if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) { SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); @@ -18644,7 +18648,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (Options.NoInfsFPMath || Flags.hasNoInfs()) + if (Flags.hasNoInfs()) if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) return RV; } @@ -18721,12 +18725,10 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue DAGCombiner::visitFSQRT(SDNode *N) { SDNodeFlags Flags = N->getFlags(); - const TargetOptions &Options = DAG.getTarget().Options; // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as: // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN - if (!Flags.hasApproximateFuncs() || - (!Options.NoInfsFPMath && !Flags.hasNoInfs())) + if (!Flags.hasApproximateFuncs() || !Flags.hasNoInfs()) return SDValue(); SDValue N0 = N->getOperand(0); @@ -18734,11 +18736,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. + SelectionDAG::FlagInserter FlagInserter(DAG, Flags); // TODO: If this is N0/sqrt(N0), and we reach this node before trying to // transform the fdiv, we may produce a sub-optimal estimate sequence // because the reciprocal calculation may not have to filter out a // 0.0 input. - return buildSqrtEstimate(N0, Flags); + return buildSqrtEstimate(N0); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -18870,27 +18873,38 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { - // We only do this if the target has legal ftrunc. Otherwise, we'd likely be - // replacing casts with a libcall. We also must be allowed to ignore -0.0 - // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer - // conversions would return +0.0. + // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc. + // If NoSignedZerosFPMath is enabled, this is a direct replacement. + // Otherwise, for strict math, we must handle edge cases: + // 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0 + // as example, it first becomes integer 0, and is converted back to +0.0. + // FTRUNC on its own could produce -0.0. + // FIXME: We should be able to use node-level FMF here. - // TODO: If strict math, should we use FABS (+ range check for signed cast)? EVT VT = N->getValueType(0); - if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || - !DAG.getTarget().Options.NoSignedZerosFPMath) + if (!TLI.isOperationLegal(ISD::FTRUNC, VT)) return SDValue(); // fptosi/fptoui round towards zero, so converting from FP to integer and // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X SDValue N0 = N->getOperand(0); if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && - N0.getOperand(0).getValueType() == VT) - return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + N0.getOperand(0).getValueType() == VT) { + if (DAG.getTarget().Options.NoSignedZerosFPMath) + return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + } if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && - N0.getOperand(0).getValueType() == VT) - return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + N0.getOperand(0).getValueType() == VT) { + if (DAG.getTarget().Options.NoSignedZerosFPMath) + return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); + + // Strict math: use FABS to handle negative inputs correctly. + if (TLI.isFAbsFree(VT)) { + SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::FTRUNC, DL, VT, Abs); + } + } return SDValue(); } @@ -19340,8 +19354,10 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) { EVT VT = N->getValueType(0); const SDNodeFlags Flags = N->getFlags(); unsigned Opc = N->getOpcode(); - bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; - bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM; + bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; + bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM; + bool IsMin = + Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM || Opc == ISD::FMINIMUMNUM; SelectionDAG::FlagInserter FlagsInserter(DAG, N); // Constant fold. @@ -19356,34 +19372,53 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) { if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) { const APFloat &AF = N1CFP->getValueAPF(); - // minnum(X, nan) -> X - // maxnum(X, nan) -> X - // minimum(X, nan) -> nan - // maximum(X, nan) -> nan - if (AF.isNaN()) - return PropagatesNaN ? N->getOperand(1) : N->getOperand(0); + // minnum(X, qnan) -> X + // maxnum(X, qnan) -> X + // minnum(X, snan) -> qnan + // maxnum(X, snan) -> qnan + // minimum(X, nan) -> qnan + // maximum(X, nan) -> qnan + // minimumnum(X, nan) -> X + // maximumnum(X, nan) -> X + if (AF.isNaN()) { + if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) { + if (AF.isSignaling()) + return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT); + return N->getOperand(1); + } + return N->getOperand(0); + } // In the following folds, inf can be replaced with the largest finite // float, if the ninf flag is set. if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) { - // minnum(X, -inf) -> -inf - // maxnum(X, +inf) -> +inf + // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation) + // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation) // minimum(X, -inf) -> -inf if nnan // maximum(X, +inf) -> +inf if nnan - if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs())) + // minimumnum(X, -inf) -> -inf + // maximumnum(X, +inf) -> +inf + if (IsMin == AF.isNegative() && + (!PropAllNaNsToQNaNs || Flags.hasNoNaNs())) return N->getOperand(1); // minnum(X, +inf) -> X if nnan // maxnum(X, -inf) -> X if nnan - // minimum(X, +inf) -> X - // maximum(X, -inf) -> X - if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs())) + // minimum(X, +inf) -> X (ignoring quieting of sNaNs) + // maximum(X, -inf) -> X (ignoring quieting of sNaNs) + // minimumnum(X, +inf) -> X if nnan + // maximumnum(X, -inf) -> X if nnan + if (IsMin != AF.isNegative() && (PropAllNaNsToQNaNs || Flags.hasNoNaNs())) return N->getOperand(0); } } + // There are no VECREDUCE variants of FMINIMUMNUM or FMAXIMUMNUM + if (Opc == ISD::FMINIMUMNUM || Opc == ISD::FMAXIMUMNUM) + return SDValue(); + if (SDValue SD = reassociateReduction( - PropagatesNaN + PropAllNaNsToQNaNs ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM) : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX), Opc, SDLoc(N), VT, N0, N1, Flags)) @@ -29703,28 +29738,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); return Est; } @@ -29735,8 +29769,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -29749,9 +29782,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { - SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est); + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est); + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -29759,13 +29792,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) - LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); } else { // SQRT: LHS = (A * E) * -0.5 - LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf); } - Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS); } return Est; @@ -29774,8 +29807,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, - bool Reciprocal) { +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) { if (LegalDAG) return SDValue(); @@ -29803,8 +29835,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (Iterations > 0) Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal); if (!Reciprocal) { SDLoc DL(Op); // Try the target specific test first. @@ -29822,12 +29854,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, true); +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, false); +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, false); } /// Return true if there is any possibility that the two addresses overlap. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b5f8a61..437d0f4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3313,7 +3313,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break; // Unary FP Operations - case ISD::FABS: case ISD::FACOS: case ISD::FASIN: case ISD::FATAN: @@ -3329,7 +3328,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FLOG10: case ISD::FNEARBYINT: - case ISD::FNEG: case ISD::FREEZE: case ISD::FRINT: case ISD::FROUND: @@ -3341,6 +3339,12 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FTAN: case ISD::FTANH: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; + case ISD::FABS: + R = SoftPromoteHalfRes_FABS(N); + break; + case ISD::FNEG: + R = SoftPromoteHalfRes_FNEG(N); + break; case ISD::AssertNoFPClass: R = SoftPromoteHalfRes_AssertNoFPClass(N); break; @@ -3670,6 +3674,24 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FABS(SDNode *N) { + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Clear the sign bit. + return DAG.getNode(ISD::AND, dl, MVT::i16, Op, + DAG.getConstant(0x7fff, dl, MVT::i16)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FNEG(SDNode *N) { + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Invert the sign bit. + return DAG.getNode(ISD::XOR, dl, MVT::i16, Op, + DAG.getConstant(0x8000, dl, MVT::i16)); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_AssertNoFPClass(SDNode *N) { return GetSoftPromotedHalf(N->getOperand(0)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d580ce0..603dc34 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -832,6 +832,8 @@ private: SDValue SoftPromoteHalfRes_SELECT(SDNode *N); SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N); SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); + SDValue SoftPromoteHalfRes_FABS(SDNode *N); + SDValue SoftPromoteHalfRes_FNEG(SDNode *N); SDValue SoftPromoteHalfRes_AssertNoFPClass(SDNode *N); SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 87d5453..3b5f83f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3416,7 +3416,7 @@ void DAGTypeLegalizer::SplitVecRes_PARTIAL_REDUCE_MLA(SDNode *N, SDValue &Lo, SDValue Input2 = N->getOperand(2); SDValue AccLo, AccHi; - std::tie(AccLo, AccHi) = DAG.SplitVector(Acc, DL); + GetSplitVector(Acc, AccLo, AccHi); unsigned Opcode = N->getOpcode(); // If the input types don't need splitting, just accumulate into the @@ -3429,8 +3429,8 @@ void DAGTypeLegalizer::SplitVecRes_PARTIAL_REDUCE_MLA(SDNode *N, SDValue &Lo, SDValue Input1Lo, Input1Hi; SDValue Input2Lo, Input2Hi; - std::tie(Input1Lo, Input1Hi) = DAG.SplitVector(Input1, DL); - std::tie(Input2Lo, Input2Hi) = DAG.SplitVector(Input2, DL); + GetSplitVector(Input1, Input1Lo, Input1Hi); + GetSplitVector(Input2, Input2Lo, Input2Hi); EVT ResultVT = AccLo.getValueType(); Lo = DAG.getNode(Opcode, DL, ResultVT, AccLo, Input1Lo, Input2Lo); @@ -4761,8 +4761,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_PARTIAL_REDUCE_MLA(SDNode *N) { SDLoc DL(N); SDValue Input1Lo, Input1Hi, Input2Lo, Input2Hi; - std::tie(Input1Lo, Input1Hi) = DAG.SplitVector(N->getOperand(1), DL); - std::tie(Input2Lo, Input2Hi) = DAG.SplitVector(N->getOperand(2), DL); + GetSplitVector(N->getOperand(1), Input1Lo, Input1Hi); + GetSplitVector(N->getOperand(2), Input2Lo, Input2Hi); unsigned Opcode = N->getOpcode(); EVT ResultVT = Acc.getValueType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6ea2e27..90edaf3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; case ISD::ADD: case ISD::ADDC: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. + // TODO: Move Operand 1 check before Operand 0 check Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. @@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); if (Tmp2 == 1) return 1; // Early out. + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. return std::min(Tmp, Tmp2) - 1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -5767,11 +5769,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, // even if the nonan flag is dropped somewhere. unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4; ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get(); - if (((unsigned)CCCode & 0x10U)) - return true; - - const TargetOptions &Options = getTarget().Options; - return Options.NoNaNsFPMath || Options.NoInfsFPMath; + return (unsigned)CCCode & 0x10U; } case ISD::OR: @@ -5790,6 +5788,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::FCOPYSIGN: case ISD::FMA: case ISD::FMAD: + case ISD::FMULADD: case ISD::FP_EXTEND: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: @@ -5908,6 +5907,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, case ISD::FCOSH: case ISD::FTANH: case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (SNaN) return true; @@ -6405,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, if (VT.isScalableVector()) return SDValue(); - // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be - // simplified to one big BUILD_VECTOR. + // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and + // single-element INSERT_VECTOR_ELT operands can be simplified to one big + // BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. EVT SVT = VT.getScalarType(); SmallVector<SDValue, 16> Elts; @@ -6416,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); else if (Op.getOpcode() == ISD::BUILD_VECTOR) Elts.append(Op->op_begin(), Op->op_end()); + else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && + OpVT.getVectorNumElements() == 1 && + isNullConstant(Op.getOperand(2))) + Elts.push_back(Op.getOperand(1)); else return SDValue(); } @@ -7235,7 +7240,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } // Handle fma/fmad special cases. - if (Opcode == ISD::FMA || Opcode == ISD::FMAD) { + if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && Ops[2].getValueType() == VT && "FMA types must match!"); @@ -7246,7 +7251,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, APFloat V1 = C1->getValueAPF(); const APFloat &V2 = C2->getValueAPF(); const APFloat &V3 = C3->getValueAPF(); - if (Opcode == ISD::FMAD) { + if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { V1.multiply(V2, APFloat::rmNearestTiesToEven); V1.add(V3, APFloat::rmNearestTiesToEven); } else @@ -8785,7 +8790,7 @@ static SDValue getMemcpyLoadsAndStores( if (Value.getNode()) { Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); } @@ -8801,7 +8806,7 @@ static SDValue getMemcpyLoadsAndStores( assert(NVT.bitsGE(VT)); bool isDereferenceable = - SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; @@ -8810,14 +8815,14 @@ static SDValue getMemcpyLoadsAndStores( Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), + DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)), SrcPtrInfo.getWithOffset(SrcOff), VT, commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); OutStoreChains.push_back(Store); } @@ -8947,14 +8952,14 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Value; bool isDereferenceable = - SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; Value = DAG.getLoad( VT, dl, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), + DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)), SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); @@ -8969,7 +8974,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getStore( Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); DstOff += VTSize; @@ -9101,7 +9106,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, NewAAInfo); @@ -11848,25 +11853,38 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, + bool AllowCommute) { SDNodeFlags Flags; if (Inserter) Flags = Inserter->getFlags(); - return getNodeIfExists(Opcode, VTList, Ops, Flags); + return getNodeIfExists(Opcode, VTList, Ops, Flags, AllowCommute); } SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, - const SDNodeFlags Flags) { - if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { + const SDNodeFlags Flags, + bool AllowCommute) { + if (VTList.VTs[VTList.NumVTs - 1] == MVT::Glue) + return nullptr; + + auto Lookup = [&](ArrayRef<SDValue> LookupOps) -> SDNode * { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops); + AddNodeIDNode(ID, Opcode, VTList, LookupOps); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) { E->intersectFlagsWith(Flags); return E; } - } + return nullptr; + }; + + if (SDNode *Existing = Lookup(Ops)) + return Existing; + + if (AllowCommute && TLI->isCommutativeBinOp(Opcode)) + return Lookup({Ops[1], Ops[0]}); + return nullptr; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c21890a..cb0038c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3971,8 +3971,14 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) { } void SelectionDAGBuilder::visitPtrToAddr(const User &I) { - // FIXME: this is not correct for pointers with addr width != pointer width - visitPtrToInt(I); + SDValue N = getValue(I.getOperand(0)); + // By definition the type of the ptrtoaddr must be equal to the address type. + const auto &TLI = DAG.getTargetLoweringInfo(); + EVT AddrVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // The address width must be smaller or equal to the pointer representation + // width, so we lower ptrtoaddr as a truncate (possibly folded to a no-op). + N = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), AddrVT, N); + setValue(&I, N); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { @@ -6996,6 +7002,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Flags)); + } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) { + // TODO: Support splitting the vector. + setValue(&I, DAG.getNode(ISD::FMULADD, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode( diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcfbfe6..39cbfad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -310,6 +310,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMA: return "fma"; case ISD::STRICT_FMA: return "strict_fma"; case ISD::FMAD: return "fmad"; + case ISD::FMULADD: return "fmuladd"; case ISD::FREM: return "frem"; case ISD::STRICT_FREM: return "strict_frem"; case ISD::FCOPYSIGN: return "fcopysign"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 175753f..6c11c5b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -234,6 +234,19 @@ static bool dontUseFastISelFor(const Function &Fn) { }); } +static bool maintainPGOProfile(const TargetMachine &TM, + CodeGenOptLevel OptLevel) { + if (OptLevel != CodeGenOptLevel::None) + return true; + if (TM.getPGOOption()) { + const PGOOptions &Options = *TM.getPGOOption(); + return Options.Action == PGOOptions::PGOAction::IRUse || + Options.Action == PGOOptions::PGOAction::SampleUse || + Options.CSAction == PGOOptions::CSPGOAction::CSIRUse; + } + return false; +} + namespace llvm { //===--------------------------------------------------------------------===// @@ -395,6 +408,7 @@ SelectionDAGISel::~SelectionDAGISel() { delete CurDAG; } void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { CodeGenOptLevel OptLevel = Selector->OptLevel; + bool RegisterPGOPasses = maintainPGOProfile(Selector->TM, Selector->OptLevel); if (OptLevel != CodeGenOptLevel::None) AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); @@ -403,15 +417,15 @@ void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); - if (UseMBPI && OptLevel != CodeGenOptLevel::None) - AU.addRequired<BranchProbabilityInfoWrapperPass>(); + if (UseMBPI && RegisterPGOPasses) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for // the module. AU.addRequired<AssignmentTrackingAnalysis>(); AU.addPreserved<AssignmentTrackingAnalysis>(); - if (OptLevel != CodeGenOptLevel::None) - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); + if (RegisterPGOPasses) + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -464,6 +478,7 @@ void SelectionDAGISel::initializeAnalysisResults( (void)MatchFilterFuncName; #endif + bool RegisterPGOPasses = maintainPGOProfile(TM, OptLevel); TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); @@ -474,7 +489,7 @@ void SelectionDAGISel::initializeAnalysisResults( auto *PSI = MAMP.getCachedResult<ProfileSummaryAnalysis>(*Fn.getParent()); BlockFrequencyInfo *BFI = nullptr; FAM.getResult<BlockFrequencyAnalysis>(Fn); - if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) + if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses) BFI = &FAM.getResult<BlockFrequencyAnalysis>(Fn); FunctionVarLocs const *FnVarLocs = nullptr; @@ -492,7 +507,7 @@ void SelectionDAGISel::initializeAnalysisResults( // into account). That's unfortunate but OK because it just means we won't // ask for passes that have been required anyway. - if (UseMBPI && OptLevel != CodeGenOptLevel::None) + if (UseMBPI && RegisterPGOPasses) FuncInfo->BPI = &FAM.getResult<BranchProbabilityAnalysis>(Fn); else FuncInfo->BPI = nullptr; @@ -518,6 +533,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { (void)MatchFilterFuncName; #endif + bool RegisterPGOPasses = maintainPGOProfile(TM, OptLevel); TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); @@ -528,7 +544,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { AC = &MFP.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(Fn); auto *PSI = &MFP.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); BlockFrequencyInfo *BFI = nullptr; - if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) + if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses) BFI = &MFP.getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); FunctionVarLocs const *FnVarLocs = nullptr; @@ -549,7 +565,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { // into account). That's unfortunate but OK because it just means we won't // ask for passes that have been required anyway. - if (UseMBPI && OptLevel != CodeGenOptLevel::None) + if (UseMBPI && RegisterPGOPasses) FuncInfo->BPI = &MFP.getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); else diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cc503d3..920dff9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7676,6 +7676,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, break; } case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (!Flags.hasNoSignedZeros()) break; |