diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 92 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 64 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1 |
5 files changed, 104 insertions, 61 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1accdd..358e060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -509,6 +509,7 @@ namespace { SDValue visitFMUL(SDNode *N); template <class MatchContextClass> SDValue visitFMA(SDNode *N); SDValue visitFMAD(SDNode *N); + SDValue visitFMULADD(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); @@ -657,13 +658,13 @@ namespace { bool InexpensiveOnly = false, std::optional<EVT> OutVT = std::nullopt); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); + SDValue buildRsqrtEstimate(SDValue Op); + SDValue buildSqrtEstimate(SDValue Op); + SDValue buildSqrtEstimateImpl(SDValue Op, bool Recip); SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal); + bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -1991,6 +1992,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA<EmptyMatchContext>(N); case ISD::FMAD: return visitFMAD(N); + case ISD::FMULADD: return visitFMULADD(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); @@ -5042,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { unsigned Opc = N->getOpcode(); bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); - ConstantSDNode *N1C = isConstOrConstSplat(N1); // X / undef -> undef // X % undef -> undef @@ -5074,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { // division-by-zero or remainder-by-zero, so assume the divisor is 1. // TODO: Similarly, if we're zero-extending a boolean divisor, then assume // it's a 1. - if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1)) return IsDiv ? N0 : DAG.getConstant(0, DL, VT); return SDValue(); @@ -18444,6 +18445,21 @@ SDValue DAGCombiner::visitFMAD(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFMULADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Constant fold FMULADD. + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FMULADD, DL, VT, {N0, N1, N2})) + return C; + + return SDValue(); +} + // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) @@ -18574,20 +18590,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0))) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = - buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); @@ -18619,7 +18633,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A); SDValue AAZ = DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0)); - if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ)) return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt); // Estimate creation failed. Clean up speculatively created nodes. @@ -18629,7 +18643,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // We found a FSQRT, so try to make this fold: // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) - if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0))) { SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y); AddToWorklist(Div.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, Div); @@ -18726,11 +18740,12 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { return SDValue(); // FSQRT nodes have flags that propagate to the created nodes. + SelectionDAG::FlagInserter FlagInserter(DAG, Flags); // TODO: If this is N0/sqrt(N0), and we reach this node before trying to // transform the fdiv, we may produce a sub-optimal estimate sequence // because the reciprocal calculation may not have to filter out a // 0.0 input. - return buildSqrtEstimate(N0, Flags); + return buildSqrtEstimate(N0); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -29727,28 +29742,27 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); } // If non-reciprocal square root is requested, multiply the result by Arg. if (!Reciprocal) - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); return Est; } @@ -29759,8 +29773,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags Flags, bool Reciprocal) { + unsigned Iterations, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -29773,9 +29786,9 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations for reciprocal square root: // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { - SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est); + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est); + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -29783,13 +29796,13 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, SDValue LHS; if (Reciprocal || (i + 1) < Iterations) { // RSQRT: LHS = (E * -0.5) - LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); } else { // SQRT: LHS = (A * E) * -0.5 - LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf); } - Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS); } return Est; @@ -29798,8 +29811,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, - bool Reciprocal) { +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, bool Reciprocal) { if (LegalDAG) return SDValue(); @@ -29827,8 +29839,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (Iterations > 0) Est = UseOneConstNR - ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) - : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); + ? buildSqrtNROneConst(Op, Est, Iterations, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Reciprocal); if (!Reciprocal) { SDLoc DL(Op); // Try the target specific test first. @@ -29846,12 +29858,12 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, true); +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { - return buildSqrtEstimateImpl(Op, Flags, false); +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op) { + return buildSqrtEstimateImpl(Op, false); } /// Return true if there is any possibility that the two addresses overlap. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 08af74c..90edaf3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; case ISD::ADD: case ISD::ADDC: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. + // TODO: Move Operand 1 check before Operand 0 check Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. @@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); if (Tmp2 == 1) return 1; // Early out. + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. return std::min(Tmp, Tmp2) - 1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -5786,6 +5788,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::FCOPYSIGN: case ISD::FMA: case ISD::FMAD: + case ISD::FMULADD: case ISD::FP_EXTEND: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: @@ -5904,6 +5907,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, case ISD::FCOSH: case ISD::FTANH: case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (SNaN) return true; @@ -6401,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, if (VT.isScalableVector()) return SDValue(); - // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be - // simplified to one big BUILD_VECTOR. + // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and + // single-element INSERT_VECTOR_ELT operands can be simplified to one big + // BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. EVT SVT = VT.getScalarType(); SmallVector<SDValue, 16> Elts; @@ -6412,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); else if (Op.getOpcode() == ISD::BUILD_VECTOR) Elts.append(Op->op_begin(), Op->op_end()); + else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && + OpVT.getVectorNumElements() == 1 && + isNullConstant(Op.getOperand(2))) + Elts.push_back(Op.getOperand(1)); else return SDValue(); } @@ -7231,7 +7240,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } // Handle fma/fmad special cases. - if (Opcode == ISD::FMA || Opcode == ISD::FMAD) { + if (Opcode == ISD::FMA || Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && Ops[2].getValueType() == VT && "FMA types must match!"); @@ -7242,7 +7251,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, APFloat V1 = C1->getValueAPF(); const APFloat &V2 = C2->getValueAPF(); const APFloat &V3 = C3->getValueAPF(); - if (Opcode == ISD::FMAD) { + if (Opcode == ISD::FMAD || Opcode == ISD::FMULADD) { V1.multiply(V2, APFloat::rmNearestTiesToEven); V1.add(V3, APFloat::rmNearestTiesToEven); } else @@ -8781,7 +8790,7 @@ static SDValue getMemcpyLoadsAndStores( if (Value.getNode()) { Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); } @@ -8797,7 +8806,7 @@ static SDValue getMemcpyLoadsAndStores( assert(NVT.bitsGE(VT)); bool isDereferenceable = - SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; @@ -8806,14 +8815,14 @@ static SDValue getMemcpyLoadsAndStores( Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), + DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)), SrcPtrInfo.getWithOffset(SrcOff), VT, commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); OutStoreChains.push_back(Store); } @@ -8943,14 +8952,14 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Value; bool isDereferenceable = - SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; Value = DAG.getLoad( VT, dl, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), + DAG.getObjectPtrOffset(dl, Src, TypeSize::getFixed(SrcOff)), SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); @@ -8965,7 +8974,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getStore( Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); DstOff += VTSize; @@ -9097,7 +9106,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), + DAG.getObjectPtrOffset(dl, Dst, TypeSize::getFixed(DstOff)), DstPtrInfo.getWithOffset(DstOff), Alignment, isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, NewAAInfo); @@ -11844,25 +11853,38 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, + bool AllowCommute) { SDNodeFlags Flags; if (Inserter) Flags = Inserter->getFlags(); - return getNodeIfExists(Opcode, VTList, Ops, Flags); + return getNodeIfExists(Opcode, VTList, Ops, Flags, AllowCommute); } SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, - const SDNodeFlags Flags) { - if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { + const SDNodeFlags Flags, + bool AllowCommute) { + if (VTList.VTs[VTList.NumVTs - 1] == MVT::Glue) + return nullptr; + + auto Lookup = [&](ArrayRef<SDValue> LookupOps) -> SDNode * { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops); + AddNodeIDNode(ID, Opcode, VTList, LookupOps); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) { E->intersectFlagsWith(Flags); return E; } - } + return nullptr; + }; + + if (SDNode *Existing = Lookup(Ops)) + return Existing; + + if (AllowCommute && TLI->isCommutativeBinOp(Opcode)) + return Lookup({Ops[1], Ops[0]}); + return nullptr; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c21890a..0f2b518 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6996,6 +6996,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), Flags)); + } else if (TLI.isOperationLegalOrCustom(ISD::FMULADD, VT)) { + // TODO: Support splitting the vector. + setValue(&I, DAG.getNode(ISD::FMULADD, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode( diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fcfbfe6..39cbfad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -310,6 +310,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMA: return "fma"; case ISD::STRICT_FMA: return "strict_fma"; case ISD::FMAD: return "fmad"; + case ISD::FMULADD: return "fmuladd"; case ISD::FREM: return "frem"; case ISD::STRICT_FREM: return "strict_frem"; case ISD::FCOPYSIGN: return "fcopysign"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cc503d3..920dff9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7676,6 +7676,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, break; } case ISD::FMA: + case ISD::FMULADD: case ISD::FMAD: { if (!Flags.hasNoSignedZeros()) break; |