diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
7 files changed, 114 insertions, 55 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6cbfef2..da3c834 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -141,7 +141,7 @@ static cl::opt<bool> EnableReduceLoadOpStoreWidth( static cl::opt<bool> ReduceLoadOpStoreWidthForceNarrowingProfitable( "combiner-reduce-load-op-store-width-force-narrowing-profitable", cl::Hidden, cl::init(false), - cl::desc("DAG combiner force override the narrowing profitable check when" + cl::desc("DAG combiner force override the narrowing profitable check when " "reducing the width of load/op/store sequences")); static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( @@ -3949,6 +3949,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) return Result; + // Similar to the previous rule, but this time targeting an expanded abs. + // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X)) + // as well as + // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X)) + // Note that these two are applicable to both signed and unsigned min/max. + SDValue X; + SDValue S0; + auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0)); + if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat), + m_UMax(m_Value(X), NegPat), + m_SMin(m_Value(X), NegPat), + m_UMin(m_Value(X), NegPat))))) { + unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode()); + if (hasOperation(NewOpc, VT)) + return DAG.getNode(NewOpc, DL, VT, X, S0); + } + // Fold neg(splat(neg(x)) -> splat(x) if (VT.isVector()) { SDValue N1S = DAG.getSplatValue(N1, true); @@ -20438,10 +20455,8 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { Value.hasOneUse()) { LoadSDNode *LD = cast<LoadSDNode>(Value); EVT VT = LD->getMemoryVT(); - if (!VT.isFloatingPoint() || - VT != ST->getMemoryVT() || - LD->isNonTemporal() || - ST->isNonTemporal() || + if (!VT.isSimple() || !VT.isFloatingPoint() || VT != ST->getMemoryVT() || + LD->isNonTemporal() || ST->isNonTemporal() || LD->getPointerInfo().getAddrSpace() != 0 || ST->getPointerInfo().getAddrSpace() != 0) return SDValue(); @@ -23088,8 +23103,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT); + // TODO: Add support for SCALAR_TO_VECTOR implicit truncation. if (LegalTypes && BCSrc.getValueType().isInteger() && - BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { + BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR && + BCSrc.getScalarValueSizeInBits() == + BCSrc.getOperand(0).getScalarValueSizeInBits()) { // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> // trunc i64 X to i32 SDValue X = BCSrc.getOperand(0); @@ -24288,8 +24306,8 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); // Keep track of what we encounter. - bool AnyInteger = false; - bool AnyFP = false; + EVT AnyFPVT; + for (const SDValue &Op : N->ops()) { if (ISD::BITCAST == Op.getOpcode() && !Op.getOperand(0).getValueType().isVector()) @@ -24303,27 +24321,23 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { // If it's neither, bail out, it could be something weird like x86mmx. EVT LastOpVT = Ops.back().getValueType(); if (LastOpVT.isFloatingPoint()) - AnyFP = true; - else if (LastOpVT.isInteger()) - AnyInteger = true; - else + AnyFPVT = LastOpVT; + else if (!LastOpVT.isInteger()) return SDValue(); } // If any of the operands is a floating point scalar bitcast to a vector, // use floating point types throughout, and bitcast everything. // Replace UNDEFs by another scalar UNDEF node, of the final desired type. - if (AnyFP) { - SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); - if (AnyInteger) { - for (SDValue &Op : Ops) { - if (Op.getValueType() == SVT) - continue; - if (Op.isUndef()) - Op = DAG.getNode(ISD::UNDEF, DL, SVT); - else - Op = DAG.getBitcast(SVT, Op); - } + if (AnyFPVT != EVT()) { + SVT = AnyFPVT; + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.isUndef()) + Op = DAG.getNode(ISD::UNDEF, DL, SVT); + else + Op = DAG.getBitcast(SVT, Op); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index db21e70..89a00c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -402,6 +402,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FMAXNUM_IEEE: case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -1081,6 +1083,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::FMAXIMUM: Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)); return; + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: + Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG)); + return; case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -1738,7 +1744,8 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, bool IsStrict = Node->isStrictFPOpcode(); unsigned OpNo = IsStrict ? 1 : 0; SDValue Src = Node->getOperand(OpNo); - EVT VT = Src.getValueType(); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); SDLoc DL(Node); // Attempt to expand using TargetLowering. @@ -1752,11 +1759,11 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, } // Make sure that the SINT_TO_FP and SRL instructions are available. - if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) == + if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Expand) || - (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) == + (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) == TargetLowering::Expand)) || - TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) { + TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) { if (IsStrict) { UnrollStrictFPOp(Node, Results); return; @@ -1766,37 +1773,59 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, return; } - unsigned BW = VT.getScalarSizeInBits(); + unsigned BW = SrcVT.getScalarSizeInBits(); assert((BW == 64 || BW == 32) && "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); - SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); + // If STRICT_/FMUL is not supported by the target (in case of f16) replace the + // UINT_TO_FP with a larger float and round to the smaller type + if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) || + (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) { + EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64; + SDValue UIToFP; + SDValue Result; + SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true); + EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT); + if (IsStrict) { + UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other}, + {Node->getOperand(0), Src}); + Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other}, + {Node->getOperand(0), UIToFP, TargetZero}); + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + } else { + UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src); + Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero); + Results.push_back(Result); + } + + return; + } + + SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT); // Constants to clear the upper part of the word. // Notice that we can also use SHL+SHR, but using a constant is slightly // faster on x86. uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; - SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); + SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT); // Two to the power of half-word-size. - SDValue TWOHW = - DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0)); + SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT); // Clear upper part of LO, lower HI - SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord); - SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask); + SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask); if (IsStrict) { // Convert hi and lo to floats // Convert the hi part back to the upper values // TODO: Can any fast-math-flags be set on these nodes? - SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, - {Node->getValueType(0), MVT::Other}, + SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other}, {Node->getOperand(0), HI}); - fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other}, + fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other}, {fHI.getValue(1), fHI, TWOHW}); - SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, - {Node->getValueType(0), MVT::Other}, + SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other}, {Node->getOperand(0), LO}); SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1), @@ -1804,8 +1833,7 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, // Add the two halves SDValue Result = - DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other}, - {TF, fHI, fLO}); + DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO}); Results.push_back(Result); Results.push_back(Result.getValue(1)); @@ -1815,13 +1843,12 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, // Convert hi and lo to floats // Convert the hi part back to the upper values // TODO: Can any fast-math-flags be set on these nodes? - SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI); - fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW); - SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO); + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI); + fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO); // Add the two halves - Results.push_back( - DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO)); + Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO)); } SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { @@ -2246,11 +2273,13 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { DAG.getVectorIdxConstant(i, dl)); SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, DAG.getVectorIdxConstant(i, dl)); + // FIXME: We should use i1 setcc + boolext here, but it causes regressions. Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); - Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT), + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], + DAG.getBoolConstant(true, dl, EltVT, VT), DAG.getConstant(0, dl, EltVT)); } return DAG.getBuildVector(VT, dl, Ops); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107454a..780eba1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -149,6 +149,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMAXNUM_IEEE: case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: case ISD::FLDEXP: case ISD::ABDS: case ISD::ABDU: diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 9e5867c..51ee3cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -125,9 +125,9 @@ static cl::opt<int> MaxReorderWindow( cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp")); -static cl::opt<unsigned> AvgIPC( - "sched-avg-ipc", cl::Hidden, cl::init(1), - cl::desc("Average inst/cycle whan no target itinerary exists.")); +static cl::opt<unsigned> + AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle when no target itinerary exists.")); namespace { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 26fc75c..dff7243 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -43,9 +43,9 @@ STATISTIC(LoadsClustered, "Number of loads clustered together"); // without a target itinerary. The choice of number here has more to do with // balancing scheduler heuristics than with the actual machine latency. static cl::opt<int> HighLatencyCycles( - "sched-high-latency-cycles", cl::Hidden, cl::init(10), - cl::desc("Roughly estimate the number of cycles that 'long latency'" - "instructions take for targets with no itinerary")); + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency' " + "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), InstrItins(mf.getSubtarget().getInstrItineraryData()) {} diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 10e8ba9..0dfd030 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -430,6 +430,21 @@ bool ISD::matchBinaryPredicate( return true; } +ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) { + switch (MinMaxOpc) { + default: + llvm_unreachable("unrecognized opcode"); + case ISD::UMIN: + return ISD::UMAX; + case ISD::UMAX: + return ISD::UMIN; + case ISD::SMIN: + return ISD::SMAX; + case ISD::SMAX: + return ISD::SMIN; + } +} + ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { switch (VecReduceOpcode) { default: diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e87d809..9f57884 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8435,7 +8435,6 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, return false; SDLoc dl(SDValue(Node, 0)); - EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); // Implementation of unsigned i64 to f64 following the algorithm in // __floatundidf in compiler_rt. This implementation performs rounding @@ -8448,7 +8447,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT); SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); - SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); + SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl); SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); |