diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
17 files changed, 1597 insertions, 892 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bdd6bf0..77346cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) { // Determines if it is a constant integer or a splat/build vector of constant // integers (and undefs). -// Do not permit build vector implicit truncation. -static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { +// Do not permit build vector implicit truncation unless AllowTruncation is set. +static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false, + bool AllowTruncation = false) { if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) return !(Const->isOpaque() && NoOpaques); if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR) @@ -1076,8 +1077,13 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { if (Op.isUndef()) continue; ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); - if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || - (Const->isOpaque() && NoOpaques)) + if (!Const || (Const->isOpaque() && NoOpaques)) + return false; + // When AllowTruncation is true, allow constants that have been promoted + // during type legalization as long as the value fits in the target type. + if ((AllowTruncation && + Const->getAPIntValue().getActiveBits() > BitWidth) || + (!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth)) return false; } return true; @@ -2042,6 +2048,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::PARTIAL_REDUCE_SMLA: case ISD::PARTIAL_REDUCE_UMLA: case ISD::PARTIAL_REDUCE_SUMLA: + case ISD::PARTIAL_REDUCE_FMLA: return visitPARTIAL_REDUCE_MLA(N); case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N); case ISD::LIFETIME_END: return visitLIFETIME_END(N); @@ -3287,6 +3294,9 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. while (true) { + if (ForceCarryReconstruction && V.getValueType() == MVT::i1) + return V; + if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { V = V.getOperand(0); continue; @@ -3301,9 +3311,6 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, continue; } - if (ForceCarryReconstruction && V.getValueType() == MVT::i1) - return V; - break; } @@ -4046,6 +4053,8 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) { m_ConstInt(AndMask)))) { // Type Legalisation Pattern: // (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff) + if (BitWidthDiff.getZExtValue() >= BitWidth) + return SDValue(); unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue(); if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth)) return SDValue(); @@ -4879,8 +4888,8 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) && - DAG.isConstantIntBuildVectorOrConstantInt(N1) && - DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + isConstantOrConstantVector(N1) && + isConstantOrConstantVector(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) return Matcher.getNode( ISD::ADD, DL, VT, @@ -4980,7 +4989,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } - return TLI.getLibcallName(LC) != nullptr; + return TLI.getLibcallImpl(LC) != RTLIB::Unsupported; } /// Issue divrem if both quotient and remainder are needed. @@ -5177,7 +5186,8 @@ static bool isDivisorPowerOfTwo(SDValue Divisor) { return false; }; - return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo); + return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false, + /*AllowTruncation=*/true); } SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { @@ -5241,7 +5251,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { // alternate sequence. Targets may check function attributes for size/speed // trade-offs. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - if (isConstantOrConstantVector(N1) && + if (isConstantOrConstantVector(N1, /*NoOpaques=*/false, + /*AllowTruncation=*/true) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; @@ -5319,7 +5330,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { EVT VT = N->getValueType(0); // fold (udiv x, (1 << c)) -> x >>u c - if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) { + if (isConstantOrConstantVector(N1, /*NoOpaques=*/true, + /*AllowTruncation=*/true)) { if (SDValue LogBase2 = BuildLogBase2(N1, DL)) { AddToWorklist(LogBase2.getNode()); @@ -5333,7 +5345,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { SDValue N10 = N1.getOperand(0); - if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) { + if (isConstantOrConstantVector(N10, /*NoOpaques=*/true, + /*AllowTruncation=*/true)) { if (SDValue LogBase2 = BuildLogBase2(N10, DL)) { AddToWorklist(LogBase2.getNode()); @@ -5349,7 +5362,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { // fold (udiv x, c) -> alternate AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - if (isConstantOrConstantVector(N1) && + if (isConstantOrConstantVector(N1, /*NoOpaques=*/false, + /*AllowTruncation=*/true) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildUDIV(N)) return Op; @@ -5577,7 +5591,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return DAG.getConstant(0, DL, VT); // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c) - if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + if (isConstantOrConstantVector(N1, /*NoOpaques=*/true, + /*AllowTruncation=*/true) && hasOperation(ISD::SRL, VT)) { if (SDValue LogBase2 = BuildLogBase2(N1, DL)) { unsigned NumEltBits = VT.getScalarSizeInBits(); @@ -9374,7 +9389,7 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) { // Check if the bytes offsets we are looking at match with either big or // little endian value loaded. Return true for big endian, false for little // endian, and std::nullopt if match failed. -static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, +static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets, int64_t FirstOffset) { // The endian can be decided only when it is 2 bytes at least. unsigned Width = ByteOffsets.size(); @@ -10880,15 +10895,14 @@ static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, // Combine to mulh if mulh is legal/custom for the narrow type on the target // or if it is a vector type then we could transform to an acceptable type and // rely on legalization to split/combine the result. + EVT TransformVT = NarrowVT; if (NarrowVT.isVector()) { - EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT); - if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() || - !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT)) - return SDValue(); - } else { - if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT)) + TransformVT = TLI.getLegalTypeToTransformTo(*DAG.getContext(), NarrowVT); + if (TransformVT.getScalarType() != NarrowVT.getScalarType()) return SDValue(); } + if (!TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT)) + return SDValue(); SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp); @@ -10985,6 +10999,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1) + // This allows merging two arithmetic shifts even when there's a NOT in + // between. + SDValue X; + APInt C1; + if (N1C && sd_match(N0, m_OneUse(m_Not( + m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) { + APInt C2 = N1C->getAPIntValue(); + zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */); + APInt Sum = C1 + C2; + unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1); + SDValue NewShift = DAG.getNode( + ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL)); + return DAG.getNOT(DL, NewShift, VT); + } + // fold (sra (shl X, m), (sub result_size, n)) // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for // result_size - n != m. @@ -11744,12 +11774,12 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y) if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) && - sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y))))) + sd_match(N0, m_Srl(m_BitReverse(m_Value(X)), m_Value(Y)))) return DAG.getNode(ISD::SHL, DL, VT, X, Y); // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y) if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) && - sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y))))) + sd_match(N0, m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))) return DAG.getNode(ISD::SRL, DL, VT, X, Y); return SDValue(); @@ -12999,20 +13029,35 @@ SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) { return SDValue(); } -// partial_reduce_*mla(acc, mul(ext(a), ext(b)), splat(1)) +// partial_reduce_*mla(acc, mul(*ext(a), *ext(b)), splat(1)) // -> partial_reduce_*mla(acc, a, b) // -// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1)) -// -> partial_reduce_*mla(acc, x, C) +// partial_reduce_*mla(acc, mul(*ext(x), splat(C)), splat(1)) +// -> partial_reduce_*mla(acc, x, splat(C)) +// +// partial_reduce_*mla(acc, sel(p, mul(*ext(a), *ext(b)), splat(0)), splat(1)) +// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), b) +// +// partial_reduce_*mla(acc, sel(p, mul(*ext(a), splat(C)), splat(0)), splat(1)) +// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), splat(C)) SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) { SDLoc DL(N); auto *Context = DAG.getContext(); SDValue Acc = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue Op2 = N->getOperand(2); - unsigned Opc = Op1->getOpcode(); - if (Opc != ISD::MUL && Opc != ISD::SHL) + + // Handle predication by moving the SELECT into the operand of the MUL. + SDValue Pred; + if (Opc == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) || + isZeroOrZeroSplatFP(Op1->getOperand(2)))) { + Pred = Op1->getOperand(0); + Op1 = Op1->getOperand(1); + Opc = Op1->getOpcode(); + } + + if (Opc != ISD::MUL && Opc != ISD::FMUL && Opc != ISD::SHL) return SDValue(); SDValue LHS = Op1->getOperand(0); @@ -13031,20 +13076,37 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) { Opc = ISD::MUL; } - APInt C; - if (Opc != ISD::MUL || !ISD::isConstantSplatVector(Op2.getNode(), C) || - !C.isOne()) + if (!(Opc == ISD::MUL && llvm::isOneOrOneSplat(Op2)) && + !(Opc == ISD::FMUL && llvm::isOneOrOneSplatFP(Op2))) return SDValue(); + auto IsIntOrFPExtOpcode = [](unsigned int Opcode) { + return (ISD::isExtOpcode(Opcode) || Opcode == ISD::FP_EXTEND); + }; + unsigned LHSOpcode = LHS->getOpcode(); - if (!ISD::isExtOpcode(LHSOpcode)) + if (!IsIntOrFPExtOpcode(LHSOpcode)) return SDValue(); SDValue LHSExtOp = LHS->getOperand(0); EVT LHSExtOpVT = LHSExtOp.getValueType(); + // When Pred is non-zero, set Op = select(Pred, Op, splat(0)) and freeze + // OtherOp to keep the same semantics when moving the selects into the MUL + // operands. + auto ApplyPredicate = [&](SDValue &Op, SDValue &OtherOp) { + if (Pred) { + EVT OpVT = Op.getValueType(); + SDValue Zero = OpVT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, OpVT) + : DAG.getConstant(0, DL, OpVT); + Op = DAG.getSelect(DL, OpVT, Pred, Op, Zero); + OtherOp = DAG.getFreeze(OtherOp); + } + }; + // partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1)) // -> partial_reduce_*mla(acc, x, C) + APInt C; if (ISD::isConstantSplatVector(RHS.getNode(), C)) { // TODO: Make use of partial_reduce_sumla here APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits()); @@ -13064,12 +13126,13 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) { TLI.getTypeToTransformTo(*Context, LHSExtOpVT))) return SDValue(); - return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp, - DAG.getConstant(CTrunc, DL, LHSExtOpVT)); + SDValue C = DAG.getConstant(CTrunc, DL, LHSExtOpVT); + ApplyPredicate(C, LHSExtOp); + return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp, C); } unsigned RHSOpcode = RHS->getOpcode(); - if (!ISD::isExtOpcode(RHSOpcode)) + if (!IsIntOrFPExtOpcode(RHSOpcode)) return SDValue(); SDValue RHSExtOp = RHS->getOperand(0); @@ -13086,6 +13149,8 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) { else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) { NewOpc = ISD::PARTIAL_REDUCE_SUMLA; std::swap(LHSExtOp, RHSExtOp); + } else if (LHSOpcode == ISD::FP_EXTEND && RHSOpcode == ISD::FP_EXTEND) { + NewOpc = ISD::PARTIAL_REDUCE_FMLA; } else return SDValue(); // For a 2-stage extend the signedness of both of the extends must match @@ -13104,39 +13169,50 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) { TLI.getTypeToTransformTo(*Context, LHSExtOpVT))) return SDValue(); + ApplyPredicate(RHSExtOp, LHSExtOp); return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp); } -// partial.reduce.umla(acc, zext(op), splat(1)) -// -> partial.reduce.umla(acc, op, splat(trunc(1))) -// partial.reduce.smla(acc, sext(op), splat(1)) -// -> partial.reduce.smla(acc, op, splat(trunc(1))) +// partial.reduce.*mla(acc, *ext(op), splat(1)) +// -> partial.reduce.*mla(acc, op, splat(trunc(1))) // partial.reduce.sumla(acc, sext(op), splat(1)) // -> partial.reduce.smla(acc, op, splat(trunc(1))) +// +// partial.reduce.*mla(acc, sel(p, *ext(op), splat(0)), splat(1)) +// -> partial.reduce.*mla(acc, sel(p, op, splat(0)), splat(trunc(1))) SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) { SDLoc DL(N); SDValue Acc = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDValue Op2 = N->getOperand(2); - APInt ConstantOne; - if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) || - !ConstantOne.isOne()) + if (!llvm::isOneOrOneSplat(Op2) && !llvm::isOneOrOneSplatFP(Op2)) return SDValue(); + SDValue Pred; unsigned Op1Opcode = Op1.getOpcode(); - if (!ISD::isExtOpcode(Op1Opcode)) + if (Op1Opcode == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) || + isZeroOrZeroSplatFP(Op1->getOperand(2)))) { + Pred = Op1->getOperand(0); + Op1 = Op1->getOperand(1); + Op1Opcode = Op1->getOpcode(); + } + + if (!ISD::isExtOpcode(Op1Opcode) && Op1Opcode != ISD::FP_EXTEND) return SDValue(); - bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND; + bool Op1IsSigned = + Op1Opcode == ISD::SIGN_EXTEND || Op1Opcode == ISD::FP_EXTEND; bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA; EVT AccElemVT = Acc.getValueType().getVectorElementType(); if (Op1IsSigned != NodeIsSigned && Op1.getValueType().getVectorElementType() != AccElemVT) return SDValue(); - unsigned NewOpcode = - Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA; + unsigned NewOpcode = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA + ? ISD::PARTIAL_REDUCE_FMLA + : Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA + : ISD::PARTIAL_REDUCE_UMLA; SDValue UnextOp1 = Op1.getOperand(0); EVT UnextOp1VT = UnextOp1.getValueType(); @@ -13146,8 +13222,18 @@ SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) { TLI.getTypeToTransformTo(*Context, UnextOp1VT))) return SDValue(); + SDValue Constant = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA + ? DAG.getConstantFP(1, DL, UnextOp1VT) + : DAG.getConstant(1, DL, UnextOp1VT); + + if (Pred) { + SDValue Zero = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA + ? DAG.getConstantFP(0, DL, UnextOp1VT) + : DAG.getConstant(0, DL, UnextOp1VT); + Constant = DAG.getSelect(DL, UnextOp1VT, Pred, Constant, Zero); + } return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1, - DAG.getConstant(1, DL, UnextOp1VT)); + Constant); } SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) { @@ -16734,38 +16820,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } // fold (conv (load x)) -> (load (conv*)x) + // fold (conv (freeze (load x))) -> (freeze (load (conv*)x)) // If the resultant load doesn't need a higher alignment than the original! - if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - // Do not remove the cast if the types differ in endian layout. - TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == - TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && - // If the load is volatile, we only want to change the load type if the - // resulting load is legal. Otherwise we might increase the number of - // memory accesses. We don't care if the original type was legal or not - // as we assume software couldn't rely on the number of accesses of an - // illegal type. - ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) || - TLI.isOperationLegal(ISD::LOAD, VT))) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); + auto CastLoad = [this, &VT](SDValue N0, const SDLoc &DL) { + if (!ISD::isNormalLoad(N0.getNode()) || !N0.hasOneUse()) + return SDValue(); - if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG, - *LN0->getMemOperand())) { - // If the range metadata type does not match the new memory - // operation type, remove the range metadata. - if (const MDNode *MD = LN0->getRanges()) { - ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0)); - if (Lower->getBitWidth() != VT.getScalarSizeInBits() || - !VT.isInteger()) { - LN0->getMemOperand()->clearRanges(); - } + // Do not remove the cast if the types differ in endian layout. + if (TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) != + TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout())) + return SDValue(); + + // If the load is volatile, we only want to change the load type if the + // resulting load is legal. Otherwise we might increase the number of + // memory accesses. We don't care if the original type was legal or not + // as we assume software couldn't rely on the number of accesses of an + // illegal type. + auto *LN0 = cast<LoadSDNode>(N0); + if ((LegalOperations || !LN0->isSimple()) && + !TLI.isOperationLegal(ISD::LOAD, VT)) + return SDValue(); + + if (!TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG, + *LN0->getMemOperand())) + return SDValue(); + + // If the range metadata type does not match the new memory + // operation type, remove the range metadata. + if (const MDNode *MD = LN0->getRanges()) { + ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0)); + if (Lower->getBitWidth() != VT.getScalarSizeInBits() || !VT.isInteger()) { + LN0->getMemOperand()->clearRanges(); } - SDValue Load = - DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getMemOperand()); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); - return Load; } - } + SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); + return Load; + }; + + if (SDValue NewLd = CastLoad(N0, SDLoc(N))) + return NewLd; + + if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse()) + if (SDValue NewLd = CastLoad(N0.getOperand(0), SDLoc(N))) + return DAG.getFreeze(NewLd); if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) return V; @@ -17821,7 +17920,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) - if (N1C->isNegative() || Flags.hasNoSignedZeros()) + if (N1C->isNegative() || Flags.hasNoSignedZeros() || + DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -18033,7 +18133,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) { + if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() || + DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) { return N0; } } @@ -18046,7 +18147,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub -0.0, N1) -> -N1 if (N0CFP && N0CFP->isZero()) { - if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) { + if (N0CFP->isNegative() || Flags.hasNoSignedZeros() || + DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) { // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are // flushed to zero, unless all users treat denorms as zero (DAZ). // FIXME: This transform will change the sign of a NaN and the behavior @@ -18625,11 +18727,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (Flags.hasAllowReciprocal()) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. + bool N1AllowReciprocal = N1->getFlags().hasAllowReciprocal(); if (N1.getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0))) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); } else if (N1.getOpcode() == ISD::FP_EXTEND && - N1.getOperand(0).getOpcode() == ISD::FSQRT) { + N1.getOperand(0).getOpcode() == ISD::FSQRT && + N1AllowReciprocal) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); @@ -18694,7 +18798,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // Fold X/Sqrt(X) -> Sqrt(X) - if (Flags.hasNoSignedZeros() && Flags.hasAllowReassociation()) + if ((Flags.hasNoSignedZeros() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) && + Flags.hasAllowReassociation()) if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0)) return N1; @@ -18745,8 +18850,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::FDIV, VT) && TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) && DAG.isKnownToBeAPowerOfTwoFP(N1)) { - bool NeedsCopySign = - !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0); + bool NeedsCopySign = !Flags.hasNoSignedZeros() && + !DAG.canIgnoreSignBitOfZero(SDValue(N, 0)) && + !DAG.cannotBeOrderedNegativeFP(N0); SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1); SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div); SDValue MLA; @@ -18831,6 +18937,26 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + if (VT != N1.getValueType()) + return SDValue(); + + // If this is equivalent to a disjoint or, replace it with one. This can + // happen if the sign operand is a sign mask (i.e., x << sign_bit_position). + if (DAG.SignBitIsZeroFP(N0) && + DAG.computeKnownBits(N1).Zero.isMaxSignedValue()) { + // TODO: Just directly match the shift pattern. computeKnownBits is heavy + // for a such a narrowly targeted case. + EVT IntVT = VT.changeTypeToInteger(); + // TODO: It appears to be profitable in some situations to unconditionally + // emit a fabs(n0) to perform this combine. + SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0); + SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1); + + SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1, + SDNodeFlags::Disjoint); + return DAG.getNode(ISD::BITCAST, DL, VT, SignOr); + } + return SDValue(); } @@ -19395,7 +19521,8 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) { const SDNodeFlags Flags = N->getFlags(); unsigned Opc = N->getOpcode(); bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM; - bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM; + bool ReturnsOtherForAllNaNs = + Opc == ISD::FMINIMUMNUM || Opc == ISD::FMAXIMUMNUM; bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM || Opc == ISD::FMINIMUMNUM; SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -19414,32 +19541,30 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) { // minnum(X, qnan) -> X // maxnum(X, qnan) -> X - // minnum(X, snan) -> qnan - // maxnum(X, snan) -> qnan // minimum(X, nan) -> qnan // maximum(X, nan) -> qnan // minimumnum(X, nan) -> X // maximumnum(X, nan) -> X if (AF.isNaN()) { - if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) { + if (PropAllNaNsToQNaNs) { if (AF.isSignaling()) return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT); return N->getOperand(1); + } else if (ReturnsOtherForAllNaNs || !AF.isSignaling()) { + return N->getOperand(0); } - return N->getOperand(0); + return SDValue(); } // In the following folds, inf can be replaced with the largest finite // float, if the ninf flag is set. if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) { - // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation) - // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation) // minimum(X, -inf) -> -inf if nnan // maximum(X, +inf) -> +inf if nnan // minimumnum(X, -inf) -> -inf // maximumnum(X, +inf) -> +inf if (IsMin == AF.isNegative() && - (!PropAllNaNsToQNaNs || Flags.hasNoNaNs())) + (ReturnsOtherForAllNaNs || Flags.hasNoNaNs())) return N->getOperand(1); // minnum(X, +inf) -> X if nnan @@ -23359,6 +23484,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { EVT SubVecVT = SubVec.getValueType(); EVT VT = DestVec.getValueType(); unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + // Bail out if the inserted value is larger than the vector element, as + // insert_vector_elt performs an implicit truncation in this case. + if (InsertVal.getValueType() != VT.getVectorElementType()) + return SDValue(); // If the source only has a single vector element, the cost of creating adding // it to a vector is likely to exceed the cost of a insert_vector_elt. if (NumSrcElts == 1) @@ -25395,7 +25524,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { !Op.getOperand(0).getValueType().isVector()) Ops.push_back(Op.getOperand(0)); else if (Op.isUndef()) - Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT)); + Ops.push_back(DAG.getNode(Op.getOpcode(), DL, SVT)); else return SDValue(); @@ -25417,7 +25546,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { if (Op.getValueType() == SVT) continue; if (Op.isUndef()) - Op = DAG.getNode(ISD::UNDEF, DL, SVT); + Op = DAG.getNode(Op.getOpcode(), DL, SVT); else Op = DAG.getBitcast(SVT, Op); } @@ -28963,9 +29092,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // over-conservative. It would be beneficial to be able to remember // both potential memory locations. Since we are discarding // src value info, don't do the transformation if the memory - // locations are not in the default address space. - LLD->getPointerInfo().getAddrSpace() != 0 || - RLD->getPointerInfo().getAddrSpace() != 0 || + // locations are not in the same address space. + LLD->getPointerInfo().getAddrSpace() != + RLD->getPointerInfo().getAddrSpace() || // We can't produce a CMOV of a TargetFrameIndex since we won't // generate the address generation required. LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex || @@ -29047,6 +29176,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // but the new load must be the minimum (most restrictive) alignment of the // inputs. Align Alignment = std::min(LLD->getAlign(), RLD->getAlign()); + unsigned AddrSpace = LLD->getAddressSpace(); + assert(AddrSpace == RLD->getAddressSpace()); + MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); if (!RLD->isInvariant()) MMOFlags &= ~MachineMemOperand::MOInvariant; @@ -29055,15 +29187,16 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { // FIXME: Discards pointer and AA info. Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), - LLD->getChain(), Addr, MachinePointerInfo(), Alignment, - MMOFlags); + LLD->getChain(), Addr, MachinePointerInfo(AddrSpace), + Alignment, MMOFlags); } else { // FIXME: Discards pointer and AA info. Load = DAG.getExtLoad( LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, - MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); + MachinePointerInfo(AddrSpace), LLD->getMemoryVT(), Alignment, + MMOFlags); } // Users of the select now use the result of the load. @@ -29707,7 +29840,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, return false; }; - if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) { + if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo, /*AllowUndefs=*/false, + /*AllowTruncation=*/true)) { if (!VT.isVector()) return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT); // We need to create a build vector diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 507b2d6..5c84059 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1965,7 +1965,7 @@ Register FastISel::createResultReg(const TargetRegisterClass *RC) { Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, unsigned OpNum) { if (Op.isVirtual()) { - const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI); + const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum); if (!MRI.constrainRegClass(Op, RegClass)) { // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index bb10cf6..4ad721b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -15,10 +15,12 @@ #include "InstrEmitter.h" #include "SDNodeDbgValue.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -61,6 +63,8 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; + if (N && Node->getOperand(N - 1).getOpcode() == ISD::DEACTIVATION_SYMBOL) + --N; // Ignore deactivation symbol if it exists. if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) --N; // Ignore chain if it exists. @@ -125,7 +129,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg, const TargetRegisterClass *RC = nullptr; if (i + II.getNumDefs() < II.getNumOperands()) { RC = TRI->getAllocatableClass( - TII->getRegClass(II, i + II.getNumDefs(), TRI)); + TII->getRegClass(II, i + II.getNumDefs())); } if (!UseRC) UseRC = RC; @@ -197,7 +201,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // register instead of creating a new vreg. Register VRBase; const TargetRegisterClass *RC = - TRI->getAllocatableClass(TII->getRegClass(II, i, TRI)); + TRI->getAllocatableClass(TII->getRegClass(II, i)); // Always let the value type influence the used register class. The // constraints on the instruction may be too lax to represent the value // type correctly. For example, a 64-bit float (X86::FR64) can't live in @@ -330,7 +334,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, if (II) { const TargetRegisterClass *OpRC = nullptr; if (IIOpNum < II->getNumOperands()) - OpRC = TII->getRegClass(*II, IIOpNum, TRI); + OpRC = TII->getRegClass(*II, IIOpNum); if (OpRC) { unsigned MinNumRegs = MinRCSize; @@ -409,8 +413,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, Register VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); const TargetRegisterClass *IIRC = - II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI)) - : nullptr; + II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum)) : nullptr; const TargetRegisterClass *OpRC = TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT, @@ -733,6 +736,8 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) return MachineOperand::CreateCImm(CI); + if (CI->getBitWidth() == 1) + return MachineOperand::CreateImm(CI->getZExtValue()); return MachineOperand::CreateImm(CI->getSExtValue()); } if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) @@ -1221,15 +1226,23 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, } } - if (SDNode *GluedNode = Node->getGluedNode()) { - // FIXME: Possibly iterate over multiple glue nodes? - if (GluedNode->getOpcode() == - ~(unsigned)TargetOpcode::CONVERGENCECTRL_GLUE) { - Register VReg = getVR(GluedNode->getOperand(0), VRBaseMap); - MachineOperand MO = MachineOperand::CreateReg(VReg, /*isDef=*/false, - /*isImp=*/true); - MIB->addOperand(MO); - } + unsigned Op = Node->getNumOperands(); + if (Op != 0 && Node->getOperand(Op - 1)->getOpcode() == + ~(unsigned)TargetOpcode::CONVERGENCECTRL_GLUE) { + Register VReg = getVR(Node->getOperand(Op - 1)->getOperand(0), VRBaseMap); + MachineOperand MO = MachineOperand::CreateReg(VReg, /*isDef=*/false, + /*isImp=*/true); + MIB->addOperand(MO); + Op--; + } + + if (Op != 0 && + Node->getOperand(Op - 1)->getOpcode() == ISD::DEACTIVATION_SYMBOL) { + MI->setDeactivationSymbol( + *MF, const_cast<GlobalValue *>( + cast<DeactivationSymbolSDNode>(Node->getOperand(Op - 1)) + ->getGlobal())); + Op--; } // Run post-isel target hook to adjust this instruction if needed. @@ -1250,7 +1263,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, llvm_unreachable("This target-independent node should have been selected!"); case ISD::EntryToken: case ISD::MERGE_VALUES: - case ISD::TokenFactor: // fall thru + case ISD::TokenFactor: + case ISD::DEACTIVATION_SYMBOL: break; case ISD::CopyToReg: { Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); @@ -1415,13 +1429,6 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } } - // Add rounding control registers as implicit def for inline asm. - if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) { - ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters(); - for (MCPhysReg Reg : RCRegs) - MIB.addReg(Reg, RegState::ImplicitDefine); - } - // GCC inline assembly allows input operands to also be early-clobber // output operands (so long as the operand is written only after it's // used), but this does not match the semantics of our early-clobber flag. @@ -1442,6 +1449,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, if (MD) MIB.addMetadata(MD); + // Add rounding control registers as implicit def for inline asm. + if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) { + ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters(); + for (MCPhysReg Reg : RCRegs) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + MBB->insert(InsertPos, MIB); break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 431a810..7606bc8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -163,6 +163,8 @@ private: RTLIB::Libcall CallI128); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); + SDValue ExpandSincosStretLibCall(SDNode *Node) const; + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, @@ -2125,10 +2127,11 @@ SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool IsSigned, EVT RetVT) { EVT CodePtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Callee; - if (const char *LibcallName = TLI.getLibcallName(LC)) - Callee = DAG.getExternalSymbol(LibcallName, CodePtrTy); + RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC); + if (LCImpl != RTLIB::Unsupported) + Callee = DAG.getExternalSymbol(LCImpl, CodePtrTy); else { - Callee = DAG.getUNDEF(CodePtrTy); + Callee = DAG.getPOISON(CodePtrTy); DAG.getContext()->emitError(Twine("no libcall available for ") + Node->getOperationName(&DAG)); } @@ -2155,7 +2158,7 @@ SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, IsSigned); CLI.setDebugLoc(SDLoc(Node)) .setChain(InChain) - .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, + .setLibCallee(TLI.getLibcallImplCallingConv(LCImpl), RetTy, Callee, std::move(Args)) .setTailCall(isTailCall) .setSExtResult(signExtend) @@ -2379,8 +2382,18 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Entry.IsZExt = !isSigned; Args.push_back(Entry); - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); + RTLIB::LibcallImpl LibcallImpl = TLI.getLibcallImpl(LC); + if (LibcallImpl == RTLIB::Unsupported) { + DAG.getContext()->emitError(Twine("no libcall available for ") + + Node->getOperationName(&DAG)); + SDValue Poison = DAG.getPOISON(RetVT); + Results.push_back(Poison); + Results.push_back(Poison); + return; + } + + SDValue Callee = + DAG.getExternalSymbol(LibcallImpl, TLI.getPointerTy(DAG.getDataLayout())); SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); @@ -2394,8 +2407,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. - SDValue Rem = - DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo()); + int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); + + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, PtrInfo); Results.push_back(CallInfo.first); Results.push_back(Rem); } @@ -2423,6 +2439,100 @@ static bool useSinCos(SDNode *Node) { return false; } +SDValue SelectionDAGLegalize::ExpandSincosStretLibCall(SDNode *Node) const { + // For iOS, we want to call an alternative entry point: __sincos_stret, + // which returns the values in two S / D registers. + SDLoc dl(Node); + SDValue Arg = Node->getOperand(0); + EVT ArgVT = Arg.getValueType(); + RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT); + RTLIB::LibcallImpl SincosStret = TLI.getLibcallImpl(LC); + if (SincosStret == RTLIB::Unsupported) + return SDValue(); + + /// There are 3 different ABI cases to handle: + /// - Direct return of separate fields in registers + /// - Single return as vector elements + /// - sret struct + + const RTLIB::RuntimeLibcallsInfo &CallsInfo = TLI.getRuntimeLibcallsInfo(); + + const DataLayout &DL = DAG.getDataLayout(); + + auto [FuncTy, FuncAttrs] = CallsInfo.getFunctionTy( + *DAG.getContext(), TM.getTargetTriple(), DL, SincosStret); + + Type *SincosStretRetTy = FuncTy->getReturnType(); + CallingConv::ID CallConv = CallsInfo.getLibcallImplCallingConv(SincosStret); + + SDValue Callee = + DAG.getExternalSymbol(SincosStret, TLI.getProgramPointerTy(DL)); + + TargetLowering::ArgListTy Args; + SDValue SRet; + + int FrameIdx; + if (FuncTy->getParamType(0)->isPointerTy()) { + // Uses sret + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + + AttributeSet PtrAttrs = FuncAttrs.getParamAttrs(0); + Type *StructTy = PtrAttrs.getStructRetType(); + const uint64_t ByteSize = DL.getTypeAllocSize(StructTy); + const Align StackAlign = DL.getPrefTypeAlign(StructTy); + + FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); + SRet = DAG.getFrameIndex(FrameIdx, TLI.getFrameIndexTy(DL)); + + TargetLowering::ArgListEntry Entry(SRet, FuncTy->getParamType(0)); + Entry.IsSRet = true; + Entry.IndirectType = StructTy; + Entry.Alignment = StackAlign; + + Args.push_back(Entry); + Args.emplace_back(Arg, FuncTy->getParamType(1)); + } else { + Args.emplace_back(Arg, FuncTy->getParamType(0)); + } + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallConv, SincosStretRetTy, Callee, std::move(Args)) + .setIsPostTypeLegalization(); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + if (SRet) { + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, PtrInfo); + + TypeSize StoreSize = ArgVT.getStoreSize(); + + // Address of cos field. + SDValue Add = DAG.getObjectPtrOffset(dl, SRet, StoreSize); + SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, + PtrInfo.getWithOffset(StoreSize)); + + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, LoadSin.getValue(0), + LoadCos.getValue(0)); + } + + if (!CallResult.first.getValueType().isVector()) + return CallResult.first; + + SDValue SinVal = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, + DAG.getVectorIdxConstant(0, dl)); + SDValue CosVal = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first, + DAG.getVectorIdxConstant(1, dl)); + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); +} + SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); @@ -3770,7 +3880,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::Libcall LC = RTLIB::getLDEXP(VT); // Use the LibCall instead, it is very likely faster // FIXME: Use separate LibCall action. - if (TLI.getLibcallName(LC)) + if (TLI.getLibcallImpl(LC) != RTLIB::Unsupported) break; if (SDValue Expanded = expandLdexp(Node)) { @@ -3785,7 +3895,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::Libcall LC = RTLIB::getFREXP(Node->getValueType(0)); // Use the LibCall instead, it is very likely faster // FIXME: Use separate LibCall action. - if (TLI.getLibcallName(LC)) + if (TLI.getLibcallImpl(LC) != RTLIB::Unsupported) break; if (SDValue Expanded = expandFrexp(Node)) { @@ -4587,7 +4697,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT); EVT RetVT = Node->getValueType(0); SmallVector<SDValue, 4> Ops; - if (TLI.getLibcallName(LC)) { + if (TLI.getLibcallImpl(LC) != RTLIB::Unsupported) { // If outline atomic available, prepare its arguments and expand. Ops.append(Node->op_begin() + 2, Node->op_end()); Ops.push_back(Node->getOperand(1)); @@ -4730,12 +4840,30 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::FSINCOS: case ISD::FSINCOSPI: { EVT VT = Node->getValueType(0); + + if (Node->getOpcode() == ISD::FSINCOS) { + RTLIB::Libcall SincosStret = RTLIB::getSINCOS_STRET(VT); + if (SincosStret != RTLIB::UNKNOWN_LIBCALL) { + if (SDValue Expanded = ExpandSincosStretLibCall(Node)) { + Results.push_back(Expanded); + Results.push_back(Expanded.getValue(1)); + break; + } + } + } + RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results); - if (!Expanded) - llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!"); + bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results); + if (!Expanded) { + DAG.getContext()->emitError(Twine("no libcall available for ") + + Node->getOperationName(&DAG)); + SDValue Poison = DAG.getPOISON(VT); + Results.push_back(Poison); + Results.push_back(Poison); + } + break; } case ISD::FLOG: @@ -4825,7 +4953,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { EVT VT = Node->getValueType(0); RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT) : RTLIB::getFREXP(VT); - bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results, + bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results, /*CallRetResNo=*/0); if (!Expanded) llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!"); @@ -4835,7 +4963,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::STRICT_FPOWI: { RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); - if (!TLI.getLibcallName(LC)) { + if (TLI.getLibcallImpl(LC) == RTLIB::Unsupported) { // Some targets don't have a powi libcall; use pow instead. if (Node->isStrictFPOpcode()) { SDValue Exponent = @@ -4866,7 +4994,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { // If the exponent does not match with sizeof(int) a libcall to // RTLIB::POWI would use the wrong type for the argument. DAG.getContext()->emitError("POWI exponent does not match sizeof(int)"); - Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(DAG.getPOISON(Node->getValueType(0))); break; } ExpandFPLibCall(Node, LC, Results); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 58983cb..545b7f5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -717,7 +717,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) { RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0)) : RTLIB::getLDEXP(N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); - if (!TLI.getLibcallName(LC)) { + if (TLI.getLibcallImpl(LC) == RTLIB::Unsupported) { // Some targets don't have a powi libcall; use pow instead. // FIXME: Implement this if some target needs it. DAG.getContext()->emitError("do not know how to soften fpowi to fpow"); @@ -802,7 +802,8 @@ bool DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults( assert(VT == N->getValueType(1) && "expected both return values to have the same type"); - if (!TLI.getLibcallName(LC)) + RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC); + if (LCImpl == RTLIB::Unsupported) return false; EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -831,8 +832,9 @@ bool DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults( CallOptions.setTypeListBeforeSoften({OpsVT}, VT) .setOpsTypeOverrides(CallOpsTypeOverrides); - auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL, - /*Chain=*/SDValue()); + auto [ReturnVal, Chain] = + TLI.makeLibCall(DAG, LCImpl, NVT, Ops, CallOptions, DL, + /*Chain=*/SDValue()); auto CreateStackLoad = [&, Chain = Chain](SDValue StackSlot) { int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex(); @@ -862,7 +864,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) { RTLIB::Libcall CosLC = RTLIB::getCOS(VT); SDValue SoftSin, SoftCos; - if (!TLI.getLibcallName(SinLC) || !TLI.getLibcallName(CosLC)) { + if (TLI.getLibcallImpl(SinLC) == RTLIB::Unsupported || + TLI.getLibcallImpl(CosLC) == RTLIB::Unsupported) { DAG.getContext()->emitError("do not know how to soften fsincos"); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -1726,7 +1729,7 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults( SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) { assert(!N->isStrictFPOpcode() && "strictfp not implemented"); SmallVector<SDValue> Results; - DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo); + TLI.expandMultipleResultFPLibCall(DAG, LC, N, Results, CallRetResNo); for (auto [ResNo, Res] : enumerate(Results)) { SDValue Lo, Hi; GetPairElements(Res, Lo, Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 44e5a18..b9377fa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -145,7 +145,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: - case ISD::EXPERIMENTAL_VP_SPLAT: Res = PromoteIntRes_ScalarOp(N); break; case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break; @@ -2008,7 +2007,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: - case ISD::EXPERIMENTAL_VP_SPLAT: Res = PromoteIntOp_ScalarOp(N); break; case ISD::VSELECT: @@ -2363,9 +2361,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - if (N->getOpcode() == ISD::EXPERIMENTAL_VP_SPLAT) - return SDValue( - DAG.UpdateNodeOperands(N, Op, N->getOperand(1), N->getOperand(2)), 0); // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated, // so just promote the operand in place. @@ -2692,7 +2687,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0)) : RTLIB::getLDEXP(N->getValueType(0)); - if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC); + if (LCImpl == RTLIB::Unsupported) { // Scalarize vector FPOWI instead of promoting the type. This allows the // scalar FPOWIs to be visited and converted to libcalls before promoting // the type. @@ -2719,7 +2715,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { CallOptions.setIsSigned(true); SDValue Ops[2] = {N->getOperand(0 + OpOffset), N->getOperand(1 + OpOffset)}; std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall( - DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); + DAG, LCImpl, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); ReplaceValueWith(SDValue(N, 0), Tmp.first); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); @@ -3128,7 +3124,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break; case ISD::AVGCEILS: - case ISD::AVGCEILU: + case ISD::AVGCEILU: case ISD::AVGFLOORS: case ISD::AVGFLOORU: ExpandIntRes_AVG(N, Lo, Hi); break; @@ -3187,7 +3183,9 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { EVT RetVT = Node->getValueType(0); TargetLowering::MakeLibCallOptions CallOptions; SmallVector<SDValue, 4> Ops; - if (TLI.getLibcallName(LC)) { + + RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC); + if (LCImpl != RTLIB::Unsupported) { Ops.append(Node->op_begin() + 2, Node->op_end()); Ops.push_back(Node->getOperand(1)); } else { @@ -3195,8 +3193,9 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); Ops.append(Node->op_begin() + 1, Node->op_end()); + LCImpl = TLI.getLibcallImpl(LC); } - return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node), + return TLI.makeLibCall(DAG, LCImpl, RetVT, Ops, CallOptions, SDLoc(Node), Node->getOperand(0)); } @@ -4097,21 +4096,21 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc DL(N); if (TLI.getOperationAction(ISD::CTPOP, VT) == TargetLoweringBase::LibCall) { - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) - LC = RTLIB::CTPOP_I32; - else if (VT == MVT::i64) - LC = RTLIB::CTPOP_I64; - else if (VT == MVT::i128) - LC = RTLIB::CTPOP_I128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC) && + RTLIB::Libcall LC = RTLIB::getCTPOP(VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "LibCall explicitly requested, but not available"); - TargetLowering::MakeLibCallOptions CallOptions; - EVT IntVT = - EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize()); - SDValue Res = TLI.makeLibCall(DAG, LC, IntVT, Op, CallOptions, DL).first; - SplitInteger(DAG.getSExtOrTrunc(Res, DL, VT), Lo, Hi); - return; + + if (RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC)) { + TargetLowering::MakeLibCallOptions CallOptions; + EVT IntVT = + EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize()); + SDValue Res = + TLI.makeLibCall(DAG, LCImpl, IntVT, Op, CallOptions, DL).first; + SplitInteger(DAG.getSExtOrTrunc(Res, DL, VT), Lo, Hi); + return; + } + + // If the function is not available, fall back on the expansion. } // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) @@ -4236,55 +4235,19 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (N->getOpcode() == ISD::LROUND || N->getOpcode() == ISD::STRICT_LROUND) { - if (VT == MVT::f32) - LC = RTLIB::LROUND_F32; - else if (VT == MVT::f64) - LC = RTLIB::LROUND_F64; - else if (VT == MVT::f80) - LC = RTLIB::LROUND_F80; - else if (VT == MVT::f128) - LC = RTLIB::LROUND_F128; - else if (VT == MVT::ppcf128) - LC = RTLIB::LROUND_PPCF128; + LC = RTLIB::getLROUND(VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lround input type!"); } else if (N->getOpcode() == ISD::LRINT || N->getOpcode() == ISD::STRICT_LRINT) { - if (VT == MVT::f32) - LC = RTLIB::LRINT_F32; - else if (VT == MVT::f64) - LC = RTLIB::LRINT_F64; - else if (VT == MVT::f80) - LC = RTLIB::LRINT_F80; - else if (VT == MVT::f128) - LC = RTLIB::LRINT_F128; - else if (VT == MVT::ppcf128) - LC = RTLIB::LRINT_PPCF128; + LC = RTLIB::getLRINT(VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lrint input type!"); } else if (N->getOpcode() == ISD::LLROUND || N->getOpcode() == ISD::STRICT_LLROUND) { - if (VT == MVT::f32) - LC = RTLIB::LLROUND_F32; - else if (VT == MVT::f64) - LC = RTLIB::LLROUND_F64; - else if (VT == MVT::f80) - LC = RTLIB::LLROUND_F80; - else if (VT == MVT::f128) - LC = RTLIB::LLROUND_F128; - else if (VT == MVT::ppcf128) - LC = RTLIB::LLROUND_PPCF128; + LC = RTLIB::getLLROUND(VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!"); } else if (N->getOpcode() == ISD::LLRINT || N->getOpcode() == ISD::STRICT_LLRINT) { - if (VT == MVT::f32) - LC = RTLIB::LLRINT_F32; - else if (VT == MVT::f64) - LC = RTLIB::LLRINT_F64; - else if (VT == MVT::f80) - LC = RTLIB::LLRINT_F80; - else if (VT == MVT::f128) - LC = RTLIB::LLRINT_F128; - else if (VT == MVT::ppcf128) - LC = RTLIB::LLRINT_PPCF128; + LC = RTLIB::getLLRINT(VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!"); } else llvm_unreachable("Unexpected opcode!"); @@ -4444,17 +4407,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, return; // If nothing else, we can make a libcall. - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i16) - LC = RTLIB::MUL_I16; - else if (VT == MVT::i32) - LC = RTLIB::MUL_I32; - else if (VT == MVT::i64) - LC = RTLIB::MUL_I64; - else if (VT == MVT::i128) - LC = RTLIB::MUL_I128; - - if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + RTLIB::Libcall LC = RTLIB::getMUL(VT); + RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC); + if (LCImpl == RTLIB::Unsupported) { // Perform a wide multiplication where the wide type is the original VT and // the 4 parts are the split arguments. TLI.forceExpandMultiply(DAG, dl, /*Signed=*/false, Lo, Hi, LL, RL, LH, RH); @@ -4466,8 +4421,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setIsSigned(true); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, - Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LCImpl, VT, Ops, CallOptions, dl).first, Lo, + Hi); } void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo, @@ -4824,15 +4779,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, return; } - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i16) - LC = RTLIB::SDIV_I16; - else if (VT == MVT::i32) - LC = RTLIB::SDIV_I32; - else if (VT == MVT::i64) - LC = RTLIB::SDIV_I64; - else if (VT == MVT::i128) - LC = RTLIB::SDIV_I128; + RTLIB::Libcall LC = RTLIB::getSDIV(VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -5039,45 +4986,26 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, bool isSigned; if (Opc == ISD::SHL) { isSigned = false; /*sign irrelevant*/ - if (VT == MVT::i16) - LC = RTLIB::SHL_I16; - else if (VT == MVT::i32) - LC = RTLIB::SHL_I32; - else if (VT == MVT::i64) - LC = RTLIB::SHL_I64; - else if (VT == MVT::i128) - LC = RTLIB::SHL_I128; + LC = RTLIB::getSHL(VT); } else if (Opc == ISD::SRL) { isSigned = false; - if (VT == MVT::i16) - LC = RTLIB::SRL_I16; - else if (VT == MVT::i32) - LC = RTLIB::SRL_I32; - else if (VT == MVT::i64) - LC = RTLIB::SRL_I64; - else if (VT == MVT::i128) - LC = RTLIB::SRL_I128; + LC = RTLIB::getSRL(VT); } else { assert(Opc == ISD::SRA && "Unknown shift!"); isSigned = true; - if (VT == MVT::i16) - LC = RTLIB::SRA_I16; - else if (VT == MVT::i32) - LC = RTLIB::SRA_I32; - else if (VT == MVT::i64) - LC = RTLIB::SRA_I64; - else if (VT == MVT::i128) - LC = RTLIB::SRA_I128; + LC = RTLIB::getSRA(VT); } - if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { + if (RTLIB::LibcallImpl LibcallImpl = TLI.getLibcallImpl(LC)) { EVT ShAmtTy = EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize()); SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy); SDValue Ops[2] = {N->getOperand(0), ShAmt}; TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setIsSigned(isSigned); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); + SplitInteger( + TLI.makeLibCall(DAG, LibcallImpl, VT, Ops, CallOptions, dl).first, Lo, + Hi); return; } @@ -5153,15 +5081,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, return; } - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i16) - LC = RTLIB::SREM_I16; - else if (VT == MVT::i32) - LC = RTLIB::SREM_I32; - else if (VT == MVT::i64) - LC = RTLIB::SREM_I64; - else if (VT == MVT::i128) - LC = RTLIB::SREM_I128; + RTLIB::Libcall LC = RTLIB::getSREM(VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -5244,18 +5164,13 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); // Replace this with a libcall that will check overflow. - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) - LC = RTLIB::MULO_I32; - else if (VT == MVT::i64) - LC = RTLIB::MULO_I64; - else if (VT == MVT::i128) - LC = RTLIB::MULO_I128; + RTLIB::Libcall LC = RTLIB::getMULO(VT); + RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC); // If we don't have the libcall or if the function we are compiling is the // implementation of the expected libcall (avoid inf-loop), expand inline. - if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) || - TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) { + if (LCImpl == RTLIB::Unsupported || + TLI.getLibcallImplName(LCImpl) == DAG.getMachineFunction().getName()) { // FIXME: This is not an optimal expansion, but better than crashing. SDValue MulLo, MulHi; TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, N->getOperand(0), @@ -5293,12 +5208,13 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Entry.IsZExt = false; Args.push_back(Entry); - SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); + SDValue Func = DAG.getExternalSymbol(LCImpl, PtrVT); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args)) + .setLibCallee(TLI.getLibcallImplCallingConv(LCImpl), RetTy, Func, + std::move(Args)) .setSExtResult(); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -5341,15 +5257,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, } } - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i16) - LC = RTLIB::UDIV_I16; - else if (VT == MVT::i32) - LC = RTLIB::UDIV_I32; - else if (VT == MVT::i64) - LC = RTLIB::UDIV_I64; - else if (VT == MVT::i128) - LC = RTLIB::UDIV_I128; + RTLIB::Libcall LC = RTLIB::getUDIV(VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -5384,15 +5292,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, } } - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i16) - LC = RTLIB::UREM_I16; - else if (VT == MVT::i32) - LC = RTLIB::UREM_I32; - else if (VT == MVT::i64) - LC = RTLIB::UREM_I64; - else if (VT == MVT::i128) - LC = RTLIB::UREM_I128; + RTLIB::Libcall LC = RTLIB::getUREM(VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -5551,7 +5451,6 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; - case ISD::EXPERIMENTAL_VP_SPLAT: case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; @@ -6195,10 +6094,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) { EVT NOutElemVT = NOutVT.getVectorElementType(); SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0)); - if (N->isVPOpcode()) - return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, N->getOperand(1), - N->getOperand(2)); - return DAG.getNode(N->getOpcode(), dl, NOutVT, Op); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index ede522e..79384de 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -990,7 +990,6 @@ private: bool SplitSETCC = false); void SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -1143,7 +1142,6 @@ private: SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo); - SDValue WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_STRICT_FSETCC(SDNode* N); SDValue WidenVecOp_VSELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 8e423c4..e8d9bce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -534,6 +534,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::PARTIAL_REDUCE_UMLA: case ISD::PARTIAL_REDUCE_SMLA: case ISD::PARTIAL_REDUCE_SUMLA: + case ISD::PARTIAL_REDUCE_FMLA: Action = TLI.getPartialReduceMLAAction(Op.getOpcode(), Node->getValueType(0), Node->getOperand(1).getValueType()); @@ -1243,6 +1244,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::PARTIAL_REDUCE_UMLA: case ISD::PARTIAL_REDUCE_SMLA: case ISD::PARTIAL_REDUCE_SUMLA: + case ISD::PARTIAL_REDUCE_FMLA: Results.push_back(TLI.expandPartialReduceMLA(Node, DAG)); return; case ISD::VECREDUCE_SEQ_FADD: @@ -1268,18 +1270,23 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { break; case ISD::FSINCOS: case ISD::FSINCOSPI: { - EVT VT = Node->getValueType(0).getVectorElementType(); + EVT VT = Node->getValueType(0); RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS ? RTLIB::getSINCOS(VT) : RTLIB::getSINCOSPI(VT); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results)) + if (LC != RTLIB::UNKNOWN_LIBCALL && + TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results)) return; + + // TODO: Try to see if there's a narrower call available to use before + // scalarizing. break; } case ISD::FMODF: { - RTLIB::Libcall LC = - RTLIB::getMODF(Node->getValueType(0).getVectorElementType()); - if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, + EVT VT = Node->getValueType(0); + RTLIB::Libcall LC = RTLIB::getMODF(VT); + if (LC != RTLIB::UNKNOWN_LIBCALL && + TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results, /*CallRetResNo=*/0)) return; break; @@ -1822,7 +1829,7 @@ SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) { // If the difference is positive then some elements may alias EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Diff.getValueType()); - SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT); + SDValue Zero = DAG.getConstant(0, DL, PtrVT); SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero, IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index bb4a8d9..da3102d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -413,7 +413,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) { SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue); EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), Diff.getValueType()); - SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT); + SDValue Zero = DAG.getConstant(0, DL, PtrVT); return DAG.getNode(ISD::OR, DL, CmpVT, DAG.getSetCC(DL, CmpVT, Diff, EltSize, ISD::SETGE), DAG.getSetCC(DL, CmpVT, Diff, Zero, ISD::SETEQ)); @@ -1091,14 +1091,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); } -SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, +SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Wrong operand for scalarization!"); SDValue Elt = GetScalarizedVector(N->getOperand(1)); - SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), - { N->getValueType(0).getVectorElementType(), - MVT::Other }, - { N->getOperand(0), Elt, N->getOperand(2) }); + SDValue Res = + DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + {N->getValueType(0).getVectorElementType(), MVT::Other}, + {N->getOperand(0), Elt, N->getOperand(2)}); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -1217,7 +1217,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break; case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; - case ISD::EXPERIMENTAL_VP_SPLAT: SplitVecRes_VP_SPLAT(N, Lo, Hi); break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: SplitVecRes_ScalarOp(N, Lo, Hi); @@ -1474,6 +1473,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::PARTIAL_REDUCE_UMLA: case ISD::PARTIAL_REDUCE_SMLA: case ISD::PARTIAL_REDUCE_SUMLA: + case ISD::PARTIAL_REDUCE_FMLA: SplitVecRes_PARTIAL_REDUCE_MLA(N, Lo, Hi); break; case ISD::GET_ACTIVE_LANE_MASK: @@ -1701,10 +1701,8 @@ void DAGTypeLegalizer::SplitVecRes_LOOP_DEPENDENCE_MASK(SDNode *N, SDValue &Lo, Lo = DAG.getNode(N->getOpcode(), DL, LoVT, PtrA, PtrB, N->getOperand(2)); unsigned EltSize = N->getConstantOperandVal(2); - unsigned Offset = EltSize * HiVT.getVectorMinNumElements(); - SDValue Addend = HiVT.isScalableVT() - ? DAG.getVScale(DL, MVT::i64, APInt(64, Offset)) - : DAG.getConstant(Offset, DL, MVT::i64); + ElementCount Offset = HiVT.getVectorElementCount() * EltSize; + SDValue Addend = DAG.getElementCount(DL, MVT::i64, Offset); PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, PtrA, PtrB, N->getOperand(2)); @@ -2185,23 +2183,13 @@ void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0)); if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { - Hi = DAG.getUNDEF(HiVT); + Hi = DAG.getPOISON(HiVT); } else { assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode"); Hi = Lo; } } -void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDLoc dl(N); - auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); - auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1)); - auto [EVLLo, EVLHi] = DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0), MaskLo, EVLLo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi); -} - void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); @@ -2362,7 +2350,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO); // Fill the upper half with poison. - Hi = DAG.getUNDEF(HiVT); + Hi = DAG.getPOISON(HiVT); ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1)); ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2)); @@ -2464,6 +2452,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue PassThru = MLD->getPassThru(); Align Alignment = MLD->getBaseAlign(); ISD::LoadExtType ExtType = MLD->getExtensionType(); + MachineMemOperand::Flags MMOFlags = MLD->getMemOperand()->getFlags(); // Split Mask operand SDValue MaskLo, MaskHi; @@ -2489,9 +2478,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo(), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), Alignment, MLD->getAAInfo(), - MLD->getRanges()); + MLD->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -2514,8 +2502,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); + MPI, MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment, + MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -2921,7 +2909,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, Input2->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector node."); EVT EltVT = NewVT.getVectorElementType(); - SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue> Ops(NewElts, DAG.getPOISON(EltVT)); for (unsigned I = 0; I < NewElts; ++I) { if (Mask[I] == PoisonMaskElem) continue; @@ -3689,6 +3677,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::PARTIAL_REDUCE_UMLA: case ISD::PARTIAL_REDUCE_SMLA: case ISD::PARTIAL_REDUCE_SUMLA: + case ISD::PARTIAL_REDUCE_FMLA: Res = SplitVecOp_PARTIAL_REDUCE_MLA(N); break; } @@ -3840,16 +3829,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { - Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, - { N->getOperand(0), Lo }); - Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, - { N->getOperand(0), Hi }); + Lo = DAG.getNode(N->getOpcode(), dl, {OutVT, MVT::Other}, + {N->getOperand(0), Lo}); + Hi = DAG.getNode(N->getOpcode(), dl, {OutVT, MVT::Other}, + {N->getOperand(0), Hi}); // Build a factor node to remember that this operation is independent // of the other one. SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Ch); @@ -3938,43 +3927,55 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { GetSplitVector(N->getOperand(0), Lo, Hi); - uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements(); - uint64_t IdxVal = Idx->getAsZExtVal(); + ElementCount LoElts = Lo.getValueType().getVectorElementCount(); + // Note: For scalable vectors, the index is scaled by vscale. + ElementCount IdxVal = + ElementCount::get(Idx->getAsZExtVal(), SubVT.isScalableVector()); + uint64_t IdxValMin = IdxVal.getKnownMinValue(); - unsigned NumResultElts = SubVT.getVectorMinNumElements(); + EVT SrcVT = N->getOperand(0).getValueType(); + ElementCount NumResultElts = SubVT.getVectorElementCount(); - if (IdxVal < LoEltsMin) { - // If the extracted elements are all in the low half, do a simple extract. - if (IdxVal + NumResultElts <= LoEltsMin) - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + // If the extracted elements are all in the low half, do a simple extract. + if (ElementCount::isKnownLE(IdxVal + NumResultElts, LoElts)) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + unsigned LoEltsMin = LoElts.getKnownMinValue(); + if (IdxValMin < LoEltsMin && SubVT.isFixedLengthVector() && + SrcVT.isFixedLengthVector()) { // Extracted subvector crosses vector split, so we need to blend the two // halves. // TODO: May be able to emit partial extract_subvector. SmallVector<SDValue, 8> Elts; - Elts.reserve(NumResultElts); + Elts.reserve(NumResultElts.getFixedValue()); - DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxVal, - /*Count=*/LoEltsMin - IdxVal); + // This is not valid for scalable vectors. If SubVT is scalable, this is the + // same as unrolling a scalable dimension (invalid). If ScrVT is scalable, + // `Lo[LoEltsMin]` may not be the last element of `Lo`. + DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxValMin, + /*Count=*/LoEltsMin - IdxValMin); DAG.ExtractVectorElements(Hi, Elts, /*Start=*/0, /*Count=*/SubVT.getVectorNumElements() - Elts.size()); return DAG.getBuildVector(SubVT, dl, Elts); } - EVT SrcVT = N->getOperand(0).getValueType(); if (SubVT.isScalableVector() == SrcVT.isScalableVector()) { - uint64_t ExtractIdx = IdxVal - LoEltsMin; - if (ExtractIdx % NumResultElts == 0) - return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx); + ElementCount ExtractIdx = IdxVal - LoElts; + if (ExtractIdx.isKnownMultipleOf(NumResultElts)) + return DAG.getExtractSubvector(dl, SubVT, Hi, + ExtractIdx.getKnownMinValue()); - // We cannot create an extract_subvector that isn't a multiple of the result - // size, which may go out of bounds for the last elements. Shuffle the - // desired elements down to 0 and do a simple 0 extract. EVT HiVT = Hi.getValueType(); + assert(HiVT.isFixedLengthVector() && + "Only fixed-vector extracts are supported in this case"); + + // We cannot create an extract_subvector that isn't a multiple of the + // result size, which may go out of bounds for the last elements. Shuffle + // the desired elements down to 0 and do a simple 0 extract. SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1); - for (int I = 0; I != static_cast<int>(NumResultElts); ++I) - Mask[I] = ExtractIdx + I; + for (int I = 0; I != int(NumResultElts.getFixedValue()); ++I) + Mask[I] = int(ExtractIdx.getFixedValue()) + I; SDValue Shuffle = DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask); @@ -4636,13 +4637,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { - Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, - { N->getOperand(0), Lo, N->getOperand(2) }); - Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, - { N->getOperand(0), Hi, N->getOperand(2) }); + Lo = DAG.getNode(N->getOpcode(), DL, {OutVT, MVT::Other}, + {N->getOperand(0), Lo, N->getOperand(2)}); + Hi = DAG.getNode(N->getOpcode(), DL, {OutVT, MVT::Other}, + {N->getOperand(0), Hi, N->getOperand(2)}); // Legalize the chain result - switch anything that used the old chain to // use the new one. - SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), Hi.getValue(1)); ReplaceValueWith(SDValue(N, 1), NewChain); } else if (N->getOpcode() == ISD::VP_FP_ROUND) { @@ -4863,7 +4864,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: - case ISD::EXPERIMENTAL_VP_SPLAT: Res = WidenVecRes_ScalarOp(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; @@ -5515,7 +5515,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { EOps.push_back(Op); } - EVT WidenVT[] = {WidenEltVT, MVT::Other}; + EVT WidenVT[] = {WidenEltVT, MVT::Other}; SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps); ConcatOps[ConcatEnd++] = Oper; Chains.push_back(Oper.getValue(1)); @@ -5652,7 +5652,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Widen the input and call convert on the widened input vector. unsigned NumConcat = WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue(); - SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); + SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT)); Ops[0] = InOp; SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) @@ -5671,7 +5671,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); - SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT)); // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); @@ -5754,7 +5754,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}}; - SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT)); SmallVector<SDValue, 32> OpChains; // Use the original element count so we don't do more scalar opts than // necessary. @@ -5817,7 +5817,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { } while (Ops.size() != WidenNumElts) - Ops.push_back(DAG.getUNDEF(WidenSVT)); + Ops.push_back(DAG.getPOISON(WidenSVT)); return DAG.getBuildVector(WidenVT, DL, Ops); } @@ -6024,7 +6024,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { // input and then widening it. To avoid this, we widen the input only if // it results in a legal type. if (WidenSize % InSize == 0) { - SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT)); + SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT)); Ops[0] = InOp; NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); @@ -6032,7 +6032,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SmallVector<SDValue, 16> Ops; DAG.ExtractVectorElements(InOp, Ops); Ops.append(WidenSize / InScalarSize - Ops.size(), - DAG.getUNDEF(InVT.getVectorElementType())); + DAG.getPOISON(InVT.getVectorElementType())); NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); } @@ -6055,11 +6055,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SDLoc dl(N); - // Build a vector with undefined for the new nodes. + // Build a vector with poison for the new nodes. EVT VT = N->getValueType(0); // Integer BUILD_VECTOR operands may be larger than the node's vector element - // type. The UNDEFs need to have the same type as the existing operands. + // type. The POISONs need to have the same type as the existing operands. EVT EltVT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -6068,7 +6068,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SmallVector<SDValue, 16> NewOps(N->ops()); assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); - NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); + NewOps.append(WidenNumElts - NumElts, DAG.getPOISON(EltVT)); return DAG.getBuildVector(WidenVT, dl, NewOps); } @@ -6086,7 +6086,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { if (WidenNumElts % NumInElts == 0) { // Add undef vectors to widen to correct length. unsigned NumConcat = WidenNumElts / NumInElts; - SDValue UndefVal = DAG.getUNDEF(InVT); + SDValue UndefVal = DAG.getPOISON(InVT); SmallVector<SDValue, 16> Ops(NumConcat); for (unsigned i=0; i < NumOperands; ++i) Ops[i] = N->getOperand(i); @@ -6144,7 +6144,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { for (unsigned j = 0; j < NumInElts; ++j) Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j); } - SDValue UndefVal = DAG.getUNDEF(EltVT); + SDValue UndefVal = DAG.getPOISON(EltVT); for (; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; return DAG.getBuildVector(WidenVT, dl, Ops); @@ -6211,13 +6211,38 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { Parts.push_back( DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD)); for (; I < WidenNumElts / GCD; ++I) - Parts.push_back(DAG.getUNDEF(PartVT)); + Parts.push_back(DAG.getPOISON(PartVT)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); } - report_fatal_error("Don't know how to widen the result of " - "EXTRACT_SUBVECTOR for scalable vectors"); + // Fallback to extracting through memory. + + Align Alignment = DAG.getReducedAlign(InVT, /*UseABI=*/false); + SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Alignment); + MachineFunction &MF = DAG.getMachineFunction(); + int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + MachineMemOperand *StoreMMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOStore, + LocationSize::beforeOrAfterPointer(), Alignment); + MachineMemOperand *LoadMMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), Alignment); + + // Write out the input vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, StoreMMO); + + // Build a mask to match the length of the non-widened result. + SDValue Mask = + DAG.getMaskFromElementCount(dl, WidenVT, VT.getVectorElementCount()); + + // Read back the sub-vector setting the remaining lanes to poison. + StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, InVT, VT, Idx); + return DAG.getMaskedLoad( + WidenVT, dl, Ch, StackPtr, DAG.getUNDEF(StackPtr.getValueType()), Mask, + DAG.getPOISON(WidenVT), VT, LoadMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD); } // We could try widening the input to the right length but for now, extract @@ -6227,7 +6252,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { for (i = 0; i < VTNumElts; ++i) Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i); - SDValue UndefVal = DAG.getUNDEF(EltVT); + SDValue UndefVal = DAG.getPOISON(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; return DAG.getBuildVector(WidenVT, dl, Ops); @@ -6321,11 +6346,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { if (VT.isVector()) { // If all else fails replace the load with a wide masked load. SDLoc DL(N); - EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); - - SDValue Len = DAG.getElementCount(DL, IdxVT, VT.getVectorElementCount()); - SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT, - DAG.getConstant(0, DL, IdxVT), Len); + SDValue Mask = + DAG.getMaskFromElementCount(DL, WideVT, VT.getVectorElementCount()); SDValue NewLoad = DAG.getMaskedLoad( WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask, @@ -6553,9 +6575,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - if (N->isVPOpcode()) - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0)); } @@ -6901,7 +6920,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) { Parts.push_back( DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD)); for (; i < WidenNumElts / GCD; ++i) - Parts.push_back(DAG.getUNDEF(PartVT)); + Parts.push_back(DAG.getPOISON(PartVT)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); } @@ -6990,7 +7009,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { EVT TmpEltVT = LHS.getValueType().getVectorElementType(); // Fully unroll and reassemble. - SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT)); SmallVector<SDValue, 8> Chains(NumElts); for (unsigned i = 0; i != NumElts; ++i) { SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i); @@ -7098,10 +7117,6 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { Res = WidenVecOp_FP_TO_XINT_SAT(N); break; - case ISD::EXPERIMENTAL_VP_SPLAT: - Res = WidenVecOp_VP_SPLAT(N, OpNo); - break; - case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -7462,9 +7477,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { SDValue InVec = N->getOperand(0); EVT OrigVT = SubVec.getValueType(); - if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) - SubVec = GetWidenedVector(SubVec); - + SubVec = GetWidenedVector(SubVec); EVT SubVT = SubVec.getValueType(); // Whether or not all the elements of the widened SubVec will be inserted into @@ -7486,17 +7499,52 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { } } + if (!IndicesValid) + report_fatal_error( + "Don't know how to widen the operands for INSERT_SUBVECTOR"); + SDLoc DL(N); // We need to make sure that the indices are still valid, otherwise we might // widen what was previously well-defined to something undefined. - if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0) + if (InVec.isUndef() && N->getConstantOperandVal(2) == 0) return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec, N->getOperand(2)); - if (!IndicesValid || OrigVT.isScalableVector()) - report_fatal_error( - "Don't know how to widen the operands for INSERT_SUBVECTOR"); + if (OrigVT.isScalableVector()) { + // Fallback to inserting through memory. + + Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false); + SDValue StackPtr = DAG.CreateStackTemporary(VT.getStoreSize(), Alignment); + MachineFunction &MF = DAG.getMachineFunction(); + int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + MachineMemOperand *StoreMMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOStore, + LocationSize::beforeOrAfterPointer(), Alignment); + MachineMemOperand *LoadMMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), Alignment); + + // Write out the vector being inserting into. + SDValue Ch = + DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, StoreMMO); + + // Build a mask to match the length of the sub-vector. + SDValue Mask = + DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount()); + + // Overwrite the sub-vector at the required offset. + SDValue SubVecPtr = + TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2)); + Ch = DAG.getMaskedStore(Ch, DL, SubVec, SubVecPtr, + DAG.getUNDEF(SubVecPtr.getValueType()), Mask, VT, + StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD); + + // Read back the result. + return DAG.getLoad(VT, DL, Ch, StackPtr, LoadMMO); + } // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR // with a series of INSERT_VECTOR_ELT @@ -7575,12 +7623,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { if (StVT.isVector()) { // If all else fails replace the store with a wide masked store. SDLoc DL(N); - EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue WideStVal = GetWidenedVector(StVal); - SDValue Len = DAG.getElementCount(DL, IdxVT, StVT.getVectorElementCount()); - SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT, - DAG.getConstant(0, DL, IdxVT), Len); + SDValue Mask = + DAG.getMaskFromElementCount(DL, WideVT, StVT.getVectorElementCount()); return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(), ST->getOffset(), Mask, ST->getMemoryVT(), @@ -7591,13 +7636,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { report_fatal_error("Unable to widen vector store"); } -SDValue DAGTypeLegalizer::WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo) { - assert(OpNo == 1 && "Can widen only mask operand of vp_splat"); - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - N->getOperand(0), GetWidenedVector(N->getOperand(1)), - N->getOperand(2)); -} - SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of vp_store"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp index e3f6c98..da763df 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp @@ -7,7 +7,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SDNodeInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" using namespace llvm; @@ -40,6 +43,32 @@ static void checkOperandType(const SelectionDAG &DAG, const SDNode *N, ExpectedVT.getEVTString() + ", got " + ActualVT.getEVTString()); } +namespace { + +/// Similar to SDValue, but also records whether it is a result or an operand +/// of a node so we can provide more precise diagnostics. +class SDNodeValue { + const SDNode *N; + unsigned Idx; + bool IsRes; + +public: + SDNodeValue(const SDNode *N, unsigned Idx, bool IsRes) + : N(N), Idx(Idx), IsRes(IsRes) {} + + SDValue getValue() const { + return IsRes ? SDValue(const_cast<SDNode *>(N), Idx) : N->getOperand(Idx); + } + + EVT getValueType() const { return getValue().getValueType(); } + + friend raw_ostream &operator<<(raw_ostream &OS, const SDNodeValue &Op) { + return OS << (Op.IsRes ? "result" : "operand") << " #" << Op.Idx; + } +}; + +} // namespace + void SDNodeInfo::verifyNode(const SelectionDAG &DAG, const SDNode *N) const { const SDNodeDesc &Desc = getDesc(N->getOpcode()); bool HasChain = Desc.hasProperty(SDNPHasChain); @@ -125,4 +154,91 @@ void SDNodeInfo::verifyNode(const SelectionDAG &DAG, const SDNode *N) const { " must be Register or RegisterMask"); } } + + unsigned VTHwMode = + DAG.getSubtarget().getHwMode(MCSubtargetInfo::HwMode_ValueType); + + // Returns a constrained or constraining value (result or operand) of a node. + // ValIdx is the index of a node's value, as defined by SDTypeConstraint; + // that is, it indexes a node's operands after its results and ignores + // chain/glue values. + auto GetConstraintValue = [&](unsigned ValIdx) { + if (ValIdx < Desc.NumResults) + return SDNodeValue(N, ValIdx, /*IsRes=*/true); + return SDNodeValue(N, HasChain + (ValIdx - Desc.NumResults), + /*IsRes=*/false); + }; + + auto GetConstraintVT = [&](const SDTypeConstraint &C) { + if (!C.NumHwModes) + return static_cast<MVT::SimpleValueType>(C.VT); + for (auto [Mode, VT] : ArrayRef(&VTByHwModeTable[C.VT], C.NumHwModes)) + if (Mode == VTHwMode) + return VT; + llvm_unreachable("No value type for this HW mode"); + }; + + SmallString<128> ES; + raw_svector_ostream SS(ES); + + for (const SDTypeConstraint &C : getConstraints(N->getOpcode())) { + SDNodeValue Val = GetConstraintValue(C.ConstrainedValIdx); + EVT VT = Val.getValueType(); + + switch (C.Kind) { + case SDTCisVT: { + EVT ExpectedVT = GetConstraintVT(C); + + bool IsPtr = ExpectedVT == MVT::iPTR; + if (IsPtr) + ExpectedVT = + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + + if (VT != ExpectedVT) { + SS << Val << " must have type " << ExpectedVT; + if (IsPtr) + SS << " (iPTR)"; + SS << ", but has type " << VT; + reportNodeError(DAG, N, SS.str()); + } + break; + } + case SDTCisPtrTy: + break; + case SDTCisInt: + break; + case SDTCisFP: + break; + case SDTCisVec: + break; + case SDTCisSameAs: + break; + case SDTCisVTSmallerThanOp: + break; + case SDTCisOpSmallerThanOp: + break; + case SDTCisEltOfVec: + break; + case SDTCisSubVecOfVec: + break; + case SDTCVecEltisVT: { + EVT ExpectedVT = GetConstraintVT(C); + + if (!VT.isVector()) { + SS << Val << " must have vector type"; + reportNodeError(DAG, N, SS.str()); + } + if (VT.getVectorElementType() != ExpectedVT) { + SS << Val << " must have " << ExpectedVT << " element type, but has " + << VT.getVectorElementType() << " element type"; + reportNodeError(DAG, N, SS.str()); + } + break; + } + case SDTCisSameNumEltsAs: + break; + case SDTCisSameSizeAs: + break; + } + } } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f70b6cd..12fc26d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -340,7 +340,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned Idx = RegDefPos.GetIdx(); const MCInstrDesc &Desc = TII->get(Opcode); - const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx); assert(RC && "Not a valid register class"); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 379242e..cbe3236 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -76,7 +76,6 @@ #include <cstdlib> #include <limits> #include <optional> -#include <set> #include <string> #include <utility> #include <vector> @@ -1917,6 +1916,21 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, return SDValue(N, 0); } +SDValue SelectionDAG::getDeactivationSymbol(const GlobalValue *GV) { + SDVTList VTs = getVTList(MVT::Untyped); + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::DEACTIVATION_SYMBOL, VTs, {}); + ID.AddPointer(GV); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) + return SDValue(E, 0); + + auto *N = newSDNode<DeactivationSymbolSDNode>(GV, VTs); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; SDVTList VTs = getVTList(VT); @@ -2052,6 +2066,11 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { return SDValue(N, 0); } +SDValue SelectionDAG::getExternalSymbol(RTLIB::LibcallImpl Libcall, EVT VT) { + StringRef SymName = TLI->getLibcallImplName(Libcall); + return getExternalSymbol(SymName.data(), VT); +} + SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { SDNode *&N = MCSymbols[Sym]; if (N) @@ -2084,32 +2103,51 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { return SDValue(CondCodeNodes[Cond], 0); } -SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm, - bool ConstantFold) { +SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm) { assert(MulImm.getBitWidth() == VT.getSizeInBits() && "APInt size does not match type size!"); if (MulImm == 0) return getConstant(0, DL, VT); - if (ConstantFold) { - const MachineFunction &MF = getMachineFunction(); - const Function &F = MF.getFunction(); - ConstantRange CR = getVScaleRange(&F, 64); - if (const APInt *C = CR.getSingleElement()) - return getConstant(MulImm * C->getZExtValue(), DL, VT); - } + const MachineFunction &MF = getMachineFunction(); + const Function &F = MF.getFunction(); + ConstantRange CR = getVScaleRange(&F, 64); + if (const APInt *C = CR.getSingleElement()) + return getConstant(MulImm * C->getZExtValue(), DL, VT); return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT)); } -SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, - bool ConstantFold) { - if (EC.isScalable()) - return getVScale(DL, VT, - APInt(VT.getSizeInBits(), EC.getKnownMinValue())); +/// \returns a value of type \p VT that represents the runtime value of \p +/// Quantity, i.e. scaled by vscale if it's scalable, or a fixed constant +/// otherwise. Quantity should be a FixedOrScalableQuantity, i.e. ElementCount +/// or TypeSize. +template <typename Ty> +static SDValue getFixedOrScalableQuantity(SelectionDAG &DAG, const SDLoc &DL, + EVT VT, Ty Quantity) { + if (Quantity.isScalable()) + return DAG.getVScale( + DL, VT, APInt(VT.getSizeInBits(), Quantity.getKnownMinValue())); + + return DAG.getConstant(Quantity.getKnownMinValue(), DL, VT); +} + +SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, + ElementCount EC) { + return getFixedOrScalableQuantity(*this, DL, VT, EC); +} + +SDValue SelectionDAG::getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS) { + return getFixedOrScalableQuantity(*this, DL, VT, TS); +} - return getConstant(EC.getKnownMinValue(), DL, VT); +SDValue SelectionDAG::getMaskFromElementCount(const SDLoc &DL, EVT DataVT, + ElementCount EC) { + EVT IdxVT = TLI->getVectorIdxTy(getDataLayout()); + EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), DataVT); + return getNode(ISD::GET_ACTIVE_LANE_MASK, DL, MaskVT, + getConstant(0, DL, IdxVT), getElementCount(DL, IdxVT, EC)); } SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) { @@ -2468,180 +2506,6 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { return getZExtOrTrunc(Op, SDLoc(Op), ShTy); } -/// Given a store node \p StoreNode, return true if it is safe to fold that node -/// into \p FPNode, which expands to a library call with output pointers. -static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, - SDNode *FPNode) { - SmallVector<const SDNode *, 8> Worklist; - SmallVector<const SDNode *, 8> DeferredNodes; - SmallPtrSet<const SDNode *, 16> Visited; - - // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). - for (SDValue Op : StoreNode->ops()) - if (Op.getNode() != FPNode) - Worklist.push_back(Op.getNode()); - - unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); - while (!Worklist.empty()) { - const SDNode *Node = Worklist.pop_back_val(); - auto [_, Inserted] = Visited.insert(Node); - if (!Inserted) - continue; - - if (MaxSteps > 0 && Visited.size() >= MaxSteps) - return false; - - // Reached the FPNode (would result in a cycle). - // OR Reached CALLSEQ_START (would result in nested call sequences). - if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) - return false; - - if (Node->getOpcode() == ISD::CALLSEQ_END) { - // Defer looking into call sequences (so we can check we're outside one). - // We still need to look through these for the predecessor check. - DeferredNodes.push_back(Node); - continue; - } - - for (SDValue Op : Node->ops()) - Worklist.push_back(Op.getNode()); - } - - // True if we're outside a call sequence and don't have the FPNode as a - // predecessor. No cycles or nested call sequences possible. - return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, - MaxSteps); -} - -bool SelectionDAG::expandMultipleResultFPLibCall( - RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results, - std::optional<unsigned> CallRetResNo) { - LLVMContext &Ctx = *getContext(); - EVT VT = Node->getValueType(0); - unsigned NumResults = Node->getNumValues(); - - if (LC == RTLIB::UNKNOWN_LIBCALL) - return false; - - const char *LCName = TLI->getLibcallName(LC); - if (!LCName) - return false; - - auto getVecDesc = [&]() -> VecDesc const * { - for (bool Masked : {false, true}) { - if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( - LCName, VT.getVectorElementCount(), Masked)) { - return VD; - } - } - return nullptr; - }; - - // For vector types, we must find a vector mapping for the libcall. - VecDesc const *VD = nullptr; - if (VT.isVector() && !(VD = getVecDesc())) - return false; - - // Find users of the node that store the results (and share input chains). The - // destination pointers can be used instead of creating stack allocations. - SDValue StoresInChain; - SmallVector<StoreSDNode *, 2> ResultStores(NumResults); - for (SDNode *User : Node->users()) { - if (!ISD::isNormalStore(User)) - continue; - auto *ST = cast<StoreSDNode>(User); - SDValue StoreValue = ST->getValue(); - unsigned ResNo = StoreValue.getResNo(); - // Ensure the store corresponds to an output pointer. - if (CallRetResNo == ResNo) - continue; - // Ensure the store to the default address space and not atomic or volatile. - if (!ST->isSimple() || ST->getAddressSpace() != 0) - continue; - // Ensure all store chains are the same (so they don't alias). - if (StoresInChain && ST->getChain() != StoresInChain) - continue; - // Ensure the store is properly aligned. - Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); - if (ST->getAlign() < - getDataLayout().getABITypeAlign(StoreType->getScalarType())) - continue; - // Avoid: - // 1. Creating cyclic dependencies. - // 2. Expanding the node to a call within a call sequence. - if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) - continue; - ResultStores[ResNo] = ST; - StoresInChain = ST->getChain(); - } - - TargetLowering::ArgListTy Args; - - // Pass the arguments. - for (const SDValue &Op : Node->op_values()) { - EVT ArgVT = Op.getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(Ctx); - Args.emplace_back(Op, ArgTy); - } - - // Pass the output pointers. - SmallVector<SDValue, 2> ResultPtrs(NumResults); - Type *PointerTy = PointerType::getUnqual(Ctx); - for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) { - if (ResNo == CallRetResNo) - continue; - EVT ResVT = Node->getValueType(ResNo); - SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(ResVT); - ResultPtrs[ResNo] = ResultPtr; - Args.emplace_back(ResultPtr, PointerTy); - } - - SDLoc DL(Node); - - // Pass the vector mask (if required). - if (VD && VD->isMasked()) { - EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); - SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); - Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); - } - - Type *RetType = CallRetResNo.has_value() - ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) - : Type::getVoidTy(Ctx); - SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); - SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, - TLI->getPointerTy(getDataLayout())); - TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( - TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args)); - - auto [Call, CallChain] = TLI->LowerCallTo(CLI); - - for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { - if (ResNo == CallRetResNo) { - Results.push_back(Call); - continue; - } - MachinePointerInfo PtrInfo; - SDValue LoadResult = - getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo); - SDValue OutChain = LoadResult.getValue(1); - - if (StoreSDNode *ST = ResultStores[ResNo]) { - // Replace store with the library call. - ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); - PtrInfo = ST->getPointerInfo(); - } else { - PtrInfo = MachinePointerInfo::getFixedStack( - getMachineFunction(), cast<FrameIndexSDNode>(ResultPtr)->getIndex()); - } - - Results.push_back(LoadResult); - } - - return true; -} - SDValue SelectionDAG::expandVAArg(SDNode *Node) { SDLoc dl(Node); const TargetLowering &TLI = getTargetLoweringInfo(); @@ -2921,6 +2785,34 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); } +bool SelectionDAG::SignBitIsZeroFP(SDValue Op, unsigned Depth) const { + if (Depth >= MaxRecursionDepth) + return false; // Limit search depth. + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::FABS: + return true; + case ISD::AssertNoFPClass: { + FPClassTest NoFPClass = + static_cast<FPClassTest>(Op.getConstantOperandVal(1)); + + const FPClassTest TestMask = fcNan | fcNegative; + return (NoFPClass & TestMask) == TestMask; + } + case ISD::ARITH_FENCE: + return SignBitIsZeroFP(Op, Depth + 1); + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FEXP10: + return Op->getFlags().hasNoNaNs(); + default: + return false; + } + + llvm_unreachable("covered opcode switch"); +} + /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. @@ -4122,6 +4014,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.clearLowBits(LogOfAlign); break; } + case ISD::AssertNoFPClass: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + FPClassTest NoFPClass = + static_cast<FPClassTest>(Op.getConstantOperandVal(1)); + const FPClassTest NegativeTestMask = fcNan | fcNegative; + if ((NoFPClass & NegativeTestMask) == NegativeTestMask) { + // Cannot be negative. + Known.makeNonNegative(); + } + + const FPClassTest PositiveTestMask = fcNan | fcPositive; + if ((NoFPClass & PositiveTestMask) == PositiveTestMask) { + // Cannot be positive. + Known.makeNegative(); + } + + break; + } case ISD::FGETSIGN: // All bits are zero except the low bit. Known.Zero.setBitsFrom(1); @@ -5830,6 +5741,9 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return false; } + case ISD::VECTOR_COMPRESS: + return false; + default: // Allow the target to implement this method for its nodes. if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || @@ -6233,7 +6147,57 @@ bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const { if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true)) return !C1->isNegative(); - return Op.getOpcode() == ISD::FABS; + switch (Op.getOpcode()) { + case ISD::FABS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FEXP10: + return true; + default: + return false; + } + + llvm_unreachable("covered opcode switch"); +} + +bool SelectionDAG::canIgnoreSignBitOfZero(const SDUse &Use) const { + assert(Use.getValueType().isFloatingPoint()); + const SDNode *User = Use.getUser(); + unsigned OperandNo = Use.getOperandNo(); + // Check if this use is insensitive to the sign of zero + switch (User->getOpcode()) { + case ISD::SETCC: + // Comparisons: IEEE-754 specifies +0.0 == -0.0. + case ISD::FABS: + // fabs always produces +0.0. + return true; + case ISD::FCOPYSIGN: + // copysign overwrites the sign bit of the first operand. + return OperandNo == 0; + case ISD::FADD: + case ISD::FSUB: { + // Arithmetic with non-zero constants fixes the uncertainty around the + // sign bit. + SDValue Other = User->getOperand(1 - OperandNo); + return isKnownNeverZeroFloat(Other); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + // fp-to-int conversions normalize signed zeros. + return true; + default: + return false; + } +} + +bool SelectionDAG::canIgnoreSignBitOfZero(SDValue Op) const { + // FIXME: Limit the amount of checked uses to not introduce a compile-time + // regression. Ideally, this should be implemented as a demanded-bits + // optimization that stems from the users. + if (Op->use_size() > 2) + return false; + return all_of(Op->uses(), + [&](const SDUse &Use) { return canIgnoreSignBitOfZero(Use); }); } bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { @@ -7471,8 +7435,12 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, C1.copySign(C2); return getConstantFP(C1, DL, VT); case ISD::FMINNUM: + if (C1.isSignaling() || C2.isSignaling()) + return SDValue(); return getConstantFP(minnum(C1, C2), DL, VT); case ISD::FMAXNUM: + if (C1.isSignaling() || C2.isSignaling()) + return SDValue(); return getConstantFP(maxnum(C1, C2), DL, VT); case ISD::FMINIMUM: return getConstantFP(minimum(C1, C2), DL, VT); @@ -7733,6 +7701,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType() == VT && "Binary operator types must match!"); if (VT.getScalarType() == MVT::i1) return getNode(ISD::AND, DL, VT, N1, N2); + if (N2CV && N2CV->isZero()) + return N2; if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { const APInt &MulImm = N1->getConstantOperandAPInt(0); const APInt &N2CImm = N2C->getAPIntValue(); @@ -8404,7 +8374,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } case ISD::PARTIAL_REDUCE_UMLA: case ISD::PARTIAL_REDUCE_SMLA: - case ISD::PARTIAL_REDUCE_SUMLA: { + case ISD::PARTIAL_REDUCE_SUMLA: + case ISD::PARTIAL_REDUCE_FMLA: { [[maybe_unused]] EVT AccVT = N1.getValueType(); [[maybe_unused]] EVT Input1VT = N2.getValueType(); [[maybe_unused]] EVT Input2VT = N3.getValueType(); @@ -8599,16 +8570,7 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags) { - EVT VT = Base.getValueType(); - SDValue Index; - - if (Offset.isScalable()) - Index = getVScale(DL, Base.getValueType(), - APInt(Base.getValueSizeInBits().getFixedValue(), - Offset.getKnownMinValue())); - else - Index = getConstant(Offset.getFixedValue(), DL, VT); - + SDValue Index = getTypeSize(DL, Base.getValueType(), Offset); return getMemBasePlusOffset(Base, Index, DL, Flags); } @@ -9146,8 +9108,8 @@ static bool isInTailCallPositionWrapper(const CallInst *CI, std::pair<SDValue, SDValue> SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0, SDValue Mem1, SDValue Size, const CallInst *CI) { - const char *LibCallName = TLI->getLibcallName(RTLIB::MEMCMP); - if (!LibCallName) + RTLIB::LibcallImpl MemcmpImpl = TLI->getLibcallImpl(RTLIB::MEMCMP); + if (MemcmpImpl == RTLIB::Unsupported) return {}; PointerType *PT = PointerType::getUnqual(*getContext()); @@ -9160,13 +9122,14 @@ SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0, bool IsTailCall = isInTailCallPositionWrapper(CI, this, /*AllowReturnsFirstArg*/ true); + StringRef LibCallName = TLI->getLibcallImplName(MemcmpImpl); CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee( - TLI->getLibcallCallingConv(RTLIB::MEMCMP), - Type::getInt32Ty(*getContext()), - getExternalSymbol(LibCallName, TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee(TLI->getLibcallImplCallingConv(MemcmpImpl), + Type::getInt32Ty(*getContext()), + getExternalSymbol(LibCallName.data(), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setTailCall(IsTailCall); return TLI->LowerCallTo(CLI); @@ -9176,8 +9139,8 @@ std::pair<SDValue, SDValue> SelectionDAG::getStrlen(SDValue Chain, const SDLoc &dl, SDValue Src, const CallInst *CI) { - const char *LibCallName = TLI->getLibcallName(RTLIB::STRLEN); - if (!LibCallName) + RTLIB::LibcallImpl StrlenImpl = TLI->getLibcallImpl(RTLIB::STRLEN); + if (StrlenImpl == RTLIB::Unsupported) return {}; // Emit a library call. @@ -9187,13 +9150,15 @@ std::pair<SDValue, SDValue> SelectionDAG::getStrlen(SDValue Chain, TargetLowering::CallLoweringInfo CLI(*this); bool IsTailCall = isInTailCallPositionWrapper(CI, this, /*AllowReturnsFirstArg*/ true); + StringRef LibcallName = TLI->getLibcallImplName(StrlenImpl); CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(RTLIB::STRLEN), CI->getType(), - getExternalSymbol( - LibCallName, TLI->getProgramPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallImplCallingConv(StrlenImpl), CI->getType(), + getExternalSymbol(LibcallName.data(), + TLI->getProgramPointerTy(getDataLayout())), + std::move(Args)) .setTailCall(IsTailCall); return TLI->LowerCallTo(CLI); @@ -9257,21 +9222,22 @@ SDValue SelectionDAG::getMemcpy( // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); bool IsTailCall = false; - const char *MemCpyName = TLI->getMemcpyName(); + RTLIB::LibcallImpl MemCpyImpl = TLI->getMemcpyImpl(); if (OverrideTailCall.has_value()) { IsTailCall = *OverrideTailCall; } else { - bool LowersToMemcpy = StringRef(MemCpyName) == StringRef("memcpy"); + bool LowersToMemcpy = MemCpyImpl == RTLIB::impl_memcpy; IsTailCall = isInTailCallPositionWrapper(CI, this, LowersToMemcpy); } CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee( - TLI->getLibcallCallingConv(RTLIB::MEMCPY), + TLI->getLibcallImplCallingConv(MemCpyImpl), Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(MemCpyName, TLI->getPointerTy(getDataLayout())), + getExternalSymbol(TLI->getLibcallImplName(MemCpyImpl).data(), + TLI->getPointerTy(getDataLayout())), std::move(Args)) .setDiscardResult() .setTailCall(IsTailCall); @@ -9295,17 +9261,19 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, RTLIB::Libcall LibraryCall = RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LibraryCall); + if (LibcallImpl == RTLIB::Unsupported) report_fatal_error("Unsupported element size"); TargetLowering::CallLoweringInfo CLI(*this); CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*getContext()), - getExternalSymbol(TLI->getLibcallName(LibraryCall), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallImplCallingConv(LibcallImpl), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -9361,22 +9329,24 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); + RTLIB::LibcallImpl MemmoveImpl = TLI->getLibcallImpl(RTLIB::MEMMOVE); + bool IsTailCall = false; if (OverrideTailCall.has_value()) { IsTailCall = *OverrideTailCall; } else { - bool LowersToMemmove = - TLI->getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove"); + bool LowersToMemmove = MemmoveImpl == RTLIB::impl_memmove; IsTailCall = isInTailCallPositionWrapper(CI, this, LowersToMemmove); } CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallImplCallingConv(MemmoveImpl), + Dst.getValueType().getTypeForEVT(*getContext()), + getExternalSymbol(TLI->getLibcallImplName(MemmoveImpl).data(), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setDiscardResult() .setTailCall(IsTailCall); @@ -9399,17 +9369,19 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, RTLIB::Libcall LibraryCall = RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LibraryCall); + if (LibcallImpl == RTLIB::Unsupported) report_fatal_error("Unsupported element size"); TargetLowering::CallLoweringInfo CLI(*this); CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*getContext()), - getExternalSymbol(TLI->getLibcallName(LibraryCall), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallImplCallingConv(LibcallImpl), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -9470,30 +9442,37 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, // FIXME: pass in SDLoc CLI.setDebugLoc(dl).setChain(Chain); - const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); + RTLIB::LibcallImpl BzeroImpl = TLI->getLibcallImpl(RTLIB::BZERO); + bool UseBZero = BzeroImpl != RTLIB::Unsupported && isNullConstant(Src); - bool UseBZero = isNullConstant(Src) && BzeroName; // If zeroing out and bzero is present, use it. if (UseBZero) { TargetLowering::ArgListTy Args; Args.emplace_back(Dst, PointerType::getUnqual(Ctx)); Args.emplace_back(Size, DL.getIntPtrType(Ctx)); CLI.setLibCallee( - TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), - getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); + TLI->getLibcallImplCallingConv(BzeroImpl), Type::getVoidTy(Ctx), + getExternalSymbol(TLI->getLibcallImplName(BzeroImpl).data(), + TLI->getPointerTy(DL)), + std::move(Args)); } else { + RTLIB::LibcallImpl MemsetImpl = TLI->getLibcallImpl(RTLIB::MEMSET); + TargetLowering::ArgListTy Args; Args.emplace_back(Dst, PointerType::getUnqual(Ctx)); Args.emplace_back(Src, Src.getValueType().getTypeForEVT(Ctx)); Args.emplace_back(Size, DL.getIntPtrType(Ctx)); - CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Dst.getValueType().getTypeForEVT(Ctx), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy(DL)), - std::move(Args)); - } - bool LowersToMemset = - TLI->getLibcallName(RTLIB::MEMSET) == StringRef("memset"); + CLI.setLibCallee( + TLI->getLibcallImplCallingConv(MemsetImpl), + Dst.getValueType().getTypeForEVT(Ctx), + getExternalSymbol(TLI->getLibcallImplName(MemsetImpl).data(), + TLI->getPointerTy(DL)), + std::move(Args)); + } + + RTLIB::LibcallImpl MemsetImpl = TLI->getLibcallImpl(RTLIB::MEMSET); + bool LowersToMemset = MemsetImpl == RTLIB::impl_memset; + // If we're going to use bzero, make sure not to tail call unless the // subsequent return doesn't need a value, as bzero doesn't return the first // arg unlike memset. @@ -9520,17 +9499,19 @@ SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, RTLIB::Libcall LibraryCall = RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LibraryCall); + if (LibcallImpl == RTLIB::Unsupported) report_fatal_error("Unsupported element size"); TargetLowering::CallLoweringInfo CLI(*this); CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*getContext()), - getExternalSymbol(TLI->getLibcallName(LibraryCall), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallImplCallingConv(LibcallImpl), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -10025,8 +10006,6 @@ SDValue SelectionDAG::getLoadVP( MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges, bool IsExpanding) { - assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); // If we don't have a PtrInfo, infer the trivial frame index case to simplify @@ -10048,6 +10027,11 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + assert(Mask.getValueType().getVectorElementCount() == + VT.getVectorElementCount() && + "Vector width mismatch between mask and data"); + bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); @@ -10143,6 +10127,10 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, ISD::MemIndexedMode AM, bool IsTruncating, bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + assert(Mask.getValueType().getVectorElementCount() == + Val.getValueType().getVectorElementCount() && + "Vector width mismatch between mask and data"); + bool Indexed = AM != ISD::UNINDEXED; assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) @@ -12741,6 +12729,10 @@ void SelectionDAG::getTopologicallyOrderedNodes( for (unsigned i = 0U; i < SortedNodes.size(); ++i) { const SDNode *N = SortedNodes[i]; for (const SDNode *U : N->users()) { + // HandleSDNode is never part of a DAG and therefore has no entry in + // RemainingOperands. + if (U->getOpcode() == ISD::HANDLENODE) + continue; unsigned &NumRemOperands = RemainingOperands[U]; assert(NumRemOperands && "Invalid number of remaining operands"); --NumRemOperands; @@ -12754,8 +12746,6 @@ void SelectionDAG::getTopologicallyOrderedNodes( "First node in topological sort is not the entry token"); assert(SortedNodes.front()->getNumOperands() == 0 && "First node in topological sort has operands"); - assert(SortedNodes.back()->use_empty() && - "Last node in topologic sort has users"); } /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the @@ -13057,6 +13047,11 @@ bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { return C && C->isOne(); } +bool llvm::isOneOrOneSplatFP(SDValue N, bool AllowUndefs) { + ConstantFPSDNode *C = isConstOrConstSplatFP(N, AllowUndefs); + return C && C->isExactlyValue(1.0); +} + bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { N = peekThroughBitcasts(N); unsigned BitWidth = N.getScalarValueSizeInBits(); @@ -13076,6 +13071,11 @@ bool llvm::isZeroOrZeroSplat(SDValue N, bool AllowUndefs) { return C && C->isZero(); } +bool llvm::isZeroOrZeroSplatFP(SDValue N, bool AllowUndefs) { + ConstantFPSDNode *C = isConstOrConstSplatFP(N, AllowUndefs); + return C && C->isZero(); +} + HandleSDNode::~HandleSDNode() { DropOperands(); } @@ -13660,11 +13660,8 @@ std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT, EVT VT = N.getValueType(); assert(VecVT.getVectorElementCount().isKnownEven() && "Expecting the mask to be an evenly-sized vector"); - unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; - SDValue HalfNumElts = - VecVT.isFixedLengthVector() - ? getConstant(HalfMinNumElts, DL, VT) - : getVScale(DL, VT, APInt(VT.getScalarSizeInBits(), HalfMinNumElts)); + SDValue HalfNumElts = getElementCount( + DL, VT, VecVT.getVectorElementCount().divideCoefficientBy(2)); SDValue Lo = getNode(ISD::UMIN, DL, VT, N, HalfNumElts); SDValue Hi = getNode(ISD::USUBSAT, DL, VT, N, HalfNumElts); return std::make_pair(Lo, Hi); @@ -14241,13 +14238,18 @@ SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, assert(InChain.getValueType() == MVT::Other && "Expected token chain"); TargetLowering::ArgListTy Args; Args.emplace_back(Ptr, Ptr.getValueType().getTypeForEVT(*getContext())); - RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc); - SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC), - TLI->getPointerTy(getDataLayout())); + RTLIB::LibcallImpl LibcallImpl = + TLI->getLibcallImpl(static_cast<RTLIB::Libcall>(LibFunc)); + if (LibcallImpl == RTLIB::Unsupported) + reportFatalUsageError("emitting call to unsupported libcall"); + + SDValue Callee = + getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(), + TLI->getPointerTy(getDataLayout())); TargetLowering::CallLoweringInfo CLI(*this); CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee( - TLI->getLibcallCallingConv(LC), Type::getVoidTy(*getContext()), Callee, - std::move(Args)); + TLI->getLibcallImplCallingConv(LibcallImpl), + Type::getVoidTy(*getContext()), Callee, std::move(Args)); return TLI->LowerCallTo(CLI).second; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a522650..7134550 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -45,6 +45,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" @@ -1097,14 +1098,15 @@ RegsForValue::getRegsAndSizes() const { } void SelectionDAGBuilder::init(GCFunctionInfo *gfi, BatchAAResults *aa, - AssumptionCache *ac, - const TargetLibraryInfo *li) { + AssumptionCache *ac, const TargetLibraryInfo *li, + const TargetTransformInfo &TTI) { BatchAA = aa; AC = ac; GFI = gfi; LibInfo = li; Context = DAG.getContext(); LPadToCallSiteMap.clear(); + this->TTI = &TTI; SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout()); AssignmentTrackingEnabled = isAssignmentTrackingEnabled( *DAG.getMachineFunction().getFunction().getParent()); @@ -2589,10 +2591,6 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether( if (!LhsDeps.contains(RhsI)) RhsDeps.try_emplace(RhsI, false); - const auto &TLI = DAG.getTargetLoweringInfo(); - const auto &TTI = - TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); - InstructionCost CostOfIncluding = 0; // See if this instruction will need to computed independently of whether RHS // is. @@ -2632,8 +2630,8 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether( // RHS condition. Use latency because we are essentially trying to calculate // the cost of the dependency chain. // Possible TODO: We could try to estimate ILP and make this more precise. - CostOfIncluding += - TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency); + CostOfIncluding += TTI->getInstructionCost( + InsPair.first, TargetTransformInfo::TCK_Latency); if (CostOfIncluding > CostThresh) return false; @@ -3507,16 +3505,46 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { DAG.getBasicBlock(Return))); } +/// The intrinsics currently supported by callbr are implicit control flow +/// intrinsics such as amdgcn.kill. +/// - they should be called (no "dontcall-" attributes) +/// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic()) +/// - they do not need custom argument handling (no +/// TLI.CollectTargetIntrinsicOperands()) +void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) { + TargetLowering::IntrinsicInfo Info; + assert(!DAG.getTargetLoweringInfo().getTgtMemIntrinsic( + Info, I, DAG.getMachineFunction(), I.getIntrinsicID()) && + "Intrinsic touches memory"); + + auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I); + + SmallVector<SDValue, 8> Ops = + getTargetIntrinsicOperands(I, HasChain, OnlyLoad); + SDVTList VTs = getTargetIntrinsicVTList(I, HasChain); + + // Create the node. + SDValue Result = + getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs); + Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result); + + setValue(&I, Result); +} + void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { MachineBasicBlock *CallBrMBB = FuncInfo.MBB; - // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't - // have to do anything here to lower funclet bundles. - failForInvalidBundles(I, "callbrs", - {LLVMContext::OB_deopt, LLVMContext::OB_funclet}); - - assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr"); - visitInlineAsm(I); + if (I.isInlineAsm()) { + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + failForInvalidBundles(I, "callbrs", + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}); + visitInlineAsm(I); + } else { + assert(!I.hasOperandBundles() && + "Can't have operand bundles for intrinsics"); + visitCallBrIntrinsic(I); + } CopyToExportRegsIfNeeded(&I); // Retrieve successors. @@ -3526,19 +3554,25 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); - for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { - BasicBlock *Dest = I.getIndirectDest(i); - MachineBasicBlock *Target = FuncInfo.getMBB(Dest); - Target->setIsInlineAsmBrIndirectTarget(); - // If we introduce a type of asm goto statement that is permitted to use an - // indirect call instruction to jump to its labels, then we should add a - // call to Target->setMachineBlockAddressTaken() here, to mark the target - // block as requiring a BTI. - - Target->setLabelMustBeEmitted(); - // Don't add duplicate machine successors. - if (Dests.insert(Dest).second) - addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + // TODO: For most of the cases where there is an intrinsic callbr, we're + // having exactly one indirect target, which will be unreachable. As soon as + // this changes, we might need to enhance + // Target->setIsInlineAsmBrIndirectTarget or add something similar for + // intrinsic indirect branches. + if (I.isInlineAsm()) { + for (BasicBlock *Dest : I.getIndirectDests()) { + MachineBasicBlock *Target = FuncInfo.getMBB(Dest); + Target->setIsInlineAsmBrIndirectTarget(); + // If we introduce a type of asm goto statement that is permitted to use + // an indirect call instruction to jump to its labels, then we should add + // a call to Target->setMachineBlockAddressTaken() here, to mark the + // target block as requiring a BTI. + + Target->setLabelMustBeEmitted(); + // Don't add duplicate machine successors. + if (Dests.insert(Dest).second) + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + } } CallBrMBB->normalizeSuccProbs(); @@ -3977,7 +4011,10 @@ void SelectionDAGBuilder::visitFPExt(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); + SDNodeFlags Flags; + if (auto *TruncInst = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*TruncInst); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N, Flags)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { @@ -4584,17 +4621,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); - if (TySize.isScalable()) - AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, - DAG.getVScale(dl, IntPtr, - APInt(IntPtr.getScalarSizeInBits(), - TySize.getKnownMinValue()))); - else { - SDValue TySizeValue = - DAG.getConstant(TySize.getFixedValue(), dl, MVT::getIntegerVT(64)); - AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, - DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr)); - } + AllocSize = DAG.getNode( + ISD::MUL, dl, IntPtr, AllocSize, + DAG.getZExtOrTrunc(DAG.getTypeSize(dl, MVT::i64, TySize), dl, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -4639,6 +4668,12 @@ static std::optional<ConstantRange> getRange(const Instruction &I) { return std::nullopt; } +static FPClassTest getNoFPClass(const Instruction &I) { + if (const auto *CB = dyn_cast<CallBase>(&I)) + return CB->getRetNoFPClass(); + return fcNone; +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -4759,7 +4794,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SmallVector<uint64_t, 4> Offsets; const Value *SrcV = I.getOperand(0); ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &Offsets, 0); + SrcV->getType(), ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -4795,7 +4830,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, - ValueVTs, &Offsets, 0); + ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0); assert(ValueVTs.size() == 1 && Offsets[0] == 0 && "expect a single EVT for swifterror"); @@ -4907,10 +4942,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata()); const auto &TLI = DAG.getTargetLoweringInfo(); - const auto &TTI = - TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); + SDValue StoreNode = - !IsCompressing && TTI.hasConditionalLoadStoreForType( + !IsCompressing && TTI->hasConditionalLoadStoreForType( I.getArgOperand(0)->getType(), /*IsStore=*/true) ? TLI.visitMaskedStore(DAG, sdl, getMemoryRoot(), MMO, Ptr, Src0, Mask) @@ -5059,20 +5093,22 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { auto MMOFlags = MachineMemOperand::MOLoad; if (I.hasMetadata(LLVMContext::MD_nontemporal)) MMOFlags |= MachineMemOperand::MONonTemporal; + if (I.hasMetadata(LLVMContext::MD_invariant_load)) + MMOFlags |= MachineMemOperand::MOInvariant; MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment, AAInfo, Ranges); const auto &TLI = DAG.getTargetLoweringInfo(); - const auto &TTI = - TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); + // The Load/Res may point to different values and both of them are output // variables. SDValue Load; SDValue Res; - if (!IsExpanding && TTI.hasConditionalLoadStoreForType(Src0Operand->getType(), - /*IsStore=*/false)) + if (!IsExpanding && + TTI->hasConditionalLoadStoreForType(Src0Operand->getType(), + /*IsStore=*/false)) Res = TLI.visitMaskedLoad(DAG, sdl, InChain, MMO, Load, Ptr, Src0, Mask); else Res = Load = @@ -5313,18 +5349,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { DAG.setRoot(OutChain); } -/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC -/// node. -void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, - unsigned Intrinsic) { - // Ignore the callsite's attributes. A specific call site may be marked with - // readnone, but the lowering code will expect the chain based on the - // definition. +/// Check if this intrinsic call depends on the chain (1st return value) +/// and if it only *loads* memory. +/// Ignore the callsite's attributes. A specific call site may be marked with +/// readnone, but the lowering code will expect the chain based on the +/// definition. +std::pair<bool, bool> +SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) { const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow(); + return {HasChain, OnlyLoad}; +} + +SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands( + const CallBase &I, bool HasChain, bool OnlyLoad, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Build the operand list. SmallVector<SDValue, 8> Ops; if (HasChain) { // If this intrinsic has side-effects, chainify it. @@ -5336,17 +5380,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } - // Info is set by getTgtMemIntrinsic - TargetLowering::IntrinsicInfo Info; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, - DAG.getMachineFunction(), - Intrinsic); - // Add the intrinsic ID as an integer operand if it's not a target intrinsic. - if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || - Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID || + TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN) + Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. @@ -5369,13 +5406,93 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } + if (std::optional<OperandBundleUse> Bundle = + I.getOperandBundle(LLVMContext::OB_deactivation_symbol)) { + auto *Sym = Bundle->Inputs[0].get(); + SDValue SDSym = getValue(Sym); + SDSym = DAG.getDeactivationSymbol(cast<GlobalValue>(Sym)); + Ops.push_back(SDSym); + } + + if (std::optional<OperandBundleUse> Bundle = + I.getOperandBundle(LLVMContext::OB_convergencectrl)) { + Value *Token = Bundle->Inputs[0].get(); + SDValue ConvControlToken = getValue(Token); + assert(Ops.back().getValueType() != MVT::Glue && + "Did not expect another glue node here."); + ConvControlToken = + DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); + Ops.push_back(ConvControlToken); + } + + return Ops; +} + +SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I, + bool HasChain) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(ValueVTs); + return DAG.getVTList(ValueVTs); +} + +/// Get an INTRINSIC node for a target intrinsic which does not touch memory. +SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode( + const Type &IntrinsicVT, bool HasChain, ArrayRef<SDValue> Ops, + const SDVTList &VTs) { + if (!HasChain) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); + if (!IntrinsicVT.isVoidTy()) + return DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); + return DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); +} + +/// Set root, convert return type if necessary and check alignment. +SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I, + bool HasChain, + bool OnlyLoad, + SDValue Result) { + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (I.getType()->isVoidTy()) + return Result; + + if (MaybeAlign Alignment = I.getRetAlign(); InsertAssertAlign && Alignment) { + // Insert `assertalign` node if there's an alignment. + Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); + } else if (!isa<VectorType>(I.getType())) { + Result = lowerRangeToAssertZExt(DAG, I, Result); + } + + return Result; +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I); + + // Info is set by getTgtMemIntrinsic + TargetLowering::IntrinsicInfo Info; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtMemIntrinsic = + TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic); + + SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands( + I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr); + SDVTList VTs = getTargetIntrinsicVTList(I, HasChain); // Propagate fast-math-flags from IR to node(s). SDNodeFlags Flags; @@ -5386,19 +5503,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Create the node. SDValue Result; - if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { - auto *Token = Bundle->Inputs[0].get(); - SDValue ConvControlToken = getValue(Token); - assert(Ops.back().getValueType() != MVT::Glue && - "Did not expected another glue node here."); - ConvControlToken = - DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); - Ops.push_back(ConvControlToken); - } - // In some cases, custom collection of operands from CallInst I may be needed. TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); - if (IsTgtIntrinsic) { + if (IsTgtMemIntrinsic) { // This is target intrinsic that touches memory // // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic @@ -5418,34 +5525,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Info.ssid, Info.order, Info.failureOrder); Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, MemVT, MMO); - } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); - } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); + Result = getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs); } - if (HasChain) { - SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); - if (OnlyLoad) - PendingLoads.push_back(Chain); - else - DAG.setRoot(Chain); - } - - if (!I.getType()->isVoidTy()) { - if (!isa<VectorType>(I.getType())) - Result = lowerRangeToAssertZExt(DAG, I, Result); - - MaybeAlign Alignment = I.getRetAlign(); - - // Insert `assertalign` node if there's an alignment. - if (InsertAssertAlign && Alignment) { - Result = - DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); - } - } + Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result); setValue(&I, Result); } @@ -7772,6 +7856,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } + case Intrinsic::reloc_none: { + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); + StringRef SymbolName = cast<MDString>(MD)->getString(); + SDValue Ops[2] = { + getRoot(), + DAG.getTargetExternalSymbol( + SymbolName.data(), TLI.getProgramPointerTy(DAG.getDataLayout()))}; + DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops)); + return; + } + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { // Get the exception pointer vreg, copy from it, and resize it to fit. @@ -8137,6 +8232,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Input, DAG.getConstant(1, sdl, Input.getValueType()))); return; } + case Intrinsic::vector_partial_reduce_fadd: { + SDValue Acc = getValue(I.getOperand(0)); + SDValue Input = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode( + ISD::PARTIAL_REDUCE_FMLA, sdl, Acc.getValueType(), Acc, + Input, DAG.getConstantFP(1.0, sdl, Input.getValueType()))); + return; + } case Intrinsic::experimental_cttz_elts: { auto DL = getCurSDLoc(); SDValue Op = getValue(I.getOperand(0)); @@ -8958,9 +9061,8 @@ bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const { // Avoid emitting tail calls in functions with the disable-tail-calls // attribute. const Function *Caller = CB.getParent()->getParent(); - if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == - "true" && - !isMustTailCall) + if (!isMustTailCall && + Caller->getFnAttribute("disable-tail-calls").getValueAsBool()) return false; // We can't tail call inside a function with a swifterror argument. Lowering @@ -9052,6 +9154,11 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, ConvControlToken = getValue(Token); } + GlobalValue *DeactivationSymbol = nullptr; + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_deactivation_symbol)) { + DeactivationSymbol = cast<GlobalValue>(Bundle->Inputs[0].get()); + } + TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) @@ -9061,7 +9168,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, .setIsPreallocated( CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) .setCFIType(CFIType) - .setConvergenceControlToken(ConvControlToken); + .setConvergenceControlToken(ConvControlToken) + .setDeactivationSymbol(DeactivationSymbol); // Set the pointer authentication info if we have it. if (PAI) { @@ -9075,6 +9183,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, if (Result.first.getNode()) { Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first); + Result.first = lowerNoFPClassToAssertNoFPClass(DAG, CB, Result.first); setValue(&CB, Result.first); } @@ -9392,7 +9501,9 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, unsigned Opcode) { // We already checked this call's prototype; verify it doesn't modify errno. - if (!I.onlyReadsMemory()) + // Do not perform optimizations for call sites that require strict + // floating-point semantics. + if (!I.onlyReadsMemory() || I.isStrictFP()) return false; SDNodeFlags Flags; @@ -9412,7 +9523,9 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, unsigned Opcode) { // We already checked this call's prototype; verify it doesn't modify errno. - if (!I.onlyReadsMemory()) + // Do not perform optimizations for call sites that require strict + // floating-point semantics. + if (!I.onlyReadsMemory() || I.isStrictFP()) return false; SDNodeFlags Flags; @@ -9445,11 +9558,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for - // some reason or the call site requires strict floating point semantics. + // some reason. + // This code should not handle libcalls that are already canonicalized to + // intrinsics by the middle-end. LibFunc Func; - if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && - F->hasName() && LibInfo->getLibFunc(*F, Func) && - LibInfo->hasOptimizedCodeGen(Func)) { + if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; case LibFunc_bcmp: @@ -9472,30 +9586,35 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitUnaryFloatCall(I, ISD::FABS)) return; break; case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitBinaryFloatCall(I, ISD::FMINNUM)) return; break; case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; case LibFunc_fminimum_num: case LibFunc_fminimum_numf: case LibFunc_fminimum_numl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitBinaryFloatCall(I, ISD::FMINIMUMNUM)) return; break; case LibFunc_fmaximum_num: case LibFunc_fmaximum_numf: case LibFunc_fmaximum_numl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitBinaryFloatCall(I, ISD::FMAXIMUMNUM)) return; break; @@ -9571,36 +9690,35 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; break; - case LibFunc_nearbyint: - case LibFunc_nearbyintf: - case LibFunc_nearbyintl: - if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) - return; - break; case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: + // TODO: Remove this, already canonicalized by the middle-end. if (visitUnaryFloatCall(I, ISD::FCEIL)) return; break; case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitUnaryFloatCall(I, ISD::FROUND)) return; break; case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: + // TODO: Remove this, already canonicalized by the middle-end. if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; break; @@ -9677,7 +9795,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { {LLVMContext::OB_deopt, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi, - LLVMContext::OB_convergencectrl}); + LLVMContext::OB_convergencectrl, LLVMContext::OB_deactivation_symbol}); SDValue Callee = getValue(I.getCalledOperand()); @@ -10661,6 +10779,30 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return DAG.getMergeValues(Ops, SL); } +SDValue SelectionDAGBuilder::lowerNoFPClassToAssertNoFPClass( + SelectionDAG &DAG, const Instruction &I, SDValue Op) { + FPClassTest Classes = getNoFPClass(I); + if (Classes == fcNone) + return Op; + + SDLoc SL = getCurSDLoc(); + SDValue TestConst = DAG.getTargetConstant(Classes, SDLoc(), MVT::i32); + + if (Op.getOpcode() != ISD::MERGE_VALUES) { + return DAG.getNode(ISD::AssertNoFPClass, SL, Op.getValueType(), Op, + TestConst); + } + + SmallVector<SDValue, 8> Ops(Op.getNumOperands()); + for (unsigned I = 0, E = Ops.size(); I != E; ++I) { + SDValue MergeOp = Op.getOperand(I); + Ops[I] = DAG.getNode(ISD::AssertNoFPClass, SL, MergeOp.getValueType(), + MergeOp, TestConst); + } + + return DAG.getMergeValues(Ops, SL); +} + /// Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 47e19f7..6f3e7a6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -232,6 +232,7 @@ public: BatchAAResults *BatchAA = nullptr; AssumptionCache *AC = nullptr; const TargetLibraryInfo *LibInfo = nullptr; + const TargetTransformInfo *TTI = nullptr; class SDAGSwitchLowering : public SwitchCG::SwitchLowering { public: @@ -285,7 +286,7 @@ public: FuncInfo(funcinfo), SwiftError(swifterror) {} void init(GCFunctionInfo *gfi, BatchAAResults *BatchAA, AssumptionCache *AC, - const TargetLibraryInfo *li); + const TargetLibraryInfo *li, const TargetTransformInfo &TTI); /// Clear out the current SelectionDAG and the associated state and prepare /// this SelectionDAGBuilder object to be used for a new block. This doesn't @@ -429,6 +430,10 @@ public: SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op); + // Lower nofpclass attributes to AssertNoFPClass + SDValue lowerNoFPClassToAssertNoFPClass(SelectionDAG &DAG, + const Instruction &I, SDValue Op); + void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, @@ -551,10 +556,12 @@ public: private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); - void visitCallBr(const CallBrInst &I); void visitCallBrLandingPad(const CallInst &I); void visitResume(const ResumeInst &I); + void visitCallBr(const CallBrInst &I); + void visitCallBrIntrinsic(const CallBrInst &I); + void visitUnary(const User &I, unsigned Opcode); void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); } @@ -727,6 +734,17 @@ private: MCSymbol *&BeginLabel); SDValue lowerEndEH(SDValue Chain, const InvokeInst *II, const BasicBlock *EHPadBB, MCSymbol *BeginLabel); + + std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I); + SmallVector<SDValue, 8> getTargetIntrinsicOperands( + const CallBase &I, bool HasChain, bool OnlyLoad, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr); + SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain); + SDValue getTargetNonMemIntrinsicNode(const Type &IntrinsicVT, bool HasChain, + ArrayRef<SDValue> Ops, + const SDVTList &VTs); + SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain, + bool OnlyLoad, SDValue Result); }; /// This struct represents the registers (physical or virtual) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 77377d3..ec5edd5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -472,6 +472,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LIFETIME_END: return "lifetime.end"; case ISD::FAKE_USE: return "fake_use"; + case ISD::RELOC_NONE: + return "reloc_none"; case ISD::PSEUDO_PROBE: return "pseudoprobe"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; @@ -588,6 +590,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "partial_reduce_smla"; case ISD::PARTIAL_REDUCE_SUMLA: return "partial_reduce_sumla"; + case ISD::PARTIAL_REDUCE_FMLA: + return "partial_reduce_fmla"; case ISD::LOOP_DEPENDENCE_WAR_MASK: return "loop_dep_war"; case ISD::LOOP_DEPENDENCE_RAW_MASK: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6c11c5b..dd8f18d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -519,9 +519,7 @@ void SelectionDAGISel::initializeAnalysisResults( SP = &FAM.getResult<SSPLayoutAnalysis>(Fn); -#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS TTI = &FAM.getResult<TargetIRAnalysis>(Fn); -#endif } void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { @@ -578,9 +576,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo(); -#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); -#endif } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { @@ -593,7 +589,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << '\n'); - SDB->init(GFI, getBatchAA(), AC, LibInfo); + SDB->init(GFI, getBatchAA(), AC, LibInfo, *TTI); MF->setHasInlineAsm(false); @@ -2448,7 +2444,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // a cycle in the scheduling graph. // If the node has glue, walk down the graph to the "lowest" node in the - // glueged set. + // glued set. EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Glue) { SDNode *GU = Root->getGluedUser(); @@ -2550,6 +2546,11 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) { N->getOperand(1), N->getOperand(0)); } +void SelectionDAGISel::Select_RELOC_NONE(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::RELOC_NONE, N->getValueType(0), + N->getOperand(1), N->getOperand(0)); +} + void SelectionDAGISel::Select_FREEZE(SDNode *N) { // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. // If FREEZE instruction is added later, the code below must be changed as @@ -2777,8 +2778,8 @@ void SelectionDAGISel::UpdateChains( /// induce cycles in the DAG) and if so, creating a TokenFactor node. that will /// be used as the input node chain for the generated nodes. static SDValue -HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, - SelectionDAG *CurDAG) { +HandleMergeInputChains(const SmallVectorImpl<SDNode *> &ChainNodesMatched, + SDValue InputGlue, SelectionDAG *CurDAG) { SmallPtrSet<const SDNode *, 16> Visited; SmallVector<const SDNode *, 8> Worklist; @@ -2821,8 +2822,16 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, // node that is both the predecessor and successor of the // to-be-merged nodes. Fail. Visited.clear(); - for (SDValue V : InputChains) + for (SDValue V : InputChains) { + // If we need to create a TokenFactor, and any of the input chain nodes will + // also be glued to the output, we cannot merge the chains. The TokenFactor + // would prevent the glue from being honored. + if (InputChains.size() != 1 && + V->getValueType(V->getNumValues() - 1) == MVT::Glue && + InputGlue.getNode() == V.getNode()) + return SDValue(); Worklist.push_back(V.getNode()); + } for (auto *N : ChainNodesMatched) if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true)) @@ -3299,6 +3308,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::LIFETIME_START: case ISD::LIFETIME_END: case ISD::PSEUDO_PROBE: + case ISD::DEACTIVATION_SYMBOL: NodeToMatch->setNodeId(-1); // Mark selected. return; case ISD::AssertSext: @@ -3325,6 +3335,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::FAKE_USE: Select_FAKE_USE(NodeToMatch); return; + case ISD::RELOC_NONE: + Select_RELOC_NONE(NodeToMatch); + return; case ISD::FREEZE: Select_FREEZE(NodeToMatch); return; @@ -3377,7 +3390,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // These are the current input chain and glue for use when generating nodes. // Various Emit operations change these. For example, emitting a copytoreg // uses and updates these. - SDValue InputChain, InputGlue; + SDValue InputChain, InputGlue, DeactivationSymbol; // ChainNodesMatched - If a pattern matches nodes that have input/output // chains, the OPC_EmitMergeInputChains operation is emitted which indicates @@ -3530,6 +3543,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, InputGlue = N->getOperand(N->getNumOperands()-1); continue; + case OPC_CaptureDeactivationSymbol: + // If the current node has a deactivation symbol, capture it in + // DeactivationSymbol. + if (N->getNumOperands() != 0 && + N->getOperand(N->getNumOperands() - 1).getOpcode() == + ISD::DEACTIVATION_SYMBOL) + DeactivationSymbol = N->getOperand(N->getNumOperands() - 1); + continue; + case OPC_MoveChild: { unsigned ChildNo = MatcherTable[MatcherIndex++]; if (ChildNo >= N.getNumOperands()) @@ -3981,7 +4003,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, } // Merge the input chains if they are not intra-pattern references. - InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + InputChain = HandleMergeInputChains(ChainNodesMatched, InputGlue, CurDAG); if (!InputChain.getNode()) break; // Failed to merge. @@ -4025,7 +4047,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, break; // Merge the input chains if they are not intra-pattern references. - InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + InputChain = HandleMergeInputChains(ChainNodesMatched, InputGlue, CurDAG); if (!InputChain.getNode()) break; // Failed to merge. @@ -4211,6 +4233,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // If this has chain/glue inputs, add them. if (EmitNodeInfo & OPFL_Chain) Ops.push_back(InputChain); + if (DeactivationSymbol.getNode() != nullptr) + Ops.push_back(DeactivationSymbol); if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9bdf822..c65ddc6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -41,8 +41,9 @@ using namespace llvm; using namespace llvm::SDPatternMatch; /// NOTE: The TargetMachine owns TLOF. -TargetLowering::TargetLowering(const TargetMachine &tm) - : TargetLoweringBase(tm) {} +TargetLowering::TargetLowering(const TargetMachine &tm, + const TargetSubtargetInfo &STI) + : TargetLoweringBase(tm, STI) {} // Define the virtual destructor out-of-line for build efficiency. TargetLowering::~TargetLowering() = default; @@ -151,11 +152,13 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, /// Generate a libcall taking the given operands as arguments and returning a /// result of type RetVT. std::pair<SDValue, SDValue> -TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, - ArrayRef<SDValue> Ops, - MakeLibCallOptions CallOptions, - const SDLoc &dl, +TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, + EVT RetVT, ArrayRef<SDValue> Ops, + MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue InChain) const { + if (LibcallImpl == RTLIB::Unsupported) + reportFatalInternalError("unsupported library call operation"); + if (!InChain) InChain = DAG.getEntryNode(); @@ -184,12 +187,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Args.push_back(Entry); } - const char *LibcallName = getLibcallName(LC); - if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName) - reportFatalInternalError("unsupported library call operation"); - SDValue Callee = - DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); + DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout())); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); Type *OrigRetTy = RetTy; @@ -205,8 +204,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, CLI.setDebugLoc(dl) .setChain(InChain) - .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee, - std::move(Args)) + .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy, + Callee, std::move(Args)) .setNoReturn(CallOptions.DoesNotReturn) .setDiscardResult(!CallOptions.IsReturnValueUsed) .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) @@ -6344,7 +6343,6 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); EVT VT = N->getValueType(0); - EVT SVT = VT.getScalarType(); EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); @@ -6354,6 +6352,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, auto BuildSDIVPattern = [&](ConstantSDNode *C) { if (C->isZero()) return false; + + EVT CT = C->getValueType(0); APInt Divisor = C->getAPIntValue(); unsigned Shift = Divisor.countr_zero(); if (Shift) { @@ -6362,12 +6362,13 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, } APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); - Factors.push_back(DAG.getConstant(Factor, dl, SVT)); + Factors.push_back(DAG.getConstant(Factor, dl, CT)); return true; }; // Collect all magic values from the build vector. - if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern)) + if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false, + /*AllowTruncation=*/true)) return SDValue(); SDValue Shift, Factor; @@ -6402,7 +6403,6 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDNode *> &Created) { EVT VT = N->getValueType(0); - EVT SVT = VT.getScalarType(); EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); @@ -6412,6 +6412,8 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, auto BuildUDIVPattern = [&](ConstantSDNode *C) { if (C->isZero()) return false; + + EVT CT = C->getValueType(0); APInt Divisor = C->getAPIntValue(); unsigned Shift = Divisor.countr_zero(); if (Shift) { @@ -6421,14 +6423,15 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, // Calculate the multiplicative inverse modulo BW. APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); - Factors.push_back(DAG.getConstant(Factor, dl, SVT)); + Factors.push_back(DAG.getConstant(Factor, dl, CT)); return true; }; SDValue Op1 = N->getOperand(1); // Collect all magic values from the build vector. - if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern)) + if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false, + /*AllowTruncation=*/true)) return SDValue(); SDValue Shift, Factor; @@ -6561,8 +6564,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, auto BuildSDIVPattern = [&](ConstantSDNode *C) { if (C->isZero()) return false; - - const APInt &Divisor = C->getAPIntValue(); + // Truncate the divisor to the target scalar type in case it was promoted + // during type legalization. + APInt Divisor = C->getAPIntValue().trunc(EltBits); SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor); int NumeratorFactor = 0; int ShiftMask = -1; @@ -6592,7 +6596,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); // Collect the shifts / magic values from each element. - if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern)) + if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false, + /*AllowTruncation=*/true)) return SDValue(); SDValue MagicFactor, Factor, Shift, ShiftMask; @@ -6737,7 +6742,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, auto BuildUDIVPattern = [&](ConstantSDNode *C) { if (C->isZero()) return false; - const APInt& Divisor = C->getAPIntValue(); + // Truncate the divisor to the target scalar type in case it was promoted + // during type legalization. + APInt Divisor = C->getAPIntValue().trunc(EltBits); SDValue PreShift, MagicFactor, NPQFactor, PostShift; @@ -6778,7 +6785,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, }; // Collect the shifts/magic values from each element. - if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern)) + if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false, + /*AllowTruncation=*/true)) return SDValue(); SDValue PreShift, PostShift, MagicFactor, NPQFactor; @@ -8851,6 +8859,7 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node, RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO); } + // Always prefer RHS if equal. SDValue MinMax = DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT); @@ -8865,13 +8874,19 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node, DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32); SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ); - SDValue LCmp = DAG.getSelect( - DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS, + EVT IntVT = VT.changeTypeToInteger(); + EVT FloatVT = VT.changeElementType(MVT::f32); + SDValue LHSTrunc = LHS; + if (!isTypeLegal(IntVT) && !isOperationLegalOrCustom(ISD::IS_FPCLASS, VT)) { + LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); + } + // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as + // we preferred RHS when generate MinMax, if the operands are equal. + SDValue RetZero = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS, MinMax, Flags); - SDValue RCmp = DAG.getSelect( - DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp, - Flags); - return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags); + return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags); } /// Returns a true value if if this FPClassTest can be performed with an ordered @@ -10606,30 +10621,29 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() && "Incompatible types of Data and Mask"); if (IsCompressedMemory) { - if (DataVT.isScalableVector()) - report_fatal_error( - "Cannot currently handle compressed memory with scalable vectors"); // Incrementing the pointer according to number of '1's in the mask. - EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits()); - SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask); - if (MaskIntVT.getSizeInBits() < 32) { - MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg); - MaskIntVT = MVT::i32; + if (DataVT.isScalableVector()) { + EVT MaskExtVT = MaskVT.changeElementType(MVT::i32); + SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask); + Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt); + } else { + EVT MaskIntVT = + EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits()); + SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask); + if (MaskIntVT.getSizeInBits() < 32) { + MaskInIntReg = + DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg); + MaskIntVT = MVT::i32; + } + Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg); } - - // Count '1's with POPCNT. - Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg); - Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT); // Scale is an element size in bytes. SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL, AddrVT); + Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT); Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); - } else if (DataVT.isScalableVector()) { - Increment = DAG.getVScale(DL, AddrVT, - APInt(AddrVT.getFixedSizeInBits(), - DataVT.getStoreSize().getKnownMinValue())); } else - Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); + Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize()); return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); } @@ -11121,7 +11135,8 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; - if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) { + RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC); + if (LibcallImpl == RTLIB::Unsupported) { forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS); return; } @@ -11922,10 +11937,8 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, // Store the lo part of CONCAT_VECTORS(V1, V2) SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo); // Store the hi part of CONCAT_VECTORS(V1, V2) - SDValue OffsetToV2 = DAG.getVScale( - DL, PtrVT, - APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue())); - SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2); + SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize()); + SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes); SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo); if (Imm >= 0) { @@ -11944,13 +11957,8 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, SDValue TrailingBytes = DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT); - if (TrailingElts > VT.getVectorMinNumElements()) { - SDValue VLBytes = - DAG.getVScale(DL, PtrVT, - APInt(PtrVT.getFixedSizeInBits(), - VT.getStoreSize().getKnownMinValue())); - TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes); - } + if (TrailingElts > VT.getVectorMinNumElements()) + TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes); // Calculate the start address of the spliced result. StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes); @@ -12074,22 +12082,32 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N, EVT::getVectorVT(*DAG.getContext(), AccVT.getVectorElementType(), MulOpVT.getVectorElementCount()); - unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA - ? ISD::ZERO_EXTEND - : ISD::SIGN_EXTEND; - unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA - ? ISD::SIGN_EXTEND - : ISD::ZERO_EXTEND; + unsigned ExtOpcLHS, ExtOpcRHS; + switch (N->getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case ISD::PARTIAL_REDUCE_UMLA: + ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND; + break; + case ISD::PARTIAL_REDUCE_SMLA: + ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND; + break; + case ISD::PARTIAL_REDUCE_FMLA: + ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND; + break; + } if (ExtMulOpVT != MulOpVT) { MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS); MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS); } SDValue Input = MulLHS; - APInt ConstantOne; - if (!ISD::isConstantSplatVector(MulRHS.getNode(), ConstantOne) || - !ConstantOne.isOne()) + if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) { + if (!llvm::isOneOrOneSplatFP(MulRHS)) + Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS); + } else if (!llvm::isOneOrOneSplat(MulRHS)) { Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS); + } unsigned Stride = AccVT.getVectorMinNumElements(); unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride; @@ -12099,10 +12117,13 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N, for (unsigned I = 0; I < ScaleFactor; I++) Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride)); + unsigned FlatNode = + N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD; + // Flatten the subvector tree while (Subvectors.size() > 1) { Subvectors.push_back( - DAG.getNode(ISD::ADD, DL, AccVT, {Subvectors[0], Subvectors[1]})); + DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]})); Subvectors.pop_front(); Subvectors.pop_front(); } @@ -12113,6 +12134,167 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N, return Subvectors[0]; } +/// Given a store node \p StoreNode, return true if it is safe to fold that node +/// into \p FPNode, which expands to a library call with output pointers. +static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, + SDNode *FPNode) { + SmallVector<const SDNode *, 8> Worklist; + SmallVector<const SDNode *, 8> DeferredNodes; + SmallPtrSet<const SDNode *, 16> Visited; + + // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). + for (SDValue Op : StoreNode->ops()) + if (Op.getNode() != FPNode) + Worklist.push_back(Op.getNode()); + + unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); + while (!Worklist.empty()) { + const SDNode *Node = Worklist.pop_back_val(); + auto [_, Inserted] = Visited.insert(Node); + if (!Inserted) + continue; + + if (MaxSteps > 0 && Visited.size() >= MaxSteps) + return false; + + // Reached the FPNode (would result in a cycle). + // OR Reached CALLSEQ_START (would result in nested call sequences). + if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) + return false; + + if (Node->getOpcode() == ISD::CALLSEQ_END) { + // Defer looking into call sequences (so we can check we're outside one). + // We still need to look through these for the predecessor check. + DeferredNodes.push_back(Node); + continue; + } + + for (SDValue Op : Node->ops()) + Worklist.push_back(Op.getNode()); + } + + // True if we're outside a call sequence and don't have the FPNode as a + // predecessor. No cycles or nested call sequences possible. + return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, + MaxSteps); +} + +bool TargetLowering::expandMultipleResultFPLibCall( + SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, + SmallVectorImpl<SDValue> &Results, + std::optional<unsigned> CallRetResNo) const { + if (LC == RTLIB::UNKNOWN_LIBCALL) + return false; + + RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC); + if (LibcallImpl == RTLIB::Unsupported) + return false; + + LLVMContext &Ctx = *DAG.getContext(); + EVT VT = Node->getValueType(0); + unsigned NumResults = Node->getNumValues(); + + // Find users of the node that store the results (and share input chains). The + // destination pointers can be used instead of creating stack allocations. + SDValue StoresInChain; + SmallVector<StoreSDNode *, 2> ResultStores(NumResults); + for (SDNode *User : Node->users()) { + if (!ISD::isNormalStore(User)) + continue; + auto *ST = cast<StoreSDNode>(User); + SDValue StoreValue = ST->getValue(); + unsigned ResNo = StoreValue.getResNo(); + // Ensure the store corresponds to an output pointer. + if (CallRetResNo == ResNo) + continue; + // Ensure the store to the default address space and not atomic or volatile. + if (!ST->isSimple() || ST->getAddressSpace() != 0) + continue; + // Ensure all store chains are the same (so they don't alias). + if (StoresInChain && ST->getChain() != StoresInChain) + continue; + // Ensure the store is properly aligned. + Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); + if (ST->getAlign() < + DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType())) + continue; + // Avoid: + // 1. Creating cyclic dependencies. + // 2. Expanding the node to a call within a call sequence. + if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) + continue; + ResultStores[ResNo] = ST; + StoresInChain = ST->getChain(); + } + + ArgListTy Args; + + // Pass the arguments. + for (const SDValue &Op : Node->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(Ctx); + Args.emplace_back(Op, ArgTy); + } + + // Pass the output pointers. + SmallVector<SDValue, 2> ResultPtrs(NumResults); + Type *PointerTy = PointerType::getUnqual(Ctx); + for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) { + if (ResNo == CallRetResNo) + continue; + EVT ResVT = Node->getValueType(ResNo); + SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT); + ResultPtrs[ResNo] = ResultPtr; + Args.emplace_back(ResultPtr, PointerTy); + } + + SDLoc DL(Node); + + if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) { + // Pass the vector mask (if required). + EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT); + SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT); + Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); + } + + Type *RetType = CallRetResNo.has_value() + ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) + : Type::getVoidTy(Ctx); + SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode(); + SDValue Callee = + DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout())); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( + getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args)); + + auto [Call, CallChain] = LowerCallTo(CLI); + + for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { + if (ResNo == CallRetResNo) { + Results.push_back(Call); + continue; + } + MachinePointerInfo PtrInfo; + SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain, + ResultPtr, PtrInfo); + SDValue OutChain = LoadResult.getValue(1); + + if (StoreSDNode *ST = ResultStores[ResNo]) { + // Replace store with the library call. + DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain); + PtrInfo = ST->getPointerInfo(); + } else { + PtrInfo = MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), + cast<FrameIndexSDNode>(ResultPtr)->getIndex()); + } + + Results.push_back(LoadResult); + } + + return true; +} + bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, |
