aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp336
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp58
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp162
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp15
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp205
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp19
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp254
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp116
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp542
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp382
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h22
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp48
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp320
17 files changed, 1597 insertions, 892 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bdd6bf0..77346cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
-// Do not permit build vector implicit truncation.
-static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+// Do not permit build vector implicit truncation unless AllowTruncation is set.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
+ bool AllowTruncation = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,8 +1077,13 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (Op.isUndef())
continue;
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
- if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
- (Const->isOpaque() && NoOpaques))
+ if (!Const || (Const->isOpaque() && NoOpaques))
+ return false;
+ // When AllowTruncation is true, allow constants that have been promoted
+ // during type legalization as long as the value fits in the target type.
+ if ((AllowTruncation &&
+ Const->getAPIntValue().getActiveBits() > BitWidth) ||
+ (!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
return false;
}
return true;
@@ -2042,6 +2048,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
return visitPARTIAL_REDUCE_MLA(N);
case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
@@ -3287,6 +3294,9 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
// First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
while (true) {
+ if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
+ return V;
+
if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
V = V.getOperand(0);
continue;
@@ -3301,9 +3311,6 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
continue;
}
- if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
- return V;
-
break;
}
@@ -4046,6 +4053,8 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
m_ConstInt(AndMask)))) {
// Type Legalisation Pattern:
// (sub (ctlz (and (xor Op XorMask) AndMask)) BitWidthDiff)
+ if (BitWidthDiff.getZExtValue() >= BitWidth)
+ return SDValue();
unsigned AndMaskWidth = BitWidth - BitWidthDiff.getZExtValue();
if (!(AndMask.isMask(AndMaskWidth) && XorMask.countr_one() >= AndMaskWidth))
return SDValue();
@@ -4879,8 +4888,8 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
- DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
- DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isConstantOrConstantVector(N1) &&
+ isConstantOrConstantVector(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
return Matcher.getNode(
ISD::ADD, DL, VT,
@@ -4980,7 +4989,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
}
- return TLI.getLibcallName(LC) != nullptr;
+ return TLI.getLibcallImpl(LC) != RTLIB::Unsupported;
}
/// Issue divrem if both quotient and remainder are needed.
@@ -5177,7 +5186,8 @@ static bool isDivisorPowerOfTwo(SDValue Divisor) {
return false;
};
- return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
+ return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true);
}
SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
@@ -5241,7 +5251,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (isConstantOrConstantVector(N1) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+ /*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -5319,7 +5330,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);
// fold (udiv x, (1 << c)) -> x >>u c
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5333,7 +5345,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
- if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+ if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true)) {
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
AddToWorklist(LogBase2.getNode());
@@ -5349,7 +5362,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// fold (udiv x, c) -> alternate
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
- if (isConstantOrConstantVector(N1) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
+ /*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -5577,7 +5591,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return DAG.getConstant(0, DL, VT);
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
+ /*AllowTruncation=*/true) &&
hasOperation(ISD::SRL, VT)) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
unsigned NumEltBits = VT.getScalarSizeInBits();
@@ -9374,7 +9389,7 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
// endian, and std::nullopt if match failed.
-static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
+static std::optional<bool> isBigEndian(ArrayRef<int64_t> ByteOffsets,
int64_t FirstOffset) {
// The endian can be decided only when it is 2 bytes at least.
unsigned Width = ByteOffsets.size();
@@ -10880,15 +10895,14 @@ static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
// Combine to mulh if mulh is legal/custom for the narrow type on the target
// or if it is a vector type then we could transform to an acceptable type and
// rely on legalization to split/combine the result.
+ EVT TransformVT = NarrowVT;
if (NarrowVT.isVector()) {
- EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
- if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
- !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
- return SDValue();
- } else {
- if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
+ TransformVT = TLI.getLegalTypeToTransformTo(*DAG.getContext(), NarrowVT);
+ if (TransformVT.getScalarType() != NarrowVT.getScalarType())
return SDValue();
}
+ if (!TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
+ return SDValue();
SDValue Result =
DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
@@ -10985,6 +10999,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
+ // fold (sra (xor (sra x, c1), -1), c2) -> (xor (sra x, c3), -1)
+ // This allows merging two arithmetic shifts even when there's a NOT in
+ // between.
+ SDValue X;
+ APInt C1;
+ if (N1C && sd_match(N0, m_OneUse(m_Not(
+ m_OneUse(m_Sra(m_Value(X), m_ConstInt(C1))))))) {
+ APInt C2 = N1C->getAPIntValue();
+ zeroExtendToMatch(C1, C2, 1 /* Overflow Bit */);
+ APInt Sum = C1 + C2;
+ unsigned ShiftSum = Sum.getLimitedValue(OpSizeInBits - 1);
+ SDValue NewShift = DAG.getNode(
+ ISD::SRA, DL, VT, X, DAG.getShiftAmountConstant(ShiftSum, VT, DL));
+ return DAG.getNOT(DL, NewShift, VT);
+ }
+
// fold (sra (shl X, m), (sub result_size, n))
// -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
// result_size - n != m.
@@ -11744,12 +11774,12 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
// fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
- sd_match(N, m_BitReverse(m_Srl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+ sd_match(N0, m_Srl(m_BitReverse(m_Value(X)), m_Value(Y))))
return DAG.getNode(ISD::SHL, DL, VT, X, Y);
// fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
- sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y)))))
+ sd_match(N0, m_Shl(m_BitReverse(m_Value(X)), m_Value(Y))))
return DAG.getNode(ISD::SRL, DL, VT, X, Y);
return SDValue();
@@ -12999,20 +13029,35 @@ SDValue DAGCombiner::visitPARTIAL_REDUCE_MLA(SDNode *N) {
return SDValue();
}
-// partial_reduce_*mla(acc, mul(ext(a), ext(b)), splat(1))
+// partial_reduce_*mla(acc, mul(*ext(a), *ext(b)), splat(1))
// -> partial_reduce_*mla(acc, a, b)
//
-// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
-// -> partial_reduce_*mla(acc, x, C)
+// partial_reduce_*mla(acc, mul(*ext(x), splat(C)), splat(1))
+// -> partial_reduce_*mla(acc, x, splat(C))
+//
+// partial_reduce_*mla(acc, sel(p, mul(*ext(a), *ext(b)), splat(0)), splat(1))
+// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), b)
+//
+// partial_reduce_*mla(acc, sel(p, mul(*ext(a), splat(C)), splat(0)), splat(1))
+// -> partial_reduce_*mla(acc, sel(p, a, splat(0)), splat(C))
SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
SDLoc DL(N);
auto *Context = DAG.getContext();
SDValue Acc = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Op2 = N->getOperand(2);
-
unsigned Opc = Op1->getOpcode();
- if (Opc != ISD::MUL && Opc != ISD::SHL)
+
+ // Handle predication by moving the SELECT into the operand of the MUL.
+ SDValue Pred;
+ if (Opc == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
+ isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
+ Pred = Op1->getOperand(0);
+ Op1 = Op1->getOperand(1);
+ Opc = Op1->getOpcode();
+ }
+
+ if (Opc != ISD::MUL && Opc != ISD::FMUL && Opc != ISD::SHL)
return SDValue();
SDValue LHS = Op1->getOperand(0);
@@ -13031,20 +13076,37 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
Opc = ISD::MUL;
}
- APInt C;
- if (Opc != ISD::MUL || !ISD::isConstantSplatVector(Op2.getNode(), C) ||
- !C.isOne())
+ if (!(Opc == ISD::MUL && llvm::isOneOrOneSplat(Op2)) &&
+ !(Opc == ISD::FMUL && llvm::isOneOrOneSplatFP(Op2)))
return SDValue();
+ auto IsIntOrFPExtOpcode = [](unsigned int Opcode) {
+ return (ISD::isExtOpcode(Opcode) || Opcode == ISD::FP_EXTEND);
+ };
+
unsigned LHSOpcode = LHS->getOpcode();
- if (!ISD::isExtOpcode(LHSOpcode))
+ if (!IsIntOrFPExtOpcode(LHSOpcode))
return SDValue();
SDValue LHSExtOp = LHS->getOperand(0);
EVT LHSExtOpVT = LHSExtOp.getValueType();
+ // When Pred is non-zero, set Op = select(Pred, Op, splat(0)) and freeze
+ // OtherOp to keep the same semantics when moving the selects into the MUL
+ // operands.
+ auto ApplyPredicate = [&](SDValue &Op, SDValue &OtherOp) {
+ if (Pred) {
+ EVT OpVT = Op.getValueType();
+ SDValue Zero = OpVT.isFloatingPoint() ? DAG.getConstantFP(0.0, DL, OpVT)
+ : DAG.getConstant(0, DL, OpVT);
+ Op = DAG.getSelect(DL, OpVT, Pred, Op, Zero);
+ OtherOp = DAG.getFreeze(OtherOp);
+ }
+ };
+
// partial_reduce_*mla(acc, mul(ext(x), splat(C)), splat(1))
// -> partial_reduce_*mla(acc, x, C)
+ APInt C;
if (ISD::isConstantSplatVector(RHS.getNode(), C)) {
// TODO: Make use of partial_reduce_sumla here
APInt CTrunc = C.trunc(LHSExtOpVT.getScalarSizeInBits());
@@ -13064,12 +13126,13 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
return SDValue();
- return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp,
- DAG.getConstant(CTrunc, DL, LHSExtOpVT));
+ SDValue C = DAG.getConstant(CTrunc, DL, LHSExtOpVT);
+ ApplyPredicate(C, LHSExtOp);
+ return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, LHSExtOp, C);
}
unsigned RHSOpcode = RHS->getOpcode();
- if (!ISD::isExtOpcode(RHSOpcode))
+ if (!IsIntOrFPExtOpcode(RHSOpcode))
return SDValue();
SDValue RHSExtOp = RHS->getOperand(0);
@@ -13086,6 +13149,8 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
else if (LHSOpcode == ISD::ZERO_EXTEND && RHSOpcode == ISD::SIGN_EXTEND) {
NewOpc = ISD::PARTIAL_REDUCE_SUMLA;
std::swap(LHSExtOp, RHSExtOp);
+ } else if (LHSOpcode == ISD::FP_EXTEND && RHSOpcode == ISD::FP_EXTEND) {
+ NewOpc = ISD::PARTIAL_REDUCE_FMLA;
} else
return SDValue();
// For a 2-stage extend the signedness of both of the extends must match
@@ -13104,39 +13169,50 @@ SDValue DAGCombiner::foldPartialReduceMLAMulOp(SDNode *N) {
TLI.getTypeToTransformTo(*Context, LHSExtOpVT)))
return SDValue();
+ ApplyPredicate(RHSExtOp, LHSExtOp);
return DAG.getNode(NewOpc, DL, N->getValueType(0), Acc, LHSExtOp, RHSExtOp);
}
-// partial.reduce.umla(acc, zext(op), splat(1))
-// -> partial.reduce.umla(acc, op, splat(trunc(1)))
-// partial.reduce.smla(acc, sext(op), splat(1))
-// -> partial.reduce.smla(acc, op, splat(trunc(1)))
+// partial.reduce.*mla(acc, *ext(op), splat(1))
+// -> partial.reduce.*mla(acc, op, splat(trunc(1)))
// partial.reduce.sumla(acc, sext(op), splat(1))
// -> partial.reduce.smla(acc, op, splat(trunc(1)))
+//
+// partial.reduce.*mla(acc, sel(p, *ext(op), splat(0)), splat(1))
+// -> partial.reduce.*mla(acc, sel(p, op, splat(0)), splat(trunc(1)))
SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
SDLoc DL(N);
SDValue Acc = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Op2 = N->getOperand(2);
- APInt ConstantOne;
- if (!ISD::isConstantSplatVector(Op2.getNode(), ConstantOne) ||
- !ConstantOne.isOne())
+ if (!llvm::isOneOrOneSplat(Op2) && !llvm::isOneOrOneSplatFP(Op2))
return SDValue();
+ SDValue Pred;
unsigned Op1Opcode = Op1.getOpcode();
- if (!ISD::isExtOpcode(Op1Opcode))
+ if (Op1Opcode == ISD::VSELECT && (isZeroOrZeroSplat(Op1->getOperand(2)) ||
+ isZeroOrZeroSplatFP(Op1->getOperand(2)))) {
+ Pred = Op1->getOperand(0);
+ Op1 = Op1->getOperand(1);
+ Op1Opcode = Op1->getOpcode();
+ }
+
+ if (!ISD::isExtOpcode(Op1Opcode) && Op1Opcode != ISD::FP_EXTEND)
return SDValue();
- bool Op1IsSigned = Op1Opcode == ISD::SIGN_EXTEND;
+ bool Op1IsSigned =
+ Op1Opcode == ISD::SIGN_EXTEND || Op1Opcode == ISD::FP_EXTEND;
bool NodeIsSigned = N->getOpcode() != ISD::PARTIAL_REDUCE_UMLA;
EVT AccElemVT = Acc.getValueType().getVectorElementType();
if (Op1IsSigned != NodeIsSigned &&
Op1.getValueType().getVectorElementType() != AccElemVT)
return SDValue();
- unsigned NewOpcode =
- Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
+ unsigned NewOpcode = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
+ ? ISD::PARTIAL_REDUCE_FMLA
+ : Op1IsSigned ? ISD::PARTIAL_REDUCE_SMLA
+ : ISD::PARTIAL_REDUCE_UMLA;
SDValue UnextOp1 = Op1.getOperand(0);
EVT UnextOp1VT = UnextOp1.getValueType();
@@ -13146,8 +13222,18 @@ SDValue DAGCombiner::foldPartialReduceAdd(SDNode *N) {
TLI.getTypeToTransformTo(*Context, UnextOp1VT)))
return SDValue();
+ SDValue Constant = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
+ ? DAG.getConstantFP(1, DL, UnextOp1VT)
+ : DAG.getConstant(1, DL, UnextOp1VT);
+
+ if (Pred) {
+ SDValue Zero = N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA
+ ? DAG.getConstantFP(0, DL, UnextOp1VT)
+ : DAG.getConstant(0, DL, UnextOp1VT);
+ Constant = DAG.getSelect(DL, UnextOp1VT, Pred, Constant, Zero);
+ }
return DAG.getNode(NewOpcode, DL, N->getValueType(0), Acc, UnextOp1,
- DAG.getConstant(1, DL, UnextOp1VT));
+ Constant);
}
SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
@@ -16734,38 +16820,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
// fold (conv (load x)) -> (load (conv*)x)
+ // fold (conv (freeze (load x))) -> (freeze (load (conv*)x))
// If the resultant load doesn't need a higher alignment than the original!
- if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- // Do not remove the cast if the types differ in endian layout.
- TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
- TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
- // If the load is volatile, we only want to change the load type if the
- // resulting load is legal. Otherwise we might increase the number of
- // memory accesses. We don't care if the original type was legal or not
- // as we assume software couldn't rely on the number of accesses of an
- // illegal type.
- ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
- TLI.isOperationLegal(ISD::LOAD, VT))) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ auto CastLoad = [this, &VT](SDValue N0, const SDLoc &DL) {
+ if (!ISD::isNormalLoad(N0.getNode()) || !N0.hasOneUse())
+ return SDValue();
- if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
- *LN0->getMemOperand())) {
- // If the range metadata type does not match the new memory
- // operation type, remove the range metadata.
- if (const MDNode *MD = LN0->getRanges()) {
- ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
- if (Lower->getBitWidth() != VT.getScalarSizeInBits() ||
- !VT.isInteger()) {
- LN0->getMemOperand()->clearRanges();
- }
+ // Do not remove the cast if the types differ in endian layout.
+ if (TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) !=
+ TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()))
+ return SDValue();
+
+ // If the load is volatile, we only want to change the load type if the
+ // resulting load is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ auto *LN0 = cast<LoadSDNode>(N0);
+ if ((LegalOperations || !LN0->isSimple()) &&
+ !TLI.isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
+ if (!TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand()))
+ return SDValue();
+
+ // If the range metadata type does not match the new memory
+ // operation type, remove the range metadata.
+ if (const MDNode *MD = LN0->getRanges()) {
+ ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ if (Lower->getBitWidth() != VT.getScalarSizeInBits() || !VT.isInteger()) {
+ LN0->getMemOperand()->clearRanges();
}
- SDValue Load =
- DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
- return Load;
}
- }
+ SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+ return Load;
+ };
+
+ if (SDValue NewLd = CastLoad(N0, SDLoc(N)))
+ return NewLd;
+
+ if (N0.getOpcode() == ISD::FREEZE && N0.hasOneUse())
+ if (SDValue NewLd = CastLoad(N0.getOperand(0), SDLoc(N)))
+ return DAG.getFreeze(NewLd);
if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
return V;
@@ -17821,7 +17920,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
- if (N1C->isNegative() || Flags.hasNoSignedZeros())
+ if (N1C->isNegative() || Flags.hasNoSignedZeros() ||
+ DAG.canIgnoreSignBitOfZero(SDValue(N, 0)))
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -18033,7 +18133,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
- if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
+ if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() ||
+ DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
return N0;
}
}
@@ -18046,7 +18147,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
- if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) {
+ if (N0CFP->isNegative() || Flags.hasNoSignedZeros() ||
+ DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
// flushed to zero, unless all users treat denorms as zero (DAZ).
// FIXME: This transform will change the sign of a NaN and the behavior
@@ -18625,11 +18727,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (Flags.hasAllowReciprocal()) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
+ bool N1AllowReciprocal = N1->getFlags().hasAllowReciprocal();
if (N1.getOpcode() == ISD::FSQRT) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0)))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ N1.getOperand(0).getOpcode() == ISD::FSQRT &&
+ N1AllowReciprocal) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
@@ -18694,7 +18798,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// Fold X/Sqrt(X) -> Sqrt(X)
- if (Flags.hasNoSignedZeros() && Flags.hasAllowReassociation())
+ if ((Flags.hasNoSignedZeros() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) &&
+ Flags.hasAllowReassociation())
if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
return N1;
@@ -18745,8 +18850,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
DAG.isKnownToBeAPowerOfTwoFP(N1)) {
- bool NeedsCopySign =
- !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
+ bool NeedsCopySign = !Flags.hasNoSignedZeros() &&
+ !DAG.canIgnoreSignBitOfZero(SDValue(N, 0)) &&
+ !DAG.cannotBeOrderedNegativeFP(N0);
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
SDValue MLA;
@@ -18831,6 +18937,26 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ if (VT != N1.getValueType())
+ return SDValue();
+
+ // If this is equivalent to a disjoint or, replace it with one. This can
+ // happen if the sign operand is a sign mask (i.e., x << sign_bit_position).
+ if (DAG.SignBitIsZeroFP(N0) &&
+ DAG.computeKnownBits(N1).Zero.isMaxSignedValue()) {
+ // TODO: Just directly match the shift pattern. computeKnownBits is heavy
+ // for a such a narrowly targeted case.
+ EVT IntVT = VT.changeTypeToInteger();
+ // TODO: It appears to be profitable in some situations to unconditionally
+ // emit a fabs(n0) to perform this combine.
+ SDValue CastSrc0 = DAG.getNode(ISD::BITCAST, DL, IntVT, N0);
+ SDValue CastSrc1 = DAG.getNode(ISD::BITCAST, DL, IntVT, N1);
+
+ SDValue SignOr = DAG.getNode(ISD::OR, DL, IntVT, CastSrc0, CastSrc1,
+ SDNodeFlags::Disjoint);
+ return DAG.getNode(ISD::BITCAST, DL, VT, SignOr);
+ }
+
return SDValue();
}
@@ -19395,7 +19521,8 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) {
const SDNodeFlags Flags = N->getFlags();
unsigned Opc = N->getOpcode();
bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
- bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM;
+ bool ReturnsOtherForAllNaNs =
+ Opc == ISD::FMINIMUMNUM || Opc == ISD::FMAXIMUMNUM;
bool IsMin =
Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM || Opc == ISD::FMINIMUMNUM;
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -19414,32 +19541,30 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) {
// minnum(X, qnan) -> X
// maxnum(X, qnan) -> X
- // minnum(X, snan) -> qnan
- // maxnum(X, snan) -> qnan
// minimum(X, nan) -> qnan
// maximum(X, nan) -> qnan
// minimumnum(X, nan) -> X
// maximumnum(X, nan) -> X
if (AF.isNaN()) {
- if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) {
+ if (PropAllNaNsToQNaNs) {
if (AF.isSignaling())
return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
return N->getOperand(1);
+ } else if (ReturnsOtherForAllNaNs || !AF.isSignaling()) {
+ return N->getOperand(0);
}
- return N->getOperand(0);
+ return SDValue();
}
// In the following folds, inf can be replaced with the largest finite
// float, if the ninf flag is set.
if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
- // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
- // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
// minimum(X, -inf) -> -inf if nnan
// maximum(X, +inf) -> +inf if nnan
// minimumnum(X, -inf) -> -inf
// maximumnum(X, +inf) -> +inf
if (IsMin == AF.isNegative() &&
- (!PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
+ (ReturnsOtherForAllNaNs || Flags.hasNoNaNs()))
return N->getOperand(1);
// minnum(X, +inf) -> X if nnan
@@ -23359,6 +23484,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
EVT SubVecVT = SubVec.getValueType();
EVT VT = DestVec.getValueType();
unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+ // Bail out if the inserted value is larger than the vector element, as
+ // insert_vector_elt performs an implicit truncation in this case.
+ if (InsertVal.getValueType() != VT.getVectorElementType())
+ return SDValue();
// If the source only has a single vector element, the cost of creating adding
// it to a vector is likely to exceed the cost of a insert_vector_elt.
if (NumSrcElts == 1)
@@ -25395,7 +25524,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
!Op.getOperand(0).getValueType().isVector())
Ops.push_back(Op.getOperand(0));
else if (Op.isUndef())
- Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
+ Ops.push_back(DAG.getNode(Op.getOpcode(), DL, SVT));
else
return SDValue();
@@ -25417,7 +25546,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
if (Op.getValueType() == SVT)
continue;
if (Op.isUndef())
- Op = DAG.getNode(ISD::UNDEF, DL, SVT);
+ Op = DAG.getNode(Op.getOpcode(), DL, SVT);
else
Op = DAG.getBitcast(SVT, Op);
}
@@ -28963,9 +29092,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// over-conservative. It would be beneficial to be able to remember
// both potential memory locations. Since we are discarding
// src value info, don't do the transformation if the memory
- // locations are not in the default address space.
- LLD->getPointerInfo().getAddrSpace() != 0 ||
- RLD->getPointerInfo().getAddrSpace() != 0 ||
+ // locations are not in the same address space.
+ LLD->getPointerInfo().getAddrSpace() !=
+ RLD->getPointerInfo().getAddrSpace() ||
// We can't produce a CMOV of a TargetFrameIndex since we won't
// generate the address generation required.
LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
@@ -29047,6 +29176,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// but the new load must be the minimum (most restrictive) alignment of the
// inputs.
Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
+ unsigned AddrSpace = LLD->getAddressSpace();
+ assert(AddrSpace == RLD->getAddressSpace());
+
MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
if (!RLD->isInvariant())
MMOFlags &= ~MachineMemOperand::MOInvariant;
@@ -29055,15 +29187,16 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
// FIXME: Discards pointer and AA info.
Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
- LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
- MMOFlags);
+ LLD->getChain(), Addr, MachinePointerInfo(AddrSpace),
+ Alignment, MMOFlags);
} else {
// FIXME: Discards pointer and AA info.
Load = DAG.getExtLoad(
LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
: LLD->getExtensionType(),
SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
- MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
+ MachinePointerInfo(AddrSpace), LLD->getMemoryVT(), Alignment,
+ MMOFlags);
}
// Users of the select now use the result of the load.
@@ -29707,7 +29840,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
return false;
};
- if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
+ if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true)) {
if (!VT.isVector())
return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
// We need to create a build vector
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 507b2d6..5c84059 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1965,7 +1965,7 @@ Register FastISel::createResultReg(const TargetRegisterClass *RC) {
Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op,
unsigned OpNum) {
if (Op.isVirtual()) {
- const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI);
+ const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum);
if (!MRI.constrainRegClass(Op, RegClass)) {
// If it's not legal to COPY between the register classes, something
// has gone very wrong before we got here.
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index bb10cf6..4ad721b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -15,10 +15,12 @@
#include "InstrEmitter.h"
#include "SDNodeDbgValue.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -61,6 +63,8 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
unsigned N = Node->getNumOperands();
while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
+ if (N && Node->getOperand(N - 1).getOpcode() == ISD::DEACTIVATION_SYMBOL)
+ --N; // Ignore deactivation symbol if it exists.
if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
--N; // Ignore chain if it exists.
@@ -125,7 +129,7 @@ void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg,
const TargetRegisterClass *RC = nullptr;
if (i + II.getNumDefs() < II.getNumOperands()) {
RC = TRI->getAllocatableClass(
- TII->getRegClass(II, i + II.getNumDefs(), TRI));
+ TII->getRegClass(II, i + II.getNumDefs()));
}
if (!UseRC)
UseRC = RC;
@@ -197,7 +201,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
// register instead of creating a new vreg.
Register VRBase;
const TargetRegisterClass *RC =
- TRI->getAllocatableClass(TII->getRegClass(II, i, TRI));
+ TRI->getAllocatableClass(TII->getRegClass(II, i));
// Always let the value type influence the used register class. The
// constraints on the instruction may be too lax to represent the value
// type correctly. For example, a 64-bit float (X86::FR64) can't live in
@@ -330,7 +334,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
if (II) {
const TargetRegisterClass *OpRC = nullptr;
if (IIOpNum < II->getNumOperands())
- OpRC = TII->getRegClass(*II, IIOpNum, TRI);
+ OpRC = TII->getRegClass(*II, IIOpNum);
if (OpRC) {
unsigned MinNumRegs = MinRCSize;
@@ -409,8 +413,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op,
Register VReg = R->getReg();
MVT OpVT = Op.getSimpleValueType();
const TargetRegisterClass *IIRC =
- II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI))
- : nullptr;
+ II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum)) : nullptr;
const TargetRegisterClass *OpRC =
TLI->isTypeLegal(OpVT)
? TLI->getRegClassFor(OpVT,
@@ -733,6 +736,8 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64)
return MachineOperand::CreateCImm(CI);
+ if (CI->getBitWidth() == 1)
+ return MachineOperand::CreateImm(CI->getZExtValue());
return MachineOperand::CreateImm(CI->getSExtValue());
}
if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
@@ -1221,15 +1226,23 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
- if (SDNode *GluedNode = Node->getGluedNode()) {
- // FIXME: Possibly iterate over multiple glue nodes?
- if (GluedNode->getOpcode() ==
- ~(unsigned)TargetOpcode::CONVERGENCECTRL_GLUE) {
- Register VReg = getVR(GluedNode->getOperand(0), VRBaseMap);
- MachineOperand MO = MachineOperand::CreateReg(VReg, /*isDef=*/false,
- /*isImp=*/true);
- MIB->addOperand(MO);
- }
+ unsigned Op = Node->getNumOperands();
+ if (Op != 0 && Node->getOperand(Op - 1)->getOpcode() ==
+ ~(unsigned)TargetOpcode::CONVERGENCECTRL_GLUE) {
+ Register VReg = getVR(Node->getOperand(Op - 1)->getOperand(0), VRBaseMap);
+ MachineOperand MO = MachineOperand::CreateReg(VReg, /*isDef=*/false,
+ /*isImp=*/true);
+ MIB->addOperand(MO);
+ Op--;
+ }
+
+ if (Op != 0 &&
+ Node->getOperand(Op - 1)->getOpcode() == ISD::DEACTIVATION_SYMBOL) {
+ MI->setDeactivationSymbol(
+ *MF, const_cast<GlobalValue *>(
+ cast<DeactivationSymbolSDNode>(Node->getOperand(Op - 1))
+ ->getGlobal()));
+ Op--;
}
// Run post-isel target hook to adjust this instruction if needed.
@@ -1250,7 +1263,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
llvm_unreachable("This target-independent node should have been selected!");
case ISD::EntryToken:
case ISD::MERGE_VALUES:
- case ISD::TokenFactor: // fall thru
+ case ISD::TokenFactor:
+ case ISD::DEACTIVATION_SYMBOL:
break;
case ISD::CopyToReg: {
Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
@@ -1415,13 +1429,6 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
- // Add rounding control registers as implicit def for inline asm.
- if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
- ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
- for (MCPhysReg Reg : RCRegs)
- MIB.addReg(Reg, RegState::ImplicitDefine);
- }
-
// GCC inline assembly allows input operands to also be early-clobber
// output operands (so long as the operand is written only after it's
// used), but this does not match the semantics of our early-clobber flag.
@@ -1442,6 +1449,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
if (MD)
MIB.addMetadata(MD);
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+
MBB->insert(InsertPos, MIB);
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 431a810..7606bc8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -163,6 +163,8 @@ private:
RTLIB::Libcall CallI128);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandSincosStretLibCall(SDNode *Node) const;
+
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
@@ -2125,10 +2127,11 @@ SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool IsSigned, EVT RetVT) {
EVT CodePtrTy = TLI.getPointerTy(DAG.getDataLayout());
SDValue Callee;
- if (const char *LibcallName = TLI.getLibcallName(LC))
- Callee = DAG.getExternalSymbol(LibcallName, CodePtrTy);
+ RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC);
+ if (LCImpl != RTLIB::Unsupported)
+ Callee = DAG.getExternalSymbol(LCImpl, CodePtrTy);
else {
- Callee = DAG.getUNDEF(CodePtrTy);
+ Callee = DAG.getPOISON(CodePtrTy);
DAG.getContext()->emitError(Twine("no libcall available for ") +
Node->getOperationName(&DAG));
}
@@ -2155,7 +2158,7 @@ SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, IsSigned);
CLI.setDebugLoc(SDLoc(Node))
.setChain(InChain)
- .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ .setLibCallee(TLI.getLibcallImplCallingConv(LCImpl), RetTy, Callee,
std::move(Args))
.setTailCall(isTailCall)
.setSExtResult(signExtend)
@@ -2379,8 +2382,18 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Entry.IsZExt = !isSigned;
Args.push_back(Entry);
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
+ RTLIB::LibcallImpl LibcallImpl = TLI.getLibcallImpl(LC);
+ if (LibcallImpl == RTLIB::Unsupported) {
+ DAG.getContext()->emitError(Twine("no libcall available for ") +
+ Node->getOperationName(&DAG));
+ SDValue Poison = DAG.getPOISON(RetVT);
+ Results.push_back(Poison);
+ Results.push_back(Poison);
+ return;
+ }
+
+ SDValue Callee =
+ DAG.getExternalSymbol(LibcallImpl, TLI.getPointerTy(DAG.getDataLayout()));
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2394,8 +2407,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
// Remainder is loaded back from the stack frame.
- SDValue Rem =
- DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo());
+ int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, PtrInfo);
Results.push_back(CallInfo.first);
Results.push_back(Rem);
}
@@ -2423,6 +2439,100 @@ static bool useSinCos(SDNode *Node) {
return false;
}
+SDValue SelectionDAGLegalize::ExpandSincosStretLibCall(SDNode *Node) const {
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two S / D registers.
+ SDLoc dl(Node);
+ SDValue Arg = Node->getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT);
+ RTLIB::LibcallImpl SincosStret = TLI.getLibcallImpl(LC);
+ if (SincosStret == RTLIB::Unsupported)
+ return SDValue();
+
+ /// There are 3 different ABI cases to handle:
+ /// - Direct return of separate fields in registers
+ /// - Single return as vector elements
+ /// - sret struct
+
+ const RTLIB::RuntimeLibcallsInfo &CallsInfo = TLI.getRuntimeLibcallsInfo();
+
+ const DataLayout &DL = DAG.getDataLayout();
+
+ auto [FuncTy, FuncAttrs] = CallsInfo.getFunctionTy(
+ *DAG.getContext(), TM.getTargetTriple(), DL, SincosStret);
+
+ Type *SincosStretRetTy = FuncTy->getReturnType();
+ CallingConv::ID CallConv = CallsInfo.getLibcallImplCallingConv(SincosStret);
+
+ SDValue Callee =
+ DAG.getExternalSymbol(SincosStret, TLI.getProgramPointerTy(DL));
+
+ TargetLowering::ArgListTy Args;
+ SDValue SRet;
+
+ int FrameIdx;
+ if (FuncTy->getParamType(0)->isPointerTy()) {
+ // Uses sret
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ AttributeSet PtrAttrs = FuncAttrs.getParamAttrs(0);
+ Type *StructTy = PtrAttrs.getStructRetType();
+ const uint64_t ByteSize = DL.getTypeAllocSize(StructTy);
+ const Align StackAlign = DL.getPrefTypeAlign(StructTy);
+
+ FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
+ SRet = DAG.getFrameIndex(FrameIdx, TLI.getFrameIndexTy(DL));
+
+ TargetLowering::ArgListEntry Entry(SRet, FuncTy->getParamType(0));
+ Entry.IsSRet = true;
+ Entry.IndirectType = StructTy;
+ Entry.Alignment = StackAlign;
+
+ Args.push_back(Entry);
+ Args.emplace_back(Arg, FuncTy->getParamType(1));
+ } else {
+ Args.emplace_back(Arg, FuncTy->getParamType(0));
+ }
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallConv, SincosStretRetTy, Callee, std::move(Args))
+ .setIsPostTypeLegalization();
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ if (SRet) {
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, PtrInfo);
+
+ TypeSize StoreSize = ArgVT.getStoreSize();
+
+ // Address of cos field.
+ SDValue Add = DAG.getObjectPtrOffset(dl, SRet, StoreSize);
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ PtrInfo.getWithOffset(StoreSize));
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, LoadSin.getValue(0),
+ LoadCos.getValue(0));
+ }
+
+ if (!CallResult.first.getValueType().isVector())
+ return CallResult.first;
+
+ SDValue SinVal =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first,
+ DAG.getVectorIdxConstant(0, dl));
+ SDValue CosVal =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first,
+ DAG.getVectorIdxConstant(1, dl));
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
+}
+
SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
@@ -3770,7 +3880,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::Libcall LC = RTLIB::getLDEXP(VT);
// Use the LibCall instead, it is very likely faster
// FIXME: Use separate LibCall action.
- if (TLI.getLibcallName(LC))
+ if (TLI.getLibcallImpl(LC) != RTLIB::Unsupported)
break;
if (SDValue Expanded = expandLdexp(Node)) {
@@ -3785,7 +3895,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::Libcall LC = RTLIB::getFREXP(Node->getValueType(0));
// Use the LibCall instead, it is very likely faster
// FIXME: Use separate LibCall action.
- if (TLI.getLibcallName(LC))
+ if (TLI.getLibcallImpl(LC) != RTLIB::Unsupported)
break;
if (SDValue Expanded = expandFrexp(Node)) {
@@ -4587,7 +4697,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
EVT RetVT = Node->getValueType(0);
SmallVector<SDValue, 4> Ops;
- if (TLI.getLibcallName(LC)) {
+ if (TLI.getLibcallImpl(LC) != RTLIB::Unsupported) {
// If outline atomic available, prepare its arguments and expand.
Ops.append(Node->op_begin() + 2, Node->op_end());
Ops.push_back(Node->getOperand(1));
@@ -4730,12 +4840,30 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::FSINCOS:
case ISD::FSINCOSPI: {
EVT VT = Node->getValueType(0);
+
+ if (Node->getOpcode() == ISD::FSINCOS) {
+ RTLIB::Libcall SincosStret = RTLIB::getSINCOS_STRET(VT);
+ if (SincosStret != RTLIB::UNKNOWN_LIBCALL) {
+ if (SDValue Expanded = ExpandSincosStretLibCall(Node)) {
+ Results.push_back(Expanded);
+ Results.push_back(Expanded.getValue(1));
+ break;
+ }
+ }
+ }
+
RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
? RTLIB::getSINCOS(VT)
: RTLIB::getSINCOSPI(VT);
- bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
- if (!Expanded)
- llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!");
+ bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results);
+ if (!Expanded) {
+ DAG.getContext()->emitError(Twine("no libcall available for ") +
+ Node->getOperationName(&DAG));
+ SDValue Poison = DAG.getPOISON(VT);
+ Results.push_back(Poison);
+ Results.push_back(Poison);
+ }
+
break;
}
case ISD::FLOG:
@@ -4825,7 +4953,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
EVT VT = Node->getValueType(0);
RTLIB::Libcall LC = Node->getOpcode() == ISD::FMODF ? RTLIB::getMODF(VT)
: RTLIB::getFREXP(VT);
- bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+ bool Expanded = TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
/*CallRetResNo=*/0);
if (!Expanded)
llvm_unreachable("Expected scalar FFREXP/FMODF to expand to libcall!");
@@ -4835,7 +4963,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::STRICT_FPOWI: {
RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
- if (!TLI.getLibcallName(LC)) {
+ if (TLI.getLibcallImpl(LC) == RTLIB::Unsupported) {
// Some targets don't have a powi libcall; use pow instead.
if (Node->isStrictFPOpcode()) {
SDValue Exponent =
@@ -4866,7 +4994,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
// If the exponent does not match with sizeof(int) a libcall to
// RTLIB::POWI would use the wrong type for the argument.
DAG.getContext()->emitError("POWI exponent does not match sizeof(int)");
- Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(DAG.getPOISON(Node->getValueType(0)));
break;
}
ExpandFPLibCall(Node, LC, Results);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 58983cb..545b7f5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -717,7 +717,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) {
RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
: RTLIB::getLDEXP(N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
- if (!TLI.getLibcallName(LC)) {
+ if (TLI.getLibcallImpl(LC) == RTLIB::Unsupported) {
// Some targets don't have a powi libcall; use pow instead.
// FIXME: Implement this if some target needs it.
DAG.getContext()->emitError("do not know how to soften fpowi to fpow");
@@ -802,7 +802,8 @@ bool DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
assert(VT == N->getValueType(1) &&
"expected both return values to have the same type");
- if (!TLI.getLibcallName(LC))
+ RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC);
+ if (LCImpl == RTLIB::Unsupported)
return false;
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -831,8 +832,9 @@ bool DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
CallOptions.setTypeListBeforeSoften({OpsVT}, VT)
.setOpsTypeOverrides(CallOpsTypeOverrides);
- auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL,
- /*Chain=*/SDValue());
+ auto [ReturnVal, Chain] =
+ TLI.makeLibCall(DAG, LCImpl, NVT, Ops, CallOptions, DL,
+ /*Chain=*/SDValue());
auto CreateStackLoad = [&, Chain = Chain](SDValue StackSlot) {
int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex();
@@ -862,7 +864,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
RTLIB::Libcall CosLC = RTLIB::getCOS(VT);
SDValue SoftSin, SoftCos;
- if (!TLI.getLibcallName(SinLC) || !TLI.getLibcallName(CosLC)) {
+ if (TLI.getLibcallImpl(SinLC) == RTLIB::Unsupported ||
+ TLI.getLibcallImpl(CosLC) == RTLIB::Unsupported) {
DAG.getContext()->emitError("do not know how to soften fsincos");
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -1726,7 +1729,7 @@ void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
assert(!N->isStrictFPOpcode() && "strictfp not implemented");
SmallVector<SDValue> Results;
- DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
+ TLI.expandMultipleResultFPLibCall(DAG, LC, N, Results, CallRetResNo);
for (auto [ResNo, Res] : enumerate(Results)) {
SDValue Lo, Hi;
GetPairElements(Res, Lo, Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 44e5a18..b9377fa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -145,7 +145,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
- case ISD::EXPERIMENTAL_VP_SPLAT:
Res = PromoteIntRes_ScalarOp(N);
break;
case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
@@ -2008,7 +2007,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
- case ISD::EXPERIMENTAL_VP_SPLAT:
Res = PromoteIntOp_ScalarOp(N);
break;
case ISD::VSELECT:
@@ -2363,9 +2361,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
- if (N->getOpcode() == ISD::EXPERIMENTAL_VP_SPLAT)
- return SDValue(
- DAG.UpdateNodeOperands(N, Op, N->getOperand(1), N->getOperand(2)), 0);
// Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated,
// so just promote the operand in place.
@@ -2692,7 +2687,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
: RTLIB::getLDEXP(N->getValueType(0));
- if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC);
+ if (LCImpl == RTLIB::Unsupported) {
// Scalarize vector FPOWI instead of promoting the type. This allows the
// scalar FPOWIs to be visited and converted to libcalls before promoting
// the type.
@@ -2719,7 +2715,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
CallOptions.setIsSigned(true);
SDValue Ops[2] = {N->getOperand(0 + OpOffset), N->getOperand(1 + OpOffset)};
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
- DAG, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain);
+ DAG, LCImpl, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain);
ReplaceValueWith(SDValue(N, 0), Tmp.first);
if (IsStrict)
ReplaceValueWith(SDValue(N, 1), Tmp.second);
@@ -3128,7 +3124,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break;
case ISD::AVGCEILS:
- case ISD::AVGCEILU:
+ case ISD::AVGCEILU:
case ISD::AVGFLOORS:
case ISD::AVGFLOORU: ExpandIntRes_AVG(N, Lo, Hi); break;
@@ -3187,7 +3183,9 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
EVT RetVT = Node->getValueType(0);
TargetLowering::MakeLibCallOptions CallOptions;
SmallVector<SDValue, 4> Ops;
- if (TLI.getLibcallName(LC)) {
+
+ RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC);
+ if (LCImpl != RTLIB::Unsupported) {
Ops.append(Node->op_begin() + 2, Node->op_end());
Ops.push_back(Node->getOperand(1));
} else {
@@ -3195,8 +3193,9 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected atomic op or value type!");
Ops.append(Node->op_begin() + 1, Node->op_end());
+ LCImpl = TLI.getLibcallImpl(LC);
}
- return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
+ return TLI.makeLibCall(DAG, LCImpl, RetVT, Ops, CallOptions, SDLoc(Node),
Node->getOperand(0));
}
@@ -4097,21 +4096,21 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc DL(N);
if (TLI.getOperationAction(ISD::CTPOP, VT) == TargetLoweringBase::LibCall) {
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (VT == MVT::i32)
- LC = RTLIB::CTPOP_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::CTPOP_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::CTPOP_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC) &&
+ RTLIB::Libcall LC = RTLIB::getCTPOP(VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"LibCall explicitly requested, but not available");
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT IntVT =
- EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
- SDValue Res = TLI.makeLibCall(DAG, LC, IntVT, Op, CallOptions, DL).first;
- SplitInteger(DAG.getSExtOrTrunc(Res, DL, VT), Lo, Hi);
- return;
+
+ if (RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC)) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT IntVT =
+ EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
+ SDValue Res =
+ TLI.makeLibCall(DAG, LCImpl, IntVT, Op, CallOptions, DL).first;
+ SplitInteger(DAG.getSExtOrTrunc(Res, DL, VT), Lo, Hi);
+ return;
+ }
+
+ // If the function is not available, fall back on the expansion.
}
// ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
@@ -4236,55 +4235,19 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (N->getOpcode() == ISD::LROUND ||
N->getOpcode() == ISD::STRICT_LROUND) {
- if (VT == MVT::f32)
- LC = RTLIB::LROUND_F32;
- else if (VT == MVT::f64)
- LC = RTLIB::LROUND_F64;
- else if (VT == MVT::f80)
- LC = RTLIB::LROUND_F80;
- else if (VT == MVT::f128)
- LC = RTLIB::LROUND_F128;
- else if (VT == MVT::ppcf128)
- LC = RTLIB::LROUND_PPCF128;
+ LC = RTLIB::getLROUND(VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lround input type!");
} else if (N->getOpcode() == ISD::LRINT ||
N->getOpcode() == ISD::STRICT_LRINT) {
- if (VT == MVT::f32)
- LC = RTLIB::LRINT_F32;
- else if (VT == MVT::f64)
- LC = RTLIB::LRINT_F64;
- else if (VT == MVT::f80)
- LC = RTLIB::LRINT_F80;
- else if (VT == MVT::f128)
- LC = RTLIB::LRINT_F128;
- else if (VT == MVT::ppcf128)
- LC = RTLIB::LRINT_PPCF128;
+ LC = RTLIB::getLRINT(VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lrint input type!");
} else if (N->getOpcode() == ISD::LLROUND ||
N->getOpcode() == ISD::STRICT_LLROUND) {
- if (VT == MVT::f32)
- LC = RTLIB::LLROUND_F32;
- else if (VT == MVT::f64)
- LC = RTLIB::LLROUND_F64;
- else if (VT == MVT::f80)
- LC = RTLIB::LLROUND_F80;
- else if (VT == MVT::f128)
- LC = RTLIB::LLROUND_F128;
- else if (VT == MVT::ppcf128)
- LC = RTLIB::LLROUND_PPCF128;
+ LC = RTLIB::getLLROUND(VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
} else if (N->getOpcode() == ISD::LLRINT ||
N->getOpcode() == ISD::STRICT_LLRINT) {
- if (VT == MVT::f32)
- LC = RTLIB::LLRINT_F32;
- else if (VT == MVT::f64)
- LC = RTLIB::LLRINT_F64;
- else if (VT == MVT::f80)
- LC = RTLIB::LLRINT_F80;
- else if (VT == MVT::f128)
- LC = RTLIB::LLRINT_F128;
- else if (VT == MVT::ppcf128)
- LC = RTLIB::LLRINT_PPCF128;
+ LC = RTLIB::getLLRINT(VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
} else
llvm_unreachable("Unexpected opcode!");
@@ -4444,17 +4407,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
return;
// If nothing else, we can make a libcall.
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (VT == MVT::i16)
- LC = RTLIB::MUL_I16;
- else if (VT == MVT::i32)
- LC = RTLIB::MUL_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::MUL_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::MUL_I128;
-
- if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ RTLIB::Libcall LC = RTLIB::getMUL(VT);
+ RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC);
+ if (LCImpl == RTLIB::Unsupported) {
// Perform a wide multiplication where the wide type is the original VT and
// the 4 parts are the split arguments.
TLI.forceExpandMultiply(DAG, dl, /*Signed=*/false, Lo, Hi, LL, RL, LH, RH);
@@ -4466,8 +4421,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setIsSigned(true);
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,
- Lo, Hi);
+ SplitInteger(TLI.makeLibCall(DAG, LCImpl, VT, Ops, CallOptions, dl).first, Lo,
+ Hi);
}
void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo,
@@ -4824,15 +4779,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
return;
}
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (VT == MVT::i16)
- LC = RTLIB::SDIV_I16;
- else if (VT == MVT::i32)
- LC = RTLIB::SDIV_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::SDIV_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::SDIV_I128;
+ RTLIB::Libcall LC = RTLIB::getSDIV(VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -5039,45 +4986,26 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
bool isSigned;
if (Opc == ISD::SHL) {
isSigned = false; /*sign irrelevant*/
- if (VT == MVT::i16)
- LC = RTLIB::SHL_I16;
- else if (VT == MVT::i32)
- LC = RTLIB::SHL_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::SHL_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::SHL_I128;
+ LC = RTLIB::getSHL(VT);
} else if (Opc == ISD::SRL) {
isSigned = false;
- if (VT == MVT::i16)
- LC = RTLIB::SRL_I16;
- else if (VT == MVT::i32)
- LC = RTLIB::SRL_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::SRL_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::SRL_I128;
+ LC = RTLIB::getSRL(VT);
} else {
assert(Opc == ISD::SRA && "Unknown shift!");
isSigned = true;
- if (VT == MVT::i16)
- LC = RTLIB::SRA_I16;
- else if (VT == MVT::i32)
- LC = RTLIB::SRA_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::SRA_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::SRA_I128;
+ LC = RTLIB::getSRA(VT);
}
- if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ if (RTLIB::LibcallImpl LibcallImpl = TLI.getLibcallImpl(LC)) {
EVT ShAmtTy =
EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy);
SDValue Ops[2] = {N->getOperand(0), ShAmt};
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setIsSigned(isSigned);
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
+ SplitInteger(
+ TLI.makeLibCall(DAG, LibcallImpl, VT, Ops, CallOptions, dl).first, Lo,
+ Hi);
return;
}
@@ -5153,15 +5081,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
return;
}
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (VT == MVT::i16)
- LC = RTLIB::SREM_I16;
- else if (VT == MVT::i32)
- LC = RTLIB::SREM_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::SREM_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::SREM_I128;
+ RTLIB::Libcall LC = RTLIB::getSREM(VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -5244,18 +5164,13 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
// Replace this with a libcall that will check overflow.
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (VT == MVT::i32)
- LC = RTLIB::MULO_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::MULO_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::MULO_I128;
+ RTLIB::Libcall LC = RTLIB::getMULO(VT);
+ RTLIB::LibcallImpl LCImpl = TLI.getLibcallImpl(LC);
// If we don't have the libcall or if the function we are compiling is the
// implementation of the expected libcall (avoid inf-loop), expand inline.
- if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) ||
- TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) {
+ if (LCImpl == RTLIB::Unsupported ||
+ TLI.getLibcallImplName(LCImpl) == DAG.getMachineFunction().getName()) {
// FIXME: This is not an optimal expansion, but better than crashing.
SDValue MulLo, MulHi;
TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, N->getOperand(0),
@@ -5293,12 +5208,13 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
Entry.IsZExt = false;
Args.push_back(Entry);
- SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+ SDValue Func = DAG.getExternalSymbol(LCImpl, PtrVT);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
+ .setLibCallee(TLI.getLibcallImplCallingConv(LCImpl), RetTy, Func,
+ std::move(Args))
.setSExtResult();
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -5341,15 +5257,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
}
}
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (VT == MVT::i16)
- LC = RTLIB::UDIV_I16;
- else if (VT == MVT::i32)
- LC = RTLIB::UDIV_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::UDIV_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::UDIV_I128;
+ RTLIB::Libcall LC = RTLIB::getUDIV(VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -5384,15 +5292,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
}
}
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (VT == MVT::i16)
- LC = RTLIB::UREM_I16;
- else if (VT == MVT::i32)
- LC = RTLIB::UREM_I32;
- else if (VT == MVT::i64)
- LC = RTLIB::UREM_I64;
- else if (VT == MVT::i128)
- LC = RTLIB::UREM_I128;
+ RTLIB::Libcall LC = RTLIB::getUREM(VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -5551,7 +5451,6 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
- case ISD::EXPERIMENTAL_VP_SPLAT:
case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
@@ -6195,10 +6094,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) {
EVT NOutElemVT = NOutVT.getVectorElementType();
SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0));
- if (N->isVPOpcode())
- return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, N->getOperand(1),
- N->getOperand(2));
-
return DAG.getNode(N->getOpcode(), dl, NOutVT, Op);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ede522e..79384de 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -990,7 +990,6 @@ private:
bool SplitSETCC = false);
void SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -1143,7 +1142,6 @@ private:
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo);
- SDValue WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
SDValue WidenVecOp_VSELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8e423c4..e8d9bce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -534,6 +534,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
Action =
TLI.getPartialReduceMLAAction(Op.getOpcode(), Node->getValueType(0),
Node->getOperand(1).getValueType());
@@ -1243,6 +1244,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
Results.push_back(TLI.expandPartialReduceMLA(Node, DAG));
return;
case ISD::VECREDUCE_SEQ_FADD:
@@ -1268,18 +1270,23 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::FSINCOS:
case ISD::FSINCOSPI: {
- EVT VT = Node->getValueType(0).getVectorElementType();
+ EVT VT = Node->getValueType(0);
RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
? RTLIB::getSINCOS(VT)
: RTLIB::getSINCOSPI(VT);
- if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
+ if (LC != RTLIB::UNKNOWN_LIBCALL &&
+ TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results))
return;
+
+ // TODO: Try to see if there's a narrower call available to use before
+ // scalarizing.
break;
}
case ISD::FMODF: {
- RTLIB::Libcall LC =
- RTLIB::getMODF(Node->getValueType(0).getVectorElementType());
- if (DAG.expandMultipleResultFPLibCall(LC, Node, Results,
+ EVT VT = Node->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getMODF(VT);
+ if (LC != RTLIB::UNKNOWN_LIBCALL &&
+ TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
/*CallRetResNo=*/0))
return;
break;
@@ -1822,7 +1829,7 @@ SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
// If the difference is positive then some elements may alias
EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
Diff.getValueType());
- SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
+ SDValue Zero = DAG.getConstant(0, DL, PtrVT);
SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bb4a8d9..da3102d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -413,7 +413,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
Diff.getValueType());
- SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
+ SDValue Zero = DAG.getConstant(0, DL, PtrVT);
return DAG.getNode(ISD::OR, DL, CmpVT,
DAG.getSetCC(DL, CmpVT, Diff, EltSize, ISD::SETGE),
DAG.getSetCC(DL, CmpVT, Diff, Zero, ISD::SETEQ));
@@ -1091,14 +1091,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
}
-SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
unsigned OpNo) {
assert(OpNo == 1 && "Wrong operand for scalarization!");
SDValue Elt = GetScalarizedVector(N->getOperand(1));
- SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
- { N->getValueType(0).getVectorElementType(),
- MVT::Other },
- { N->getOperand(0), Elt, N->getOperand(2) });
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ {N->getValueType(0).getVectorElementType(), MVT::Other},
+ {N->getOperand(0), Elt, N->getOperand(2)});
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -1217,7 +1217,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
- case ISD::EXPERIMENTAL_VP_SPLAT: SplitVecRes_VP_SPLAT(N, Lo, Hi); break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
SplitVecRes_ScalarOp(N, Lo, Hi);
@@ -1474,6 +1473,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
SplitVecRes_PARTIAL_REDUCE_MLA(N, Lo, Hi);
break;
case ISD::GET_ACTIVE_LANE_MASK:
@@ -1701,10 +1701,8 @@ void DAGTypeLegalizer::SplitVecRes_LOOP_DEPENDENCE_MASK(SDNode *N, SDValue &Lo,
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, PtrA, PtrB, N->getOperand(2));
unsigned EltSize = N->getConstantOperandVal(2);
- unsigned Offset = EltSize * HiVT.getVectorMinNumElements();
- SDValue Addend = HiVT.isScalableVT()
- ? DAG.getVScale(DL, MVT::i64, APInt(64, Offset))
- : DAG.getConstant(Offset, DL, MVT::i64);
+ ElementCount Offset = HiVT.getVectorElementCount() * EltSize;
+ SDValue Addend = DAG.getElementCount(DL, MVT::i64, Offset);
PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend);
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, PtrA, PtrB, N->getOperand(2));
@@ -2185,23 +2183,13 @@ void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0));
if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
- Hi = DAG.getUNDEF(HiVT);
+ Hi = DAG.getPOISON(HiVT);
} else {
assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode");
Hi = Lo;
}
}
-void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDLoc dl(N);
- auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
- auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1));
- auto [EVLLo, EVLHi] = DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0), MaskLo, EVLLo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi);
-}
-
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue &Hi) {
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
@@ -2362,7 +2350,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo,
Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO);
// Fill the upper half with poison.
- Hi = DAG.getUNDEF(HiVT);
+ Hi = DAG.getPOISON(HiVT);
ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1));
ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2));
@@ -2464,6 +2452,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue PassThru = MLD->getPassThru();
Align Alignment = MLD->getBaseAlign();
ISD::LoadExtType ExtType = MLD->getExtensionType();
+ MachineMemOperand::Flags MMOFlags = MLD->getMemOperand()->getFlags();
// Split Mask operand
SDValue MaskLo, MaskHi;
@@ -2489,9 +2478,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo(), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), Alignment, MLD->getAAInfo(),
- MLD->getRanges());
+ MLD->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
@@ -2514,8 +2502,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
LoMemVT.getStoreSize().getFixedValue());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(),
- Alignment, MLD->getAAInfo(), MLD->getRanges());
+ MPI, MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
@@ -2921,7 +2909,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
Input2->getOpcode() == ISD::BUILD_VECTOR &&
"Expected build vector node.");
EVT EltVT = NewVT.getVectorElementType();
- SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue> Ops(NewElts, DAG.getPOISON(EltVT));
for (unsigned I = 0; I < NewElts; ++I) {
if (Mask[I] == PoisonMaskElem)
continue;
@@ -3689,6 +3677,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA:
Res = SplitVecOp_PARTIAL_REDUCE_MLA(N);
break;
}
@@ -3840,16 +3829,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
InVT.getVectorElementCount());
if (N->isStrictFPOpcode()) {
- Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
- { N->getOperand(0), Lo });
- Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
- { N->getOperand(0), Hi });
+ Lo = DAG.getNode(N->getOpcode(), dl, {OutVT, MVT::Other},
+ {N->getOperand(0), Lo});
+ Hi = DAG.getNode(N->getOpcode(), dl, {OutVT, MVT::Other},
+ {N->getOperand(0), Hi});
// Build a factor node to remember that this operation is independent
// of the other one.
SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
-
+
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
@@ -3938,43 +3927,55 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
GetSplitVector(N->getOperand(0), Lo, Hi);
- uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements();
- uint64_t IdxVal = Idx->getAsZExtVal();
+ ElementCount LoElts = Lo.getValueType().getVectorElementCount();
+ // Note: For scalable vectors, the index is scaled by vscale.
+ ElementCount IdxVal =
+ ElementCount::get(Idx->getAsZExtVal(), SubVT.isScalableVector());
+ uint64_t IdxValMin = IdxVal.getKnownMinValue();
- unsigned NumResultElts = SubVT.getVectorMinNumElements();
+ EVT SrcVT = N->getOperand(0).getValueType();
+ ElementCount NumResultElts = SubVT.getVectorElementCount();
- if (IdxVal < LoEltsMin) {
- // If the extracted elements are all in the low half, do a simple extract.
- if (IdxVal + NumResultElts <= LoEltsMin)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ // If the extracted elements are all in the low half, do a simple extract.
+ if (ElementCount::isKnownLE(IdxVal + NumResultElts, LoElts))
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ unsigned LoEltsMin = LoElts.getKnownMinValue();
+ if (IdxValMin < LoEltsMin && SubVT.isFixedLengthVector() &&
+ SrcVT.isFixedLengthVector()) {
// Extracted subvector crosses vector split, so we need to blend the two
// halves.
// TODO: May be able to emit partial extract_subvector.
SmallVector<SDValue, 8> Elts;
- Elts.reserve(NumResultElts);
+ Elts.reserve(NumResultElts.getFixedValue());
- DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxVal,
- /*Count=*/LoEltsMin - IdxVal);
+ // This is not valid for scalable vectors. If SubVT is scalable, this is the
+ // same as unrolling a scalable dimension (invalid). If ScrVT is scalable,
+ // `Lo[LoEltsMin]` may not be the last element of `Lo`.
+ DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxValMin,
+ /*Count=*/LoEltsMin - IdxValMin);
DAG.ExtractVectorElements(Hi, Elts, /*Start=*/0,
/*Count=*/SubVT.getVectorNumElements() -
Elts.size());
return DAG.getBuildVector(SubVT, dl, Elts);
}
- EVT SrcVT = N->getOperand(0).getValueType();
if (SubVT.isScalableVector() == SrcVT.isScalableVector()) {
- uint64_t ExtractIdx = IdxVal - LoEltsMin;
- if (ExtractIdx % NumResultElts == 0)
- return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx);
+ ElementCount ExtractIdx = IdxVal - LoElts;
+ if (ExtractIdx.isKnownMultipleOf(NumResultElts))
+ return DAG.getExtractSubvector(dl, SubVT, Hi,
+ ExtractIdx.getKnownMinValue());
- // We cannot create an extract_subvector that isn't a multiple of the result
- // size, which may go out of bounds for the last elements. Shuffle the
- // desired elements down to 0 and do a simple 0 extract.
EVT HiVT = Hi.getValueType();
+ assert(HiVT.isFixedLengthVector() &&
+ "Only fixed-vector extracts are supported in this case");
+
+ // We cannot create an extract_subvector that isn't a multiple of the
+ // result size, which may go out of bounds for the last elements. Shuffle
+ // the desired elements down to 0 and do a simple 0 extract.
SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1);
- for (int I = 0; I != static_cast<int>(NumResultElts); ++I)
- Mask[I] = ExtractIdx + I;
+ for (int I = 0; I != int(NumResultElts.getFixedValue()); ++I)
+ Mask[I] = int(ExtractIdx.getFixedValue()) + I;
SDValue Shuffle =
DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask);
@@ -4636,13 +4637,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
InVT.getVectorElementCount());
if (N->isStrictFPOpcode()) {
- Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
- { N->getOperand(0), Lo, N->getOperand(2) });
- Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
- { N->getOperand(0), Hi, N->getOperand(2) });
+ Lo = DAG.getNode(N->getOpcode(), DL, {OutVT, MVT::Other},
+ {N->getOperand(0), Lo, N->getOperand(2)});
+ Hi = DAG.getNode(N->getOpcode(), DL, {OutVT, MVT::Other},
+ {N->getOperand(0), Hi, N->getOperand(2)});
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
} else if (N->getOpcode() == ISD::VP_FP_ROUND) {
@@ -4863,7 +4864,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
- case ISD::EXPERIMENTAL_VP_SPLAT:
Res = WidenVecRes_ScalarOp(N);
break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
@@ -5515,7 +5515,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
EOps.push_back(Op);
}
- EVT WidenVT[] = {WidenEltVT, MVT::Other};
+ EVT WidenVT[] = {WidenEltVT, MVT::Other};
SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps);
ConcatOps[ConcatEnd++] = Oper;
Chains.push_back(Oper.getValue(1));
@@ -5652,7 +5652,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat =
WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
- SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
@@ -5671,7 +5671,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -5754,7 +5754,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
@@ -5817,7 +5817,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
}
while (Ops.size() != WidenNumElts)
- Ops.push_back(DAG.getUNDEF(WidenSVT));
+ Ops.push_back(DAG.getPOISON(WidenSVT));
return DAG.getBuildVector(WidenVT, DL, Ops);
}
@@ -6024,7 +6024,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
// input and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
if (WidenSize % InSize == 0) {
- SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT));
+ SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
Ops[0] = InOp;
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
@@ -6032,7 +6032,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(InOp, Ops);
Ops.append(WidenSize / InScalarSize - Ops.size(),
- DAG.getUNDEF(InVT.getVectorElementType()));
+ DAG.getPOISON(InVT.getVectorElementType()));
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
}
@@ -6055,11 +6055,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
SDLoc dl(N);
- // Build a vector with undefined for the new nodes.
+ // Build a vector with poison for the new nodes.
EVT VT = N->getValueType(0);
// Integer BUILD_VECTOR operands may be larger than the node's vector element
- // type. The UNDEFs need to have the same type as the existing operands.
+ // type. The POISONs need to have the same type as the existing operands.
EVT EltVT = N->getOperand(0).getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -6068,7 +6068,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
SmallVector<SDValue, 16> NewOps(N->ops());
assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
- NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
+ NewOps.append(WidenNumElts - NumElts, DAG.getPOISON(EltVT));
return DAG.getBuildVector(WidenVT, dl, NewOps);
}
@@ -6086,7 +6086,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (WidenNumElts % NumInElts == 0) {
// Add undef vectors to widen to correct length.
unsigned NumConcat = WidenNumElts / NumInElts;
- SDValue UndefVal = DAG.getUNDEF(InVT);
+ SDValue UndefVal = DAG.getPOISON(InVT);
SmallVector<SDValue, 16> Ops(NumConcat);
for (unsigned i=0; i < NumOperands; ++i)
Ops[i] = N->getOperand(i);
@@ -6144,7 +6144,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
for (unsigned j = 0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
}
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6211,13 +6211,38 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
for (; I < WidenNumElts / GCD; ++I)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
- report_fatal_error("Don't know how to widen the result of "
- "EXTRACT_SUBVECTOR for scalable vectors");
+ // Fallback to extracting through memory.
+
+ Align Alignment = DAG.getReducedAlign(InVT, /*UseABI=*/false);
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Alignment);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore,
+ LocationSize::beforeOrAfterPointer(), Alignment);
+ MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), Alignment);
+
+ // Write out the input vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, StoreMMO);
+
+ // Build a mask to match the length of the non-widened result.
+ SDValue Mask =
+ DAG.getMaskFromElementCount(dl, WidenVT, VT.getVectorElementCount());
+
+ // Read back the sub-vector setting the remaining lanes to poison.
+ StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, InVT, VT, Idx);
+ return DAG.getMaskedLoad(
+ WidenVT, dl, Ch, StackPtr, DAG.getUNDEF(StackPtr.getValueType()), Mask,
+ DAG.getPOISON(WidenVT), VT, LoadMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
}
// We could try widening the input to the right length but for now, extract
@@ -6227,7 +6252,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
for (i = 0; i < VTNumElts; ++i)
Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
- SDValue UndefVal = DAG.getUNDEF(EltVT);
+ SDValue UndefVal = DAG.getPOISON(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
return DAG.getBuildVector(WidenVT, dl, Ops);
@@ -6321,11 +6346,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
if (VT.isVector()) {
// If all else fails replace the load with a wide masked load.
SDLoc DL(N);
- EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
-
- SDValue Len = DAG.getElementCount(DL, IdxVT, VT.getVectorElementCount());
- SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
- DAG.getConstant(0, DL, IdxVT), Len);
+ SDValue Mask =
+ DAG.getMaskFromElementCount(DL, WideVT, VT.getVectorElementCount());
SDValue NewLoad = DAG.getMaskedLoad(
WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
@@ -6553,9 +6575,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- if (N->isVPOpcode())
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0),
- N->getOperand(1), N->getOperand(2));
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
}
@@ -6901,7 +6920,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
Parts.push_back(
DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
for (; i < WidenNumElts / GCD; ++i)
- Parts.push_back(DAG.getUNDEF(PartVT));
+ Parts.push_back(DAG.getPOISON(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
@@ -6990,7 +7009,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
// Fully unroll and reassemble.
- SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
@@ -7098,10 +7117,6 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
Res = WidenVecOp_FP_TO_XINT_SAT(N);
break;
- case ISD::EXPERIMENTAL_VP_SPLAT:
- Res = WidenVecOp_VP_SPLAT(N, OpNo);
- break;
-
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -7462,9 +7477,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
SDValue InVec = N->getOperand(0);
EVT OrigVT = SubVec.getValueType();
- if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
- SubVec = GetWidenedVector(SubVec);
-
+ SubVec = GetWidenedVector(SubVec);
EVT SubVT = SubVec.getValueType();
// Whether or not all the elements of the widened SubVec will be inserted into
@@ -7486,17 +7499,52 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
}
}
+ if (!IndicesValid)
+ report_fatal_error(
+ "Don't know how to widen the operands for INSERT_SUBVECTOR");
+
SDLoc DL(N);
// We need to make sure that the indices are still valid, otherwise we might
// widen what was previously well-defined to something undefined.
- if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
+ if (InVec.isUndef() && N->getConstantOperandVal(2) == 0)
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
N->getOperand(2));
- if (!IndicesValid || OrigVT.isScalableVector())
- report_fatal_error(
- "Don't know how to widen the operands for INSERT_SUBVECTOR");
+ if (OrigVT.isScalableVector()) {
+ // Fallback to inserting through memory.
+
+ Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
+ SDValue StackPtr = DAG.CreateStackTemporary(VT.getStoreSize(), Alignment);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore,
+ LocationSize::beforeOrAfterPointer(), Alignment);
+ MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), Alignment);
+
+ // Write out the vector being inserting into.
+ SDValue Ch =
+ DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, StoreMMO);
+
+ // Build a mask to match the length of the sub-vector.
+ SDValue Mask =
+ DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount());
+
+ // Overwrite the sub-vector at the required offset.
+ SDValue SubVecPtr =
+ TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2));
+ Ch = DAG.getMaskedStore(Ch, DL, SubVec, SubVecPtr,
+ DAG.getUNDEF(SubVecPtr.getValueType()), Mask, VT,
+ StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
+
+ // Read back the result.
+ return DAG.getLoad(VT, DL, Ch, StackPtr, LoadMMO);
+ }
// If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
// with a series of INSERT_VECTOR_ELT
@@ -7575,12 +7623,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
if (StVT.isVector()) {
// If all else fails replace the store with a wide masked store.
SDLoc DL(N);
- EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
-
SDValue WideStVal = GetWidenedVector(StVal);
- SDValue Len = DAG.getElementCount(DL, IdxVT, StVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WideMaskVT,
- DAG.getConstant(0, DL, IdxVT), Len);
+ SDValue Mask =
+ DAG.getMaskFromElementCount(DL, WideVT, StVT.getVectorElementCount());
return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(),
ST->getOffset(), Mask, ST->getMemoryVT(),
@@ -7591,13 +7636,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
report_fatal_error("Unable to widen vector store");
}
-SDValue DAGTypeLegalizer::WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo) {
- assert(OpNo == 1 && "Can widen only mask operand of vp_splat");
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- N->getOperand(0), GetWidenedVector(N->getOperand(1)),
- N->getOperand(2));
-}
-
SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of vp_store");
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp
index e3f6c98..da763df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeInfo.cpp
@@ -7,7 +7,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SDNodeInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
using namespace llvm;
@@ -40,6 +43,32 @@ static void checkOperandType(const SelectionDAG &DAG, const SDNode *N,
ExpectedVT.getEVTString() + ", got " + ActualVT.getEVTString());
}
+namespace {
+
+/// Similar to SDValue, but also records whether it is a result or an operand
+/// of a node so we can provide more precise diagnostics.
+class SDNodeValue {
+ const SDNode *N;
+ unsigned Idx;
+ bool IsRes;
+
+public:
+ SDNodeValue(const SDNode *N, unsigned Idx, bool IsRes)
+ : N(N), Idx(Idx), IsRes(IsRes) {}
+
+ SDValue getValue() const {
+ return IsRes ? SDValue(const_cast<SDNode *>(N), Idx) : N->getOperand(Idx);
+ }
+
+ EVT getValueType() const { return getValue().getValueType(); }
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const SDNodeValue &Op) {
+ return OS << (Op.IsRes ? "result" : "operand") << " #" << Op.Idx;
+ }
+};
+
+} // namespace
+
void SDNodeInfo::verifyNode(const SelectionDAG &DAG, const SDNode *N) const {
const SDNodeDesc &Desc = getDesc(N->getOpcode());
bool HasChain = Desc.hasProperty(SDNPHasChain);
@@ -125,4 +154,91 @@ void SDNodeInfo::verifyNode(const SelectionDAG &DAG, const SDNode *N) const {
" must be Register or RegisterMask");
}
}
+
+ unsigned VTHwMode =
+ DAG.getSubtarget().getHwMode(MCSubtargetInfo::HwMode_ValueType);
+
+ // Returns a constrained or constraining value (result or operand) of a node.
+ // ValIdx is the index of a node's value, as defined by SDTypeConstraint;
+ // that is, it indexes a node's operands after its results and ignores
+ // chain/glue values.
+ auto GetConstraintValue = [&](unsigned ValIdx) {
+ if (ValIdx < Desc.NumResults)
+ return SDNodeValue(N, ValIdx, /*IsRes=*/true);
+ return SDNodeValue(N, HasChain + (ValIdx - Desc.NumResults),
+ /*IsRes=*/false);
+ };
+
+ auto GetConstraintVT = [&](const SDTypeConstraint &C) {
+ if (!C.NumHwModes)
+ return static_cast<MVT::SimpleValueType>(C.VT);
+ for (auto [Mode, VT] : ArrayRef(&VTByHwModeTable[C.VT], C.NumHwModes))
+ if (Mode == VTHwMode)
+ return VT;
+ llvm_unreachable("No value type for this HW mode");
+ };
+
+ SmallString<128> ES;
+ raw_svector_ostream SS(ES);
+
+ for (const SDTypeConstraint &C : getConstraints(N->getOpcode())) {
+ SDNodeValue Val = GetConstraintValue(C.ConstrainedValIdx);
+ EVT VT = Val.getValueType();
+
+ switch (C.Kind) {
+ case SDTCisVT: {
+ EVT ExpectedVT = GetConstraintVT(C);
+
+ bool IsPtr = ExpectedVT == MVT::iPTR;
+ if (IsPtr)
+ ExpectedVT =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+
+ if (VT != ExpectedVT) {
+ SS << Val << " must have type " << ExpectedVT;
+ if (IsPtr)
+ SS << " (iPTR)";
+ SS << ", but has type " << VT;
+ reportNodeError(DAG, N, SS.str());
+ }
+ break;
+ }
+ case SDTCisPtrTy:
+ break;
+ case SDTCisInt:
+ break;
+ case SDTCisFP:
+ break;
+ case SDTCisVec:
+ break;
+ case SDTCisSameAs:
+ break;
+ case SDTCisVTSmallerThanOp:
+ break;
+ case SDTCisOpSmallerThanOp:
+ break;
+ case SDTCisEltOfVec:
+ break;
+ case SDTCisSubVecOfVec:
+ break;
+ case SDTCVecEltisVT: {
+ EVT ExpectedVT = GetConstraintVT(C);
+
+ if (!VT.isVector()) {
+ SS << Val << " must have vector type";
+ reportNodeError(DAG, N, SS.str());
+ }
+ if (VT.getVectorElementType() != ExpectedVT) {
+ SS << Val << " must have " << ExpectedVT << " element type, but has "
+ << VT.getVectorElementType() << " element type";
+ reportNodeError(DAG, N, SS.str());
+ }
+ break;
+ }
+ case SDTCisSameNumEltsAs:
+ break;
+ case SDTCisSameSizeAs:
+ break;
+ }
+ }
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index f70b6cd..12fc26d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -340,7 +340,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
unsigned Idx = RegDefPos.GetIdx();
const MCInstrDesc &Desc = TII->get(Opcode);
- const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx);
assert(RC && "Not a valid register class");
RegClass = RC->getID();
// FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 379242e..cbe3236 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -76,7 +76,6 @@
#include <cstdlib>
#include <limits>
#include <optional>
-#include <set>
#include <string>
#include <utility>
#include <vector>
@@ -1917,6 +1916,21 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getDeactivationSymbol(const GlobalValue *GV) {
+ SDVTList VTs = getVTList(MVT::Untyped);
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::DEACTIVATION_SYMBOL, VTs, {});
+ ID.AddPointer(GV);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<DeactivationSymbolSDNode>(GV, VTs);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
SDVTList VTs = getVTList(VT);
@@ -2052,6 +2066,11 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
return SDValue(N, 0);
}
+SDValue SelectionDAG::getExternalSymbol(RTLIB::LibcallImpl Libcall, EVT VT) {
+ StringRef SymName = TLI->getLibcallImplName(Libcall);
+ return getExternalSymbol(SymName.data(), VT);
+}
+
SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
SDNode *&N = MCSymbols[Sym];
if (N)
@@ -2084,32 +2103,51 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0);
}
-SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
- bool ConstantFold) {
+SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm) {
assert(MulImm.getBitWidth() == VT.getSizeInBits() &&
"APInt size does not match type size!");
if (MulImm == 0)
return getConstant(0, DL, VT);
- if (ConstantFold) {
- const MachineFunction &MF = getMachineFunction();
- const Function &F = MF.getFunction();
- ConstantRange CR = getVScaleRange(&F, 64);
- if (const APInt *C = CR.getSingleElement())
- return getConstant(MulImm * C->getZExtValue(), DL, VT);
- }
+ const MachineFunction &MF = getMachineFunction();
+ const Function &F = MF.getFunction();
+ ConstantRange CR = getVScaleRange(&F, 64);
+ if (const APInt *C = CR.getSingleElement())
+ return getConstant(MulImm * C->getZExtValue(), DL, VT);
return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
}
-SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
- bool ConstantFold) {
- if (EC.isScalable())
- return getVScale(DL, VT,
- APInt(VT.getSizeInBits(), EC.getKnownMinValue()));
+/// \returns a value of type \p VT that represents the runtime value of \p
+/// Quantity, i.e. scaled by vscale if it's scalable, or a fixed constant
+/// otherwise. Quantity should be a FixedOrScalableQuantity, i.e. ElementCount
+/// or TypeSize.
+template <typename Ty>
+static SDValue getFixedOrScalableQuantity(SelectionDAG &DAG, const SDLoc &DL,
+ EVT VT, Ty Quantity) {
+ if (Quantity.isScalable())
+ return DAG.getVScale(
+ DL, VT, APInt(VT.getSizeInBits(), Quantity.getKnownMinValue()));
+
+ return DAG.getConstant(Quantity.getKnownMinValue(), DL, VT);
+}
+
+SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT,
+ ElementCount EC) {
+ return getFixedOrScalableQuantity(*this, DL, VT, EC);
+}
+
+SDValue SelectionDAG::getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS) {
+ return getFixedOrScalableQuantity(*this, DL, VT, TS);
+}
- return getConstant(EC.getKnownMinValue(), DL, VT);
+SDValue SelectionDAG::getMaskFromElementCount(const SDLoc &DL, EVT DataVT,
+ ElementCount EC) {
+ EVT IdxVT = TLI->getVectorIdxTy(getDataLayout());
+ EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), DataVT);
+ return getNode(ISD::GET_ACTIVE_LANE_MASK, DL, MaskVT,
+ getConstant(0, DL, IdxVT), getElementCount(DL, IdxVT, EC));
}
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
@@ -2468,180 +2506,6 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
}
-/// Given a store node \p StoreNode, return true if it is safe to fold that node
-/// into \p FPNode, which expands to a library call with output pointers.
-static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
- SDNode *FPNode) {
- SmallVector<const SDNode *, 8> Worklist;
- SmallVector<const SDNode *, 8> DeferredNodes;
- SmallPtrSet<const SDNode *, 16> Visited;
-
- // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
- for (SDValue Op : StoreNode->ops())
- if (Op.getNode() != FPNode)
- Worklist.push_back(Op.getNode());
-
- unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
- while (!Worklist.empty()) {
- const SDNode *Node = Worklist.pop_back_val();
- auto [_, Inserted] = Visited.insert(Node);
- if (!Inserted)
- continue;
-
- if (MaxSteps > 0 && Visited.size() >= MaxSteps)
- return false;
-
- // Reached the FPNode (would result in a cycle).
- // OR Reached CALLSEQ_START (would result in nested call sequences).
- if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
- return false;
-
- if (Node->getOpcode() == ISD::CALLSEQ_END) {
- // Defer looking into call sequences (so we can check we're outside one).
- // We still need to look through these for the predecessor check.
- DeferredNodes.push_back(Node);
- continue;
- }
-
- for (SDValue Op : Node->ops())
- Worklist.push_back(Op.getNode());
- }
-
- // True if we're outside a call sequence and don't have the FPNode as a
- // predecessor. No cycles or nested call sequences possible.
- return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
- MaxSteps);
-}
-
-bool SelectionDAG::expandMultipleResultFPLibCall(
- RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results,
- std::optional<unsigned> CallRetResNo) {
- LLVMContext &Ctx = *getContext();
- EVT VT = Node->getValueType(0);
- unsigned NumResults = Node->getNumValues();
-
- if (LC == RTLIB::UNKNOWN_LIBCALL)
- return false;
-
- const char *LCName = TLI->getLibcallName(LC);
- if (!LCName)
- return false;
-
- auto getVecDesc = [&]() -> VecDesc const * {
- for (bool Masked : {false, true}) {
- if (VecDesc const *VD = getLibInfo().getVectorMappingInfo(
- LCName, VT.getVectorElementCount(), Masked)) {
- return VD;
- }
- }
- return nullptr;
- };
-
- // For vector types, we must find a vector mapping for the libcall.
- VecDesc const *VD = nullptr;
- if (VT.isVector() && !(VD = getVecDesc()))
- return false;
-
- // Find users of the node that store the results (and share input chains). The
- // destination pointers can be used instead of creating stack allocations.
- SDValue StoresInChain;
- SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
- for (SDNode *User : Node->users()) {
- if (!ISD::isNormalStore(User))
- continue;
- auto *ST = cast<StoreSDNode>(User);
- SDValue StoreValue = ST->getValue();
- unsigned ResNo = StoreValue.getResNo();
- // Ensure the store corresponds to an output pointer.
- if (CallRetResNo == ResNo)
- continue;
- // Ensure the store to the default address space and not atomic or volatile.
- if (!ST->isSimple() || ST->getAddressSpace() != 0)
- continue;
- // Ensure all store chains are the same (so they don't alias).
- if (StoresInChain && ST->getChain() != StoresInChain)
- continue;
- // Ensure the store is properly aligned.
- Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
- if (ST->getAlign() <
- getDataLayout().getABITypeAlign(StoreType->getScalarType()))
- continue;
- // Avoid:
- // 1. Creating cyclic dependencies.
- // 2. Expanding the node to a call within a call sequence.
- if (!canFoldStoreIntoLibCallOutputPointers(ST, Node))
- continue;
- ResultStores[ResNo] = ST;
- StoresInChain = ST->getChain();
- }
-
- TargetLowering::ArgListTy Args;
-
- // Pass the arguments.
- for (const SDValue &Op : Node->op_values()) {
- EVT ArgVT = Op.getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
- Args.emplace_back(Op, ArgTy);
- }
-
- // Pass the output pointers.
- SmallVector<SDValue, 2> ResultPtrs(NumResults);
- Type *PointerTy = PointerType::getUnqual(Ctx);
- for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
- if (ResNo == CallRetResNo)
- continue;
- EVT ResVT = Node->getValueType(ResNo);
- SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(ResVT);
- ResultPtrs[ResNo] = ResultPtr;
- Args.emplace_back(ResultPtr, PointerTy);
- }
-
- SDLoc DL(Node);
-
- // Pass the vector mask (if required).
- if (VD && VD->isMasked()) {
- EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT);
- SDValue Mask = getBoolConstant(true, DL, MaskVT, VT);
- Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
- }
-
- Type *RetType = CallRetResNo.has_value()
- ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
- : Type::getVoidTy(Ctx);
- SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
- SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName,
- TLI->getPointerTy(getDataLayout()));
- TargetLowering::CallLoweringInfo CLI(*this);
- CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
- TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args));
-
- auto [Call, CallChain] = TLI->LowerCallTo(CLI);
-
- for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
- if (ResNo == CallRetResNo) {
- Results.push_back(Call);
- continue;
- }
- MachinePointerInfo PtrInfo;
- SDValue LoadResult =
- getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo);
- SDValue OutChain = LoadResult.getValue(1);
-
- if (StoreSDNode *ST = ResultStores[ResNo]) {
- // Replace store with the library call.
- ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
- PtrInfo = ST->getPointerInfo();
- } else {
- PtrInfo = MachinePointerInfo::getFixedStack(
- getMachineFunction(), cast<FrameIndexSDNode>(ResultPtr)->getIndex());
- }
-
- Results.push_back(LoadResult);
- }
-
- return true;
-}
-
SDValue SelectionDAG::expandVAArg(SDNode *Node) {
SDLoc dl(Node);
const TargetLowering &TLI = getTargetLoweringInfo();
@@ -2921,6 +2785,34 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
}
+bool SelectionDAG::SignBitIsZeroFP(SDValue Op, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ case ISD::FABS:
+ return true;
+ case ISD::AssertNoFPClass: {
+ FPClassTest NoFPClass =
+ static_cast<FPClassTest>(Op.getConstantOperandVal(1));
+
+ const FPClassTest TestMask = fcNan | fcNegative;
+ return (NoFPClass & TestMask) == TestMask;
+ }
+ case ISD::ARITH_FENCE:
+ return SignBitIsZeroFP(Op, Depth + 1);
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FEXP10:
+ return Op->getFlags().hasNoNaNs();
+ default:
+ return false;
+ }
+
+ llvm_unreachable("covered opcode switch");
+}
+
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Mask is known to be zero
/// for bits that V cannot have.
@@ -4122,6 +4014,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One.clearLowBits(LogOfAlign);
break;
}
+ case ISD::AssertNoFPClass: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ FPClassTest NoFPClass =
+ static_cast<FPClassTest>(Op.getConstantOperandVal(1));
+ const FPClassTest NegativeTestMask = fcNan | fcNegative;
+ if ((NoFPClass & NegativeTestMask) == NegativeTestMask) {
+ // Cannot be negative.
+ Known.makeNonNegative();
+ }
+
+ const FPClassTest PositiveTestMask = fcNan | fcPositive;
+ if ((NoFPClass & PositiveTestMask) == PositiveTestMask) {
+ // Cannot be positive.
+ Known.makeNegative();
+ }
+
+ break;
+ }
case ISD::FGETSIGN:
// All bits are zero except the low bit.
Known.Zero.setBitsFrom(1);
@@ -5830,6 +5741,9 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return false;
}
+ case ISD::VECTOR_COMPRESS:
+ return false;
+
default:
// Allow the target to implement this method for its nodes.
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
@@ -6233,7 +6147,57 @@ bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const {
if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
return !C1->isNegative();
- return Op.getOpcode() == ISD::FABS;
+ switch (Op.getOpcode()) {
+ case ISD::FABS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FEXP10:
+ return true;
+ default:
+ return false;
+ }
+
+ llvm_unreachable("covered opcode switch");
+}
+
+bool SelectionDAG::canIgnoreSignBitOfZero(const SDUse &Use) const {
+ assert(Use.getValueType().isFloatingPoint());
+ const SDNode *User = Use.getUser();
+ unsigned OperandNo = Use.getOperandNo();
+ // Check if this use is insensitive to the sign of zero
+ switch (User->getOpcode()) {
+ case ISD::SETCC:
+ // Comparisons: IEEE-754 specifies +0.0 == -0.0.
+ case ISD::FABS:
+ // fabs always produces +0.0.
+ return true;
+ case ISD::FCOPYSIGN:
+ // copysign overwrites the sign bit of the first operand.
+ return OperandNo == 0;
+ case ISD::FADD:
+ case ISD::FSUB: {
+ // Arithmetic with non-zero constants fixes the uncertainty around the
+ // sign bit.
+ SDValue Other = User->getOperand(1 - OperandNo);
+ return isKnownNeverZeroFloat(Other);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ // fp-to-int conversions normalize signed zeros.
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SelectionDAG::canIgnoreSignBitOfZero(SDValue Op) const {
+ // FIXME: Limit the amount of checked uses to not introduce a compile-time
+ // regression. Ideally, this should be implemented as a demanded-bits
+ // optimization that stems from the users.
+ if (Op->use_size() > 2)
+ return false;
+ return all_of(Op->uses(),
+ [&](const SDUse &Use) { return canIgnoreSignBitOfZero(Use); });
}
bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
@@ -7471,8 +7435,12 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
C1.copySign(C2);
return getConstantFP(C1, DL, VT);
case ISD::FMINNUM:
+ if (C1.isSignaling() || C2.isSignaling())
+ return SDValue();
return getConstantFP(minnum(C1, C2), DL, VT);
case ISD::FMAXNUM:
+ if (C1.isSignaling() || C2.isSignaling())
+ return SDValue();
return getConstantFP(maxnum(C1, C2), DL, VT);
case ISD::FMINIMUM:
return getConstantFP(minimum(C1, C2), DL, VT);
@@ -7733,6 +7701,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
if (VT.getScalarType() == MVT::i1)
return getNode(ISD::AND, DL, VT, N1, N2);
+ if (N2CV && N2CV->isZero())
+ return N2;
if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
const APInt &MulImm = N1->getConstantOperandAPInt(0);
const APInt &N2CImm = N2C->getAPIntValue();
@@ -8404,7 +8374,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
case ISD::PARTIAL_REDUCE_UMLA:
case ISD::PARTIAL_REDUCE_SMLA:
- case ISD::PARTIAL_REDUCE_SUMLA: {
+ case ISD::PARTIAL_REDUCE_SUMLA:
+ case ISD::PARTIAL_REDUCE_FMLA: {
[[maybe_unused]] EVT AccVT = N1.getValueType();
[[maybe_unused]] EVT Input1VT = N2.getValueType();
[[maybe_unused]] EVT Input2VT = N3.getValueType();
@@ -8599,16 +8570,7 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,
const SDLoc &DL,
const SDNodeFlags Flags) {
- EVT VT = Base.getValueType();
- SDValue Index;
-
- if (Offset.isScalable())
- Index = getVScale(DL, Base.getValueType(),
- APInt(Base.getValueSizeInBits().getFixedValue(),
- Offset.getKnownMinValue()));
- else
- Index = getConstant(Offset.getFixedValue(), DL, VT);
-
+ SDValue Index = getTypeSize(DL, Base.getValueType(), Offset);
return getMemBasePlusOffset(Base, Index, DL, Flags);
}
@@ -9146,8 +9108,8 @@ static bool isInTailCallPositionWrapper(const CallInst *CI,
std::pair<SDValue, SDValue>
SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0,
SDValue Mem1, SDValue Size, const CallInst *CI) {
- const char *LibCallName = TLI->getLibcallName(RTLIB::MEMCMP);
- if (!LibCallName)
+ RTLIB::LibcallImpl MemcmpImpl = TLI->getLibcallImpl(RTLIB::MEMCMP);
+ if (MemcmpImpl == RTLIB::Unsupported)
return {};
PointerType *PT = PointerType::getUnqual(*getContext());
@@ -9160,13 +9122,14 @@ SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0,
bool IsTailCall =
isInTailCallPositionWrapper(CI, this, /*AllowReturnsFirstArg*/ true);
+ StringRef LibCallName = TLI->getLibcallImplName(MemcmpImpl);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setLibCallee(
- TLI->getLibcallCallingConv(RTLIB::MEMCMP),
- Type::getInt32Ty(*getContext()),
- getExternalSymbol(LibCallName, TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallImplCallingConv(MemcmpImpl),
+ Type::getInt32Ty(*getContext()),
+ getExternalSymbol(LibCallName.data(),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setTailCall(IsTailCall);
return TLI->LowerCallTo(CLI);
@@ -9176,8 +9139,8 @@ std::pair<SDValue, SDValue> SelectionDAG::getStrlen(SDValue Chain,
const SDLoc &dl,
SDValue Src,
const CallInst *CI) {
- const char *LibCallName = TLI->getLibcallName(RTLIB::STRLEN);
- if (!LibCallName)
+ RTLIB::LibcallImpl StrlenImpl = TLI->getLibcallImpl(RTLIB::STRLEN);
+ if (StrlenImpl == RTLIB::Unsupported)
return {};
// Emit a library call.
@@ -9187,13 +9150,15 @@ std::pair<SDValue, SDValue> SelectionDAG::getStrlen(SDValue Chain,
TargetLowering::CallLoweringInfo CLI(*this);
bool IsTailCall =
isInTailCallPositionWrapper(CI, this, /*AllowReturnsFirstArg*/ true);
+ StringRef LibcallName = TLI->getLibcallImplName(StrlenImpl);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(RTLIB::STRLEN), CI->getType(),
- getExternalSymbol(
- LibCallName, TLI->getProgramPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(
+ TLI->getLibcallImplCallingConv(StrlenImpl), CI->getType(),
+ getExternalSymbol(LibcallName.data(),
+ TLI->getProgramPointerTy(getDataLayout())),
+ std::move(Args))
.setTailCall(IsTailCall);
return TLI->LowerCallTo(CLI);
@@ -9257,21 +9222,22 @@ SDValue SelectionDAG::getMemcpy(
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
bool IsTailCall = false;
- const char *MemCpyName = TLI->getMemcpyName();
+ RTLIB::LibcallImpl MemCpyImpl = TLI->getMemcpyImpl();
if (OverrideTailCall.has_value()) {
IsTailCall = *OverrideTailCall;
} else {
- bool LowersToMemcpy = StringRef(MemCpyName) == StringRef("memcpy");
+ bool LowersToMemcpy = MemCpyImpl == RTLIB::impl_memcpy;
IsTailCall = isInTailCallPositionWrapper(CI, this, LowersToMemcpy);
}
CLI.setDebugLoc(dl)
.setChain(Chain)
.setLibCallee(
- TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ TLI->getLibcallImplCallingConv(MemCpyImpl),
Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(MemCpyName, TLI->getPointerTy(getDataLayout())),
+ getExternalSymbol(TLI->getLibcallImplName(MemCpyImpl).data(),
+ TLI->getPointerTy(getDataLayout())),
std::move(Args))
.setDiscardResult()
.setTailCall(IsTailCall);
@@ -9295,17 +9261,19 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
RTLIB::Libcall LibraryCall =
RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LibraryCall);
+ if (LibcallImpl == RTLIB::Unsupported)
report_fatal_error("Unsupported element size");
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*getContext()),
- getExternalSymbol(TLI->getLibcallName(LibraryCall),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(
+ TLI->getLibcallImplCallingConv(LibcallImpl),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -9361,22 +9329,24 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
+ RTLIB::LibcallImpl MemmoveImpl = TLI->getLibcallImpl(RTLIB::MEMMOVE);
+
bool IsTailCall = false;
if (OverrideTailCall.has_value()) {
IsTailCall = *OverrideTailCall;
} else {
- bool LowersToMemmove =
- TLI->getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove");
+ bool LowersToMemmove = MemmoveImpl == RTLIB::impl_memmove;
IsTailCall = isInTailCallPositionWrapper(CI, this, LowersToMemmove);
}
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(
+ TLI->getLibcallImplCallingConv(MemmoveImpl),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallImplName(MemmoveImpl).data(),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(IsTailCall);
@@ -9399,17 +9369,19 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
RTLIB::Libcall LibraryCall =
RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LibraryCall);
+ if (LibcallImpl == RTLIB::Unsupported)
report_fatal_error("Unsupported element size");
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*getContext()),
- getExternalSymbol(TLI->getLibcallName(LibraryCall),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(
+ TLI->getLibcallImplCallingConv(LibcallImpl),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -9470,30 +9442,37 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
// FIXME: pass in SDLoc
CLI.setDebugLoc(dl).setChain(Chain);
- const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO);
+ RTLIB::LibcallImpl BzeroImpl = TLI->getLibcallImpl(RTLIB::BZERO);
+ bool UseBZero = BzeroImpl != RTLIB::Unsupported && isNullConstant(Src);
- bool UseBZero = isNullConstant(Src) && BzeroName;
// If zeroing out and bzero is present, use it.
if (UseBZero) {
TargetLowering::ArgListTy Args;
Args.emplace_back(Dst, PointerType::getUnqual(Ctx));
Args.emplace_back(Size, DL.getIntPtrType(Ctx));
CLI.setLibCallee(
- TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx),
- getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args));
+ TLI->getLibcallImplCallingConv(BzeroImpl), Type::getVoidTy(Ctx),
+ getExternalSymbol(TLI->getLibcallImplName(BzeroImpl).data(),
+ TLI->getPointerTy(DL)),
+ std::move(Args));
} else {
+ RTLIB::LibcallImpl MemsetImpl = TLI->getLibcallImpl(RTLIB::MEMSET);
+
TargetLowering::ArgListTy Args;
Args.emplace_back(Dst, PointerType::getUnqual(Ctx));
Args.emplace_back(Src, Src.getValueType().getTypeForEVT(Ctx));
Args.emplace_back(Size, DL.getIntPtrType(Ctx));
- CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
- Dst.getValueType().getTypeForEVT(Ctx),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy(DL)),
- std::move(Args));
- }
- bool LowersToMemset =
- TLI->getLibcallName(RTLIB::MEMSET) == StringRef("memset");
+ CLI.setLibCallee(
+ TLI->getLibcallImplCallingConv(MemsetImpl),
+ Dst.getValueType().getTypeForEVT(Ctx),
+ getExternalSymbol(TLI->getLibcallImplName(MemsetImpl).data(),
+ TLI->getPointerTy(DL)),
+ std::move(Args));
+ }
+
+ RTLIB::LibcallImpl MemsetImpl = TLI->getLibcallImpl(RTLIB::MEMSET);
+ bool LowersToMemset = MemsetImpl == RTLIB::impl_memset;
+
// If we're going to use bzero, make sure not to tail call unless the
// subsequent return doesn't need a value, as bzero doesn't return the first
// arg unlike memset.
@@ -9520,17 +9499,19 @@ SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
RTLIB::Libcall LibraryCall =
RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ RTLIB::LibcallImpl LibcallImpl = TLI->getLibcallImpl(LibraryCall);
+ if (LibcallImpl == RTLIB::Unsupported)
report_fatal_error("Unsupported element size");
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*getContext()),
- getExternalSymbol(TLI->getLibcallName(LibraryCall),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(
+ TLI->getLibcallImplCallingConv(LibcallImpl),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -10025,8 +10006,6 @@ SDValue SelectionDAG::getLoadVP(
MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
const MDNode *Ranges, bool IsExpanding) {
- assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
-
MMOFlags |= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
@@ -10048,6 +10027,11 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
SDValue Offset, SDValue Mask, SDValue EVL,
EVT MemVT, MachineMemOperand *MMO,
bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ assert(Mask.getValueType().getVectorElementCount() ==
+ VT.getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+
bool Indexed = AM != ISD::UNINDEXED;
assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
@@ -10143,6 +10127,10 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
ISD::MemIndexedMode AM, bool IsTruncating,
bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ assert(Mask.getValueType().getVectorElementCount() ==
+ Val.getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+
bool Indexed = AM != ISD::UNINDEXED;
assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!");
SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other)
@@ -12741,6 +12729,10 @@ void SelectionDAG::getTopologicallyOrderedNodes(
for (unsigned i = 0U; i < SortedNodes.size(); ++i) {
const SDNode *N = SortedNodes[i];
for (const SDNode *U : N->users()) {
+ // HandleSDNode is never part of a DAG and therefore has no entry in
+ // RemainingOperands.
+ if (U->getOpcode() == ISD::HANDLENODE)
+ continue;
unsigned &NumRemOperands = RemainingOperands[U];
assert(NumRemOperands && "Invalid number of remaining operands");
--NumRemOperands;
@@ -12754,8 +12746,6 @@ void SelectionDAG::getTopologicallyOrderedNodes(
"First node in topological sort is not the entry token");
assert(SortedNodes.front()->getNumOperands() == 0 &&
"First node in topological sort has operands");
- assert(SortedNodes.back()->use_empty() &&
- "Last node in topologic sort has users");
}
/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
@@ -13057,6 +13047,11 @@ bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
return C && C->isOne();
}
+bool llvm::isOneOrOneSplatFP(SDValue N, bool AllowUndefs) {
+ ConstantFPSDNode *C = isConstOrConstSplatFP(N, AllowUndefs);
+ return C && C->isExactlyValue(1.0);
+}
+
bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
N = peekThroughBitcasts(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
@@ -13076,6 +13071,11 @@ bool llvm::isZeroOrZeroSplat(SDValue N, bool AllowUndefs) {
return C && C->isZero();
}
+bool llvm::isZeroOrZeroSplatFP(SDValue N, bool AllowUndefs) {
+ ConstantFPSDNode *C = isConstOrConstSplatFP(N, AllowUndefs);
+ return C && C->isZero();
+}
+
HandleSDNode::~HandleSDNode() {
DropOperands();
}
@@ -13660,11 +13660,8 @@ std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT,
EVT VT = N.getValueType();
assert(VecVT.getVectorElementCount().isKnownEven() &&
"Expecting the mask to be an evenly-sized vector");
- unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
- SDValue HalfNumElts =
- VecVT.isFixedLengthVector()
- ? getConstant(HalfMinNumElts, DL, VT)
- : getVScale(DL, VT, APInt(VT.getScalarSizeInBits(), HalfMinNumElts));
+ SDValue HalfNumElts = getElementCount(
+ DL, VT, VecVT.getVectorElementCount().divideCoefficientBy(2));
SDValue Lo = getNode(ISD::UMIN, DL, VT, N, HalfNumElts);
SDValue Hi = getNode(ISD::USUBSAT, DL, VT, N, HalfNumElts);
return std::make_pair(Lo, Hi);
@@ -14241,13 +14238,18 @@ SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr,
assert(InChain.getValueType() == MVT::Other && "Expected token chain");
TargetLowering::ArgListTy Args;
Args.emplace_back(Ptr, Ptr.getValueType().getTypeForEVT(*getContext()));
- RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc);
- SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC),
- TLI->getPointerTy(getDataLayout()));
+ RTLIB::LibcallImpl LibcallImpl =
+ TLI->getLibcallImpl(static_cast<RTLIB::Libcall>(LibFunc));
+ if (LibcallImpl == RTLIB::Unsupported)
+ reportFatalUsageError("emitting call to unsupported libcall");
+
+ SDValue Callee =
+ getExternalSymbol(TLI->getLibcallImplName(LibcallImpl).data(),
+ TLI->getPointerTy(getDataLayout()));
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee(
- TLI->getLibcallCallingConv(LC), Type::getVoidTy(*getContext()), Callee,
- std::move(Args));
+ TLI->getLibcallImplCallingConv(LibcallImpl),
+ Type::getVoidTy(*getContext()), Callee, std::move(Args));
return TLI->LowerCallTo(CLI).second;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a522650..7134550 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
@@ -1097,14 +1098,15 @@ RegsForValue::getRegsAndSizes() const {
}
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, BatchAAResults *aa,
- AssumptionCache *ac,
- const TargetLibraryInfo *li) {
+ AssumptionCache *ac, const TargetLibraryInfo *li,
+ const TargetTransformInfo &TTI) {
BatchAA = aa;
AC = ac;
GFI = gfi;
LibInfo = li;
Context = DAG.getContext();
LPadToCallSiteMap.clear();
+ this->TTI = &TTI;
SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
AssignmentTrackingEnabled = isAssignmentTrackingEnabled(
*DAG.getMachineFunction().getFunction().getParent());
@@ -2589,10 +2591,6 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
if (!LhsDeps.contains(RhsI))
RhsDeps.try_emplace(RhsI, false);
- const auto &TLI = DAG.getTargetLoweringInfo();
- const auto &TTI =
- TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
-
InstructionCost CostOfIncluding = 0;
// See if this instruction will need to computed independently of whether RHS
// is.
@@ -2632,8 +2630,8 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
// RHS condition. Use latency because we are essentially trying to calculate
// the cost of the dependency chain.
// Possible TODO: We could try to estimate ILP and make this more precise.
- CostOfIncluding +=
- TTI.getInstructionCost(InsPair.first, TargetTransformInfo::TCK_Latency);
+ CostOfIncluding += TTI->getInstructionCost(
+ InsPair.first, TargetTransformInfo::TCK_Latency);
if (CostOfIncluding > CostThresh)
return false;
@@ -3507,16 +3505,46 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
DAG.getBasicBlock(Return)));
}
+/// The intrinsics currently supported by callbr are implicit control flow
+/// intrinsics such as amdgcn.kill.
+/// - they should be called (no "dontcall-" attributes)
+/// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic())
+/// - they do not need custom argument handling (no
+/// TLI.CollectTargetIntrinsicOperands())
+void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) {
+ TargetLowering::IntrinsicInfo Info;
+ assert(!DAG.getTargetLoweringInfo().getTgtMemIntrinsic(
+ Info, I, DAG.getMachineFunction(), I.getIntrinsicID()) &&
+ "Intrinsic touches memory");
+
+ auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+ SmallVector<SDValue, 8> Ops =
+ getTargetIntrinsicOperands(I, HasChain, OnlyLoad);
+ SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
+
+ // Create the node.
+ SDValue Result =
+ getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs);
+ Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
+
+ setValue(&I, Result);
+}
+
void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
- // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
- // have to do anything here to lower funclet bundles.
- failForInvalidBundles(I, "callbrs",
- {LLVMContext::OB_deopt, LLVMContext::OB_funclet});
-
- assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
- visitInlineAsm(I);
+ if (I.isInlineAsm()) {
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ failForInvalidBundles(I, "callbrs",
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet});
+ visitInlineAsm(I);
+ } else {
+ assert(!I.hasOperandBundles() &&
+ "Can't have operand bundles for intrinsics");
+ visitCallBrIntrinsic(I);
+ }
CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
@@ -3526,19 +3554,25 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
- for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
- BasicBlock *Dest = I.getIndirectDest(i);
- MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
- Target->setIsInlineAsmBrIndirectTarget();
- // If we introduce a type of asm goto statement that is permitted to use an
- // indirect call instruction to jump to its labels, then we should add a
- // call to Target->setMachineBlockAddressTaken() here, to mark the target
- // block as requiring a BTI.
-
- Target->setLabelMustBeEmitted();
- // Don't add duplicate machine successors.
- if (Dests.insert(Dest).second)
- addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ // TODO: For most of the cases where there is an intrinsic callbr, we're
+ // having exactly one indirect target, which will be unreachable. As soon as
+ // this changes, we might need to enhance
+ // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+ // intrinsic indirect branches.
+ if (I.isInlineAsm()) {
+ for (BasicBlock *Dest : I.getIndirectDests()) {
+ MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
+ Target->setIsInlineAsmBrIndirectTarget();
+ // If we introduce a type of asm goto statement that is permitted to use
+ // an indirect call instruction to jump to its labels, then we should add
+ // a call to Target->setMachineBlockAddressTaken() here, to mark the
+ // target block as requiring a BTI.
+
+ Target->setLabelMustBeEmitted();
+ // Don't add duplicate machine successors.
+ if (Dests.insert(Dest).second)
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ }
}
CallBrMBB->normalizeSuccProbs();
@@ -3977,7 +4011,10 @@ void SelectionDAGBuilder::visitFPExt(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
- setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
+ SDNodeFlags Flags;
+ if (auto *TruncInst = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*TruncInst);
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N, Flags));
}
void SelectionDAGBuilder::visitFPToUI(const User &I) {
@@ -4584,17 +4621,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
- if (TySize.isScalable())
- AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
- DAG.getVScale(dl, IntPtr,
- APInt(IntPtr.getScalarSizeInBits(),
- TySize.getKnownMinValue())));
- else {
- SDValue TySizeValue =
- DAG.getConstant(TySize.getFixedValue(), dl, MVT::getIntegerVT(64));
- AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
- DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
- }
+ AllocSize = DAG.getNode(
+ ISD::MUL, dl, IntPtr, AllocSize,
+ DAG.getZExtOrTrunc(DAG.getTypeSize(dl, MVT::i64, TySize), dl, IntPtr));
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
@@ -4639,6 +4668,12 @@ static std::optional<ConstantRange> getRange(const Instruction &I) {
return std::nullopt;
}
+static FPClassTest getNoFPClass(const Instruction &I) {
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ return CB->getRetNoFPClass();
+ return fcNone;
+}
+
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (I.isAtomic())
return visitAtomicLoad(I);
@@ -4759,7 +4794,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SmallVector<uint64_t, 4> Offsets;
const Value *SrcV = I.getOperand(0);
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &Offsets, 0);
+ SrcV->getType(), ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -4795,7 +4830,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
- ValueVTs, &Offsets, 0);
+ ValueVTs, /*MemVTs=*/nullptr, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -4907,10 +4942,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata());
const auto &TLI = DAG.getTargetLoweringInfo();
- const auto &TTI =
- TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
+
SDValue StoreNode =
- !IsCompressing && TTI.hasConditionalLoadStoreForType(
+ !IsCompressing && TTI->hasConditionalLoadStoreForType(
I.getArgOperand(0)->getType(), /*IsStore=*/true)
? TLI.visitMaskedStore(DAG, sdl, getMemoryRoot(), MMO, Ptr, Src0,
Mask)
@@ -5059,20 +5093,22 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
auto MMOFlags = MachineMemOperand::MOLoad;
if (I.hasMetadata(LLVMContext::MD_nontemporal))
MMOFlags |= MachineMemOperand::MONonTemporal;
+ if (I.hasMetadata(LLVMContext::MD_invariant_load))
+ MMOFlags |= MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MMOFlags,
LocationSize::beforeOrAfterPointer(), Alignment, AAInfo, Ranges);
const auto &TLI = DAG.getTargetLoweringInfo();
- const auto &TTI =
- TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
+
// The Load/Res may point to different values and both of them are output
// variables.
SDValue Load;
SDValue Res;
- if (!IsExpanding && TTI.hasConditionalLoadStoreForType(Src0Operand->getType(),
- /*IsStore=*/false))
+ if (!IsExpanding &&
+ TTI->hasConditionalLoadStoreForType(Src0Operand->getType(),
+ /*IsStore=*/false))
Res = TLI.visitMaskedLoad(DAG, sdl, InChain, MMO, Load, Ptr, Src0, Mask);
else
Res = Load =
@@ -5313,18 +5349,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
DAG.setRoot(OutChain);
}
-/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
-/// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
- unsigned Intrinsic) {
- // Ignore the callsite's attributes. A specific call site may be marked with
- // readnone, but the lowering code will expect the chain based on the
- // definition.
+/// Check if this intrinsic call depends on the chain (1st return value)
+/// and if it only *loads* memory.
+/// Ignore the callsite's attributes. A specific call site may be marked with
+/// readnone, but the lowering code will expect the chain based on the
+/// definition.
+std::pair<bool, bool>
+SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) {
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
bool OnlyLoad =
HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow();
+ return {HasChain, OnlyLoad};
+}
+
+SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
// Build the operand list.
SmallVector<SDValue, 8> Ops;
if (HasChain) { // If this intrinsic has side-effects, chainify it.
@@ -5336,17 +5380,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
- // Info is set by getTgtMemIntrinsic
- TargetLowering::IntrinsicInfo Info;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
- DAG.getMachineFunction(),
- Intrinsic);
-
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
- Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+ if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID ||
+ TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
@@ -5369,13 +5406,93 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
+ if (std::optional<OperandBundleUse> Bundle =
+ I.getOperandBundle(LLVMContext::OB_deactivation_symbol)) {
+ auto *Sym = Bundle->Inputs[0].get();
+ SDValue SDSym = getValue(Sym);
+ SDSym = DAG.getDeactivationSymbol(cast<GlobalValue>(Sym));
+ Ops.push_back(SDSym);
+ }
+
+ if (std::optional<OperandBundleUse> Bundle =
+ I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ Value *Token = Bundle->Inputs[0].get();
+ SDValue ConvControlToken = getValue(Token);
+ assert(Ops.back().getValueType() != MVT::Glue &&
+ "Did not expect another glue node here.");
+ ConvControlToken =
+ DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
+ Ops.push_back(ConvControlToken);
+ }
+
+ return Ops;
+}
+
+SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I,
+ bool HasChain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
- SDVTList VTs = DAG.getVTList(ValueVTs);
+ return DAG.getVTList(ValueVTs);
+}
+
+/// Get an INTRINSIC node for a target intrinsic which does not touch memory.
+SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode(
+ const Type &IntrinsicVT, bool HasChain, ArrayRef<SDValue> Ops,
+ const SDVTList &VTs) {
+ if (!HasChain)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+ if (!IntrinsicVT.isVoidTy())
+ return DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+ return DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+}
+
+/// Set root, convert return type if necessary and check alignment.
+SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I,
+ bool HasChain,
+ bool OnlyLoad,
+ SDValue Result) {
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (I.getType()->isVoidTy())
+ return Result;
+
+ if (MaybeAlign Alignment = I.getRetAlign(); InsertAssertAlign && Alignment) {
+ // Insert `assertalign` node if there's an alignment.
+ Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+ } else if (!isa<VectorType>(I.getType())) {
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
+ }
+
+ return Result;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+ // Info is set by getTgtMemIntrinsic
+ TargetLowering::IntrinsicInfo Info;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtMemIntrinsic =
+ TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic);
+
+ SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands(
+ I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr);
+ SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
// Propagate fast-math-flags from IR to node(s).
SDNodeFlags Flags;
@@ -5386,19 +5503,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
- if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
- auto *Token = Bundle->Inputs[0].get();
- SDValue ConvControlToken = getValue(Token);
- assert(Ops.back().getValueType() != MVT::Glue &&
- "Did not expected another glue node here.");
- ConvControlToken =
- DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
- Ops.push_back(ConvControlToken);
- }
-
// In some cases, custom collection of operands from CallInst I may be needed.
TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
- if (IsTgtIntrinsic) {
+ if (IsTgtMemIntrinsic) {
// This is target intrinsic that touches memory
//
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
@@ -5418,34 +5525,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Info.ssid, Info.order, Info.failureOrder);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, MemVT, MMO);
- } else if (!HasChain) {
- Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
- } else if (!I.getType()->isVoidTy()) {
- Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
} else {
- Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+ Result = getTargetNonMemIntrinsicNode(*I.getType(), HasChain, Ops, VTs);
}
- if (HasChain) {
- SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
- if (OnlyLoad)
- PendingLoads.push_back(Chain);
- else
- DAG.setRoot(Chain);
- }
-
- if (!I.getType()->isVoidTy()) {
- if (!isa<VectorType>(I.getType()))
- Result = lowerRangeToAssertZExt(DAG, I, Result);
-
- MaybeAlign Alignment = I.getRetAlign();
-
- // Insert `assertalign` node if there's an alignment.
- if (InsertAssertAlign && Alignment) {
- Result =
- DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
- }
- }
+ Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
setValue(&I, Result);
}
@@ -7772,6 +7856,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
+ case Intrinsic::reloc_none: {
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
+ StringRef SymbolName = cast<MDString>(MD)->getString();
+ SDValue Ops[2] = {
+ getRoot(),
+ DAG.getTargetExternalSymbol(
+ SymbolName.data(), TLI.getProgramPointerTy(DAG.getDataLayout()))};
+ DAG.setRoot(DAG.getNode(ISD::RELOC_NONE, sdl, MVT::Other, Ops));
+ return;
+ }
+
case Intrinsic::eh_exceptionpointer:
case Intrinsic::eh_exceptioncode: {
// Get the exception pointer vreg, copy from it, and resize it to fit.
@@ -8137,6 +8232,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Input, DAG.getConstant(1, sdl, Input.getValueType())));
return;
}
+ case Intrinsic::vector_partial_reduce_fadd: {
+ SDValue Acc = getValue(I.getOperand(0));
+ SDValue Input = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(
+ ISD::PARTIAL_REDUCE_FMLA, sdl, Acc.getValueType(), Acc,
+ Input, DAG.getConstantFP(1.0, sdl, Input.getValueType())));
+ return;
+ }
case Intrinsic::experimental_cttz_elts: {
auto DL = getCurSDLoc();
SDValue Op = getValue(I.getOperand(0));
@@ -8958,9 +9061,8 @@ bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const {
// Avoid emitting tail calls in functions with the disable-tail-calls
// attribute.
const Function *Caller = CB.getParent()->getParent();
- if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
- "true" &&
- !isMustTailCall)
+ if (!isMustTailCall &&
+ Caller->getFnAttribute("disable-tail-calls").getValueAsBool())
return false;
// We can't tail call inside a function with a swifterror argument. Lowering
@@ -9052,6 +9154,11 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
ConvControlToken = getValue(Token);
}
+ GlobalValue *DeactivationSymbol = nullptr;
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_deactivation_symbol)) {
+ DeactivationSymbol = cast<GlobalValue>(Bundle->Inputs[0].get());
+ }
+
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
@@ -9061,7 +9168,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
.setIsPreallocated(
CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
.setCFIType(CFIType)
- .setConvergenceControlToken(ConvControlToken);
+ .setConvergenceControlToken(ConvControlToken)
+ .setDeactivationSymbol(DeactivationSymbol);
// Set the pointer authentication info if we have it.
if (PAI) {
@@ -9075,6 +9183,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
if (Result.first.getNode()) {
Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
+ Result.first = lowerNoFPClassToAssertNoFPClass(DAG, CB, Result.first);
setValue(&CB, Result.first);
}
@@ -9392,7 +9501,9 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
unsigned Opcode) {
// We already checked this call's prototype; verify it doesn't modify errno.
- if (!I.onlyReadsMemory())
+ // Do not perform optimizations for call sites that require strict
+ // floating-point semantics.
+ if (!I.onlyReadsMemory() || I.isStrictFP())
return false;
SDNodeFlags Flags;
@@ -9412,7 +9523,9 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
unsigned Opcode) {
// We already checked this call's prototype; verify it doesn't modify errno.
- if (!I.onlyReadsMemory())
+ // Do not perform optimizations for call sites that require strict
+ // floating-point semantics.
+ if (!I.onlyReadsMemory() || I.isStrictFP())
return false;
SDNodeFlags Flags;
@@ -9445,11 +9558,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
- // some reason or the call site requires strict floating point semantics.
+ // some reason.
+ // This code should not handle libcalls that are already canonicalized to
+ // intrinsics by the middle-end.
LibFunc Func;
- if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
- F->hasName() && LibInfo->getLibFunc(*F, Func) &&
- LibInfo->hasOptimizedCodeGen(Func)) {
+ if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
case LibFunc_bcmp:
@@ -9472,30 +9586,35 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
case LibFunc_fabs:
case LibFunc_fabsf:
case LibFunc_fabsl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
case LibFunc_fmin:
case LibFunc_fminf:
case LibFunc_fminl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitBinaryFloatCall(I, ISD::FMINNUM))
return;
break;
case LibFunc_fmax:
case LibFunc_fmaxf:
case LibFunc_fmaxl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitBinaryFloatCall(I, ISD::FMAXNUM))
return;
break;
case LibFunc_fminimum_num:
case LibFunc_fminimum_numf:
case LibFunc_fminimum_numl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitBinaryFloatCall(I, ISD::FMINIMUMNUM))
return;
break;
case LibFunc_fmaximum_num:
case LibFunc_fmaximum_numf:
case LibFunc_fmaximum_numl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitBinaryFloatCall(I, ISD::FMAXIMUMNUM))
return;
break;
@@ -9571,36 +9690,35 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
case LibFunc_floor:
case LibFunc_floorf:
case LibFunc_floorl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
break;
- case LibFunc_nearbyint:
- case LibFunc_nearbyintf:
- case LibFunc_nearbyintl:
- if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
- return;
- break;
case LibFunc_ceil:
case LibFunc_ceilf:
case LibFunc_ceill:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
break;
case LibFunc_rint:
case LibFunc_rintf:
case LibFunc_rintl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
case LibFunc_round:
case LibFunc_roundf:
case LibFunc_roundl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitUnaryFloatCall(I, ISD::FROUND))
return;
break;
case LibFunc_trunc:
case LibFunc_truncf:
case LibFunc_truncl:
+ // TODO: Remove this, already canonicalized by the middle-end.
if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
break;
@@ -9677,7 +9795,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi,
- LLVMContext::OB_convergencectrl});
+ LLVMContext::OB_convergencectrl, LLVMContext::OB_deactivation_symbol});
SDValue Callee = getValue(I.getCalledOperand());
@@ -10661,6 +10779,30 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return DAG.getMergeValues(Ops, SL);
}
+SDValue SelectionDAGBuilder::lowerNoFPClassToAssertNoFPClass(
+ SelectionDAG &DAG, const Instruction &I, SDValue Op) {
+ FPClassTest Classes = getNoFPClass(I);
+ if (Classes == fcNone)
+ return Op;
+
+ SDLoc SL = getCurSDLoc();
+ SDValue TestConst = DAG.getTargetConstant(Classes, SDLoc(), MVT::i32);
+
+ if (Op.getOpcode() != ISD::MERGE_VALUES) {
+ return DAG.getNode(ISD::AssertNoFPClass, SL, Op.getValueType(), Op,
+ TestConst);
+ }
+
+ SmallVector<SDValue, 8> Ops(Op.getNumOperands());
+ for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
+ SDValue MergeOp = Op.getOperand(I);
+ Ops[I] = DAG.getNode(ISD::AssertNoFPClass, SL, MergeOp.getValueType(),
+ MergeOp, TestConst);
+ }
+
+ return DAG.getMergeValues(Ops, SL);
+}
+
/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
/// the call being lowered.
///
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 47e19f7..6f3e7a6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -232,6 +232,7 @@ public:
BatchAAResults *BatchAA = nullptr;
AssumptionCache *AC = nullptr;
const TargetLibraryInfo *LibInfo = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
public:
@@ -285,7 +286,7 @@ public:
FuncInfo(funcinfo), SwiftError(swifterror) {}
void init(GCFunctionInfo *gfi, BatchAAResults *BatchAA, AssumptionCache *AC,
- const TargetLibraryInfo *li);
+ const TargetLibraryInfo *li, const TargetTransformInfo &TTI);
/// Clear out the current SelectionDAG and the associated state and prepare
/// this SelectionDAGBuilder object to be used for a new block. This doesn't
@@ -429,6 +430,10 @@ public:
SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
SDValue Op);
+ // Lower nofpclass attributes to AssertNoFPClass
+ SDValue lowerNoFPClassToAssertNoFPClass(SelectionDAG &DAG,
+ const Instruction &I, SDValue Op);
+
void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
const CallBase *Call, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
@@ -551,10 +556,12 @@ public:
private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
- void visitCallBr(const CallBrInst &I);
void visitCallBrLandingPad(const CallInst &I);
void visitResume(const ResumeInst &I);
+ void visitCallBr(const CallBrInst &I);
+ void visitCallBrIntrinsic(const CallBrInst &I);
+
void visitUnary(const User &I, unsigned Opcode);
void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
@@ -727,6 +734,17 @@ private:
MCSymbol *&BeginLabel);
SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+
+ std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I);
+ SmallVector<SDValue, 8> getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+ SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain);
+ SDValue getTargetNonMemIntrinsicNode(const Type &IntrinsicVT, bool HasChain,
+ ArrayRef<SDValue> Ops,
+ const SDVTList &VTs);
+ SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain,
+ bool OnlyLoad, SDValue Result);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 77377d3..ec5edd5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -472,6 +472,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LIFETIME_END: return "lifetime.end";
case ISD::FAKE_USE:
return "fake_use";
+ case ISD::RELOC_NONE:
+ return "reloc_none";
case ISD::PSEUDO_PROBE:
return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
@@ -588,6 +590,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "partial_reduce_smla";
case ISD::PARTIAL_REDUCE_SUMLA:
return "partial_reduce_sumla";
+ case ISD::PARTIAL_REDUCE_FMLA:
+ return "partial_reduce_fmla";
case ISD::LOOP_DEPENDENCE_WAR_MASK:
return "loop_dep_war";
case ISD::LOOP_DEPENDENCE_RAW_MASK:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6c11c5b..dd8f18d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -519,9 +519,7 @@ void SelectionDAGISel::initializeAnalysisResults(
SP = &FAM.getResult<SSPLayoutAnalysis>(Fn);
-#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
TTI = &FAM.getResult<TargetIRAnalysis>(Fn);
-#endif
}
void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
@@ -578,9 +576,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo();
-#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
-#endif
}
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
@@ -593,7 +589,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << '\n');
- SDB->init(GFI, getBatchAA(), AC, LibInfo);
+ SDB->init(GFI, getBatchAA(), AC, LibInfo, *TTI);
MF->setHasInlineAsm(false);
@@ -2448,7 +2444,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
// a cycle in the scheduling graph.
// If the node has glue, walk down the graph to the "lowest" node in the
- // glueged set.
+ // glued set.
EVT VT = Root->getValueType(Root->getNumValues()-1);
while (VT == MVT::Glue) {
SDNode *GU = Root->getGluedUser();
@@ -2550,6 +2546,11 @@ void SelectionDAGISel::Select_FAKE_USE(SDNode *N) {
N->getOperand(1), N->getOperand(0));
}
+void SelectionDAGISel::Select_RELOC_NONE(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::RELOC_NONE, N->getValueType(0),
+ N->getOperand(1), N->getOperand(0));
+}
+
void SelectionDAGISel::Select_FREEZE(SDNode *N) {
// TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
// If FREEZE instruction is added later, the code below must be changed as
@@ -2777,8 +2778,8 @@ void SelectionDAGISel::UpdateChains(
/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
/// be used as the input node chain for the generated nodes.
static SDValue
-HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
- SelectionDAG *CurDAG) {
+HandleMergeInputChains(const SmallVectorImpl<SDNode *> &ChainNodesMatched,
+ SDValue InputGlue, SelectionDAG *CurDAG) {
SmallPtrSet<const SDNode *, 16> Visited;
SmallVector<const SDNode *, 8> Worklist;
@@ -2821,8 +2822,16 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
// node that is both the predecessor and successor of the
// to-be-merged nodes. Fail.
Visited.clear();
- for (SDValue V : InputChains)
+ for (SDValue V : InputChains) {
+ // If we need to create a TokenFactor, and any of the input chain nodes will
+ // also be glued to the output, we cannot merge the chains. The TokenFactor
+ // would prevent the glue from being honored.
+ if (InputChains.size() != 1 &&
+ V->getValueType(V->getNumValues() - 1) == MVT::Glue &&
+ InputGlue.getNode() == V.getNode())
+ return SDValue();
Worklist.push_back(V.getNode());
+ }
for (auto *N : ChainNodesMatched)
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
@@ -3299,6 +3308,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
case ISD::PSEUDO_PROBE:
+ case ISD::DEACTIVATION_SYMBOL:
NodeToMatch->setNodeId(-1); // Mark selected.
return;
case ISD::AssertSext:
@@ -3325,6 +3335,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::FAKE_USE:
Select_FAKE_USE(NodeToMatch);
return;
+ case ISD::RELOC_NONE:
+ Select_RELOC_NONE(NodeToMatch);
+ return;
case ISD::FREEZE:
Select_FREEZE(NodeToMatch);
return;
@@ -3377,7 +3390,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// These are the current input chain and glue for use when generating nodes.
// Various Emit operations change these. For example, emitting a copytoreg
// uses and updates these.
- SDValue InputChain, InputGlue;
+ SDValue InputChain, InputGlue, DeactivationSymbol;
// ChainNodesMatched - If a pattern matches nodes that have input/output
// chains, the OPC_EmitMergeInputChains operation is emitted which indicates
@@ -3530,6 +3543,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
InputGlue = N->getOperand(N->getNumOperands()-1);
continue;
+ case OPC_CaptureDeactivationSymbol:
+ // If the current node has a deactivation symbol, capture it in
+ // DeactivationSymbol.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands() - 1).getOpcode() ==
+ ISD::DEACTIVATION_SYMBOL)
+ DeactivationSymbol = N->getOperand(N->getNumOperands() - 1);
+ continue;
+
case OPC_MoveChild: {
unsigned ChildNo = MatcherTable[MatcherIndex++];
if (ChildNo >= N.getNumOperands())
@@ -3981,7 +4003,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
}
// Merge the input chains if they are not intra-pattern references.
- InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+ InputChain = HandleMergeInputChains(ChainNodesMatched, InputGlue, CurDAG);
if (!InputChain.getNode())
break; // Failed to merge.
@@ -4025,7 +4047,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
break;
// Merge the input chains if they are not intra-pattern references.
- InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+ InputChain = HandleMergeInputChains(ChainNodesMatched, InputGlue, CurDAG);
if (!InputChain.getNode())
break; // Failed to merge.
@@ -4211,6 +4233,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// If this has chain/glue inputs, add them.
if (EmitNodeInfo & OPFL_Chain)
Ops.push_back(InputChain);
+ if (DeactivationSymbol.getNode() != nullptr)
+ Ops.push_back(DeactivationSymbol);
if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
Ops.push_back(InputGlue);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9bdf822..c65ddc6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -41,8 +41,9 @@ using namespace llvm;
using namespace llvm::SDPatternMatch;
/// NOTE: The TargetMachine owns TLOF.
-TargetLowering::TargetLowering(const TargetMachine &tm)
- : TargetLoweringBase(tm) {}
+TargetLowering::TargetLowering(const TargetMachine &tm,
+ const TargetSubtargetInfo &STI)
+ : TargetLoweringBase(tm, STI) {}
// Define the virtual destructor out-of-line for build efficiency.
TargetLowering::~TargetLowering() = default;
@@ -151,11 +152,13 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
/// Generate a libcall taking the given operands as arguments and returning a
/// result of type RetVT.
std::pair<SDValue, SDValue>
-TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
- ArrayRef<SDValue> Ops,
- MakeLibCallOptions CallOptions,
- const SDLoc &dl,
+TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
+ EVT RetVT, ArrayRef<SDValue> Ops,
+ MakeLibCallOptions CallOptions, const SDLoc &dl,
SDValue InChain) const {
+ if (LibcallImpl == RTLIB::Unsupported)
+ reportFatalInternalError("unsupported library call operation");
+
if (!InChain)
InChain = DAG.getEntryNode();
@@ -184,12 +187,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
Args.push_back(Entry);
}
- const char *LibcallName = getLibcallName(LC);
- if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
- reportFatalInternalError("unsupported library call operation");
-
SDValue Callee =
- DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
+ DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
Type *OrigRetTy = RetTy;
@@ -205,8 +204,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
CLI.setDebugLoc(dl)
.setChain(InChain)
- .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee,
- std::move(Args))
+ .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
+ Callee, std::move(Args))
.setNoReturn(CallOptions.DoesNotReturn)
.setDiscardResult(!CallOptions.IsReturnValueUsed)
.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
@@ -6344,7 +6343,6 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- EVT SVT = VT.getScalarType();
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
@@ -6354,6 +6352,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
+
+ EVT CT = C->getValueType(0);
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countr_zero();
if (Shift) {
@@ -6362,12 +6362,13 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
}
APInt Factor = Divisor.multiplicativeInverse();
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
- Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+ Factors.push_back(DAG.getConstant(Factor, dl, CT));
return true;
};
// Collect all magic values from the build vector.
- if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
+ if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue Shift, Factor;
@@ -6402,7 +6403,6 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) {
EVT VT = N->getValueType(0);
- EVT SVT = VT.getScalarType();
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
EVT ShSVT = ShVT.getScalarType();
@@ -6412,6 +6412,8 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
+
+ EVT CT = C->getValueType(0);
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countr_zero();
if (Shift) {
@@ -6421,14 +6423,15 @@ static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
// Calculate the multiplicative inverse modulo BW.
APInt Factor = Divisor.multiplicativeInverse();
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
- Factors.push_back(DAG.getConstant(Factor, dl, SVT));
+ Factors.push_back(DAG.getConstant(Factor, dl, CT));
return true;
};
SDValue Op1 = N->getOperand(1);
// Collect all magic values from the build vector.
- if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
+ if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue Shift, Factor;
@@ -6561,8 +6564,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
-
- const APInt &Divisor = C->getAPIntValue();
+ // Truncate the divisor to the target scalar type in case it was promoted
+ // during type legalization.
+ APInt Divisor = C->getAPIntValue().trunc(EltBits);
SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
int NumeratorFactor = 0;
int ShiftMask = -1;
@@ -6592,7 +6596,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
// Collect the shifts / magic values from each element.
- if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
+ if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue MagicFactor, Factor, Shift, ShiftMask;
@@ -6737,7 +6742,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;
- const APInt& Divisor = C->getAPIntValue();
+ // Truncate the divisor to the target scalar type in case it was promoted
+ // during type legalization.
+ APInt Divisor = C->getAPIntValue().trunc(EltBits);
SDValue PreShift, MagicFactor, NPQFactor, PostShift;
@@ -6778,7 +6785,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
};
// Collect the shifts/magic values from each element.
- if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
+ if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true))
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
@@ -8851,6 +8859,7 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
}
+ // Always prefer RHS if equal.
SDValue MinMax =
DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
@@ -8865,13 +8874,19 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
- SDValue LCmp = DAG.getSelect(
- DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
+ EVT IntVT = VT.changeTypeToInteger();
+ EVT FloatVT = VT.changeElementType(MVT::f32);
+ SDValue LHSTrunc = LHS;
+ if (!isTypeLegal(IntVT) && !isOperationLegalOrCustom(ISD::IS_FPCLASS, VT)) {
+ LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ }
+ // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
+ // we preferred RHS when generate MinMax, if the operands are equal.
+ SDValue RetZero = DAG.getSelect(
+ DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
MinMax, Flags);
- SDValue RCmp = DAG.getSelect(
- DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
- Flags);
- return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
+ return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
}
/// Returns a true value if if this FPClassTest can be performed with an ordered
@@ -10606,30 +10621,29 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
"Incompatible types of Data and Mask");
if (IsCompressedMemory) {
- if (DataVT.isScalableVector())
- report_fatal_error(
- "Cannot currently handle compressed memory with scalable vectors");
// Incrementing the pointer according to number of '1's in the mask.
- EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
- SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
- if (MaskIntVT.getSizeInBits() < 32) {
- MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
- MaskIntVT = MVT::i32;
+ if (DataVT.isScalableVector()) {
+ EVT MaskExtVT = MaskVT.changeElementType(MVT::i32);
+ SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
+ Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
+ } else {
+ EVT MaskIntVT =
+ EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
+ SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
+ if (MaskIntVT.getSizeInBits() < 32) {
+ MaskInIntReg =
+ DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
+ MaskIntVT = MVT::i32;
+ }
+ Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
}
-
- // Count '1's with POPCNT.
- Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
- Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
// Scale is an element size in bytes.
SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
AddrVT);
+ Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
- } else if (DataVT.isScalableVector()) {
- Increment = DAG.getVScale(DL, AddrVT,
- APInt(AddrVT.getFixedSizeInBits(),
- DataVT.getStoreSize().getKnownMinValue()));
} else
- Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
+ Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
}
@@ -11121,7 +11135,8 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
else if (WideVT == MVT::i128)
LC = RTLIB::MUL_I128;
- if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
+ RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
+ if (LibcallImpl == RTLIB::Unsupported) {
forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
return;
}
@@ -11922,10 +11937,8 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
// Store the lo part of CONCAT_VECTORS(V1, V2)
SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
// Store the hi part of CONCAT_VECTORS(V1, V2)
- SDValue OffsetToV2 = DAG.getVScale(
- DL, PtrVT,
- APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
- SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
+ SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
if (Imm >= 0) {
@@ -11944,13 +11957,8 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
SDValue TrailingBytes =
DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
- if (TrailingElts > VT.getVectorMinNumElements()) {
- SDValue VLBytes =
- DAG.getVScale(DL, PtrVT,
- APInt(PtrVT.getFixedSizeInBits(),
- VT.getStoreSize().getKnownMinValue()));
- TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
- }
+ if (TrailingElts > VT.getVectorMinNumElements())
+ TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
// Calculate the start address of the spliced result.
StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
@@ -12074,22 +12082,32 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
EVT::getVectorVT(*DAG.getContext(), AccVT.getVectorElementType(),
MulOpVT.getVectorElementCount());
- unsigned ExtOpcLHS = N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
- ? ISD::ZERO_EXTEND
- : ISD::SIGN_EXTEND;
- unsigned ExtOpcRHS = N->getOpcode() == ISD::PARTIAL_REDUCE_SMLA
- ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND;
+ unsigned ExtOpcLHS, ExtOpcRHS;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case ISD::PARTIAL_REDUCE_UMLA:
+ ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
+ break;
+ case ISD::PARTIAL_REDUCE_SMLA:
+ ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
+ break;
+ case ISD::PARTIAL_REDUCE_FMLA:
+ ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
+ break;
+ }
if (ExtMulOpVT != MulOpVT) {
MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
}
SDValue Input = MulLHS;
- APInt ConstantOne;
- if (!ISD::isConstantSplatVector(MulRHS.getNode(), ConstantOne) ||
- !ConstantOne.isOne())
+ if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
+ if (!llvm::isOneOrOneSplatFP(MulRHS))
+ Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
+ } else if (!llvm::isOneOrOneSplat(MulRHS)) {
Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
+ }
unsigned Stride = AccVT.getVectorMinNumElements();
unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
@@ -12099,10 +12117,13 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
for (unsigned I = 0; I < ScaleFactor; I++)
Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
+ unsigned FlatNode =
+ N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
+
// Flatten the subvector tree
while (Subvectors.size() > 1) {
Subvectors.push_back(
- DAG.getNode(ISD::ADD, DL, AccVT, {Subvectors[0], Subvectors[1]}));
+ DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
Subvectors.pop_front();
Subvectors.pop_front();
}
@@ -12113,6 +12134,167 @@ SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
return Subvectors[0];
}
+/// Given a store node \p StoreNode, return true if it is safe to fold that node
+/// into \p FPNode, which expands to a library call with output pointers.
+static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
+ SDNode *FPNode) {
+ SmallVector<const SDNode *, 8> Worklist;
+ SmallVector<const SDNode *, 8> DeferredNodes;
+ SmallPtrSet<const SDNode *, 16> Visited;
+
+ // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
+ for (SDValue Op : StoreNode->ops())
+ if (Op.getNode() != FPNode)
+ Worklist.push_back(Op.getNode());
+
+ unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
+ while (!Worklist.empty()) {
+ const SDNode *Node = Worklist.pop_back_val();
+ auto [_, Inserted] = Visited.insert(Node);
+ if (!Inserted)
+ continue;
+
+ if (MaxSteps > 0 && Visited.size() >= MaxSteps)
+ return false;
+
+ // Reached the FPNode (would result in a cycle).
+ // OR Reached CALLSEQ_START (would result in nested call sequences).
+ if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
+ return false;
+
+ if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ // Defer looking into call sequences (so we can check we're outside one).
+ // We still need to look through these for the predecessor check.
+ DeferredNodes.push_back(Node);
+ continue;
+ }
+
+ for (SDValue Op : Node->ops())
+ Worklist.push_back(Op.getNode());
+ }
+
+ // True if we're outside a call sequence and don't have the FPNode as a
+ // predecessor. No cycles or nested call sequences possible.
+ return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
+ MaxSteps);
+}
+
+bool TargetLowering::expandMultipleResultFPLibCall(
+ SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
+ SmallVectorImpl<SDValue> &Results,
+ std::optional<unsigned> CallRetResNo) const {
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ return false;
+
+ RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
+ if (LibcallImpl == RTLIB::Unsupported)
+ return false;
+
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT VT = Node->getValueType(0);
+ unsigned NumResults = Node->getNumValues();
+
+ // Find users of the node that store the results (and share input chains). The
+ // destination pointers can be used instead of creating stack allocations.
+ SDValue StoresInChain;
+ SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
+ for (SDNode *User : Node->users()) {
+ if (!ISD::isNormalStore(User))
+ continue;
+ auto *ST = cast<StoreSDNode>(User);
+ SDValue StoreValue = ST->getValue();
+ unsigned ResNo = StoreValue.getResNo();
+ // Ensure the store corresponds to an output pointer.
+ if (CallRetResNo == ResNo)
+ continue;
+ // Ensure the store to the default address space and not atomic or volatile.
+ if (!ST->isSimple() || ST->getAddressSpace() != 0)
+ continue;
+ // Ensure all store chains are the same (so they don't alias).
+ if (StoresInChain && ST->getChain() != StoresInChain)
+ continue;
+ // Ensure the store is properly aligned.
+ Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
+ if (ST->getAlign() <
+ DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
+ continue;
+ // Avoid:
+ // 1. Creating cyclic dependencies.
+ // 2. Expanding the node to a call within a call sequence.
+ if (!canFoldStoreIntoLibCallOutputPointers(ST, Node))
+ continue;
+ ResultStores[ResNo] = ST;
+ StoresInChain = ST->getChain();
+ }
+
+ ArgListTy Args;
+
+ // Pass the arguments.
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
+ Args.emplace_back(Op, ArgTy);
+ }
+
+ // Pass the output pointers.
+ SmallVector<SDValue, 2> ResultPtrs(NumResults);
+ Type *PointerTy = PointerType::getUnqual(Ctx);
+ for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
+ if (ResNo == CallRetResNo)
+ continue;
+ EVT ResVT = Node->getValueType(ResNo);
+ SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
+ ResultPtrs[ResNo] = ResultPtr;
+ Args.emplace_back(ResultPtr, PointerTy);
+ }
+
+ SDLoc DL(Node);
+
+ if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(LibcallImpl)) {
+ // Pass the vector mask (if required).
+ EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
+ SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
+ Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
+ }
+
+ Type *RetType = CallRetResNo.has_value()
+ ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
+ : Type::getVoidTy(Ctx);
+ SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
+ SDValue Callee =
+ DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
+ getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
+
+ auto [Call, CallChain] = LowerCallTo(CLI);
+
+ for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
+ if (ResNo == CallRetResNo) {
+ Results.push_back(Call);
+ continue;
+ }
+ MachinePointerInfo PtrInfo;
+ SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
+ ResultPtr, PtrInfo);
+ SDValue OutChain = LoadResult.getValue(1);
+
+ if (StoreSDNode *ST = ResultStores[ResNo]) {
+ // Replace store with the library call.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
+ PtrInfo = ST->getPointerInfo();
+ } else {
+ PtrInfo = MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(),
+ cast<FrameIndexSDNode>(ResultPtr)->getIndex());
+ }
+
+ Results.push_back(LoadResult);
+ }
+
+ return true;
+}
+
bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
SDValue &LHS, SDValue &RHS,
SDValue &CC, SDValue Mask,