diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
17 files changed, 1233 insertions, 625 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a43020e..bed3c424 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -331,6 +331,11 @@ namespace { return CombineTo(N, To, 2, AddTo); } + SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To, + bool AddTo = true) { + return CombineTo(N, To->data(), To->size(), AddTo); + } + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); private: @@ -396,7 +401,7 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - SDValue foldShiftToAvg(SDNode *N); + SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL); // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)` SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT); @@ -541,6 +546,7 @@ namespace { SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitVECTOR_INTERLEAVE(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitSCALAR_TO_VECTOR(SDNode *N); @@ -2021,6 +2027,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); @@ -4068,18 +4075,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { unsigned BitWidth = VT.getScalarSizeInBits(); SDLoc DL(N); - auto PeekThroughFreeze = [](SDValue N) { - if (N->getOpcode() == ISD::FREEZE && N.hasOneUse()) - return N->getOperand(0); - return N; - }; - if (SDValue V = foldSubCtlzNot<EmptyMatchContext>(N, DAG)) return V; // fold (sub x, x) -> 0 - // FIXME: Refactor this and xor and other similar operations together. - if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1)) + if (N0 == N1) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // fold (sub c1, c2) -> c3 @@ -4100,18 +4100,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y)) if (N1.hasOneUse() && hasUMin(VT)) { SDValue Y; - if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y))) || - sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero())) || - sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y))) || - sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero()))) + auto MS0 = m_Specific(N0); + auto MVY = m_Value(Y); + auto MZ = m_Zero(); + auto MCC1 = m_SpecificCondCode(ISD::SETULT); + auto MCC2 = m_SpecificCondCode(ISD::SETUGE); + + if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) || + sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) || + sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) || + sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ))) + return DAG.getNode(ISD::UMIN, DL, VT, N0, DAG.getNode(ISD::SUB, DL, VT, N0, Y)); } @@ -4711,7 +4710,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue LogBase2 = BuildLogBase2(N1, DL)) { EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); - return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc); + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap()); + // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1. + return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags); } } @@ -6071,6 +6073,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (N0 == N1) return N0; + // Fold operation with vscale operands. + if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) { + uint64_t C0 = N0->getConstantOperandVal(0); + uint64_t C1 = N1->getConstantOperandVal(0); + if (Opcode == ISD::UMAX) + return C0 > C1 ? N0 : N1; + else if (Opcode == ISD::UMIN) + return C0 > C1 ? N1 : N0; + } + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) @@ -6493,19 +6505,21 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands // are non NaN values. if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) || - ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) + ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) { return arebothOperandsNotNan(Operand1, Operand2, DAG) && isFMAXNUMFMINNUM_IEEE ? ISD::FMINNUM_IEEE : ISD::DELETED_NODE; - else if (((CC == ISD::SETGT || CC == ISD::SETGE) && - (OrAndOpcode == ISD::OR)) || - ((CC == ISD::SETLT || CC == ISD::SETLE) && - (OrAndOpcode == ISD::AND))) + } + + if (((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::AND))) { return arebothOperandsNotNan(Operand1, Operand2, DAG) && isFMAXNUMFMINNUM_IEEE ? ISD::FMAXNUM_IEEE : ISD::DELETED_NODE; + } + // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/ // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove @@ -6515,24 +6529,24 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, // we can prove that we do not have any sNaNs, then we can do the // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following // cases. - else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && - (OrAndOpcode == ISD::OR)) || - ((CC == ISD::SETUGT || CC == ISD::SETUGE) && - (OrAndOpcode == ISD::AND))) + if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) { return isFMAXNUMFMINNUM ? ISD::FMINNUM - : arebothOperandsNotSNan(Operand1, Operand2, DAG) && - isFMAXNUMFMINNUM_IEEE - ? ISD::FMINNUM_IEEE - : ISD::DELETED_NODE; - else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && - (OrAndOpcode == ISD::OR)) || - ((CC == ISD::SETULT || CC == ISD::SETULE) && - (OrAndOpcode == ISD::AND))) + : arebothOperandsNotSNan(Operand1, Operand2, DAG) && + isFMAXNUMFMINNUM_IEEE + ? ISD::FMINNUM_IEEE + : ISD::DELETED_NODE; + } + + if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETULT || CC == ISD::SETULE) && (OrAndOpcode == ISD::AND))) { return isFMAXNUMFMINNUM ? ISD::FMAXNUM - : arebothOperandsNotSNan(Operand1, Operand2, DAG) && - isFMAXNUMFMINNUM_IEEE - ? ISD::FMAXNUM_IEEE - : ISD::DELETED_NODE; + : arebothOperandsNotSNan(Operand1, Operand2, DAG) && + isFMAXNUMFMINNUM_IEEE + ? ISD::FMAXNUM_IEEE + : ISD::DELETED_NODE; + } + return ISD::DELETED_NODE; } @@ -9389,8 +9403,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { LLVMContext &Context = *DAG.getContext(); unsigned NumStores = Stores.size(); unsigned WideNumBits = NumStores * NarrowNumBits; - EVT WideVT = EVT::getIntegerVT(Context, WideNumBits); - if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64) + if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64) return SDValue(); // Check if all bytes of the source value that we are looking at are stored @@ -9444,7 +9457,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SourceValue = WideVal; // Give up if the source value type is smaller than the store size. - if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits()) + if (SourceValue.getScalarValueSizeInBits() < WideNumBits) return SDValue(); } @@ -9468,6 +9481,8 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { OffsetMap[Offset] = ByteOffsetFromBase; } + EVT WideVT = EVT::getIntegerVT(Context, WideNumBits); + assert(FirstOffset != INT64_MAX && "First byte offset must be set"); assert(FirstStore && "First store must be set"); @@ -10616,6 +10631,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getVScale(DL, VT, C0 << C1); } + SDValue X; + APInt VS0; + + // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1)) + if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) { + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() && + N0->getFlags().hasNoUnsignedWrap()); + + SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue()); + return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags); + } + // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). APInt ShlVal; if (N0.getOpcode() == ISD::STEP_VECTOR && @@ -10968,7 +10996,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (SDValue NarrowLoad = reduceLoadWidth(N)) return NarrowLoad; - if (SDValue AVG = foldShiftToAvg(N)) + if (SDValue AVG = foldShiftToAvg(N, DL)) return AVG; return SDValue(); @@ -11064,38 +11092,43 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } - // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or - // (and (srl x, (sub c2, c1), MASK) - if (N0.getOpcode() == ISD::SHL && - (N0.getOperand(1) == N1 || N0->hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { - auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, - ConstantSDNode *RHS) { - const APInt &LHSC = LHS->getAPIntValue(); - const APInt &RHSC = RHS->getAPIntValue(); - return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && - LHSC.getZExtValue() <= RHSC.getZExtValue(); - }; - if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, - /*AllowUndefs*/ false, - /*AllowTypeMismatch*/ true)) { - SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); - SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); - SDValue Mask = DAG.getAllOnesConstant(DL, VT); - Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); - Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); - SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); - return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); - } - if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, - /*AllowUndefs*/ false, - /*AllowTypeMismatch*/ true)) { - SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); - SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); - SDValue Mask = DAG.getAllOnesConstant(DL, VT); - Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); - SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); - return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + if (N0.getOpcode() == ISD::SHL) { + // fold (srl (shl nuw x, c), c) -> x + if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap()) + return N0.getOperand(0); + + // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or + // (and (srl x, (sub c2, c1), MASK) + if ((N0.getOperand(1) == N1 || N0->hasOneUse()) && + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + const APInt &LHSC = LHS->getAPIntValue(); + const APInt &RHSC = RHS->getAPIntValue(); + return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && + LHSC.getZExtValue() <= RHSC.getZExtValue(); + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); + SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } } } @@ -11241,7 +11274,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI)) return MULH; - if (SDValue AVG = foldShiftToAvg(N)) + if (SDValue AVG = foldShiftToAvg(N, DL)) return AVG; return SDValue(); @@ -11256,6 +11289,11 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { unsigned BitWidth = VT.getScalarSizeInBits(); SDLoc DL(N); + // fold (fshl/fshr C0, C1, C2) -> C3 + if (SDValue C = + DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2})) + return C; + // fold (fshl N0, N1, 0) -> N0 // fold (fshr N0, N1, 0) -> N1 if (isPowerOf2_32(BitWidth)) @@ -11757,51 +11795,36 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, } } -SDValue DAGCombiner::foldShiftToAvg(SDNode *N) { +// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y) +SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) { const unsigned Opcode = N->getOpcode(); - - // Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y) if (Opcode != ISD::SRA && Opcode != ISD::SRL) return SDValue(); - unsigned FloorISD = 0; - auto VT = N->getValueType(0); - bool IsUnsigned = false; - - // Decide wether signed or unsigned. - switch (Opcode) { - case ISD::SRA: - if (!hasOperation(ISD::AVGFLOORS, VT)) - return SDValue(); - FloorISD = ISD::AVGFLOORS; - break; - case ISD::SRL: - IsUnsigned = true; - if (!hasOperation(ISD::AVGFLOORU, VT)) - return SDValue(); - FloorISD = ISD::AVGFLOORU; - break; - default: - return SDValue(); - } + EVT VT = N->getValueType(0); + bool IsUnsigned = Opcode == ISD::SRL; // Captured values. SDValue A, B, Add; // Match floor average as it is common to both floor/ceil avgs. - if (!sd_match(N, m_BinOp(Opcode, - m_AllOf(m_Value(Add), m_Add(m_Value(A), m_Value(B))), - m_One()))) - return SDValue(); + if (sd_match(N, m_BinOp(Opcode, + m_AllOf(m_Value(Add), m_Add(m_Value(A), m_Value(B))), + m_One()))) { + // Decide whether signed or unsigned. + unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS; + if (!hasOperation(FloorISD, VT)) + return SDValue(); - // Can't optimize adds that may wrap. - if (IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) - return SDValue(); + // Can't optimize adds that may wrap. + if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) || + (!IsUnsigned && !Add->getFlags().hasNoSignedWrap())) + return SDValue(); - if (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()) - return SDValue(); + return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B}); + } - return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B}); + return SDValue(); } SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) { @@ -12828,22 +12851,21 @@ SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) { SDLoc DL(HG); EVT MemVT = HG->getMemoryVT(); + EVT DataVT = Index.getValueType(); MachineMemOperand *MMO = HG->getMemOperand(); ISD::MemIndexType IndexType = HG->getIndexType(); if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; - SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index, - HG->getScale(), HG->getIntID()}; - if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL)) + if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) || + refineIndexType(Index, IndexType, DataVT, DAG)) { + SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index, + HG->getScale(), HG->getIntID()}; return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops, MMO, IndexType); + } - EVT DataVT = Index.getValueType(); - if (refineIndexType(Index, IndexType, DataVT, DAG)) - return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops, - MMO, IndexType); return SDValue(); } @@ -13165,14 +13187,14 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, // select Cond, -1, x → or Cond, x if (IsTAllOne) { - SDValue X = DAG.getBitcast(CondVT, FVal); + SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal)); SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X); return DAG.getBitcast(VT, Or); } // select Cond, x, 0 → and Cond, x if (IsFAllZero) { - SDValue X = DAG.getBitcast(CondVT, TVal); + SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal)); SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X); return DAG.getBitcast(VT, And); } @@ -13180,7 +13202,7 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, // select Cond, 0, x -> and not(Cond), x if (IsTAllZero && (isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) { - SDValue X = DAG.getBitcast(CondVT, FVal); + SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal)); SDValue And = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X); return DAG.getBitcast(VT, And); @@ -15123,7 +15145,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return foldedExt; } else if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; SmallVector<SDNode *, 4> SetCCs; if (!N0.hasOneUse()) @@ -16328,6 +16350,42 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { DAG, DL); } break; + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::ABDS: + case ISD::ABDU: + // (trunc (avg a, b)) -> (avg (trunc a), (trunc b)) + // (trunc (abdu/abds a, b)) -> (abdu/abds (trunc a), (trunc b)) + if (!LegalOperations && N0.hasOneUse() && + TLI.isOperationLegal(N0.getOpcode(), VT)) { + EVT TruncVT = VT; + unsigned SrcBits = SrcVT.getScalarSizeInBits(); + unsigned TruncBits = TruncVT.getScalarSizeInBits(); + + SDValue A = N0.getOperand(0); + SDValue B = N0.getOperand(1); + bool CanFold = false; + + if (N0.getOpcode() == ISD::AVGFLOORU || N0.getOpcode() == ISD::AVGCEILU || + N0.getOpcode() == ISD::ABDU) { + APInt UpperBits = APInt::getBitsSetFrom(SrcBits, TruncBits); + CanFold = DAG.MaskedValueIsZero(B, UpperBits) && + DAG.MaskedValueIsZero(A, UpperBits); + } else { + unsigned NeededBits = SrcBits - TruncBits; + CanFold = DAG.ComputeNumSignBits(B) > NeededBits && + DAG.ComputeNumSignBits(A) > NeededBits; + } + + if (CanFold) { + SDValue NewA = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, A); + SDValue NewB = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, B); + return DAG.getNode(N0.getOpcode(), DL, TruncVT, NewA, NewB); + } + } + break; } return SDValue(); @@ -16735,6 +16793,19 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; + // If we have frozen and unfrozen users of N0, update so everything uses N. + if (!N0.isUndef() && !N0.hasOneUse()) { + SDValue FrozenN0(N, 0); + // Unfreeze all uses of N to avoid double deleting N from the CSE map. + DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0); + DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0); + // ReplaceAllUsesOfValueWith will have also updated the use in N, thus + // creating a cycle in a DAG. Let's undo that by mutating the freeze. + assert(N->getOperand(0) == FrozenN0 && "Expected cycle in DAG"); + DAG.UpdateNodeOperands(N, N0); + return FrozenN0; + } + // We currently avoid folding freeze over SRA/SRL, due to the problems seen // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for // example https://reviews.llvm.org/D136529#4120959. @@ -16788,8 +16859,7 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { SmallSet<SDValue, 8> MaybePoisonOperands; SmallVector<unsigned, 8> MaybePoisonOperandNumbers; for (auto [OpNo, Op] : enumerate(N0->ops())) { - if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false, - /*Depth*/ 1)) + if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly=*/false)) continue; bool HadMaybePoisonOperands = !MaybePoisonOperands.empty(); bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second; @@ -16974,8 +17044,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) --> // fma A, B, (fma C, D, fma (E, F, G)). // This requires reassociation because it changes the order of operations. - bool CanReassociate = - Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); + bool CanReassociate = N->getFlags().hasAllowReassociation(); if (CanReassociate) { SDValue FMA, E; if (isFusedOp(N0) && N0.hasOneUse()) { @@ -17641,7 +17710,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + if ((Options.NoSignedZerosFPMath || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -17728,7 +17797,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } // enable-unsafe-fp-math && AllowNewConst - if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + if ((Options.NoSignedZerosFPMath || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))) { // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y)) if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL, @@ -17831,7 +17900,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } - if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + if ((Options.NoSignedZerosFPMath || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y @@ -17971,7 +18040,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); - const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); SelectionDAG::FlagInserter FlagsInserter(DAG, N); @@ -17995,7 +18063,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { + if (Flags.hasAllowReassociation()) { // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 if (DAG.isConstantFPBuildVectorOrConstantFP(N1) && N0.getOpcode() == ISD::FMUL) { @@ -18172,8 +18240,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { !DAG.isConstantFPBuildVectorOrConstantFP(N1)) return matcher.getNode(ISD::FMA, DL, VT, N1, N0, N2); - bool CanReassociate = - Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); + bool CanReassociate = N->getFlags().hasAllowReassociation(); if (CanReassociate) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) && @@ -18268,9 +18335,8 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // TODO: Limit this transform based on optsize/minsize - it always creates at // least 1 extra instruction. But the perf win may be substantial enough // that only minsize should restrict this. - bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; const SDNodeFlags Flags = N->getFlags(); - if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal())) + if (LegalDAG || !Flags.hasAllowReciprocal()) return SDValue(); // Skip if current node is a reciprocal/fneg-reciprocal. @@ -18307,7 +18373,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // This division is eligible for optimization only if global unsafe math // is enabled or if this division allows reciprocal formation. - if (UnsafeMath || U->getFlags().hasAllowReciprocal()) + if (U->getFlags().hasAllowReciprocal()) Users.insert(U); } } @@ -18932,7 +18998,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. - if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) + if ((N->getFlags().hasAllowContract() && + N0->getFlags().hasAllowContract()) || + N0IsTrunc) return DAG.getNode( ISD::FP_ROUND, DL, VT, N0.getOperand(0), DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true)); @@ -22515,6 +22583,56 @@ SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) { return SDValue(); } +static SDValue foldToMaskedStore(StoreSDNode *Store, SelectionDAG &DAG, + const SDLoc &Dl) { + if (!Store->isSimple() || !ISD::isNormalStore(Store)) + return SDValue(); + + SDValue StoredVal = Store->getValue(); + SDValue StorePtr = Store->getBasePtr(); + SDValue StoreOffset = Store->getOffset(); + EVT VT = Store->getMemoryVT(); + unsigned AddrSpace = Store->getAddressSpace(); + Align Alignment = Store->getAlign(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (!TLI.isOperationLegalOrCustom(ISD::MSTORE, VT) || + !TLI.allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment)) + return SDValue(); + + SDValue Mask, OtherVec, LoadCh; + unsigned LoadPos; + if (sd_match(StoredVal, + m_VSelect(m_Value(Mask), m_Value(OtherVec), + m_Load(m_Value(LoadCh), m_Specific(StorePtr), + m_Specific(StoreOffset))))) { + LoadPos = 2; + } else if (sd_match(StoredVal, + m_VSelect(m_Value(Mask), + m_Load(m_Value(LoadCh), m_Specific(StorePtr), + m_Specific(StoreOffset)), + m_Value(OtherVec)))) { + LoadPos = 1; + } else { + return SDValue(); + } + + auto *Load = cast<LoadSDNode>(StoredVal.getOperand(LoadPos)); + if (!Load->isSimple() || !ISD::isNormalLoad(Load) || + Load->getAddressSpace() != AddrSpace) + return SDValue(); + + if (!Store->getChain().reachesChainWithoutSideEffects(LoadCh)) + return SDValue(); + + if (LoadPos == 1) + Mask = DAG.getNOT(Dl, Mask, Mask.getValueType()); + + return DAG.getMaskedStore(Store->getChain(), Dl, OtherVec, StorePtr, + StoreOffset, Mask, VT, Store->getMemOperand(), + Store->getAddressingMode()); +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -22749,6 +22867,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (SDValue NewSt = splitMergedValStore(ST)) return NewSt; + if (SDValue MaskedStore = foldToMaskedStore(ST, DAG, SDLoc(N))) + return MaskedStore; + return ReduceLoadOpStoreWidth(N); } @@ -25282,6 +25403,28 @@ static SDValue combineConcatVectorOfShuffleAndItsOperands( return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask); } +static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalTypes, + bool LegalOperations) { + EVT VT = N->getValueType(0); + + // Post-legalization we can only create wider SPLAT_VECTOR operations if both + // the type and operation is legal. The Hexagon target has custom + // legalization for SPLAT_VECTOR that splits the operation into two parts and + // concatenates them. Therefore, custom lowering must also be rejected in + // order to avoid an infinite loop. + if ((LegalTypes && !TLI.isTypeLegal(VT)) || + (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR) + return SDValue(); + + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) @@ -25405,6 +25548,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(VT, SDLoc(N), Opnds); } + if (SDValue V = + combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations)) + return V; + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) @@ -25473,6 +25620,21 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) { + // Check to see if all operands are identical. + if (!llvm::all_equal(N->op_values())) + return SDValue(); + + // Check to see if the identical operand is a splat. + if (!DAG.isSplatValue(N->getOperand(0))) + return SDValue(); + + // interleave splat(X), splat(X).... --> splat(X), splat(X).... + SmallVector<SDValue, 4> Ops; + Ops.append(N->op_values().begin(), N->op_values().end()); + return CombineTo(N, &Ops); +} + // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find // if the subvector can be sourced for free. static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) { @@ -25868,7 +26030,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // Combine an extract of an extract into a single extract_subvector. // ext (ext X, C), 0 --> ext X, C if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) { - if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), + // The index has to be a multiple of the new result type's known minimum + // vector length. + if (V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 && + TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), V.getConstantOperandVal(1)) && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0), @@ -25955,8 +26120,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT ConcatSrcVT = V.getOperand(0).getValueType(); assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && "Concat and extract subvector do not change element type"); - assert((ExtIdx % ExtNumElts) == 0 && - "Extract index is not a multiple of the input vector length."); unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements(); unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts; @@ -28965,13 +29128,27 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, ((N1C->isAllOnes() && CC == ISD::SETGT) || (N1C->isZero() && CC == ISD::SETLT)) && !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { - SDValue ASR = DAG.getNode( - ISD::SRA, DL, CmpOpVT, N0, - DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); - return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT), + SDValue ASHR = + DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, + DAG.getShiftAmountConstant( + CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL)); + return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT), DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); } + // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1 + if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() && + N2C->isOne() && N3C->isAllOnes() && + !TLI.shouldAvoidTransformToShift(CmpOpVT, + CmpOpVT.getScalarSizeInBits() - 1)) { + SDValue ASHR = + DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, + DAG.getShiftAmountConstant( + CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL)); + return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT), + DAG.getConstant(1, DL, VT)); + } + if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG)) return S; if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG)) diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index fb9eff9..9467ba1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -729,9 +729,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); - ArgListEntry Entry; - Entry.Val = V; - Entry.Ty = V->getType(); + ArgListEntry Entry(V); Entry.setAttributes(CI, ArgI); Args.push_back(Entry); } @@ -978,9 +976,7 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); - ArgListEntry Entry; - Entry.Val = V; - Entry.Ty = V->getType(); + ArgListEntry Entry(V); Entry.setAttributes(CI, ArgI); Args.push_back(Entry); } @@ -1012,17 +1008,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.ArgVT = VT; - MyFlags.Used = CLI.IsReturnValueUsed; + ISD::ArgFlagsTy Flags; if (CLI.RetSExt) - MyFlags.Flags.setSExt(); + Flags.setSExt(); if (CLI.RetZExt) - MyFlags.Flags.setZExt(); + Flags.setZExt(); if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); + Flags.setInReg(); + ISD::InputArg Ret(Flags, RegisterVT, VT, CLI.RetTy, CLI.IsReturnValueUsed, + ISD::InputArg::NoArgIndex, 0); + CLI.Ins.push_back(Ret); } } @@ -1117,7 +1112,6 @@ bool FastISel::lowerCall(const CallInst *CI) { Type *RetTy = CI->getType(); ArgListTy Args; - ArgListEntry Entry; Args.reserve(CI->arg_size()); for (auto i = CI->arg_begin(), e = CI->arg_end(); i != e; ++i) { @@ -1127,9 +1121,7 @@ bool FastISel::lowerCall(const CallInst *CI) { if (V->getType()->isEmptyTy()) continue; - Entry.Val = V; - Entry.Ty = V->getType(); - + ArgListEntry Entry(V); // Skip the first return-type Attribute to get to params. Entry.setAttributes(CI, i - CI->arg_begin()); Args.push_back(Entry); @@ -1148,9 +1140,12 @@ bool FastISel::lowerCall(const CallInst *CI) { CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI) .setTailCall(IsTailCall); - diagnoseDontCall(*CI); + if (lowerCallTo(CLI)) { + diagnoseDontCall(*CI); + return true; + } - return lowerCallTo(CLI); + return false; } bool FastISel::selectCall(const User *I) { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 8c8daef..861f76e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -81,12 +81,11 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. -void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, VRBaseMapType &VRBaseMap) { +void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg, + VRBaseMapType &VRBaseMap) { Register VRBase; if (SrcReg.isVirtual()) { // Just use the input register directly! - SDValue Op(Node, ResNo); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; @@ -99,17 +98,15 @@ void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = nullptr; - MVT VT = Node->getSimpleValueType(ResNo); + MVT VT = Op.getSimpleValueType(); // Stick to the preferred register classes for legal types. if (TLI->isTypeLegal(VT)) - UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); + UseRC = TLI->getRegClassFor(VT, Op->isDivergent()); - for (SDNode *User : Node->users()) { + for (SDNode *User : Op->users()) { bool Match = true; - if (User->getOpcode() == ISD::CopyToReg && - User->getOperand(2).getNode() == Node && - User->getOperand(2).getResNo() == ResNo) { + if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2) == Op) { Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); if (DestReg.isVirtual()) { VRBase = DestReg; @@ -118,10 +115,8 @@ void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, Match = false; } else { for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - SDValue Op = User->getOperand(i); - if (Op.getNode() != Node || Op.getResNo() != ResNo) + if (User->getOperand(i) != Op) continue; - MVT VT = Node->getSimpleValueType(Op.getResNo()); if (VT == MVT::Other || VT == MVT::Glue) continue; Match = false; @@ -170,11 +165,11 @@ void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), - VRBase).addReg(SrcReg); + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase) + .addReg(SrcReg); } - SDValue Op(Node, ResNo); if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; @@ -243,7 +238,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // Create the result registers for this node and add the result regs to // the machine instruction. - if (VRBase == 0) { + if (!VRBase) { assert(RC && "Isn't a register operand!"); VRBase = MRI->createVirtualRegister(RC); MIB.addReg(VRBase, RegState::Define); @@ -1170,7 +1165,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, continue; // This implicitly defined physreg has a use. UsedRegs.push_back(Reg); - EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap); + EmitCopyFromReg(SDValue(Node, i), IsClone, Reg, VRBaseMap); } } @@ -1178,7 +1173,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { if (F->getOpcode() == ISD::CopyFromReg) { - UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + Register Reg = cast<RegisterSDNode>(F->getOperand(1))->getReg(); + if (Reg.isPhysical()) + UsedRegs.push_back(Reg); continue; } else if (F->getOpcode() == ISD::CopyToReg) { // Skip CopyToReg nodes that are internal to the glue chain. @@ -1281,7 +1278,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } case ISD::CopyFromReg: { Register SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); - EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap); + EmitCopyFromReg(SDValue(Node, 0), IsClone, SrcReg, VRBaseMap); break; } case ISD::EH_LABEL: diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 16d754c..b465de8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -48,8 +48,8 @@ private: /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. - void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, VRBaseMapType &VRBaseMap); + void EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg, + VRBaseMapType &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ba0ab23..bcfc2c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2181,12 +2181,10 @@ SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; for (const SDValue &Op : Node->op_values()) { EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op; - Entry.Ty = ArgTy; + TargetLowering::ArgListEntry Entry(Op, ArgTy); Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, isSigned); Entry.IsZExt = !Entry.IsSExt; Args.push_back(Entry); @@ -2325,11 +2323,9 @@ SDValue SelectionDAGLegalize::ExpandBitCountingLibCall( EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize()); - TargetLowering::ArgListEntry Arg; EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Arg.Node = Op; - Arg.Ty = ArgTy; + TargetLowering::ArgListEntry Arg(Op, ArgTy); Arg.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, /*IsSigned=*/false); Arg.IsZExt = !Arg.IsSExt; @@ -2370,12 +2366,10 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; for (const SDValue &Op : Node->op_values()) { EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op; - Entry.Ty = ArgTy; + TargetLowering::ArgListEntry Entry(Op, ArgTy); Entry.IsSExt = isSigned; Entry.IsZExt = !isSigned; Args.push_back(Entry); @@ -2383,8 +2377,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, // Also pass the return address of the remainder. SDValue FIPtr = DAG.CreateStackTemporary(RetVT); - Entry.Node = FIPtr; - Entry.Ty = PointerType::getUnqual(RetTy->getContext()); + TargetLowering::ArgListEntry Entry( + FIPtr, PointerType::getUnqual(RetTy->getContext())); Entry.IsSExt = isSigned; Entry.IsZExt = !isSigned; Args.push_back(Entry); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 2cad36e..83bb1df 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -197,7 +197,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) { SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; EVT OpVT = N->getOperand(0 + Offset).getValueType(); - CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0)); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N), Chain); @@ -218,7 +218,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) { TargetLowering::MakeLibCallOptions CallOptions; EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), N->getOperand(1 + Offset).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0)); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N), Chain); @@ -558,7 +558,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(), N->getOperand(1 + Offset).getValueType(), N->getOperand(2 + Offset).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0)); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, @@ -642,7 +642,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); TargetLowering::MakeLibCallOptions CallOptions; EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); - CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0)); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N), Chain); @@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); TargetLowering::MakeLibCallOptions CallOptions; EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0)); SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, CallOptions, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) @@ -694,7 +694,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); TargetLowering::MakeLibCallOptions CallOptions; EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); - CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0)); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N), Chain); @@ -742,7 +742,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) { TargetLowering::MakeLibCallOptions CallOptions; EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), N->getOperand(1 + Offset).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0)); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N), Chain); @@ -779,7 +779,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) { // TODO: setTypeListBeforeSoften can't properly express multiple return types, // but we only really need to handle the 0th one for softening anyway. - CallOptions.setTypeListBeforeSoften({OpsVT}, VT0, true) + CallOptions.setTypeListBeforeSoften({OpsVT}, VT0) .setOpsTypeOverrides(CallOpsTypeOverrides); auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT0, Ops, CallOptions, DL, @@ -828,7 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults( TargetLowering::MakeLibCallOptions CallOptions; // TODO: setTypeListBeforeSoften can't properly express multiple return types, // but since both returns have the same type it should be okay. - CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true) + CallOptions.setTypeListBeforeSoften({OpsVT}, VT) .setOpsTypeOverrides(CallOpsTypeOverrides); auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL, @@ -1100,7 +1100,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(IsStrict ? 1 : 0)); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setIsSigned(Signed); - CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + CallOptions.setTypeListBeforeSoften(SVT, RVT); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), Op, CallOptions, dl, Chain); @@ -1222,7 +1222,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); Op = GetSoftenedFloat(Op); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + CallOptions.setTypeListBeforeSoften(SVT, RVT); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N), Chain); @@ -1298,7 +1298,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { Op = GetSoftenedFloat(Op); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + CallOptions.setTypeListBeforeSoften(SVT, RVT); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain); @@ -1453,7 +1453,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) { SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; EVT OpVT = N->getOperand(0 + Offset).getValueType(); - CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0)); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N), Chain); @@ -1551,6 +1551,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; + case ISD::AssertNoFPClass: ExpandFloatRes_AssertNoFPClass(N, Lo, Hi); break; case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; case ISD::STRICT_FMINNUM: case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; @@ -1966,6 +1967,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); } +void DAGTypeLegalizer::ExpandFloatRes_AssertNoFPClass(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // TODO: Handle ppcf128 by preserving AssertNoFPClass for one of the halves. + SDLoc dl(N); + GetExpandedFloat(N->getOperand(0), Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -3559,7 +3567,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); Op = GetSoftenedFloat(Op); TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + CallOptions.setTypeListBeforeSoften(SVT, RVT); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N), Chain); if (IsStrict) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 583a85a..90d62e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2217,8 +2217,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { switch (getTypeAction(InVT)) { case TargetLowering::TypePromoteInteger: { - // TODO: Handle big endian - if (OutVT.isVector() && DAG.getDataLayout().isLittleEndian()) { + // TODO: Handle big endian & vector input type. + if (OutVT.isVector() && !InVT.isVector() && + DAG.getDataLayout().isLittleEndian()) { EVT EltVT = OutVT.getVectorElementType(); TypeSize EltSize = EltVT.getSizeInBits(); TypeSize NInSize = NInVT.getSizeInBits(); @@ -5259,20 +5260,18 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, MachinePointerInfo()); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; for (const SDValue &Op : N->op_values()) { EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op; - Entry.Ty = ArgTy; + TargetLowering::ArgListEntry Entry(Op, ArgTy); Entry.IsSExt = true; Entry.IsZExt = false; Args.push_back(Entry); } // Also pass the address of the overflow check. - Entry.Node = Temp; - Entry.Ty = PointerType::getUnqual(PtrTy->getContext()); + TargetLowering::ArgListEntry Entry( + Temp, PointerType::getUnqual(PtrTy->getContext())); Entry.IsSExt = true; Entry.IsZExt = false; Args.push_back(Entry); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2e13b18..65fd863 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -681,6 +681,7 @@ private: SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {}); // clang-format off + void ExpandFloatRes_AssertNoFPClass(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -908,6 +909,7 @@ private: SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); SDValue ScalarizeVecOp_VSETCC(SDNode *N); + SDValue ScalarizeVecOp_VSTRICT_FSETCC(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo); @@ -971,6 +973,7 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); @@ -1075,6 +1078,7 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); + SDValue WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); SDValue WidenVecRes_VECTOR_COMPRESS(SDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index d2ecc133..2ca9895 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -2223,17 +2223,13 @@ bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, SDLoc DL(Node); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.IsSExt = false; - Entry.IsZExt = false; unsigned OpNum = 0; for (auto &VFParam : OptVFInfo->Shape.Parameters) { if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT); - Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT); - Entry.Ty = MaskVT.getTypeForEVT(*Ctx); - Args.push_back(Entry); + Args.emplace_back(DAG.getBoolConstant(true, DL, MaskVT, VT), + MaskVT.getTypeForEVT(*Ctx)); continue; } @@ -2241,9 +2237,7 @@ bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, if (VFParam.ParamKind != VFParamKind::Vector) return false; - Entry.Node = Node->getOperand(OpNum++); - Entry.Ty = Ty; - Args.push_back(Entry); + Args.emplace_back(Node->getOperand(OpNum++), Ty); } // Emit a call to the vector function. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1661814..e8f6167 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -789,6 +789,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::SETCC: Res = ScalarizeVecOp_VSETCC(N); break; + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: + Res = ScalarizeVecOp_VSTRICT_FSETCC(N, OpNo); + break; case ISD::STORE: Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -985,6 +989,43 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res); } +// Similiar to ScalarizeVecOp_VSETCC, with added logic to update chains. +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSTRICT_FSETCC(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Wrong operand for scalarization!"); + assert(N->getValueType(0).isVector() && + N->getOperand(1).getValueType().isVector() && + "Operand types must be vectors"); + assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type"); + + EVT VT = N->getValueType(0); + SDValue Ch = N->getOperand(0); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + SDValue RHS = GetScalarizedVector(N->getOperand(2)); + SDValue CC = N->getOperand(3); + + EVT OpVT = N->getOperand(1).getValueType(); + EVT NVT = VT.getVectorElementType(); + SDLoc DL(N); + SDValue Res = DAG.getNode(N->getOpcode(), DL, {MVT::i1, MVT::Other}, + {Ch, LHS, RHS, CC}); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + + Res = DAG.getNode(ExtendCode, DL, NVT, Res); + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res); + + // Do our own replacement and return SDValue() to tell the caller that we + // handled all replacements since caller can only handle a single result. + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); +} + /// If the value to store is a vector that needs to be scalarized, it must be /// <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -1152,6 +1193,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi); break; + case ISD::VP_LOAD_FF: + SplitVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N), Lo, Hi); + break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi); break; @@ -2227,6 +2271,45 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(LD); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(LD->getValueType(0)); + + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + Align Alignment = LD->getBaseAlign(); + SDValue Mask = LD->getMask(); + SDValue EVL = LD->getVectorLength(); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } + + // Split EVL operand + auto [EVLLo, EVLHi] = DAG.SplitEVL(EVL, LD->getValueType(0), dl); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + LD->getPointerInfo(), MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(), + LD->getRanges()); + + Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO); + + // Fill the upper half with poison. + Hi = DAG.getUNDEF(HiVT); + + ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1)); + ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2)); +} + void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, SDValue &Hi) { assert(SLD->isUnindexed() && @@ -2434,10 +2517,10 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), - N->getRanges()); + N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(), + Alignment, N->getAAInfo(), N->getRanges()); if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) { SDValue PassThru = MGT->getPassThru(); @@ -3800,13 +3883,45 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements(); uint64_t IdxVal = Idx->getAsZExtVal(); + unsigned NumResultElts = SubVT.getVectorMinNumElements(); + if (IdxVal < LoEltsMin) { - assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin && - "Extracted subvector crosses vector split!"); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); - } else if (SubVT.isScalableVector() == - N->getOperand(0).getValueType().isScalableVector()) - return DAG.getExtractSubvector(dl, SubVT, Hi, IdxVal - LoEltsMin); + // If the extracted elements are all in the low half, do a simple extract. + if (IdxVal + NumResultElts <= LoEltsMin) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + + // Extracted subvector crosses vector split, so we need to blend the two + // halves. + // TODO: May be able to emit partial extract_subvector. + SmallVector<SDValue, 8> Elts; + Elts.reserve(NumResultElts); + + DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxVal, + /*Count=*/LoEltsMin - IdxVal); + DAG.ExtractVectorElements(Hi, Elts, /*Start=*/0, + /*Count=*/SubVT.getVectorNumElements() - + Elts.size()); + return DAG.getBuildVector(SubVT, dl, Elts); + } + + EVT SrcVT = N->getOperand(0).getValueType(); + if (SubVT.isScalableVector() == SrcVT.isScalableVector()) { + uint64_t ExtractIdx = IdxVal - LoEltsMin; + if (ExtractIdx % NumResultElts == 0) + return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx); + + // We cannot create an extract_subvector that isn't a multiple of the result + // size, which may go out of bounds for the last elements. Shuffle the + // desired elements down to 0 and do a simple 0 extract. + EVT HiVT = Hi.getValueType(); + SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1); + for (int I = 0; I != static_cast<int>(NumResultElts); ++I) + Mask[I] = ExtractIdx + I; + + SDValue Shuffle = + DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask); + return DAG.getExtractSubvector(dl, SubVT, Shuffle, 0); + } // After this point the DAG node only permits extracting fixed-width // subvectors from scalable vectors. @@ -4206,10 +4321,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) { std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL); SDValue Lo; + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, - LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), - N->getRanges()); + N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(), + Alignment, N->getAAInfo(), N->getRanges()); if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) { SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale}; @@ -4707,6 +4822,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N)); break; + case ISD::VP_LOAD_FF: + Res = WidenVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N)); + break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N)); break; @@ -6163,6 +6281,29 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue EVL = N->getVectorLength(); + SDLoc dl(N); + + // The mask should be widened as well + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen binary VP op"); + Mask = GetWidenedVector(Mask); + assert(Mask.getValueType().getVectorElementCount() == + TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType()) + .getVectorElementCount() && + "Unable to widen vector load"); + + SDValue Res = DAG.getLoadFFVP(WidenVT, dl, N->getChain(), N->getBasePtr(), + Mask, EVL, N->getMemOperand()); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) { SDLoc DL(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 0a449fd..72ea089 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -63,6 +63,8 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) HorizontalVerticalBalance = 0; } +ResourcePriorityQueue::~ResourcePriorityQueue() = default; + unsigned ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f41b6eb..9668d25 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -837,6 +837,14 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ELD->getMemOperand()->getFlags()); break; } + case ISD::VP_LOAD_FF: { + const auto *LD = cast<VPLoadFFSDNode>(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); + ID.AddInteger(LD->getMemOperand()->getFlags()); + break; + } case ISD::VP_STORE: { const VPStoreSDNode *EST = cast<VPStoreSDNode>(N); ID.AddInteger(EST->getMemoryVT().getRawBits()); @@ -1363,7 +1371,7 @@ void SelectionDAG::init(MachineFunction &NewMF, const TargetLibraryInfo *LibraryInfo, UniformityInfo *NewUA, ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin, MachineModuleInfo &MMIin, - FunctionVarLocs const *VarLocs, bool HasDivergency) { + FunctionVarLocs const *VarLocs) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1376,7 +1384,6 @@ void SelectionDAG::init(MachineFunction &NewMF, BFI = BFIin; MMI = &MMIin; FnVarLocs = VarLocs; - DivergentTarget = HasDivergency; } SelectionDAG::~SelectionDAG() { @@ -2323,8 +2330,7 @@ SDValue SelectionDAG::getRegister(Register Reg, EVT VT) { return SDValue(E, 0); auto *N = newSDNode<RegisterSDNode>(Reg, VTs); - N->SDNodeBits.IsDivergent = - DivergentTarget && TLI->isSDNodeSourceOfDivergence(N, FLI, UA); + N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -2570,18 +2576,12 @@ bool SelectionDAG::expandMultipleResultFPLibCall( } TargetLowering::ArgListTy Args; - auto AddArgListEntry = [&](SDValue Node, Type *Ty) { - TargetLowering::ArgListEntry Entry{}; - Entry.Ty = Ty; - Entry.Node = Node; - Args.push_back(Entry); - }; // Pass the arguments. for (const SDValue &Op : Node->op_values()) { EVT ArgVT = Op.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(Ctx); - AddArgListEntry(Op, ArgTy); + Args.emplace_back(Op, ArgTy); } // Pass the output pointers. @@ -2593,7 +2593,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall( EVT ResVT = Node->getValueType(ResNo); SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(ResVT); ResultPtrs[ResNo] = ResultPtr; - AddArgListEntry(ResultPtr, PointerTy); + Args.emplace_back(ResultPtr, PointerTy); } SDLoc DL(Node); @@ -2602,7 +2602,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall( if (VD && VD->isMasked()) { EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); - AddArgListEntry(Mask, MaskVT.getTypeForEVT(Ctx)); + Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx)); } Type *RetType = CallRetResNo.has_value() @@ -3299,7 +3299,7 @@ SelectionDAG::getValidShiftAmountRange(SDValue V, const APInt &DemandedElts, return std::nullopt; } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || @@ -3312,7 +3312,7 @@ SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts, return std::nullopt; } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const { EVT VT = V.getValueType(); APInt DemandedElts = VT.isFixedLengthVector() @@ -3321,7 +3321,7 @@ SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const { return getValidShiftAmount(V, DemandedElts, Depth); } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || @@ -3333,7 +3333,7 @@ SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts, return std::nullopt; } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const { EVT VT = V.getValueType(); APInt DemandedElts = VT.isFixedLengthVector() @@ -3342,7 +3342,7 @@ SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const { return getValidMinimumShiftAmount(V, DemandedElts, Depth); } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || @@ -3354,7 +3354,7 @@ SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, return std::nullopt; } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidMaximumShiftAmount(SDValue V, unsigned Depth) const { EVT VT = V.getValueType(); APInt DemandedElts = VT.isFixedLengthVector() @@ -3828,7 +3828,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero); // Minimum shift low bits are known zero. - if (std::optional<uint64_t> ShMinAmt = + if (std::optional<unsigned> ShMinAmt = getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) Known.Zero.setLowBits(*ShMinAmt); break; @@ -3840,7 +3840,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Op->getFlags().hasExact()); // Minimum shift high bits are known zero. - if (std::optional<uint64_t> ShMinAmt = + if (std::optional<unsigned> ShMinAmt = getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) Known.Zero.setHighBits(*ShMinAmt); break; @@ -3850,6 +3850,22 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false, Op->getFlags().hasExact()); break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { + unsigned Amt = C->getAPIntValue().urem(BitWidth); + + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // Canonicalize to ROTR. + if (Opcode == ISD::ROTL && Amt != 0) + Amt = BitWidth - Amt; + + Known.Zero = Known.Zero.rotr(Amt); + Known.One = Known.One.rotr(Amt); + } + break; case ISD::FSHL: case ISD::FSHR: if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { @@ -3868,15 +3884,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); if (Opcode == ISD::FSHL) { - Known.One <<= Amt; - Known.Zero <<= Amt; - Known2.One.lshrInPlace(BitWidth - Amt); - Known2.Zero.lshrInPlace(BitWidth - Amt); + Known <<= Amt; + Known2 >>= BitWidth - Amt; } else { - Known.One <<= BitWidth - Amt; - Known.Zero <<= BitWidth - Amt; - Known2.One.lshrInPlace(Amt); - Known2.Zero.lshrInPlace(Amt); + Known <<= BitWidth - Amt; + Known2 >>= Amt; } Known = Known.unionWith(Known2); } @@ -4875,15 +4887,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); // SRA X, C -> adds C sign bits. - if (std::optional<uint64_t> ShAmt = + if (std::optional<unsigned> ShAmt = getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) - Tmp = std::min<uint64_t>(Tmp + *ShAmt, VTBits); + Tmp = std::min(Tmp + *ShAmt, VTBits); return Tmp; case ISD::SHL: if (std::optional<ConstantRange> ShAmtRange = getValidShiftAmountRange(Op, DemandedElts, Depth + 1)) { - uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue(); - uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue(); + unsigned MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue(); + unsigned MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue(); // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are // shifted out, then we can compute the number of sign bits for the // operand being extended. A future improvement could be to pass along the @@ -4894,7 +4906,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, EVT ExtVT = Ext.getValueType(); SDValue Extendee = Ext.getOperand(0); EVT ExtendeeVT = Extendee.getValueType(); - uint64_t SizeDifference = + unsigned SizeDifference = ExtVT.getScalarSizeInBits() - ExtendeeVT.getScalarSizeInBits(); if (SizeDifference <= MinShAmt) { Tmp = SizeDifference + @@ -5127,7 +5139,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If the sign portion ends in our element the subtraction gives correct // result. Otherwise it gives either negative or > bitwidth result - return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth); + return std::clamp(KnownSign - rIndex * BitWidth, 1, BitWidth); } case ISD::INSERT_VECTOR_ELT: { if (VT.isScalableVector()) @@ -5454,6 +5466,83 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, } return true; + case ISD::EXTRACT_SUBVECTOR: { + SDValue Src = Op.getOperand(0); + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); + return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly, + Depth + 1); + } + + case ISD::INSERT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + break; + SDValue Src = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + uint64_t Idx = Op.getConstantOperandVal(2); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.clearBits(Idx, Idx + NumSubElts); + + if (!!DemandedSubElts && !isGuaranteedNotToBeUndefOrPoison( + Sub, DemandedSubElts, PoisonOnly, Depth + 1)) + return false; + if (!!DemandedSrcElts && !isGuaranteedNotToBeUndefOrPoison( + Src, DemandedSrcElts, PoisonOnly, Depth + 1)) + return false; + return true; + } + + case ISD::EXTRACT_VECTOR_ELT: { + SDValue Src = Op.getOperand(0); + auto *IndexC = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + EVT SrcVT = Src.getValueType(); + if (SrcVT.isFixedLengthVector() && IndexC && + IndexC->getAPIntValue().ult(SrcVT.getVectorNumElements())) { + APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), + IndexC->getZExtValue()); + return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly, + Depth + 1); + } + break; + } + + case ISD::INSERT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue InVal = Op.getOperand(1); + SDValue EltNo = Op.getOperand(2); + EVT VT = InVec.getValueType(); + auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); + if (IndexC && VT.isFixedLengthVector() && + IndexC->getAPIntValue().ult(VT.getVectorNumElements())) { + if (DemandedElts[IndexC->getZExtValue()] && + !isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1)) + return false; + APInt InVecDemandedElts = DemandedElts; + InVecDemandedElts.clearBit(IndexC->getZExtValue()); + if (!!InVecDemandedElts && + !isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts, + PoisonOnly, Depth + 1)) + return false; + return true; + } + break; + } + + case ISD::SCALAR_TO_VECTOR: + // Check upper (known undef) elements. + if (DemandedElts.ugt(1) && !PoisonOnly) + return false; + // Check element zero. + if (DemandedElts[0] && !isGuaranteedNotToBeUndefOrPoison( + Op.getOperand(0), PoisonOnly, Depth + 1)) + return false; + return true; + case ISD::SPLAT_VECTOR: return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly, Depth + 1); @@ -5476,6 +5565,52 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, return true; } + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + // Shift amount operand is checked by canCreateUndefOrPoison. So it is + // enough to check operand 0 if Op can't create undef/poison. + return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, + /*ConsiderFlags*/ true, Depth) && + isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts, + PoisonOnly, Depth + 1); + + case ISD::BSWAP: + case ISD::CTPOP: + case ISD::BITREVERSE: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SADDSAT: + case ISD::UADDSAT: + case ISD::SSUBSAT: + case ISD::USUBSAT: + case ISD::SSHLSAT: + case ISD::USHLSAT: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::VSELECT: { + // If Op can't create undef/poison and none of its operands are undef/poison + // then Op is never undef/poison. A difference from the more common check + // below, outside the switch, is that we handle elementwise operations for + // which the DemandedElts mask is valid for all operands here. + return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, + /*ConsiderFlags*/ true, Depth) && + all_of(Op->ops(), [&](SDValue V) { + return isGuaranteedNotToBeUndefOrPoison(V, DemandedElts, + PoisonOnly, Depth + 1); + }); + } + // TODO: Search for noundef attributes from library functions. // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. @@ -5541,8 +5676,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::ABDS: case ISD::SMIN: case ISD::SMAX: + case ISD::SCMP: case ISD::UMIN: case ISD::UMAX: + case ISD::UCMP: case ISD::AND: case ISD::XOR: case ISD::ROTL: @@ -5622,6 +5759,11 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::FDIV: case ISD::FREM: case ISD::FCOPYSIGN: + case ISD::FMA: + case ISD::FMAD: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: // No poison except from flags (which is handled above) return false; @@ -6351,8 +6493,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FREEZE: assert(VT == N1.getValueType() && "Unexpected VT!"); - if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false, - /*Depth*/ 1)) + if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly=*/false)) return N1; break; case ISD::TokenFactor: @@ -6415,6 +6556,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1.isUndef()) // sext(undef) = 0, because the top bits will all be the same. return getConstant(0, DL, VT); + + // Skip unnecessary sext_inreg pattern: + // (sext (trunc x)) -> x iff the upper bits are all signbits. + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = N1.getOperand(0); + if (OpOp.getValueType() == VT) { + unsigned NumSignExtBits = + VT.getScalarSizeInBits() - N1.getScalarValueSizeInBits(); + if (ComputeNumSignBits(OpOp) > NumSignExtBits) { + transferDbgValues(N1, OpOp); + return OpOp; + } + } + } break; case ISD::ZERO_EXTEND: assert(VT.isInteger() && N1.getValueType().isInteger() && @@ -7032,6 +7187,45 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } } + // Handle fshl/fshr special cases. + if (Opcode == ISD::FSHL || Opcode == ISD::FSHR) { + auto *C1 = dyn_cast<ConstantSDNode>(Ops[0]); + auto *C2 = dyn_cast<ConstantSDNode>(Ops[1]); + auto *C3 = dyn_cast<ConstantSDNode>(Ops[2]); + + if (C1 && C2 && C3) { + if (C1->isOpaque() || C2->isOpaque() || C3->isOpaque()) + return SDValue(); + const APInt &V1 = C1->getAPIntValue(), &V2 = C2->getAPIntValue(), + &V3 = C3->getAPIntValue(); + + APInt FoldedVal = Opcode == ISD::FSHL ? APIntOps::fshl(V1, V2, V3) + : APIntOps::fshr(V1, V2, V3); + return getConstant(FoldedVal, DL, VT); + } + } + + // Handle fma/fmad special cases. + if (Opcode == ISD::FMA || Opcode == ISD::FMAD) { + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && + Ops[2].getValueType() == VT && "FMA types must match!"); + ConstantFPSDNode *C1 = dyn_cast<ConstantFPSDNode>(Ops[0]); + ConstantFPSDNode *C2 = dyn_cast<ConstantFPSDNode>(Ops[1]); + ConstantFPSDNode *C3 = dyn_cast<ConstantFPSDNode>(Ops[2]); + if (C1 && C2 && C3) { + APFloat V1 = C1->getValueAPF(); + const APFloat &V2 = C2->getValueAPF(); + const APFloat &V3 = C3->getValueAPF(); + if (Opcode == ISD::FMAD) { + V1.multiply(V2, APFloat::rmNearestTiesToEven); + V1.add(V3, APFloat::rmNearestTiesToEven); + } else + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + return getConstantFP(V1, DL, VT); + } + } + // This is for vector folding only from here on. if (!VT.isVector()) return SDValue(); @@ -7817,6 +8011,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(N2C->getAPIntValue().getBitWidth() == TLI->getVectorIdxWidth(getDataLayout()) && "Constant index for EXTRACT_SUBVECTOR has an invalid size"); + assert(N2C->getZExtValue() % VT.getVectorMinNumElements() == 0 && + "Extract index is not a multiple of the output vector length"); // Trivial extraction. if (VT == N1VT) @@ -7992,27 +8188,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Operand is DELETED_NODE!"); // Perform various simplifications. switch (Opcode) { - case ISD::FMA: - case ISD::FMAD: { - assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); - assert(N1.getValueType() == VT && N2.getValueType() == VT && - N3.getValueType() == VT && "FMA types must match!"); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); - ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); - if (N1CFP && N2CFP && N3CFP) { - APFloat V1 = N1CFP->getValueAPF(); - const APFloat &V2 = N2CFP->getValueAPF(); - const APFloat &V3 = N3CFP->getValueAPF(); - if (Opcode == ISD::FMAD) { - V1.multiply(V2, APFloat::rmNearestTiesToEven); - V1.add(V3, APFloat::rmNearestTiesToEven); - } else - V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); - return getConstantFP(V1, DL, VT); - } - break; - } case ISD::BUILD_VECTOR: { // Attempt to simplify BUILD_VECTOR. SDValue Ops[] = {N1, N2, N3}; @@ -8038,12 +8213,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Use FoldSetCC to simplify SETCC's. if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) return V; - // Vector constant folding. - SDValue Ops[] = {N1, N2, N3}; - if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) { - NewSDValueDbgMsg(V, "New node vector constant folding: ", this); - return V; - } break; } case ISD::SELECT: @@ -8179,6 +8348,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } + // Perform trivial constant folding for arithmetic operators. + switch (Opcode) { + case ISD::FMA: + case ISD::FMAD: + case ISD::SETCC: + case ISD::FSHL: + case ISD::FSHR: + if (SDValue SV = + FoldConstantArithmetic(Opcode, DL, VT, {N1, N2, N3}, Flags)) + return SV; + break; + } + // Memoize node if it doesn't produce a glue result. SDNode *N; SDVTList VTs = getVTList(VT); @@ -8867,6 +9049,37 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, } } +std::pair<SDValue, SDValue> +SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0, + SDValue Mem1, SDValue Size, const CallInst *CI) { + const char *LibCallName = TLI->getLibcallName(RTLIB::MEMCMP); + if (!LibCallName) + return {}; + + PointerType *PT = PointerType::getUnqual(*getContext()); + TargetLowering::ArgListTy Args = { + {Mem0, PT}, + {Mem1, PT}, + {Size, getDataLayout().getIntPtrType(*getContext())}}; + + TargetLowering::CallLoweringInfo CLI(*this); + bool IsTailCall = false; + bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI); + IsTailCall = CI && CI->isTailCall() && + isInTailCallPosition(*CI, getTarget(), ReturnsFirstArg); + + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee( + TLI->getLibcallCallingConv(RTLIB::MEMCMP), + Type::getInt32Ty(*getContext()), + getExternalSymbol(LibCallName, TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setTailCall(IsTailCall); + + return TLI->LowerCallTo(CLI); +} + SDValue SelectionDAG::getMemcpy( SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, @@ -8918,13 +9131,10 @@ SDValue SelectionDAG::getMemcpy( // Emit a library call. TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = PointerType::getUnqual(*getContext()); - Entry.Node = Dst; Args.push_back(Entry); - Entry.Node = Src; Args.push_back(Entry); - - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Entry.Node = Size; Args.push_back(Entry); + Type *PtrTy = PointerType::getUnqual(*getContext()); + Args.emplace_back(Dst, PtrTy); + Args.emplace_back(Src, PtrTy); + Args.emplace_back(Size, getDataLayout().getIntPtrType(*getContext())); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); bool IsTailCall = false; @@ -8962,17 +9172,10 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, MachinePointerInfo SrcPtrInfo) { // Emit a library call. TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Node = Src; - Args.push_back(Entry); - - Entry.Ty = SizeTy; - Entry.Node = Size; - Args.push_back(Entry); + Type *ArgTy = getDataLayout().getIntPtrType(*getContext()); + Args.emplace_back(Dst, ArgTy); + Args.emplace_back(Src, ArgTy); + Args.emplace_back(Size, SizeTy); RTLIB::Libcall LibraryCall = RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); @@ -9035,13 +9238,10 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = PointerType::getUnqual(*getContext()); - Entry.Node = Dst; Args.push_back(Entry); - Entry.Node = Src; Args.push_back(Entry); - - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Entry.Node = Size; Args.push_back(Entry); + Type *PtrTy = PointerType::getUnqual(*getContext()); + Args.emplace_back(Dst, PtrTy); + Args.emplace_back(Src, PtrTy); + Args.emplace_back(Size, getDataLayout().getIntPtrType(*getContext())); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); @@ -9079,17 +9279,10 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, MachinePointerInfo SrcPtrInfo) { // Emit a library call. TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Node = Src; - Args.push_back(Entry); - - Entry.Ty = SizeTy; - Entry.Node = Size; - Args.push_back(Entry); + Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); + Args.emplace_back(Dst, IntPtrTy); + Args.emplace_back(Src, IntPtrTy); + Args.emplace_back(Size, SizeTy); RTLIB::Libcall LibraryCall = RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); @@ -9166,28 +9359,20 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); - // Helper function to create an Entry from Node and Type. - const auto CreateEntry = [](SDValue Node, Type *Ty) { - TargetLowering::ArgListEntry Entry; - Entry.Node = Node; - Entry.Ty = Ty; - return Entry; - }; - bool UseBZero = isNullConstant(Src) && BzeroName; // If zeroing out and bzero is present, use it. if (UseBZero) { TargetLowering::ArgListTy Args; - Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); - Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); + Args.emplace_back(Dst, PointerType::getUnqual(Ctx)); + Args.emplace_back(Size, DL.getIntPtrType(Ctx)); CLI.setLibCallee( TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); } else { TargetLowering::ArgListTy Args; - Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); - Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx))); - Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); + Args.emplace_back(Dst, PointerType::getUnqual(Ctx)); + Args.emplace_back(Src, Src.getValueType().getTypeForEVT(Ctx)); + Args.emplace_back(Size, DL.getIntPtrType(Ctx)); CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), Dst.getValueType().getTypeForEVT(Ctx), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), @@ -9216,18 +9401,9 @@ SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, MachinePointerInfo DstPtrInfo) { // Emit a library call. TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Ty = Type::getInt8Ty(*getContext()); - Entry.Node = Value; - Args.push_back(Entry); - - Entry.Ty = SizeTy; - Entry.Node = Size; - Args.push_back(Entry); + Args.emplace_back(Dst, getDataLayout().getIntPtrType(*getContext())); + Args.emplace_back(Value, Type::getInt8Ty(*getContext())); + Args.emplace_back(Size, SizeTy); RTLIB::Libcall LibraryCall = RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); @@ -10434,6 +10610,34 @@ SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT, return V; } +SDValue SelectionDAG::getLoadFFVP(EVT VT, const SDLoc &DL, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO) { + SDVTList VTs = getVTList(VT, EVL.getValueType(), MVT::Other); + SDValue Ops[] = {Chain, Ptr, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_LOAD_FF, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPLoadFFSDNode>(DL.getIROrder(), + VTs, VT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast<VPLoadFFSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<VPLoadFFSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, + VT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -12176,8 +12380,6 @@ static bool gluePropagatesDivergence(const SDNode *Node) { } bool SelectionDAG::calculateDivergence(SDNode *N) { - if (!DivergentTarget) - return false; if (TLI->isSDNodeAlwaysUniform(N)) { assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) && "Conflicting divergence information!"); @@ -12197,8 +12399,6 @@ bool SelectionDAG::calculateDivergence(SDNode *N) { } void SelectionDAG::updateDivergence(SDNode *N) { - if (!DivergentTarget) - return; SmallVector<SDNode *, 16> Worklist(1, N); do { N = Worklist.pop_back_val(); @@ -13759,20 +13959,16 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { Ops[I].setInitial(Vals[I]); EVT VT = Ops[I].getValueType(); - // Take care of the Node's operands iff target has divergence // Skip Chain. It does not carry divergence. - if (DivergentTarget && VT != MVT::Other && + if (VT != MVT::Other && (VT != MVT::Glue || gluePropagatesDivergence(Ops[I].getNode())) && Ops[I].getNode()->isDivergent()) { - // Node is going to be divergent if at least one of its operand is - // divergent, unless it belongs to the "AlwaysUniform" exemptions. IsDivergent = true; } } Node->NumOperands = Vals.size(); Node->OperandList = Ops; - // Check the divergence of the Node itself. - if (DivergentTarget && !TLI->isSDNodeAlwaysUniform(Node)) { + if (!TLI->isSDNodeAlwaysUniform(Node)) { IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA); Node->SDNodeBits.IsDivergent = IsDivergent; } @@ -13862,10 +14058,7 @@ SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, const SDLoc &DLoc) { assert(InChain.getValueType() == MVT::Other && "Expected token chain"); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Ptr; - Entry.Ty = Ptr.getValueType().getTypeForEVT(*getContext()); - Args.push_back(Entry); + Args.emplace_back(Ptr, Ptr.getValueType().getTypeForEVT(*getContext())); RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc); SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC), TLI->getPointerTy(getDataLayout())); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 306e068..5ccd58c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -1837,11 +1836,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { getValue(CPA->getDiscriminator())); } - if (isa<ConstantPointerNull>(C)) { - unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, getCurSDLoc(), - TLI.getPointerTy(DAG.getDataLayout(), AS)); - } + if (isa<ConstantPointerNull>(C)) + return DAG.getConstant(0, getCurSDLoc(), VT); if (match(C, m_VScale())) return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)); @@ -2211,9 +2207,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, Chains); } else if (I.getNumOperands() != 0) { - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); - unsigned NumValues = ValueVTs.size(); + SmallVector<Type *, 4> Types; + ComputeValueTypes(DL, I.getOperand(0)->getType(), Types); + unsigned NumValues = Types.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -2233,7 +2229,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { - EVT VT = ValueVTs[j]; + EVT VT = TLI.getValueType(DL, Types[j]); if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); @@ -2275,7 +2271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType().getSimpleVT(), - VT, /*isfixed=*/true, 0, 0)); + VT, Types[j], 0, 0)); OutVals.push_back(Parts[i]); } } @@ -2291,9 +2287,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { assert(SwiftError.getFunctionArg() && "Need a swift error argument"); ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); - Outs.push_back(ISD::OutputArg( - Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)), - /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0)); + Outs.push_back(ISD::OutputArg(Flags, /*vt=*/TLI.getPointerTy(DL), + /*argvt=*/EVT(TLI.getPointerTy(DL)), + PointerType::getUnqual(*DAG.getContext()), + /*origidx=*/1, /*partOffs=*/0)); // Create SDNode for the swifterror virtual register. OutVals.push_back( DAG.getRegister(SwiftError.getOrCreateVRegUseAt( @@ -3108,9 +3105,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, assert(FnTy->getNumParams() == 1 && "Invalid function signature"); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = GuardVal; - Entry.Ty = FnTy->getParamType(0); + TargetLowering::ArgListEntry Entry(GuardVal, FnTy->getParamType(0)); if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg)) Entry.IsInReg = true; Args.push_back(Entry); @@ -3207,9 +3202,7 @@ void SelectionDAGBuilder::visitSPDescriptorFailure( assert(FnTy->getNumParams() == 1 && "Invalid function signature"); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = GuardVal; - Entry.Ty = FnTy->getParamType(0); + TargetLowering::ArgListEntry Entry(GuardVal, FnTy->getParamType(0)); if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg)) Entry.IsInReg = true; Args.push_back(Entry); @@ -3579,7 +3572,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. - SmallSet<BasicBlock*, 32> Done; + SmallPtrSet<BasicBlock *, 32> Done; for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { BasicBlock *BB = I.getSuccessor(i); bool Inserted = Done.insert(BB).second; @@ -3978,6 +3971,11 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) { setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } +void SelectionDAGBuilder::visitPtrToAddr(const User &I) { + // FIXME: this is not correct for pointers with addr width != pointer width + visitPtrToInt(I); +} + void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. @@ -4903,9 +4901,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, - ISD::MemIndexType &IndexType, SDValue &Scale, - SelectionDAGBuilder *SDB, const BasicBlock *CurBB, - uint64_t ElemSize) { + SDValue &Scale, SelectionDAGBuilder *SDB, + const BasicBlock *CurBB, uint64_t ElemSize) { SelectionDAG& DAG = SDB->DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); @@ -4923,7 +4920,6 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount(); EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts); Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT); - IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; } @@ -4953,7 +4949,6 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, Base = SDB->getValue(BasePtr); Index = SDB->getValue(IndexVal); - IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); @@ -4975,9 +4970,8 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; - ISD::MemIndexType IndexType; SDValue Scale; - bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, + bool UniformBase = getUniformBase(Ptr, Base, Index, Scale, this, I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); @@ -4987,8 +4981,8 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; - Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Scale = + DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); @@ -5000,7 +4994,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, - Ops, MMO, IndexType, false); + Ops, MMO, ISD::SIGNED_SCALED, false); DAG.setRoot(Scatter); setValue(&I, Scatter); } @@ -5093,9 +5087,8 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; - ISD::MemIndexType IndexType; SDValue Scale; - bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, + bool UniformBase = getUniformBase(Ptr, Base, Index, Scale, this, I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( @@ -5106,8 +5099,8 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; - Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Scale = + DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); @@ -5118,8 +5111,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; - SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, - Ops, MMO, IndexType, ISD::NON_EXTLOAD); + SDValue Gather = + DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO, + ISD::SIGNED_SCALED, ISD::NON_EXTLOAD); PendingLoads.push_back(Gather.getValue(1)); setValue(&I, Gather); @@ -6432,9 +6426,8 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I, SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; - ISD::MemIndexType IndexType; SDValue Scale; - bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, + bool UniformBase = getUniformBase(Ptr, Base, Index, Scale, this, I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); @@ -6447,7 +6440,6 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I, if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } @@ -6463,7 +6455,7 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I, SDValue Ops[] = {Root, Inc, Mask, Base, Index, Scale, ID}; SDValue Histogram = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), VT, sdl, - Ops, MMO, IndexType); + Ops, MMO, ISD::SIGNED_SCALED); setValue(&I, Histogram); DAG.setRoot(Histogram); @@ -7515,10 +7507,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } TargetLowering::ArgListTy Args; if (Intrinsic == Intrinsic::ubsantrap) { - Args.push_back(TargetLoweringBase::ArgListEntry()); - Args[0].Val = I.getArgOperand(0); - Args[0].Node = getValue(Args[0].Val); - Args[0].Ty = Args[0].Val->getType(); + Value *Arg = I.getArgOperand(0); + Args.emplace_back(Arg, getValue(Arg)); } TargetLowering::CallLoweringInfo CLI(DAG); @@ -7598,7 +7588,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (TM.getOptLevel() == CodeGenOptLevel::None) return; - const AllocaInst *LifetimeObject = cast<AllocaInst>(I.getArgOperand(1)); + const AllocaInst *LifetimeObject = dyn_cast<AllocaInst>(I.getArgOperand(0)); + if (!LifetimeObject) + return; // First check that the Alloca is static, otherwise it won't have a // valid frame index. @@ -7945,9 +7937,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Args.reserve(3); for (unsigned Idx : {2, 3, 1}) { - TargetLowering::ArgListEntry Arg; - Arg.Node = getValue(I.getOperand(Idx)); - Arg.Ty = I.getOperand(Idx)->getType(); + TargetLowering::ArgListEntry Arg(getValue(I.getOperand(Idx)), + I.getOperand(Idx)->getType()); Arg.setAttributes(&I, Idx); Args.push_back(Arg); } @@ -7958,9 +7949,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Forward the flags and any additional arguments. for (unsigned Idx = 4; Idx < I.arg_size(); ++Idx) { - TargetLowering::ArgListEntry Arg; - Arg.Node = getValue(I.getOperand(Idx)); - Arg.Ty = I.getOperand(Idx)->getType(); + TargetLowering::ArgListEntry Arg(getValue(I.getOperand(Idx)), + I.getOperand(Idx)->getType()); Arg.setAttributes(&I, Idx); Args.push_back(Arg); } @@ -7982,6 +7972,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, HasTailCall = true; return; } + case Intrinsic::amdgcn_call_whole_wave: { + TargetLowering::ArgListTy Args; + + // The first argument is the callee. Skip it when assembling the call args. + for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) { + TargetLowering::ArgListEntry Arg(getValue(I.getArgOperand(Idx)), + I.getArgOperand(Idx)->getType()); + Arg.setAttributes(&I, Idx); + Args.push_back(Arg); + } + + SDValue ConvControlToken; + if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + ConvControlToken = getValue(Token); + } + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(), + getValue(I.getArgOperand(0)), std::move(Args)) + .setTailCall(false) + .setIsPreallocated( + I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) + .setConvergent(I.isConvergent()) + .setConvergenceControlToken(ConvControlToken); + CLI.CB = &I; + + std::pair<SDValue, SDValue> Result = + lowerInvokable(CLI, /*EHPadBB=*/nullptr); + + if (Result.first.getNode()) + setValue(&I, Result.first); + return; + } case Intrinsic::ptrmask: { SDValue Ptr = getValue(I.getOperand(0)); SDValue Mask = getValue(I.getOperand(1)); @@ -8430,8 +8456,11 @@ void SelectionDAGBuilder::visitVPLoad( MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], MMO, false /*IsExpanding */); @@ -8440,6 +8469,34 @@ void SelectionDAGBuilder::visitVPLoad( setValue(&VPIntrin, LD); } +void SelectionDAGBuilder::visitVPLoadFF( + const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT, + const SmallVectorImpl<SDValue> &OpValues) { + assert(OpValues.size() == 3 && "Unexpected number of operands"); + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + SDValue LD; + // Do not serialize variable-length loads of constant memory with + // anything. + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); + bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); + LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], + MMO); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1)); + if (AddToChain) + PendingLoads.push_back(LD.getValue(2)); + setValue(&VPIntrin, DAG.getMergeValues({LD.getValue(0), Trunc}, DL)); +} + void SelectionDAGBuilder::visitVPGather( const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl<SDValue> &OpValues) { @@ -8454,18 +8511,18 @@ void SelectionDAGBuilder::visitVPGather( Alignment = DAG.getEVTAlign(VT.getScalarType()); unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); + MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(), + *Alignment, AAInfo, Ranges); SDValue Base, Index, Scale; - ISD::MemIndexType IndexType; - bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent(), - VT.getScalarStoreSize()); + bool UniformBase = + getUniformBase(PtrOperand, Base, Index, Scale, this, VPIntrin.getParent(), + VT.getScalarStoreSize()); if (!UniformBase) { Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); @@ -8477,7 +8534,7 @@ void SelectionDAGBuilder::visitVPGather( LD = DAG.getGatherVP( DAG.getVTList(VT, MVT::Other), VT, DL, {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, - IndexType); + ISD::SIGNED_SCALED); PendingLoads.push_back(LD.getValue(1)); setValue(&VPIntrin, LD); } @@ -8494,8 +8551,11 @@ void SelectionDAGBuilder::visitVPStore( Alignment = DAG.getEVTAlign(VT); SDValue Ptr = OpValues[1]; SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, @@ -8517,20 +8577,19 @@ void SelectionDAGBuilder::visitVPScatter( Alignment = DAG.getEVTAlign(VT.getScalarType()); unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOStore, - LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); + MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(), + *Alignment, AAInfo); SDValue Base, Index, Scale; - ISD::MemIndexType IndexType; - bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent(), - VT.getScalarStoreSize()); + bool UniformBase = + getUniformBase(PtrOperand, Base, Index, Scale, this, VPIntrin.getParent(), + VT.getScalarStoreSize()); if (!UniformBase) { Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_SCALED; - Scale = - DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); } EVT IdxVT = Index.getValueType(); EVT EltTy = IdxVT.getVectorElementType(); @@ -8541,7 +8600,7 @@ void SelectionDAGBuilder::visitVPScatter( ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, {getMemoryRoot(), OpValues[0], Base, Index, Scale, OpValues[2], OpValues[3]}, - MMO, IndexType); + MMO, ISD::SIGNED_SCALED); DAG.setRoot(ST); setValue(&VPIntrin, ST); } @@ -8560,9 +8619,12 @@ void SelectionDAGBuilder::visitVPStridedLoad( bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); + MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(), + *Alignment, AAInfo, Ranges); SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], OpValues[3], MMO, @@ -8583,9 +8645,12 @@ void SelectionDAGBuilder::visitVPStridedStore( Alignment = DAG.getEVTAlign(VT.getScalarType()); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOStore, - LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); + MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(), + *Alignment, AAInfo); SDValue ST = DAG.getStridedStoreVP( getMemoryRoot(), DL, OpValues[0], OpValues[1], @@ -8673,6 +8738,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( case ISD::VP_LOAD: visitVPLoad(VPIntrin, ValueVTs[0], OpValues); break; + case ISD::VP_LOAD_FF: + visitVPLoadFF(VPIntrin, ValueVTs[0], ValueVTs[1], OpValues); + break; case ISD::VP_GATHER: visitVPGather(VPIntrin, ValueVTs[0], OpValues); break; @@ -8880,7 +8948,6 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, } for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { - TargetLowering::ArgListEntry Entry; const Value *V = *I; // Skip empty types @@ -8888,8 +8955,7 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, continue; SDValue ArgNode = getValue(V); - Entry.Node = ArgNode; Entry.Ty = V->getType(); - + TargetLowering::ArgListEntry Entry(ArgNode, V->getType()); Entry.setAttributes(&CB, I - CB.arg_begin()); // Use swifterror virtual register as input to the call. @@ -8913,11 +8979,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, // If call site has a cfguardtarget operand bundle, create and add an // additional ArgListEntry. if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) { - TargetLowering::ArgListEntry Entry; Value *V = Bundle->Inputs[0]; - SDValue ArgNode = getValue(V); - Entry.Node = ArgNode; - Entry.Ty = V->getType(); + TargetLowering::ArgListEntry Entry(V, getValue(V)); Entry.IsCFGuardTarget = true; Args.push_back(Entry); } @@ -9058,7 +9121,7 @@ bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp( DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), - getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); + getValue(Size), &I); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); @@ -10580,9 +10643,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); - TargetLowering::ArgListEntry Entry; - Entry.Node = getValue(V); - Entry.Ty = V->getType(); + TargetLowering::ArgListEntry Entry(getValue(V), V->getType()); Entry.setAttributes(Call, ArgI); Args.push_back(Entry); } @@ -10942,27 +11003,42 @@ static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + LLVMContext &Context = CLI.RetTy->getContext(); + // Handle the incoming return values from the call. CLI.Ins.clear(); - SmallVector<EVT, 4> RetTys; + SmallVector<Type *, 4> RetOrigTys; SmallVector<TypeSize, 4> Offsets; auto &DL = CLI.DAG.getDataLayout(); - ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + ComputeValueTypes(DL, CLI.OrigRetTy, RetOrigTys, &Offsets); + + SmallVector<EVT, 4> RetVTs; + if (CLI.RetTy != CLI.OrigRetTy) { + assert(RetOrigTys.size() == 1 && + "Only supported for non-aggregate returns"); + RetVTs.push_back(getValueType(DL, CLI.RetTy)); + } else { + for (Type *Ty : RetOrigTys) + RetVTs.push_back(getValueType(DL, Ty)); + } if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. - SmallVector<EVT, 4> OldRetTys; + SmallVector<Type *, 4> OldRetOrigTys; + SmallVector<EVT, 4> OldRetVTs; SmallVector<TypeSize, 4> OldOffsets; - RetTys.swap(OldRetTys); + RetOrigTys.swap(OldRetOrigTys); + RetVTs.swap(OldRetVTs); Offsets.swap(OldOffsets); - for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { - EVT RetVT = OldRetTys[i]; + for (size_t i = 0, e = OldRetVTs.size(); i != e; ++i) { + EVT RetVT = OldRetVTs[i]; uint64_t Offset = OldOffsets[i]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); + MVT RegisterVT = getRegisterType(Context, RetVT); + unsigned NumRegs = getNumRegisters(Context, RetVT); unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; - RetTys.append(NumRegs, RegisterVT); + RetOrigTys.append(NumRegs, OldRetOrigTys[i]); + RetVTs.append(NumRegs, RegisterVT); for (unsigned j = 0; j != NumRegs; ++j) Offsets.push_back(TypeSize::getFixed(Offset + j * RegisterVTByteSZ)); } @@ -10973,7 +11049,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { bool CanLowerReturn = this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), - CLI.IsVarArg, Outs, CLI.RetTy->getContext(), CLI.RetTy); + CLI.IsVarArg, Outs, Context, CLI.RetTy); SDValue DemoteStackSlot; int DemoteStackIdx = -100; @@ -10986,30 +11062,16 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { MachineFunction &MF = CLI.DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Alignment, false); - Type *StackSlotPtrType = - PointerType::get(CLI.RetTy->getContext(), DL.getAllocaAddrSpace()); + Type *StackSlotPtrType = PointerType::get(Context, DL.getAllocaAddrSpace()); DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); - ArgListEntry Entry; - Entry.Node = DemoteStackSlot; - Entry.Ty = StackSlotPtrType; - Entry.IsSExt = false; - Entry.IsZExt = false; - Entry.IsInReg = false; + ArgListEntry Entry(DemoteStackSlot, StackSlotPtrType); Entry.IsSRet = true; - Entry.IsNest = false; - Entry.IsByVal = false; - Entry.IsByRef = false; - Entry.IsReturned = false; - Entry.IsSwiftSelf = false; - Entry.IsSwiftAsync = false; - Entry.IsSwiftError = false; - Entry.IsCFGuardTarget = false; Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; CLI.getArgs()[0].IndirectType = CLI.RetTy; - CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + CLI.RetTy = CLI.OrigRetTy = Type::getVoidTy(Context); // sret demotion isn't compatible with tail-calls, since the sret argument // points into the callers stack frame. @@ -11017,36 +11079,32 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + for (unsigned I = 0, E = RetVTs.size(); I != E; ++I) { ISD::ArgFlagsTy Flags; if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); - if (I == RetTys.size() - 1) + if (I == RetVTs.size() - 1) Flags.setInConsecutiveRegsLast(); } - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), - CLI.CallConv, VT); - unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), - CLI.CallConv, VT); + EVT VT = RetVTs[I]; + MVT RegisterVT = getRegisterTypeForCallingConv(Context, CLI.CallConv, VT); + unsigned NumRegs = + getNumRegistersForCallingConv(Context, CLI.CallConv, VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.Flags = Flags; - MyFlags.VT = RegisterVT; - MyFlags.ArgVT = VT; - MyFlags.Used = CLI.IsReturnValueUsed; + ISD::InputArg Ret(Flags, RegisterVT, VT, RetOrigTys[I], + CLI.IsReturnValueUsed, ISD::InputArg::NoArgIndex, 0); if (CLI.RetTy->isPointerTy()) { - MyFlags.Flags.setPointer(); - MyFlags.Flags.setPointerAddrSpace( + Ret.Flags.setPointer(); + Ret.Flags.setPointerAddrSpace( cast<PointerType>(CLI.RetTy)->getAddressSpace()); } if (CLI.RetSExt) - MyFlags.Flags.setSExt(); + Ret.Flags.setSExt(); if (CLI.RetZExt) - MyFlags.Flags.setZExt(); + Ret.Flags.setZExt(); if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); + Ret.Flags.setInReg(); + CLI.Ins.push_back(Ret); } } } @@ -11056,11 +11114,12 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (supportSwiftError()) { for (const ArgListEntry &Arg : Args) { if (Arg.IsSwiftError) { - ISD::InputArg MyFlags; - MyFlags.VT = getPointerTy(DL); - MyFlags.ArgVT = EVT(getPointerTy(DL)); - MyFlags.Flags.setSwiftError(); - CLI.Ins.push_back(MyFlags); + ISD::ArgFlagsTy Flags; + Flags.setSwiftError(); + ISD::InputArg Ret(Flags, getPointerTy(DL), EVT(getPointerTy(DL)), + PointerType::getUnqual(Context), + /*Used=*/true, ISD::InputArg::NoArgIndex, 0); + CLI.Ins.push_back(Ret); } } } @@ -11069,18 +11128,24 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { CLI.Outs.clear(); CLI.OutVals.clear(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + SmallVector<Type *, 4> OrigArgTys; + ComputeValueTypes(DL, Args[i].OrigTy, OrigArgTys); // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; if (Args[i].IsByVal) FinalType = Args[i].IndirectType; bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( FinalType, CLI.CallConv, CLI.IsVarArg, DL); - for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + for (unsigned Value = 0, NumValues = OrigArgTys.size(); Value != NumValues; ++Value) { - EVT VT = ValueVTs[Value]; - Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); + Type *OrigArgTy = OrigArgTys[Value]; + Type *ArgTy = OrigArgTy; + if (Args[i].Ty != Args[i].OrigTy) { + assert(Value == 0 && "Only supported for non-aggregate arguments"); + ArgTy = Args[i].Ty; + } + + EVT VT = getValueType(DL, ArgTy); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; @@ -11091,10 +11156,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); Flags.setOrigAlign(OriginalAlignment); - if (Args[i].Ty->isPointerTy()) { + if (i >= CLI.NumFixedArgs) + Flags.setVarArg(); + if (ArgTy->isPointerTy()) { Flags.setPointer(); - Flags.setPointerAddrSpace( - cast<PointerType>(Args[i].Ty)->getAddressSpace()); + Flags.setPointerAddrSpace(cast<PointerType>(ArgTy)->getAddressSpace()); } if (Args[i].IsZExt) Flags.setZExt(); @@ -11168,10 +11234,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), - CLI.CallConv, VT); - unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(), - CLI.CallConv, VT); + MVT PartVT = getRegisterTypeForCallingConv(Context, CLI.CallConv, VT); + unsigned NumParts = + getNumRegistersForCallingConv(Context, CLI.CallConv, VT); SmallVector<SDValue, 4> Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -11188,7 +11253,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() && CLI.RetTy->getPointerAddressSpace() == Args[i].Ty->getPointerAddressSpace())) && - RetTys.size() == NumValues && "unexpected use of 'returned'"); + RetVTs.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that // either the register MVT and the actual EVT are the same size or that // the return value and argument are extended in the same way; in these @@ -11213,8 +11278,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // For scalable vectors the scalable part is currently handled // by individual targets, so we just use the known minimum size here. ISD::OutputArg MyFlags( - Flags, Parts[j].getValueType().getSimpleVT(), VT, - i < CLI.NumFixedArgs, i, + Flags, Parts[j].getValueType().getSimpleVT(), VT, OrigArgTy, i, j * Parts[j].getValueType().getStoreSize().getKnownMinValue()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); @@ -11270,7 +11334,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // hidden sret parameter. MVT PtrVT = getPointerTy(DL, DL.getAllocaAddrSpace()); - unsigned NumValues = RetTys.size(); + unsigned NumValues = RetVTs.size(); ReturnValues.resize(NumValues); SmallVector<SDValue, 4> Chains(NumValues); @@ -11283,7 +11347,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, PtrVT), CLI.DL, SDNodeFlags::NoUnsignedWrap); SDValue L = CLI.DAG.getLoad( - RetTys[i], CLI.DL, CLI.Chain, Add, + RetVTs[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), HiddenSRetAlign); @@ -11301,11 +11365,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (CLI.RetZExt) AssertOp = ISD::AssertZext; unsigned CurReg = 0; - for (EVT VT : RetTys) { - MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), - CLI.CallConv, VT); - unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(), - CLI.CallConv, VT); + for (EVT VT : RetVTs) { + MVT RegisterVT = getRegisterTypeForCallingConv(Context, CLI.CallConv, VT); + unsigned NumRegs = + getNumRegistersForCallingConv(Context, CLI.CallConv, VT); ReturnValues.push_back(getCopyFromParts( CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, @@ -11321,7 +11384,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, - CLI.DAG.getVTList(RetTys), ReturnValues); + CLI.DAG.getVTList(RetVTs), ReturnValues); return std::make_pair(Res, CLI.Chain); } @@ -11592,7 +11655,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ISD::ArgFlagsTy Flags; Flags.setSRet(); MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVT); - ISD::InputArg RetArg(Flags, RegisterVT, ValueVT, true, + ISD::InputArg RetArg(Flags, RegisterVT, ValueVT, F.getReturnType(), true, ISD::InputArg::NoArgIndex, 0); Ins.push_back(RetArg); } @@ -11607,8 +11670,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set up the incoming argument description vector. for (const Argument &Arg : F.args()) { unsigned ArgNo = Arg.getArgNo(); - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); + SmallVector<Type *, 4> Types; + ComputeValueTypes(DAG.getDataLayout(), Arg.getType(), Types); bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; Type *FinalType = Arg.getType(); @@ -11616,17 +11679,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FinalType = Arg.getParamByValType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg(), DL); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { - EVT VT = ValueVTs[Value]; - Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + for (unsigned Value = 0, NumValues = Types.size(); Value != NumValues; + ++Value) { + Type *ArgTy = Types[Value]; + EVT VT = TLI->getValueType(DL, ArgTy); ISD::ArgFlagsTy Flags; - - if (Arg.getType()->isPointerTy()) { + if (ArgTy->isPointerTy()) { Flags.setPointer(); - Flags.setPointerAddrSpace( - cast<PointerType>(Arg.getType())->getAddressSpace()); + Flags.setPointerAddrSpace(cast<PointerType>(ArgTy)->getAddressSpace()); } if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); @@ -11730,7 +11791,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // are responsible for handling scalable vector arguments and // return values. ISD::InputArg MyFlags( - Flags, RegisterVT, VT, isArgValueUsed, ArgNo, + Flags, RegisterVT, VT, ArgTy, isArgValueUsed, ArgNo, PartBase + i * RegisterVT.getStoreSize().getKnownMinValue()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); @@ -12704,17 +12765,22 @@ static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) { assert(MI->getOpcode() == TargetOpcode::COPY && "start of copy chain MUST be COPY"); Reg = MI->getOperand(1).getReg(); + + // If the copied register in the first copy must be virtual. + assert(Reg.isVirtual() && "expected COPY of virtual register"); MI = MRI.def_begin(Reg)->getParent(); + // There may be an optional second copy. if (MI->getOpcode() == TargetOpcode::COPY) { assert(Reg.isVirtual() && "expected COPY of virtual register"); Reg = MI->getOperand(1).getReg(); assert(Reg.isPhysical() && "expected COPY of physical register"); - MI = MRI.def_begin(Reg)->getParent(); + } else { + // The start of the chain must be an INLINEASM_BR. + assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR && + "end of copy chain MUST be INLINEASM_BR"); } - // The start of the chain must be an INLINEASM_BR. - assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR && - "end of copy chain MUST be INLINEASM_BR"); + return Reg; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 1c27807..e0835e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -574,6 +574,7 @@ private: void visitFPToSI(const User &I); void visitUIToFP(const User &I); void visitSIToFP(const User &I); + void visitPtrToAddr(const User &I); void visitPtrToInt(const User &I); void visitIntToPtr(const User &I); void visitBitCast(const User &I); @@ -631,6 +632,8 @@ private: void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic); void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl<SDValue> &OpValues); + void visitVPLoadFF(const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT, + const SmallVectorImpl<SDValue> &OpValues); void visitVPStore(const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues); void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 26071ed..ece50ed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -480,10 +480,7 @@ void SelectionDAGISel::initializeAnalysisResults( MachineModuleInfo &MMI = MAMP.getCachedResult<MachineModuleAnalysis>(*Fn.getParent())->getMMI(); - TTI = &FAM.getResult<TargetIRAnalysis>(Fn); - - CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs, - TTI->hasBranchDivergence(&Fn)); + CurDAG->init(*MF, *ORE, MFAM, LibInfo, UA, PSI, BFI, MMI, FnVarLocs); // Now get the optional analyzes if we want to. // This is based on the possibly changed OptLevel (after optnone is taken @@ -501,6 +498,10 @@ void SelectionDAGISel::initializeAnalysisResults( BatchAA = std::nullopt; SP = &FAM.getResult<SSPLayoutAnalysis>(Fn); + +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS + TTI = &FAM.getResult<TargetIRAnalysis>(Fn); +#endif } void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { @@ -536,10 +537,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { MachineModuleInfo &MMI = MFP.getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); - TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); - - CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs, - TTI->hasBranchDivergence(&Fn)); + CurDAG->init(*MF, *ORE, &MFP, LibInfo, UA, PSI, BFI, MMI, FnVarLocs); // Now get the optional analyzes if we want to. // This is based on the possibly changed OptLevel (after optnone is taken @@ -558,6 +556,10 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { BatchAA = std::nullopt; SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo(); + +#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS + TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); +#endif } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 80aeefe..46a5e44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1258,7 +1258,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { if (Record.type == RecordType::Spill) { unsigned Index = Record.payload.FI; - SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); + SDValue SpillSlot = DAG.getFrameIndex(Index, getFrameIndexTy()); // All the reloads are independent and are reading memory only modified by // statepoints (i.e. no other aliasing stores); informing SelectionDAG of diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 48d6b99..a8c7c16e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -162,14 +162,17 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); - TargetLowering::ArgListEntry Entry; ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides; for (unsigned i = 0; i < Ops.size(); ++i) { SDValue NewOp = Ops[i]; - Entry.Node = NewOp; - Entry.Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i] + Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i] ? OpsTypeOverrides[i] - : Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + : NewOp.getValueType().getTypeForEVT(*DAG.getContext()); + TargetLowering::ArgListEntry Entry(NewOp, Ty); + if (CallOptions.IsSoften) + Entry.OrigTy = + CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext()); + Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned); Entry.IsZExt = !Entry.IsSExt; @@ -189,18 +192,21 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + Type *OrigRetTy = RetTy; TargetLowering::CallLoweringInfo CLI(DAG); bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned); bool zeroExtend = !signExtend; - if (CallOptions.IsSoften && - !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) { - signExtend = zeroExtend = false; + if (CallOptions.IsSoften) { + OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext()); + if (!shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) + signExtend = zeroExtend = false; } CLI.setDebugLoc(dl) .setChain(InChain) - .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) + .setLibCallee(getLibcallCallingConv(LC), RetTy, OrigRetTy, Callee, + std::move(Args)) .setNoReturn(CallOptions.DoesNotReturn) .setDiscardResult(!CallOptions.IsReturnValueUsed) .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) @@ -420,7 +426,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, TargetLowering::MakeLibCallOptions CallOptions; EVT OpsVT[2] = { OldLHS.getValueType(), OldRHS.getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); + CallOptions.setTypeListBeforeSoften(OpsVT, RetVT); auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); NewLHS = Call.first; NewRHS = DAG.getConstant(0, dl, RetVT); @@ -775,13 +781,6 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } - case ISD::FREEZE: { - SDValue N0 = Op.getOperand(0); - if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts, - /*PoisonOnly=*/false, Depth + 1)) - return N0; - break; - } case ISD::AND: { LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -833,7 +832,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( case ISD::SHL: { // If we are only demanding sign bits then we can use the shift source // directly. - if (std::optional<uint64_t> MaxSA = + if (std::optional<unsigned> MaxSA = DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { SDValue Op0 = Op.getOperand(0); unsigned ShAmt = *MaxSA; @@ -848,7 +847,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( case ISD::SRL: { // If we are only demanding sign bits then we can use the shift source // directly. - if (std::optional<uint64_t> MaxSA = + if (std::optional<unsigned> MaxSA = DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { SDValue Op0 = Op.getOperand(0); unsigned ShAmt = *MaxSA; @@ -1781,7 +1780,7 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); - if (std::optional<uint64_t> KnownSA = + if (std::optional<unsigned> KnownSA = TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *KnownSA; if (ShAmt == 0) @@ -1793,7 +1792,7 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { - if (std::optional<uint64_t> InnerSA = + if (std::optional<unsigned> InnerSA = TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned C1 = *InnerSA; unsigned Opc = ISD::SHL; @@ -1833,7 +1832,7 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() && InnerOp.hasOneUse()) { - if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount( + if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount( InnerOp, DemandedElts, Depth + 2)) { unsigned InnerShAmt = *SA2; if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && @@ -1859,8 +1858,7 @@ bool TargetLowering::SimplifyDemandedBits( Op->dropFlags(SDNodeFlags::NoWrap); return true; } - Known.Zero <<= ShAmt; - Known.One <<= ShAmt; + Known <<= ShAmt; // low bits known zero. Known.Zero.setLowBits(ShAmt); @@ -1951,7 +1949,7 @@ bool TargetLowering::SimplifyDemandedBits( // If we are only demanding sign bits then we can use the shift source // directly. - if (std::optional<uint64_t> MaxSA = + if (std::optional<unsigned> MaxSA = TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *MaxSA; unsigned NumSignBits = @@ -1967,7 +1965,7 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); - if (std::optional<uint64_t> KnownSA = + if (std::optional<unsigned> KnownSA = TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *KnownSA; if (ShAmt == 0) @@ -1979,7 +1977,7 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SHL) { if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { - if (std::optional<uint64_t> InnerSA = + if (std::optional<unsigned> InnerSA = TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned C1 = *InnerSA; unsigned Opc = ISD::SRL; @@ -1999,7 +1997,7 @@ bool TargetLowering::SimplifyDemandedBits( // single sra. We can do this if the top bits are never demanded. if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) { if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { - if (std::optional<uint64_t> InnerSA = + if (std::optional<unsigned> InnerSA = TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned C1 = *InnerSA; // Clamp the combined shift amount if it exceeds the bit width. @@ -2043,8 +2041,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; - Known.Zero.lshrInPlace(ShAmt); - Known.One.lshrInPlace(ShAmt); + Known >>= ShAmt; // High bits known zero. Known.Zero.setHighBits(ShAmt); @@ -2065,7 +2062,7 @@ bool TargetLowering::SimplifyDemandedBits( // If we are only demanding sign bits then we can use the shift source // directly. - if (std::optional<uint64_t> MaxSA = + if (std::optional<unsigned> MaxSA = TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *MaxSA; // Must already be signbits in DemandedBits bounds, and can't demand any @@ -2104,7 +2101,7 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); - if (std::optional<uint64_t> KnownSA = + if (std::optional<unsigned> KnownSA = TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *KnownSA; if (ShAmt == 0) @@ -2113,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedBits( // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target // supports sext_inreg. if (Op0.getOpcode() == ISD::SHL) { - if (std::optional<uint64_t> InnerSA = + if (std::optional<unsigned> InnerSA = TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned LowBits = BitWidth - ShAmt; EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits); @@ -2154,8 +2151,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; - Known.Zero.lshrInPlace(ShAmt); - Known.One.lshrInPlace(ShAmt); + Known >>= ShAmt; // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. @@ -2226,10 +2222,8 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1)) return true; - Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt)); - Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt)); - Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); - Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); + Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt)); + Known >>= (IsFSHL ? (BitWidth - Amt) : Amt); Known = Known.unionWith(Known2); // Attempt to avoid multi-use ops if we don't need anything from them. @@ -2364,8 +2358,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - Known.One = Known2.One.reverseBits(); - Known.Zero = Known2.Zero.reverseBits(); + Known = Known2.reverseBits(); break; } case ISD::BSWAP: { @@ -2398,8 +2391,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - Known.One = Known2.One.byteSwap(); - Known.Zero = Known2.Zero.byteSwap(); + Known = Known2.byteSwap(); break; } case ISD::CTPOP: { @@ -2665,11 +2657,11 @@ bool TargetLowering::SimplifyDemandedBits( break; } - std::optional<uint64_t> ShAmtC = + std::optional<unsigned> ShAmtC = TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2); if (!ShAmtC || *ShAmtC >= BitWidth) break; - uint64_t ShVal = *ShAmtC; + unsigned ShVal = *ShAmtC; APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); @@ -5125,6 +5117,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE); } + // fold (setcc (trunc x) c) -> (setcc x c) + if (N0.getOpcode() == ISD::TRUNCATE && + ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) || + (N0->getFlags().hasNoSignedWrap() && + !ISD::isUnsignedIntSetCC(Cond))) && + isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) { + EVT NewVT = N0.getOperand(0).getValueType(); + SDValue NewConst = DAG.getConstant( + (N0->getFlags().hasNoSignedWrap() && !ISD::isUnsignedIntSetCC(Cond)) + ? C1.sext(NewVT.getSizeInBits()) + : C1.zext(NewVT.getSizeInBits()), + dl, NewVT); + return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond); + } + if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; @@ -5363,10 +5370,25 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) { unsigned ShiftBits = AndRHSC.countr_zero(); if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) { + // If using an unsigned shift doesn't yield a legal compare + // immediate, try using sra instead. + APInt NewC = C1.lshr(ShiftBits); + if (NewC.getSignificantBits() <= 64 && + !isLegalICmpImmediate(NewC.getSExtValue())) { + APInt SignedC = C1.ashr(ShiftBits); + if (SignedC.getSignificantBits() <= 64 && + isLegalICmpImmediate(SignedC.getSExtValue())) { + SDValue Shift = DAG.getNode( + ISD::SRA, dl, ShValTy, N0.getOperand(0), + DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl)); + SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } + } SDValue Shift = DAG.getNode( ISD::SRL, dl, ShValTy, N0.getOperand(0), DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl)); - SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy); + SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); } } @@ -5646,6 +5668,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return N0; } + // Fold (setcc (trunc x) (trunc y)) -> (setcc x y) + if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + ((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() && + N1->getFlags().hasNoUnsignedWrap()) || + (!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() && + N1->getFlags().hasNoSignedWrap())) && + isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + } + // Could not fold it. return SDValue(); } @@ -6482,8 +6515,8 @@ SDValue TargetLowering::buildSDIVPow2WithCMov( Created.push_back(CMov.getNode()); // Divide by pow2. - SDValue SRA = - DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT)); + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov, + DAG.getShiftAmountConstant(Lg2, VT, DL)); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. @@ -9700,8 +9733,8 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const { SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue LHS = DAG.getFreeze(N->getOperand(0)); - SDValue RHS = DAG.getFreeze(N->getOperand(1)); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); bool IsSigned = N->getOpcode() == ISD::ABDS; // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs)) @@ -9709,34 +9742,37 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const { unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX; unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN; if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) { + LHS = DAG.getFreeze(LHS); + RHS = DAG.getFreeze(RHS); SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS); SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS); return DAG.getNode(ISD::SUB, dl, VT, Max, Min); } // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs)) - if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) + if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) { + LHS = DAG.getFreeze(LHS); + RHS = DAG.getFreeze(RHS); return DAG.getNode(ISD::OR, dl, VT, DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS), DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS)); + } // If the subtract doesn't overflow then just use abs(sub()) - // NOTE: don't use frozen operands for value tracking. - bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) && - DAG.SignBitIsZero(N->getOperand(0)); + bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS); - if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0), - N->getOperand(1))) + if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS)) return DAG.getNode(ISD::ABS, dl, VT, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS)); - if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1), - N->getOperand(0))) + if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS)) return DAG.getNode(ISD::ABS, dl, VT, DAG.getNode(ISD::SUB, dl, VT, RHS, LHS)); EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT; + LHS = DAG.getFreeze(LHS); + RHS = DAG.getFreeze(RHS); SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC); // Branchless expansion iff cmp result is allbits: @@ -10679,7 +10715,6 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SDLoc dl(GA); ArgListTy Args; - ArgListEntry Entry; const GlobalValue *GV = cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases()); SmallString<32> NameString("__emutls_v."); @@ -10688,9 +10723,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, const GlobalVariable *EmuTlsVar = GV->getParent()->getNamedGlobal(EmuTlsVarName); assert(EmuTlsVar && "Cannot find EmuTlsVar "); - Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); - Entry.Ty = VoidPtrType; - Args.push_back(Entry); + Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType); SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); |