aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp844
1 files changed, 645 insertions, 199 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2970cf4..fbd875a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -130,7 +130,7 @@ static cl::opt<bool> MulConstantOptimization(
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
- : TargetLowering(TM), Subtarget(STI) {
+ : TargetLowering(TM, STI), Subtarget(STI) {
bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
@@ -635,6 +635,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FROUNDEVEN, VT, Action);
setOperationAction(ISD::FTRUNC, VT, Action);
setOperationAction(ISD::FLDEXP, VT, Action);
+ setOperationAction(ISD::FSINCOSPI, VT, Action);
};
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -2072,8 +2073,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasVBMI2()) {
for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Legal);
+ setOperationAction(ISD::FSHR, VT, Legal);
}
setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
@@ -2088,8 +2089,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasVBMI2()) {
for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
MVT::v4i64}) {
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::FSHR, VT, Subtarget.hasVLX() ? Legal : Custom);
}
}
@@ -2097,9 +2098,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// pre-AVX512 equivalents. Without VLX we use 512-bit operations for
// narrower widths.
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
+ for (MVT VT : {MVT::f16, MVT::f32, MVT::f64, MVT::v8f16, MVT::v4f32,
+ MVT::v2f64, MVT::v16f16, MVT::v8f32, MVT::v4f64, MVT::v32f16,
+ MVT::v16f32, MVT::v8f64})
+ setOperationAction(ISD::FLDEXP, VT, Custom);
+
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
-
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
@@ -2150,6 +2155,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasCDI()) {
+ for (auto VT : {MVT::i256, MVT::i512}) {
+ if (VT == MVT::i512 && !Subtarget.useAVX512Regs())
+ continue;
+ setOperationAction(ISD::CTLZ, VT, Custom);
+ setOperationAction(ISD::CTTZ, VT, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+ }
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
setOperationAction(ISD::CTLZ, VT, Legal);
}
@@ -2572,8 +2585,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Combine sin / cos into _sincos_stret if it is available.
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
if (Subtarget.isTargetWin64()) {
setOperationAction(ISD::SDIV, MVT::i128, Custom);
@@ -2655,6 +2668,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::AVGFLOORU,
ISD::BITREVERSE,
ISD::ADD,
+ ISD::SADDSAT,
+ ISD::SSUBSAT,
ISD::FADD,
ISD::FSUB,
ISD::FNEG,
@@ -2694,6 +2709,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::STRICT_FP_EXTEND,
ISD::FP_ROUND,
ISD::STRICT_FP_ROUND,
+ ISD::FSHL,
+ ISD::FSHR,
ISD::INTRINSIC_VOID,
ISD::INTRINSIC_WO_CHAIN,
ISD::INTRINSIC_W_CHAIN});
@@ -2871,6 +2888,8 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
case X86ISD::VZEXT_MOVL:
+ case X86ISD::COMPRESS:
+ case X86ISD::EXPAND:
return true;
}
}
@@ -3087,7 +3106,7 @@ static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
}
bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I,
+ const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const {
Info.flags = MachineMemOperand::MONone;
@@ -3454,6 +3473,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
return true;
+ // If we have a large vector type (even if illegal), don't bitcast to large
+ // (illegal) scalar types. Better to load fewer vectors and extract.
+ if (LoadVT.isVector() && !BitcastVT.isVector() && LoadVT.isInteger() &&
+ BitcastVT.isInteger() && (LoadVT.getSizeInBits() % 128) == 0)
+ return false;
+
return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
}
@@ -5358,12 +5383,12 @@ bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs) {
int getRoundingModeX86(unsigned RM) {
switch (static_cast<::llvm::RoundingMode>(RM)) {
// clang-format off
- case ::llvm::RoundingMode::NearestTiesToEven: return X86::rmToNearest; break;
- case ::llvm::RoundingMode::TowardNegative: return X86::rmDownward; break;
- case ::llvm::RoundingMode::TowardPositive: return X86::rmUpward; break;
- case ::llvm::RoundingMode::TowardZero: return X86::rmTowardZero; break;
- default:
- return X86::rmInvalid; // Invalid rounding mode
+ case ::llvm::RoundingMode::NearestTiesToEven: return X86::rmToNearest;
+ case ::llvm::RoundingMode::TowardNegative: return X86::rmDownward;
+ case ::llvm::RoundingMode::TowardPositive: return X86::rmUpward;
+ case ::llvm::RoundingMode::TowardZero: return X86::rmTowardZero;
+ default: return X86::rmInvalid;
+ // clang-format on
}
}
@@ -5816,6 +5841,48 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
}
return false;
}
+ case X86ISD::COMPRESS: {
+ SDValue CmpVec = N.getOperand(0);
+ SDValue PassThru = N.getOperand(1);
+ SDValue CmpMask = N.getOperand(2);
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (!getTargetConstantBitsFromNode(CmpMask, 1, UndefElts, EltBits))
+ return false;
+ assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+ "Illegal compression mask");
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (!EltBits[I].isZero())
+ Mask.push_back(I);
+ }
+ while (Mask.size() != NumElems) {
+ Mask.push_back(NumElems + Mask.size());
+ }
+ Ops.push_back(CmpVec);
+ Ops.push_back(PassThru);
+ return true;
+ }
+ case X86ISD::EXPAND: {
+ SDValue ExpVec = N.getOperand(0);
+ SDValue PassThru = N.getOperand(1);
+ SDValue ExpMask = N.getOperand(2);
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (!getTargetConstantBitsFromNode(ExpMask, 1, UndefElts, EltBits))
+ return false;
+ assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+ "Illegal expansion mask");
+ unsigned ExpIndex = 0;
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (EltBits[I].isZero())
+ Mask.push_back(I + NumElems);
+ else
+ Mask.push_back(ExpIndex++);
+ }
+ Ops.push_back(ExpVec);
+ Ops.push_back(PassThru);
+ return true;
+ }
default:
llvm_unreachable("unknown target shuffle node");
}
@@ -7270,7 +7337,10 @@ static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
const SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
- bool IsAfterLegalize) {
+ bool IsAfterLegalize,
+ unsigned Depth = 0) {
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return SDValue(); // Limit search depth.
if ((VT.getScalarSizeInBits() % 8) != 0)
return SDValue();
@@ -7444,7 +7514,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
SDValue HalfLD =
EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL,
- DAG, Subtarget, IsAfterLegalize);
+ DAG, Subtarget, IsAfterLegalize, Depth + 1);
if (HalfLD)
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
HalfLD, DAG.getVectorIdxConstant(0, DL));
@@ -7521,7 +7591,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
VT.getSizeInBits() / ScalarSize);
if (TLI.isTypeLegal(BroadcastVT)) {
if (SDValue RepeatLoad = EltsFromConsecutiveLoads(
- RepeatVT, RepeatedLoads, DL, DAG, Subtarget, IsAfterLegalize)) {
+ RepeatVT, RepeatedLoads, DL, DAG, Subtarget, IsAfterLegalize,
+ Depth + 1)) {
SDValue Broadcast = RepeatLoad;
if (RepeatSize > ScalarSize) {
while (Broadcast.getValueSizeInBits() < VT.getSizeInBits())
@@ -7542,6 +7613,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
}
}
+ // REVERSE - attempt to match the loads in reverse and then shuffle back.
+ // TODO: Do this for any permute or mismatching element counts.
+ if (Depth == 0 && ZeroMask.isZero() && UndefMask.isZero() &&
+ TLI.isTypeLegal(VT) && VT.isVector() &&
+ NumElems == VT.getVectorNumElements()) {
+ SmallVector<SDValue, 16> ReverseElts(Elts.rbegin(), Elts.rend());
+ if (SDValue RevLd = EltsFromConsecutiveLoads(
+ VT, ReverseElts, DL, DAG, Subtarget, IsAfterLegalize, Depth + 1)) {
+ SmallVector<int, 16> ReverseMask(NumElems);
+ std::iota(ReverseMask.rbegin(), ReverseMask.rend(), 0);
+ return DAG.getVectorShuffle(VT, DL, RevLd, DAG.getUNDEF(VT), ReverseMask);
+ }
+ }
+
return SDValue();
}
@@ -7948,7 +8033,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, const SDLoc &DL,
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Opc = Op.getOperand(i).getOpcode();
- if (Opc == ISD::UNDEF)
+ if (Opc == ISD::POISON || Opc == ISD::UNDEF)
continue;
if (Opc != ISD::EXTRACT_VECTOR_ELT) {
@@ -7991,7 +8076,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, const SDLoc &DL,
if (!VecIn1.getNode())
return SDValue();
- VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getPOISON(VT);
SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
for (unsigned Idx : InsertIndices)
@@ -8115,6 +8200,8 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
case X86ISD::FHSUB:
case X86ISD::HADD:
case X86ISD::HSUB:
+ case X86ISD::HADDS:
+ case X86ISD::HSUBS:
return true;
}
return false;
@@ -8426,9 +8513,7 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
// DAGCombiner::visitFADDForFMACombine. It would be good to have one
// function that would answer if it is Ok to fuse MUL + ADD to FMADD
// or MUL + ADDSUB to FMADDSUB.
- const TargetOptions &Options = DAG.getTarget().Options;
bool AllowFusion =
- Options.AllowFPOpFusion == FPOpFusion::Fast ||
(AllowSubAddOrAddSubContract && Opnd0->getFlags().hasAllowContract());
if (!AllowFusion)
return false;
@@ -8856,6 +8941,56 @@ static SDValue lowerBuildVectorAsBlend(BuildVectorSDNode *BVOp, SDLoc const &DL,
return SDValue();
}
+/// Widen a BUILD_VECTOR if the scalar operands are freely mergeable.
+static SDValue widenBuildVector(BuildVectorSDNode *BVOp, SDLoc const &DL,
+ X86Subtarget const &Subtarget,
+ SelectionDAG &DAG) {
+ using namespace SDPatternMatch;
+ MVT VT = BVOp->getSimpleValueType(0);
+ MVT SVT = VT.getScalarType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltBits = SVT.getSizeInBits();
+
+ if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
+ return SDValue();
+
+ unsigned WideBits = 2 * EltBits;
+ MVT WideSVT = MVT::getIntegerVT(WideBits);
+ MVT WideVT = MVT::getVectorVT(WideSVT, NumElts / 2);
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(WideSVT))
+ return SDValue();
+
+ SmallVector<SDValue, 8> WideOps;
+ for (unsigned I = 0; I != NumElts; I += 2) {
+ SDValue Op0 = BVOp->getOperand(I + 0);
+ SDValue Op1 = BVOp->getOperand(I + 1);
+
+ if (Op0.isUndef() && Op1.isUndef()) {
+ WideOps.push_back(DAG.getUNDEF(WideSVT));
+ continue;
+ }
+
+ // TODO: Constant repacking?
+
+ // Merge scalars that have been split from the same source.
+ SDValue X, Y;
+ if (sd_match(Op0, m_Trunc(m_Value(X))) &&
+ sd_match(Op1, m_Trunc(m_Srl(m_Value(Y), m_SpecificInt(EltBits)))) &&
+ peekThroughTruncates(X) == peekThroughTruncates(Y) &&
+ X.getValueType().bitsGE(WideSVT)) {
+ if (X.getValueType().bitsGT(WideSVT))
+ X = DAG.getNode(ISD::TRUNCATE, DL, WideSVT, X);
+ WideOps.push_back(X);
+ continue;
+ }
+
+ return SDValue();
+ }
+
+ assert(WideOps.size() == (NumElts / 2) && "Failed to widen build vector");
+ return DAG.getBitcast(VT, DAG.getBuildVector(WideVT, DL, WideOps));
+}
+
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
/// functionality to do this, so it's all zeros, all ones, or some derivation
/// that is cheap to calculate.
@@ -9326,6 +9461,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return BitOp;
if (SDValue Blend = lowerBuildVectorAsBlend(BV, dl, Subtarget, DAG))
return Blend;
+ if (SDValue WideBV = widenBuildVector(BV, dl, Subtarget, DAG))
+ return WideBV;
unsigned NumZero = ZeroMask.popcount();
unsigned NumNonZero = NonZeroMask.popcount();
@@ -18370,16 +18507,20 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
SmallVector<int> Mask(OrigMask);
// Canonicalize the shuffle with any horizontal ops inputs.
+ // Don't attempt this if the shuffle can still be widened as we may lose
+ // whole lane shuffle patterns.
// NOTE: This may update Ops and Mask.
- if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
- Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget))
- return DAG.getBitcast(VT, HOp);
+ if (!canWidenShuffleElements(Mask)) {
+ if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
+ Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget))
+ return DAG.getBitcast(VT, HOp);
- V1 = DAG.getBitcast(VT, Ops[0]);
- V2 = DAG.getBitcast(VT, Ops[1]);
- assert(NumElements == (int)Mask.size() &&
- "canonicalizeShuffleMaskWithHorizOp "
- "shouldn't alter the shuffle mask size");
+ V1 = DAG.getBitcast(VT, Ops[0]);
+ V2 = DAG.getBitcast(VT, Ops[1]);
+ assert(NumElements == (int)Mask.size() &&
+ "canonicalizeShuffleMaskWithHorizOp "
+ "shouldn't alter the shuffle mask size");
+ }
// Canonicalize zeros/ones/fp splat constants to ensure no undefs.
// These will be materialized uniformly anyway, so make splat matching easier.
@@ -19142,6 +19283,72 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return SDValue();
}
+static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ SDValue X = Op.getOperand(0);
+ MVT XTy = X.getSimpleValueType();
+ SDValue Exp = Op.getOperand(1);
+
+ switch (XTy.SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::f16:
+ if (!Subtarget.hasFP16())
+ X = DAG.getFPExtendOrRound(X, DL, MVT::f32);
+ [[fallthrough]];
+ case MVT::f32:
+ case MVT::f64: {
+ MVT VT = MVT::getVectorVT(X.getSimpleValueType(),
+ 128 / X.getSimpleValueType().getSizeInBits());
+ Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
+ SDValue VX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, X);
+ SDValue VExp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Exp);
+ SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, VT, VX, VExp);
+ SDValue Final = DAG.getExtractVectorElt(DL, X.getValueType(), Scalefs, 0);
+ return DAG.getFPExtendOrRound(Final, DL, XTy);
+ }
+ case MVT::v4f32:
+ case MVT::v2f64:
+ case MVT::v8f32:
+ case MVT::v4f64:
+ case MVT::v16f32:
+ case MVT::v8f64:
+ if (XTy.getSizeInBits() == 512 || Subtarget.hasVLX()) {
+ Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
+ return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
+ }
+ break;
+ case MVT::v8f16:
+ case MVT::v16f16:
+ if (Subtarget.hasFP16()) {
+ if (Subtarget.hasVLX()) {
+ Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
+ return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
+ }
+ break;
+ }
+ X = DAG.getFPExtendOrRound(X, DL, XTy.changeVectorElementType(MVT::f32));
+ Exp = DAG.getSExtOrTrunc(Exp, DL,
+ X.getSimpleValueType().changeTypeToInteger());
+ break;
+ case MVT::v32f16:
+ if (Subtarget.hasFP16()) {
+ Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
+ return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
+ }
+ return splitVectorOp(Op, DAG, DL);
+ }
+ SDValue WideX = widenSubVector(X, true, Subtarget, DAG, DL, 512);
+ SDValue WideExp = widenSubVector(Exp, true, Subtarget, DAG, DL, 512);
+ Exp = DAG.getNode(ISD::SINT_TO_FP, DL, WideExp.getSimpleValueType(), Exp);
+ SDValue Scalef =
+ DAG.getNode(X86ISD::SCALEF, DL, WideX.getValueType(), WideX, WideExp);
+ SDValue Final =
+ DAG.getExtractSubvector(DL, X.getSimpleValueType(), Scalef, 0);
+ return DAG.getFPExtendOrRound(Final, DL, XTy);
+}
+
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -29568,9 +29775,9 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
}
if (!(IsLoLaneAllZeroOrUndef || IsHiLaneAllZeroOrUndef)) {
SDValue Mask = DAG.getBitcast(VT, DAG.getConstant(0x00FF, dl, ExVT));
- SDValue BLo = DAG.getNode(ISD::AND, dl, VT, Mask, B);
SDValue BHi = DAG.getNode(X86ISD::ANDNP, dl, VT, Mask, B);
- SDValue RLo = DAG.getNode(X86ISD::VPMADDUBSW, dl, ExVT, A, BLo);
+ SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, DAG.getBitcast(ExVT, A),
+ DAG.getBitcast(ExVT, B));
SDValue RHi = DAG.getNode(X86ISD::VPMADDUBSW, dl, ExVT, A, BHi);
RLo = DAG.getNode(ISD::AND, dl, VT, DAG.getBitcast(VT, RLo), Mask);
RHi = DAG.getNode(X86ISD::VSHLI, dl, ExVT, RHi,
@@ -29586,26 +29793,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
SDValue Undef = DAG.getUNDEF(VT);
SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef));
SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef));
-
- SDValue BLo, BHi;
- if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) {
- // If the RHS is a constant, manually unpackl/unpackh.
- SmallVector<SDValue, 16> LoOps, HiOps;
- for (unsigned i = 0; i != NumElts; i += 16) {
- for (unsigned j = 0; j != 8; ++j) {
- LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl,
- MVT::i16));
- HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl,
- MVT::i16));
- }
- }
-
- BLo = DAG.getBuildVector(ExVT, dl, LoOps);
- BHi = DAG.getBuildVector(ExVT, dl, HiOps);
- } else {
- BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));
- BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));
- }
+ SDValue BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));
+ SDValue BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));
// Multiply, mask the lower 8bits of the lo/hi results and pack.
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
@@ -30908,6 +31097,63 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR);
}
+ if (VT == MVT::v64i8 && Subtarget.canExtendTo512BW()) {
+ // On AVX512BW, we can use variable 16-bit shifts to implement variable
+ // 8-bit shifts. For this, we split the input into two vectors, RLo and RHi.
+ // The i-th lane of RLo contains the (2*i)-th lane of R, and the i-th lane
+ // of RHi contains the (2*i+1)-th lane of R. After shifting, these vectors
+ // can efficiently be merged together using a masked move.
+ MVT ExtVT = MVT::v32i16;
+
+ SDValue RLo, RHi;
+ // Isolate lower and upper lanes of Amt by masking odd lanes in AmtLo and
+ // right shifting AmtHi.
+ SDValue AmtLo = DAG.getNode(ISD::AND, dl, ExtVT, DAG.getBitcast(ExtVT, Amt),
+ DAG.getConstant(0x00ff, dl, ExtVT));
+ SDValue AmtHi = getTargetVShiftByConstNode(
+ X86ISD::VSRLI, dl, ExtVT, DAG.getBitcast(ExtVT, Amt), 8, DAG);
+ switch (Opc) {
+ case ISD::SHL:
+ // Because we shift left, no bits from the high half can influence the low
+ // half, so we don't need to mask RLo. We do however need to mask RHi, to
+ // prevent high bits of an even lane overflowing into low bits of an odd
+ // lane.
+ RLo = DAG.getBitcast(ExtVT, R);
+ RHi = DAG.getNode(ISD::AND, dl, ExtVT, RLo,
+ DAG.getConstant(0xff00, dl, ExtVT));
+ break;
+ case ISD::SRL:
+ // Same idea as above, but this time we need to make sure no low bits of
+ // an odd lane can overflow into high bits of an even lane.
+ RHi = DAG.getBitcast(ExtVT, R);
+ RLo = DAG.getNode(ISD::AND, dl, ExtVT, RHi,
+ DAG.getConstant(0x00ff, dl, ExtVT));
+ break;
+ case ISD::SRA:
+ // For arithmetic right shifts, we want to sign extend each even lane of R
+ // such that the upper half of the corresponding lane of RLo is 0 or -1
+ // depending on the sign bit of the original lane. We do this using 2
+ // immediate shifts.
+ RHi = DAG.getBitcast(ExtVT, R);
+ RLo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, RHi, 8, DAG);
+ RLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExtVT, RLo, 8, DAG);
+ break;
+ default:
+ llvm_unreachable("Unexpected Shift Op");
+ }
+
+ SDValue ShiftedLo =
+ DAG.getBitcast(VT, DAG.getNode(Opc, dl, ExtVT, RLo, AmtLo));
+ SDValue ShiftedHi =
+ DAG.getBitcast(VT, DAG.getNode(Opc, dl, ExtVT, RHi, AmtHi));
+
+ // To merge the shifted vectors back together, we select even lanes
+ // from ShiftedLo and odd lanes from ShiftedHi.
+ SDValue SelectMask = DAG.getBitcast(
+ MVT::v64i1, DAG.getConstant(0x5555555555555555, dl, MVT::i64));
+ return DAG.getSelect(dl, VT, SelectMask, ShiftedLo, ShiftedHi);
+ }
+
if (VT == MVT::v16i8 ||
(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
(VT == MVT::v64i8 && Subtarget.hasBWI())) {
@@ -31127,19 +31373,15 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt);
unsigned NumElts = VT.getVectorNumElements();
- if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
-
- if (IsCstSplat) {
- if (IsFSHR)
- std::swap(Op0, Op1);
- uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
- SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
- return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
- {Op0, Op1, Imm}, DAG, Subtarget);
- }
+ // For non-VLX VBMI2 targets, widen 128/256-bit to 512-bit so
+ // the rest of the lowering/isel can select the VBMI2 forms.
+ // Only Custom types (v8i16, v4i32, v2i64, v16i16, v8i32, v4i64) can
+ // reach LowerFunnelShift with VBMI2 but no VLX, so no type check needed.
+ if (Subtarget.hasVBMI2() && !Subtarget.hasVLX() && EltSizeInBits > 8) {
return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
{Op0, Op1, Amt}, DAG, Subtarget);
}
+
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||
VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
@@ -33004,60 +33246,6 @@ static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
-static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue Arg = Op.getOperand(0);
- EVT ArgVT = Arg.getValueType();
- bool isF64 = ArgVT == MVT::f64;
-
- RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
- const char *LibcallName = TLI.getLibcallName(LC);
- if (!LibcallName)
- return SDValue();
-
- assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());
-
- // For MacOSX, we want to call an alternative entry point: __sincos_stret,
- // which returns the values as { float, float } (in XMM0) or
- // { double, double } (which is returned in XMM0, XMM1).
- SDLoc dl(Op);
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-
- TargetLowering::ArgListTy Args;
- Args.emplace_back(Arg, ArgTy);
-
- // Only optimize x86_64 for now. i386 is a bit messy. For f32,
- // the small struct {f32, f32} is returned in (eax, edx). For f64,
- // the results are returned via SRet in memory.
- SDValue Callee =
- DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
-
- Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy)
- : (Type *)FixedVectorType::get(ArgTy, 4);
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(DAG.getEntryNode())
- .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
- if (isF64)
- // Returned in xmm0 and xmm1.
- return CallResult.first;
-
- // Returned in bits 0:31 and 32:64 xmm0.
- SDValue SinVal =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first,
- DAG.getVectorIdxConstant(0, dl));
- SDValue CosVal =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, CallResult.first,
- DAG.getVectorIdxConstant(1, dl));
- SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
- return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
-}
-
/// Widen a vector input to a vector of NVT. The
/// input vector must have the same element type as NVT.
static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
@@ -33662,7 +33850,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ABDS:
case ISD::ABDU: return LowerABD(Op, Subtarget, DAG);
case ISD::AVGCEILU: return LowerAVG(Op, Subtarget, DAG);
- case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
@@ -33672,7 +33859,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
- // clang-format on
+ case ISD::FLDEXP: return LowerFLDEXP(Op, Subtarget, DAG);
+ // clang-format on
}
}
@@ -33756,6 +33944,59 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF: {
+ // Fold i256/i512 CTLZ/CTTZ patterns to make use of AVX512
+ // vXi64 CTLZ/CTTZ and VECTOR_COMPRESS.
+ // Compute the CTLZ/CTTZ of each element, add the element's bit offset,
+ // compress the result to remove all zero elements (passthru is set to
+ // scalar bitwidth if all elements are zero) and extract the lowest
+ // compressed element.
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ assert(Subtarget.hasCDI() && "AVX512CD required");
+ assert((VT == MVT::i256 || VT == MVT::i512) && "Unexpected VT!");
+ if (VT == MVT::i256 && !X86::mayFoldLoad(N0, Subtarget))
+ return;
+
+ unsigned SizeInBits = VT.getSizeInBits();
+ MVT VecVT = MVT::getVectorVT(MVT::i64, SizeInBits / 64);
+ MVT BoolVT = VecVT.changeVectorElementType(MVT::i1);
+ SDValue Vec = DAG.getBitcast(VecVT, N0);
+
+ SmallVector<int, 8> RevMask;
+ SmallVector<SDValue, 8> Offsets;
+ for (unsigned I = 0, E = VecVT.getVectorNumElements(); I != E; ++I) {
+ RevMask.push_back((int)((E - 1) - I));
+ Offsets.push_back(DAG.getConstant(I * 64, dl, MVT::i64));
+ }
+
+ // CTLZ - reverse the elements as we want the top non-zero element at the
+ // bottom for compression.
+ unsigned VecOpc = ISD::CTTZ;
+ if (Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF) {
+ VecOpc = ISD::CTLZ;
+ Vec = DAG.getVectorShuffle(VecVT, dl, Vec, Vec, RevMask);
+ }
+
+ SDValue PassThrough = DAG.getUNDEF(VecVT);
+ if (Opc == ISD::CTLZ || Opc == ISD::CTTZ)
+ PassThrough = DAG.getConstant(SizeInBits, dl, VecVT);
+
+ SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, Vec,
+ DAG.getConstant(0, dl, VecVT), ISD::SETNE);
+ SDValue Cnt = DAG.getNode(VecOpc, dl, VecVT, Vec);
+ Cnt = DAG.getNode(ISD::ADD, dl, VecVT, Cnt,
+ DAG.getBuildVector(VecVT, dl, Offsets));
+ Cnt = DAG.getNode(ISD::VECTOR_COMPRESS, dl, VecVT, Cnt, IsNonZero,
+ PassThrough);
+ Cnt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cnt,
+ DAG.getVectorIdxConstant(0, dl));
+ Results.push_back(DAG.getZExtOrTrunc(Cnt, dl, VT));
+ return;
+ }
case ISD::MUL: {
EVT VT = N->getValueType(0);
assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
@@ -34931,6 +35172,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BLENDV)
NODE_NAME_CASE(HADD)
NODE_NAME_CASE(HSUB)
+ NODE_NAME_CASE(HADDS)
+ NODE_NAME_CASE(HSUBS)
NODE_NAME_CASE(FHADD)
NODE_NAME_CASE(FHSUB)
NODE_NAME_CASE(CONFLICT)
@@ -38168,22 +38411,22 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
default:
llvm_unreachable("Unexpected instruction!");
case X86::PTCVTROWD2PSrri:
- Opc = X86::TCVTROWD2PSrri;
+ Opc = X86::TCVTROWD2PSrti;
break;
case X86::PTCVTROWPS2BF16Hrri:
- Opc = X86::TCVTROWPS2BF16Hrri;
+ Opc = X86::TCVTROWPS2BF16Hrti;
break;
case X86::PTCVTROWPS2PHHrri:
- Opc = X86::TCVTROWPS2PHHrri;
+ Opc = X86::TCVTROWPS2PHHrti;
break;
case X86::PTCVTROWPS2BF16Lrri:
- Opc = X86::TCVTROWPS2BF16Lrri;
+ Opc = X86::TCVTROWPS2BF16Lrti;
break;
case X86::PTCVTROWPS2PHLrri:
- Opc = X86::TCVTROWPS2PHLrri;
+ Opc = X86::TCVTROWPS2PHLrti;
break;
case X86::PTILEMOVROWrri:
- Opc = X86::TILEMOVROWrri;
+ Opc = X86::TILEMOVROWrti;
break;
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
@@ -38206,22 +38449,22 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
default:
llvm_unreachable("Unexpected instruction!");
case X86::PTCVTROWD2PSrre:
- Opc = X86::TCVTROWD2PSrre;
+ Opc = X86::TCVTROWD2PSrte;
break;
case X86::PTCVTROWPS2BF16Hrre:
- Opc = X86::TCVTROWPS2BF16Hrre;
+ Opc = X86::TCVTROWPS2BF16Hrte;
break;
case X86::PTCVTROWPS2BF16Lrre:
- Opc = X86::TCVTROWPS2BF16Lrre;
+ Opc = X86::TCVTROWPS2BF16Lrte;
break;
case X86::PTCVTROWPS2PHHrre:
- Opc = X86::TCVTROWPS2PHHrre;
+ Opc = X86::TCVTROWPS2PHHrte;
break;
case X86::PTCVTROWPS2PHLrre:
- Opc = X86::TCVTROWPS2PHLrre;
+ Opc = X86::TCVTROWPS2PHLrte;
break;
case X86::PTILEMOVROWrre:
- Opc = X86::TILEMOVROWrre;
+ Opc = X86::TILEMOVROWrte;
break;
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
@@ -40707,8 +40950,9 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
}))
return SDValue();
- bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD ||
- Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB);
+ bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::FHSUB ||
+ Opcode0 == X86ISD::HADD || Opcode0 == X86ISD::HSUB ||
+ Opcode0 == X86ISD::HADDS || Opcode0 == X86ISD::HSUBS);
bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS);
if (!isHoriz && !isPack)
return SDValue();
@@ -45014,11 +45258,16 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
case X86ISD::INSERTPS:
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
+ case X86ISD::VZEXT_MOVL:
case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFP:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILPV:
case X86ISD::VPERMILPI:
+ case X86ISD::VPERMI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3: {
SmallVector<int, 8> Mask;
@@ -45044,6 +45293,16 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
}
break;
}
+ case X86ISD::VBROADCAST: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ if (SrcVT.isVector()) {
+ APInt DemandedSrc = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
+ return DAG.isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrc, PoisonOnly,
+ Depth + 1);
+ }
+ return DAG.isGuaranteedNotToBeUndefOrPoison(Src, PoisonOnly, Depth + 1);
+ }
}
return TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
Op, DemandedElts, DAG, PoisonOnly, Depth);
@@ -45088,13 +45347,19 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
// SSE target shuffles.
case X86ISD::INSERTPS:
case X86ISD::PSHUFB:
+ case X86ISD::VZEXT_MOVL:
case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFP:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILPV:
case X86ISD::VPERMILPI:
+ case X86ISD::VPERMI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
+ case X86ISD::VBROADCAST:
return false;
// SSE comparisons handle all icmp/fcmp cases.
// TODO: Add CMPM/MM with test coverage.
@@ -53307,18 +53572,48 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
if (Mst->isCompressingStore())
return SDValue();
- EVT VT = Mst->getValue().getValueType();
+ if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget))
+ return ScalarStore;
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL(N);
- if (Mst->isTruncatingStore())
- return SDValue();
+ SDValue Mask = Mst->getMask();
+ SDValue Value = Mst->getValue();
+ EVT MemVT = Mst->getMemoryVT();
+ EVT VT = Value.getValueType();
- if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget))
- return ScalarStore;
+ // See if the truncating store can be a saturating truncated store.
+ if (Mst->isTruncatingStore()) {
+ if (VT.isVector() && MemVT.isVector() && VT.getScalarType().isInteger() &&
+ MemVT.getScalarType().isInteger() &&
+ VT.getVectorNumElements() == MemVT.getVectorNumElements() &&
+ Subtarget.hasBWI() && Subtarget.hasVLX()) {
+
+ SDValue SatSrc;
+ unsigned Opc;
+ if (SDValue SVal = detectSSatPattern(Value, MemVT)) {
+ SatSrc = SVal;
+ Opc = X86ISD::VMTRUNCSTORES;
+ } else if (SDValue UVal = detectUSatPattern(Value, MemVT, DAG, DL)) {
+ SatSrc = UVal;
+ Opc = X86ISD::VMTRUNCSTOREUS;
+ } else {
+ return SDValue();
+ }
+
+ SDVTList VTs = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {Mst->getChain(), SatSrc, Mst->getBasePtr(), Mask};
+ MachineMemOperand *MMO = Mst->getMemOperand();
+ return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
+ }
+
+ // Otherwise don't combine if this store already truncates.
+ return SDValue();
+ }
// If the mask value has been legalized to a non-boolean vector, try to
// simplify ops leading up to it. We only demand the MSB of each lane.
- SDValue Mask = Mst->getMask();
if (Mask.getScalarValueSizeInBits() != 1) {
APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));
if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) {
@@ -53334,54 +53629,57 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
Mst->getAddressingMode());
}
- SDValue Value = Mst->getValue();
if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
- TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
- Mst->getMemoryVT())) {
- return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
- Mst->getBasePtr(), Mst->getOffset(), Mask,
- Mst->getMemoryVT(), Mst->getMemOperand(),
- Mst->getAddressingMode(), true);
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), MemVT)) {
+ return DAG.getMaskedStore(Mst->getChain(), DL, Value.getOperand(0),
+ Mst->getBasePtr(), Mst->getOffset(), Mask, MemVT,
+ Mst->getMemOperand(), Mst->getAddressingMode(),
+ true);
}
return SDValue();
}
// Look for a RMW operation that only touches one bit of a larger than legal
-// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
+// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single
+// i32 sub value.
static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;
-
- // Only handle normal stores and its chain was a matching normal load.
- auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
- if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld ||
- !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
- Ld->getBasePtr() != St->getBasePtr() ||
- Ld->getOffset() != St->getOffset())
- return SDValue();
-
- SDValue LoadVal(Ld, 0);
SDValue StoredVal = St->getValue();
EVT VT = StoredVal.getValueType();
- // Only narrow larger than legal scalar integers.
- if (!VT.isScalarInteger() ||
+ // Only narrow normal stores of larger than legal scalar integers.
+ if (!ISD::isNormalStore(St) || !St->isSimple() || !VT.isScalarInteger() ||
VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32))
return SDValue();
// BTR: X & ~(1 << ShAmt)
// BTS: X | (1 << ShAmt)
// BTC: X ^ (1 << ShAmt)
- SDValue ShAmt;
- if (!StoredVal.hasOneUse() ||
- !(sd_match(StoredVal, m_And(m_Specific(LoadVal),
+ //
+ // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt)
+ SDValue SrcVal, InsertBit, ShAmt;
+ if (!(sd_match(StoredVal, m_And(m_Value(SrcVal),
m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
sd_match(StoredVal,
- m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ m_Or(m_Value(SrcVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
sd_match(StoredVal,
- m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
+ m_Xor(m_Value(SrcVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ sd_match(
+ StoredVal,
+ m_Or(m_And(m_Value(SrcVal), m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
+ m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
+ return SDValue();
+
+ // SrcVal must be a matching normal load further up the chain.
+ auto *Ld = dyn_cast<LoadSDNode>(peekThroughBitcasts(SrcVal));
+ if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
+ Ld->getBasePtr() != St->getBasePtr() ||
+ Ld->getOffset() != St->getOffset() ||
+ !St->getChain().reachesChainWithoutSideEffects(SDValue(Ld, 1)))
return SDValue();
// Ensure the shift amount is in bounds.
@@ -53389,6 +53687,13 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
return SDValue();
+ // If we're inserting a bit then it must be the LSB.
+ if (InsertBit) {
+ KnownBits KnownInsert = DAG.computeKnownBits(InsertBit);
+ if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1))
+ return SDValue();
+ }
+
// Split the shift into an alignment shift that moves the active i32 block to
// the bottom bits for truncation and a modulo shift that can act on the i32.
EVT AmtVT = ShAmt.getValueType();
@@ -53396,6 +53701,7 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
DAG.getSignedConstant(-32LL, DL, AmtVT));
SDValue ModuloAmt =
DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
+ ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8);
// Compute the byte offset for the i32 block that is changed by the RMW.
// combineTruncate will adjust the load for us in a similar way.
@@ -53407,18 +53713,41 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SDNodeFlags::NoUnsignedWrap);
// Reconstruct the BTC/BTR/BTS pattern for the i32 block and store.
- SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
+ SDValue X = DAG.getNode(ISD::SRL, DL, VT, SrcVal, AlignAmt);
X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
- SDValue Mask =
- DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
- DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
- if (StoredVal.getOpcode() == ISD::AND)
- Mask = DAG.getNOT(DL, Mask, MVT::i32);
+ SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getConstant(1, DL, MVT::i32), ModuloAmt);
- SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
- return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
- Align(), St->getMemOperand()->getFlags());
+ SDValue Res;
+ if (InsertBit) {
+ SDValue BitMask =
+ DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt);
+ Res =
+ DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32));
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask);
+ } else {
+ if (StoredVal.getOpcode() == ISD::AND)
+ Mask = DAG.getNOT(DL, Mask, MVT::i32);
+ Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
+ }
+
+ SDValue NewStore =
+ DAG.getStore(St->getChain(), DL, Res, NewPtr,
+ MachinePointerInfo(St->getPointerInfo().getAddrSpace()),
+ Align(), St->getMemOperand()->getFlags());
+
+ // If there are other uses of StoredVal, replace with a new load of the
+ // whole (updated) value.
+ if (!StoredVal.hasOneUse()) {
+ SDValue NewLoad =
+ DAG.getLoad(VT, DL, NewStore, Ld->getBasePtr(), Ld->getMemOperand());
+ for (SDNode *User : StoredVal->users())
+ DCI.AddToWorklist(User);
+ DAG.ReplaceAllUsesWith(StoredVal, NewLoad);
+ }
+ return NewStore;
}
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
@@ -53647,7 +53976,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
- if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
+ if (SDValue R = narrowBitOpRMW(St, dl, DAG, DCI, Subtarget))
return R;
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
@@ -53984,7 +54313,9 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
- bool IsAdd = (Opcode == ISD::FADD) || (Opcode == ISD::ADD);
+ bool IsAdd =
+ (Opcode == ISD::FADD) || (Opcode == ISD::ADD) || (Opcode == ISD::SADDSAT);
+ bool IsSat = (Opcode == ISD::SADDSAT) || (Opcode == ISD::SSUBSAT);
SmallVector<int, 8> PostShuffleMask;
auto MergableHorizOp = [N](unsigned HorizOpcode) {
@@ -54014,11 +54345,17 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
break;
case ISD::ADD:
case ISD::SUB:
- if (Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v16i16 || VT == MVT::v8i32)) {
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ if (!Subtarget.hasSSSE3())
+ break;
+ if (VT == MVT::v8i16 || VT == MVT::v16i16 ||
+ (!IsSat && (VT == MVT::v4i32 || VT == MVT::v8i32))) {
+
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB;
+ auto HorizOpcode = IsSat ? (IsAdd ? X86ISD::HADDS : X86ISD::HSUBS)
+ : (IsAdd ? X86ISD::HADD : X86ISD::HSUB);
if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd,
PostShuffleMask, MergableHorizOp(HorizOpcode))) {
auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL,
@@ -54095,11 +54432,6 @@ static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG,
// FADD(A, FMA(B, C, 0)) and FADD(A, FMUL(B, C)) to FMA(B, C, A)
static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- auto AllowContract = [&DAG](const SDNodeFlags &Flags) {
- return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Flags.hasAllowContract();
- };
-
auto HasNoSignedZero = [&DAG](const SDNodeFlags &Flags) {
return DAG.getTarget().Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros();
@@ -54112,7 +54444,7 @@ static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
};
if (N->getOpcode() != ISD::FADD || !Subtarget.hasFP16() ||
- !AllowContract(N->getFlags()))
+ !N->getFlags().hasAllowContract())
return SDValue();
EVT VT = N->getValueType(0);
@@ -54123,14 +54455,13 @@ static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
bool IsConj;
SDValue FAddOp1, MulOp0, MulOp1;
- auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &AllowContract,
- &IsVectorAllNegativeZero,
+ auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &IsVectorAllNegativeZero,
&HasNoSignedZero](SDValue N) -> bool {
if (!N.hasOneUse() || N.getOpcode() != ISD::BITCAST)
return false;
SDValue Op0 = N.getOperand(0);
unsigned Opcode = Op0.getOpcode();
- if (Op0.hasOneUse() && AllowContract(Op0->getFlags())) {
+ if (Op0.hasOneUse() && Op0->getFlags().hasAllowContract()) {
if ((Opcode == X86ISD::VFMULC || Opcode == X86ISD::VFCMULC)) {
MulOp0 = Op0.getOperand(0);
MulOp1 = Op0.getOperand(1);
@@ -54592,11 +54923,14 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
// Check the shift amount is byte aligned.
// Check the truncation doesn't use any shifted in (zero) top bits.
- // Check the shift amount doesn't depend on the original load.
+ // Check the shift amount doesn't depend on the original load chain.
if (KnownAmt.countMinTrailingZeros() >= 3 &&
KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() -
VT.getSizeInBits()) &&
- !Ld->isPredecessorOf(ShAmt.getNode())) {
+ none_of(Ld->uses(), [&ShAmt](SDUse &Use) {
+ return Use.getResNo() == 1 &&
+ Use.getUser()->isPredecessorOf(ShAmt.getNode());
+ })) {
EVT PtrVT = Ld->getBasePtr().getValueType();
SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT);
SDValue PtrByteOfs =
@@ -54605,7 +54939,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewPtr = DAG.getMemBasePlusOffset(
Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap);
SDValue NewLoad =
- DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
+ DAG.getLoad(VT, DL, Ld->getChain(), NewPtr,
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()),
Align(), Ld->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
@@ -57377,6 +57712,40 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Combiner: turn uniform-constant splat funnel shifts into VSHLD/VSHRD
+static SDValue combineFunnelShift(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ SDLoc DL(N);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Amt = N->getOperand(2);
+ EVT VT = Op0.getValueType();
+
+ if (!VT.isVector())
+ return SDValue();
+
+ // Only combine if the operation is legal for this type.
+ // This ensures we don't try to convert types that need to be
+ // widened/promoted.
+ if (!DAG.getTargetLoweringInfo().isOperationLegal(N->getOpcode(), VT))
+ return SDValue();
+
+ unsigned EltSize = VT.getScalarSizeInBits();
+ APInt ShiftVal;
+ if (!X86::isConstantSplat(Amt, ShiftVal))
+ return SDValue();
+
+ uint64_t ModAmt = ShiftVal.urem(EltSize);
+ SDValue Imm = DAG.getTargetConstant(ModAmt, DL, MVT::i8);
+ bool IsFSHR = N->getOpcode() == ISD::FSHR;
+
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+ unsigned Opcode = IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD;
+ return DAG.getNode(Opcode, DL, VT, {Op0, Op1, Imm});
+}
+
static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
@@ -59063,7 +59432,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
case X86ISD::ANDNP:
// TODO: AVX512 targets should only use CombineSubOperand like AVX1/2.
if (!IsSplat && (VT.is256BitVector() ||
- (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
+ (VT.is512BitVector() && Subtarget.useAVX512Regs()) ||
+ (EltSizeInBits == 1 && TLI.isTypeLegal(VT)))) {
// Don't concatenate root AVX1 NOT patterns.
// TODO: Allow NOT folding if Concat0 succeeds.
if (Opcode == ISD::XOR && Depth == 0 && !Subtarget.hasInt256() &&
@@ -59073,7 +59443,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
break;
SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
- if (Concat0 || Concat1 || Subtarget.useAVX512Regs())
+ if (Concat0 || Concat1 ||
+ (EltSizeInBits != 1 && Subtarget.useAVX512Regs()))
return DAG.getNode(Opcode, DL, VT,
Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
@@ -59133,6 +59504,31 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
}
break;
+ case ISD::SETCC:
+ if (!IsSplat && EltSizeInBits == 1 &&
+ llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op0.getOperand(0).getValueType() ==
+ Op.getOperand(0).getValueType() &&
+ Op0.getOperand(2) == Op.getOperand(2);
+ })) {
+ EVT SrcVT = Op0.getOperand(0).getValueType();
+ EVT NewSrcVT = EVT::getVectorVT(Ctx, SrcVT.getScalarType(),
+ NumOps * SrcVT.getVectorNumElements());
+ unsigned SrcSizeInBits = SrcVT.getScalarSizeInBits();
+ if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(NewSrcVT) &&
+ (NewSrcVT.is256BitVector() ||
+ (NewSrcVT.is512BitVector() && Subtarget.useAVX512Regs() &&
+ (SrcSizeInBits >= 32 || Subtarget.useBWIRegs())))) {
+ SDValue LHS = CombineSubOperand(NewSrcVT.getSimpleVT(), Ops, 0);
+ SDValue RHS = CombineSubOperand(NewSrcVT.getSimpleVT(), Ops, 1);
+ if (LHS || RHS)
+ return DAG.getNode(Opcode, DL, VT,
+ LHS ? LHS : ConcatSubOperand(NewSrcVT, Ops, 0),
+ RHS ? RHS : ConcatSubOperand(NewSrcVT, Ops, 1),
+ Op0.getOperand(2));
+ }
+ }
+ break;
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::CTLZ:
@@ -59196,6 +59592,36 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
ConcatSubOperand(VT, Ops, 1));
}
break;
+ case ISD::FSQRT:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FFLOOR:
+ if (!IsSplat && (VT.is256BitVector() ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
+ return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
+ }
+ break;
+ case X86ISD::FRCP:
+ case X86ISD::FRSQRT:
+ if (!IsSplat && VT.is256BitVector()) {
+ return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
+ }
+ break;
+ case X86ISD::VRNDSCALE:
+ if (!IsSplat &&
+ (VT.is256BitVector() ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
+ llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op0.getOperand(1) == Op.getOperand(1);
+ })) {
+ return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0),
+ Op0.getOperand(1));
+ }
+ break;
case X86ISD::HADD:
case X86ISD::HSUB:
case X86ISD::FHADD:
@@ -59327,8 +59753,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (TLI->allowsMemoryAccess(Ctx, DAG.getDataLayout(), VT,
*FirstLd->getMemOperand(), &Fast) &&
Fast) {
- if (SDValue Ld =
- EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))
+ if (SDValue Ld = EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget,
+ false, Depth + 1))
return Ld;
}
}
@@ -59467,6 +59893,17 @@ static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
}
}
+ // Attempt to merge comparison/logic ops if the type is legal.
+ if (TLI.isTypeLegal(VT) &&
+ (all_of(Ops, [](SDValue Op) { return Op.getOpcode() == ISD::SETCC; }) ||
+ all_of(Ops, [](SDValue Op) {
+ return ISD::isBitwiseLogicOp(Op.getOpcode());
+ }))) {
+ if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops,
+ DAG, Subtarget))
+ return R;
+ }
+
// Don't do anything else for i1 vectors.
return SDValue();
}
@@ -60807,6 +61244,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
case X86ISD::ADD:
case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget);
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT: return combineToHorizontalAddSub(N, DAG, Subtarget);
case X86ISD::CLOAD:
case X86ISD::CSTORE: return combineX86CloadCstore(N, DAG);
case X86ISD::SBB: return combineSBB(N, DAG);
@@ -60930,6 +61369,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case X86ISD::SHUF128:
case X86ISD::VZEXT_MOVL:
+ case X86ISD::COMPRESS:
+ case X86ISD::EXPAND:
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
case X86ISD::FMADD_RND:
case X86ISD::FMSUB:
@@ -60977,6 +61418,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
+ case ISD::FSHL:
+ case ISD::FSHR: return combineFunnelShift(N, DAG, DCI, Subtarget);
// clang-format on
}
@@ -61531,8 +61974,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||
(Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) {
- Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
- Op.getValueType());
+ Result = DAG.getSignedTargetConstant(C->getSExtValue(), SDLoc(Op),
+ Op.getValueType());
break;
}
}
@@ -61570,7 +62013,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getSExtValue())) {
// Widen to 64 bits here to get it sign extended.
- Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
+ Result =
+ DAG.getSignedTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);
break;
}
// FIXME gcc accepts some relocatable values here too, but only in certain
@@ -61619,9 +62063,11 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
BooleanContent BCont = getBooleanContents(MVT::i64);
ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
: ISD::SIGN_EXTEND;
- int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue()
- : CST->getSExtValue();
- Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64);
+ SDLoc DL(Op);
+ Result =
+ ExtOpc == ISD::ZERO_EXTEND
+ ? DAG.getTargetConstant(CST->getZExtValue(), DL, MVT::i64)
+ : DAG.getSignedTargetConstant(CST->getSExtValue(), DL, MVT::i64);
break;
}