aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp156
1 files changed, 12 insertions, 144 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 007074c..133406b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22861,6 +22861,13 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
if (!OpVT.isScalarInteger() || OpSize < 128)
return SDValue();
+ // Don't do this if we're not supposed to use the FPU.
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (Subtarget.useSoftFloat() || NoImplicitFloatOps)
+ return SDValue();
+
// Ignore a comparison with zero because that gets special treatment in
// EmitTest(). But make an exception for the special case of a pair of
// logically-combined vector-sized operands compared to zero. This pattern may
@@ -22883,13 +22890,9 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
// Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
// Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
// Otherwise use PCMPEQ (plus AND) and mask testing.
- bool NoImplicitFloatOps =
- DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX()) ||
- (OpSize == 512 && Subtarget.useAVX512Regs()))) {
+ if ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs())) {
bool HasPT = Subtarget.hasSSE41();
// PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
@@ -53344,105 +53347,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// Look for a RMW operation that only touches one bit of a larger than legal
-// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single
-// i32 sub value.
-static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- using namespace SDPatternMatch;
-
- // Only handle normal stores and its chain was a matching normal load.
- auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
- if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld ||
- !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
- Ld->getBasePtr() != St->getBasePtr() ||
- Ld->getOffset() != St->getOffset())
- return SDValue();
-
- SDValue LoadVal(Ld, 0);
- SDValue StoredVal = St->getValue();
- EVT VT = StoredVal.getValueType();
-
- // Only narrow larger than legal scalar integers.
- if (!VT.isScalarInteger() ||
- VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32))
- return SDValue();
-
- // BTR: X & ~(1 << ShAmt)
- // BTS: X | (1 << ShAmt)
- // BTC: X ^ (1 << ShAmt)
- //
- // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt)
- SDValue InsertBit, ShAmt;
- if (!StoredVal.hasOneUse() ||
- !(sd_match(StoredVal, m_And(m_Specific(LoadVal),
- m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
- sd_match(StoredVal,
- m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
- sd_match(StoredVal,
- m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
- sd_match(StoredVal,
- m_Or(m_And(m_Specific(LoadVal),
- m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
- m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
- return SDValue();
-
- // Ensure the shift amount is in bounds.
- KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
- if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
- return SDValue();
-
- // If we're inserting a bit then it must be the LSB.
- if (InsertBit) {
- KnownBits KnownInsert = DAG.computeKnownBits(InsertBit);
- if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1))
- return SDValue();
- }
-
- // Split the shift into an alignment shift that moves the active i32 block to
- // the bottom bits for truncation and a modulo shift that can act on the i32.
- EVT AmtVT = ShAmt.getValueType();
- SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
- DAG.getSignedConstant(-32LL, DL, AmtVT));
- SDValue ModuloAmt =
- DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
- ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8);
-
- // Compute the byte offset for the i32 block that is changed by the RMW.
- // combineTruncate will adjust the load for us in a similar way.
- EVT PtrVT = St->getBasePtr().getValueType();
- SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT);
- SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs,
- DAG.getShiftAmountConstant(3, PtrVT, DL));
- SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL,
- SDNodeFlags::NoUnsignedWrap);
-
- // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store.
- SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
- X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
-
- SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
- DAG.getConstant(1, DL, MVT::i32), ModuloAmt);
-
- SDValue Res;
- if (InsertBit) {
- SDValue BitMask =
- DAG.getNode(ISD::SHL, DL, MVT::i32,
- DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt);
- Res =
- DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32));
- Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask);
- } else {
- if (StoredVal.getOpcode() == ISD::AND)
- Mask = DAG.getNOT(DL, Mask, MVT::i32);
- Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
- }
-
- return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
- Align(), St->getMemOperand()->getFlags());
-}
-
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -53669,9 +53573,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
- if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
- return R;
-
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
// store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)
if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -54604,9 +54505,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
// truncation, see if we can convert the shift into a pointer offset instead.
// Limit this to normal (non-ext) scalar integer loads.
if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL &&
- Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
- (Src.getOperand(0).hasOneUse() ||
- !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {
+ Src.hasOneUse() && Src.getOperand(0).hasOneUse() &&
+ ISD::isNormalLoad(Src.getOperand(0).getNode())) {
auto *Ld = cast<LoadSDNode>(Src.getOperand(0));
if (Ld->isSimple() && VT.isByteSized() &&
isPowerOf2_64(VT.getSizeInBits())) {
@@ -56406,7 +56306,6 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
- using namespace SDPatternMatch;
const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
const SDValue LHS = N->getOperand(0);
const SDValue RHS = N->getOperand(1);
@@ -56465,37 +56364,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
- // If we're performing a bit test on a larger than legal type, attempt
- // to (aligned) shift down the value to the bottom 32-bits and then
- // perform the bittest on the i32 value.
- // ICMP_ZERO(AND(X,SHL(1,IDX)))
- // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31))))
- if (isNullConstant(RHS) &&
- OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) {
- SDValue X, ShAmt;
- if (sd_match(LHS, m_OneUse(m_And(m_Value(X),
- m_Shl(m_One(), m_Value(ShAmt)))))) {
- // Only attempt this if the shift amount is known to be in bounds.
- KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
- if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) {
- EVT AmtVT = ShAmt.getValueType();
- SDValue AlignAmt =
- DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
- DAG.getSignedConstant(-32LL, DL, AmtVT));
- SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
- DAG.getConstant(31, DL, AmtVT));
- SDValue Mask = DAG.getNode(
- ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
- DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
- X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt);
- X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
- X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask);
- return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32),
- CC);
- }
- }
- }
-
// cmpeq(trunc(x),C) --> cmpeq(x,C)
// cmpne(trunc(x),C) --> cmpne(x,C)
// iff x upper bits are zero.