aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp63
1 files changed, 20 insertions, 43 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 007074c..2970cf4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22861,6 +22861,13 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
if (!OpVT.isScalarInteger() || OpSize < 128)
return SDValue();
+ // Don't do this if we're not supposed to use the FPU.
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (Subtarget.useSoftFloat() || NoImplicitFloatOps)
+ return SDValue();
+
// Ignore a comparison with zero because that gets special treatment in
// EmitTest(). But make an exception for the special case of a pair of
// logically-combined vector-sized operands compared to zero. This pattern may
@@ -22883,13 +22890,9 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
// Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
// Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
// Otherwise use PCMPEQ (plus AND) and mask testing.
- bool NoImplicitFloatOps =
- DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX()) ||
- (OpSize == 512 && Subtarget.useAVX512Regs()))) {
+ if ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs())) {
bool HasPT = Subtarget.hasSSE41();
// PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
@@ -53345,8 +53348,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
}
// Look for a RMW operation that only touches one bit of a larger than legal
-// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single
-// i32 sub value.
+// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -53372,20 +53374,14 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
// BTR: X & ~(1 << ShAmt)
// BTS: X | (1 << ShAmt)
// BTC: X ^ (1 << ShAmt)
- //
- // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt)
- SDValue InsertBit, ShAmt;
+ SDValue ShAmt;
if (!StoredVal.hasOneUse() ||
!(sd_match(StoredVal, m_And(m_Specific(LoadVal),
m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
sd_match(StoredVal,
m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
sd_match(StoredVal,
- m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
- sd_match(StoredVal,
- m_Or(m_And(m_Specific(LoadVal),
- m_Not(m_Shl(m_One(), m_Value(ShAmt)))),
- m_Shl(m_Value(InsertBit), m_Deferred(ShAmt))))))
+ m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
return SDValue();
// Ensure the shift amount is in bounds.
@@ -53393,13 +53389,6 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
return SDValue();
- // If we're inserting a bit then it must be the LSB.
- if (InsertBit) {
- KnownBits KnownInsert = DAG.computeKnownBits(InsertBit);
- if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1))
- return SDValue();
- }
-
// Split the shift into an alignment shift that moves the active i32 block to
// the bottom bits for truncation and a modulo shift that can act on the i32.
EVT AmtVT = ShAmt.getValueType();
@@ -53407,7 +53396,6 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
DAG.getSignedConstant(-32LL, DL, AmtVT));
SDValue ModuloAmt =
DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
- ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8);
// Compute the byte offset for the i32 block that is changed by the RMW.
// combineTruncate will adjust the load for us in a similar way.
@@ -53422,23 +53410,13 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
- SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
- DAG.getConstant(1, DL, MVT::i32), ModuloAmt);
-
- SDValue Res;
- if (InsertBit) {
- SDValue BitMask =
- DAG.getNode(ISD::SHL, DL, MVT::i32,
- DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt);
- Res =
- DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32));
- Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask);
- } else {
- if (StoredVal.getOpcode() == ISD::AND)
- Mask = DAG.getNOT(DL, Mask, MVT::i32);
- Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
- }
+ SDValue Mask =
+ DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
+ DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
+ if (StoredVal.getOpcode() == ISD::AND)
+ Mask = DAG.getNOT(DL, Mask, MVT::i32);
+ SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
Align(), St->getMemOperand()->getFlags());
}
@@ -54629,8 +54607,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue NewLoad =
DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(),
Align(), Ld->getMemOperand()->getFlags());
- DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1),
- NewLoad.getValue(1));
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return NewLoad;
}
}