diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 63 |
1 files changed, 20 insertions, 43 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 007074c..2970cf4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22861,6 +22861,13 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, if (!OpVT.isScalarInteger() || OpSize < 128) return SDValue(); + // Don't do this if we're not supposed to use the FPU. + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (Subtarget.useSoftFloat() || NoImplicitFloatOps) + return SDValue(); + // Ignore a comparison with zero because that gets special treatment in // EmitTest(). But make an exception for the special case of a pair of // logically-combined vector-sized operands compared to zero. This pattern may @@ -22883,13 +22890,9 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. // Otherwise use PCMPEQ (plus AND) and mask testing. - bool NoImplicitFloatOps = - DAG.getMachineFunction().getFunction().hasFnAttribute( - Attribute::NoImplicitFloat); - if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && - ((OpSize == 128 && Subtarget.hasSSE2()) || - (OpSize == 256 && Subtarget.hasAVX()) || - (OpSize == 512 && Subtarget.useAVX512Regs()))) { + if ((OpSize == 128 && Subtarget.hasSSE2()) || + (OpSize == 256 && Subtarget.hasAVX()) || + (OpSize == 512 && Subtarget.useAVX512Regs())) { bool HasPT = Subtarget.hasSSE41(); // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened @@ -53345,8 +53348,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, } // Look for a RMW operation that only touches one bit of a larger than legal -// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single -// i32 sub value. +// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value. static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -53372,20 +53374,14 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, // BTR: X & ~(1 << ShAmt) // BTS: X | (1 << ShAmt) // BTC: X ^ (1 << ShAmt) - // - // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt) - SDValue InsertBit, ShAmt; + SDValue ShAmt; if (!StoredVal.hasOneUse() || !(sd_match(StoredVal, m_And(m_Specific(LoadVal), m_Not(m_Shl(m_One(), m_Value(ShAmt))))) || sd_match(StoredVal, m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || sd_match(StoredVal, - m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || - sd_match(StoredVal, - m_Or(m_And(m_Specific(LoadVal), - m_Not(m_Shl(m_One(), m_Value(ShAmt)))), - m_Shl(m_Value(InsertBit), m_Deferred(ShAmt)))))) + m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))))) return SDValue(); // Ensure the shift amount is in bounds. @@ -53393,13 +53389,6 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, if (KnownAmt.getMaxValue().uge(VT.getSizeInBits())) return SDValue(); - // If we're inserting a bit then it must be the LSB. - if (InsertBit) { - KnownBits KnownInsert = DAG.computeKnownBits(InsertBit); - if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1)) - return SDValue(); - } - // Split the shift into an alignment shift that moves the active i32 block to // the bottom bits for truncation and a modulo shift that can act on the i32. EVT AmtVT = ShAmt.getValueType(); @@ -53407,7 +53396,6 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, DAG.getSignedConstant(-32LL, DL, AmtVT)); SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT)); - ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8); // Compute the byte offset for the i32 block that is changed by the RMW. // combineTruncate will adjust the load for us in a similar way. @@ -53422,23 +53410,13 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt); X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32, - DAG.getConstant(1, DL, MVT::i32), ModuloAmt); - - SDValue Res; - if (InsertBit) { - SDValue BitMask = - DAG.getNode(ISD::SHL, DL, MVT::i32, - DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt); - Res = - DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32)); - Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask); - } else { - if (StoredVal.getOpcode() == ISD::AND) - Mask = DAG.getNOT(DL, Mask, MVT::i32); - Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask); - } + SDValue Mask = + DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), + DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); + if (StoredVal.getOpcode() == ISD::AND) + Mask = DAG.getNOT(DL, Mask, MVT::i32); + SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask); return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), Align(), St->getMemOperand()->getFlags()); } @@ -54629,8 +54607,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, SDValue NewLoad = DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getPointerInfo(), Align(), Ld->getMemOperand()->getFlags()); - DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1), - NewLoad.getValue(1)); + DAG.makeEquivalentMemoryOrdering(Ld, NewLoad); return NewLoad; } } |
