diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 156 |
1 files changed, 12 insertions, 144 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 007074c..133406b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22861,6 +22861,13 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, if (!OpVT.isScalarInteger() || OpSize < 128) return SDValue(); + // Don't do this if we're not supposed to use the FPU. + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (Subtarget.useSoftFloat() || NoImplicitFloatOps) + return SDValue(); + // Ignore a comparison with zero because that gets special treatment in // EmitTest(). But make an exception for the special case of a pair of // logically-combined vector-sized operands compared to zero. This pattern may @@ -22883,13 +22890,9 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. // Otherwise use PCMPEQ (plus AND) and mask testing. - bool NoImplicitFloatOps = - DAG.getMachineFunction().getFunction().hasFnAttribute( - Attribute::NoImplicitFloat); - if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && - ((OpSize == 128 && Subtarget.hasSSE2()) || - (OpSize == 256 && Subtarget.hasAVX()) || - (OpSize == 512 && Subtarget.useAVX512Regs()))) { + if ((OpSize == 128 && Subtarget.hasSSE2()) || + (OpSize == 256 && Subtarget.hasAVX()) || + (OpSize == 512 && Subtarget.useAVX512Regs())) { bool HasPT = Subtarget.hasSSE41(); // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened @@ -53344,105 +53347,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// Look for a RMW operation that only touches one bit of a larger than legal -// type and fold it to a BTC/BTR/BTS or bit insertion pattern acting on a single -// i32 sub value. -static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - using namespace SDPatternMatch; - - // Only handle normal stores and its chain was a matching normal load. - auto *Ld = dyn_cast<LoadSDNode>(St->getChain()); - if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld || - !ISD::isNormalLoad(Ld) || !Ld->isSimple() || - Ld->getBasePtr() != St->getBasePtr() || - Ld->getOffset() != St->getOffset()) - return SDValue(); - - SDValue LoadVal(Ld, 0); - SDValue StoredVal = St->getValue(); - EVT VT = StoredVal.getValueType(); - - // Only narrow larger than legal scalar integers. - if (!VT.isScalarInteger() || - VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32)) - return SDValue(); - - // BTR: X & ~(1 << ShAmt) - // BTS: X | (1 << ShAmt) - // BTC: X ^ (1 << ShAmt) - // - // BitInsert: (X & ~(1 << ShAmt)) | (InsertBit << ShAmt) - SDValue InsertBit, ShAmt; - if (!StoredVal.hasOneUse() || - !(sd_match(StoredVal, m_And(m_Specific(LoadVal), - m_Not(m_Shl(m_One(), m_Value(ShAmt))))) || - sd_match(StoredVal, - m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || - sd_match(StoredVal, - m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) || - sd_match(StoredVal, - m_Or(m_And(m_Specific(LoadVal), - m_Not(m_Shl(m_One(), m_Value(ShAmt)))), - m_Shl(m_Value(InsertBit), m_Deferred(ShAmt)))))) - return SDValue(); - - // Ensure the shift amount is in bounds. - KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); - if (KnownAmt.getMaxValue().uge(VT.getSizeInBits())) - return SDValue(); - - // If we're inserting a bit then it must be the LSB. - if (InsertBit) { - KnownBits KnownInsert = DAG.computeKnownBits(InsertBit); - if (KnownInsert.countMinLeadingZeros() < (VT.getSizeInBits() - 1)) - return SDValue(); - } - - // Split the shift into an alignment shift that moves the active i32 block to - // the bottom bits for truncation and a modulo shift that can act on the i32. - EVT AmtVT = ShAmt.getValueType(); - SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getSignedConstant(-32LL, DL, AmtVT)); - SDValue ModuloAmt = - DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT)); - ModuloAmt = DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8); - - // Compute the byte offset for the i32 block that is changed by the RMW. - // combineTruncate will adjust the load for us in a similar way. - EVT PtrVT = St->getBasePtr().getValueType(); - SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT); - SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs, - DAG.getShiftAmountConstant(3, PtrVT, DL)); - SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL, - SDNodeFlags::NoUnsignedWrap); - - // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store. - SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt); - X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - - SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32, - DAG.getConstant(1, DL, MVT::i32), ModuloAmt); - - SDValue Res; - if (InsertBit) { - SDValue BitMask = - DAG.getNode(ISD::SHL, DL, MVT::i32, - DAG.getZExtOrTrunc(InsertBit, DL, MVT::i32), ModuloAmt); - Res = - DAG.getNode(ISD::AND, DL, MVT::i32, X, DAG.getNOT(DL, Mask, MVT::i32)); - Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, BitMask); - } else { - if (StoredVal.getOpcode() == ISD::AND) - Mask = DAG.getNOT(DL, Mask, MVT::i32); - Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask); - } - - return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(), - Align(), St->getMemOperand()->getFlags()); -} - static SDValue combineStore(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -53669,9 +53573,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, } } - if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget)) - return R; - // Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC) // store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC) if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && @@ -54604,9 +54505,8 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, // truncation, see if we can convert the shift into a pointer offset instead. // Limit this to normal (non-ext) scalar integer loads. if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL && - Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) && - (Src.getOperand(0).hasOneUse() || - !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) { + Src.hasOneUse() && Src.getOperand(0).hasOneUse() && + ISD::isNormalLoad(Src.getOperand(0).getNode())) { auto *Ld = cast<LoadSDNode>(Src.getOperand(0)); if (Ld->isSimple() && VT.isByteSized() && isPowerOf2_64(VT.getSizeInBits())) { @@ -56406,7 +56306,6 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC, static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - using namespace SDPatternMatch; const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); const SDValue LHS = N->getOperand(0); const SDValue RHS = N->getOperand(1); @@ -56465,37 +56364,6 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, if (SDValue AndN = MatchAndCmpEq(RHS, LHS)) return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); - // If we're performing a bit test on a larger than legal type, attempt - // to (aligned) shift down the value to the bottom 32-bits and then - // perform the bittest on the i32 value. - // ICMP_ZERO(AND(X,SHL(1,IDX))) - // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31)))) - if (isNullConstant(RHS) && - OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) { - SDValue X, ShAmt; - if (sd_match(LHS, m_OneUse(m_And(m_Value(X), - m_Shl(m_One(), m_Value(ShAmt)))))) { - // Only attempt this if the shift amount is known to be in bounds. - KnownBits KnownAmt = DAG.computeKnownBits(ShAmt); - if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) { - EVT AmtVT = ShAmt.getValueType(); - SDValue AlignAmt = - DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getSignedConstant(-32LL, DL, AmtVT)); - SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, - DAG.getConstant(31, DL, AmtVT)); - SDValue Mask = DAG.getNode( - ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), - DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8)); - X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt); - X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); - X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask); - return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32), - CC); - } - } - } - // cmpeq(trunc(x),C) --> cmpeq(x,C) // cmpne(trunc(x),C) --> cmpne(x,C) // iff x upper bits are zero. |
