diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 85 |
1 files changed, 70 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cd04ff5..931a10b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; - if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) - Mask = UnpackLoMask; - else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) - Mask = UnpackHiMask; + if (!isSingleElementRepeatedMask(Mask)) { + if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) + Mask = UnpackLoMask; + else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) + Mask = UnpackHiMask; + } return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); @@ -44615,8 +44617,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( APInt DemandedMask = OriginalDemandedBits << ShAmt; - // If we just want the sign bit then we don't need to shift it. - if (OriginalDemandedBits.isSignMask()) + // If we only want bits that already match the signbit then we don't need + // to shift. + unsigned NumHiDemandedBits = BitWidth - OriginalDemandedBits.countr_zero(); + if (TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1) >= + NumHiDemandedBits) return TLO.CombineTo(Op, Op0); // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 @@ -45169,6 +45174,18 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( case X86ISD::Wrapper: case X86ISD::WrapperRIP: return true; + case X86ISD::PACKSS: + case X86ISD::PACKUS: { + APInt DemandedLHS, DemandedRHS; + getPackDemandedElts(Op.getSimpleValueType(), DemandedElts, DemandedLHS, + DemandedRHS); + return (!DemandedLHS || + DAG.isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS, + PoisonOnly, Depth + 1)) && + (!DemandedRHS || + DAG.isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS, + PoisonOnly, Depth + 1)); + } case X86ISD::INSERTPS: case X86ISD::BLENDI: case X86ISD::PSHUFB: @@ -45239,6 +45256,10 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( case X86ISD::BLENDI: case X86ISD::BLENDV: return false; + // SSE packs. + case X86ISD::PACKSS: + case X86ISD::PACKUS: + return false; // SSE target shuffles. case X86ISD::INSERTPS: case X86ISD::PSHUFB: @@ -45438,7 +45459,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, const SDLoc &DL, const X86Subtarget &Subtarget) { EVT SrcVT = Src.getValueType(); - if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) + if (Subtarget.useSoftFloat() || !SrcVT.isSimple() || + SrcVT.getScalarType() != MVT::i1) return SDValue(); // Recognize the IR pattern for the movmsk intrinsic under SSE1 before type @@ -52369,16 +52391,41 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, // Do not flip "e > c", where "c" is a constant, because Cmp instruction // cannot take an immediate as its first operand. // - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && - EFLAGS.getValueType().isInteger() && - !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { - SDValue NewSub = - DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), - EFLAGS.getOperand(1), EFLAGS.getOperand(0)); - SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + // If EFLAGS is from a CMP that compares the same operands as the earlier + // SUB producing X (i.e. CMP X, Y), we can directly use the carry flag with + // SBB/ADC without creating a flipped SUB. + if (EFLAGS.getOpcode() == X86ISD::CMP && + EFLAGS.getValueType().isInteger() && X == EFLAGS.getOperand(0)) { return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, DAG.getVTList(VT, MVT::i32), X, - DAG.getConstant(0, DL, VT), NewEFLAGS); + DAG.getConstant(0, DL, VT), EFLAGS); + } + + if (EFLAGS.getOpcode() == X86ISD::SUB && + EFLAGS.getValueType().isInteger() && + !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { + // Only create NewSub if we know one of the folds will succeed to avoid + // introducing a temporary node that may persist and affect one-use checks + // below. + if (EFLAGS.getNode()->hasOneUse()) { + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(0, DL, VT), NewEFLAGS); + } + + if (IsSub && X == EFLAGS.getValue(0)) { + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + return DAG.getNode(X86ISD::SBB, DL, DAG.getVTList(VT, MVT::i32), + EFLAGS.getOperand(0), EFLAGS.getOperand(1), + NewEFLAGS); + } } } @@ -58090,6 +58137,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending + // on the scheduler model. Limit multiple users to AVX+ targets to prevent + // introducing extra register moves. + if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL)) + if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode())) + return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(), + Op0, 1, DAG); + // Canonicalize hidden LEA pattern: // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) // iff c < 4 |