diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 32 |
1 files changed, 9 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index efeddd7..cd04ff5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4456,8 +4456,8 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, bool AllowAVX512 = true) { assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2"); unsigned NumSubs = 1; - if ((CheckBWI && Subtarget.useBWIRegs()) || - (!CheckBWI && AllowAVX512 && Subtarget.useAVX512Regs())) { + if (AllowAVX512 && ((CheckBWI && Subtarget.useBWIRegs()) || + (!CheckBWI && Subtarget.useAVX512Regs()))) { if (VT.getSizeInBits() > 512) { NumSubs = VT.getSizeInBits() / 512; assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); @@ -30313,22 +30313,8 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG, uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); - if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) { - // Hardware support for vector shifts is sparse which makes us scalarize the - // vector operations in many cases. Also, on sandybridge ADD is faster than - // shl: (shl V, 1) -> (add (freeze V), (freeze V)) - if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) { - // R may be undef at run-time, but (shl R, 1) must be an even number (LSB - // must be 0). (add undef, undef) however can be any value. To make this - // safe, we must freeze R to ensure that register allocation uses the same - // register for an undefined value. This ensures that the result will - // still be even and preserves the original semantics. - R = DAG.getFreeze(R); - return DAG.getNode(ISD::ADD, dl, VT, R, R); - } - + if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); - } // i64 SRA needs to be performed as partial shifts. if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || @@ -31229,16 +31215,16 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, unsigned NumElts = VT.getVectorNumElements(); if (Subtarget.hasVBMI2() && EltSizeInBits > 8) { - if (IsFSHR) - std::swap(Op0, Op1); if (IsCstSplat) { + if (IsFSHR) + std::swap(Op0, Op1); uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits); SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8); return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, {Op0, Op1, Imm}, DAG, Subtarget); } - return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, + return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT, {Op0, Op1, Amt}, DAG, Subtarget); } assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 || @@ -35153,8 +35139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VALIGN) NODE_NAME_CASE(VSHLD) NODE_NAME_CASE(VSHRD) - NODE_NAME_CASE(VSHLDV) - NODE_NAME_CASE(VSHRDV) NODE_NAME_CASE(PSHUFD) NODE_NAME_CASE(PSHUFHW) NODE_NAME_CASE(PSHUFLW) @@ -45185,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( case X86ISD::Wrapper: case X86ISD::WrapperRIP: return true; + case X86ISD::INSERTPS: case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: @@ -45255,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( case X86ISD::BLENDV: return false; // SSE target shuffles. + case X86ISD::INSERTPS: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::UNPCKL: @@ -46211,7 +46197,7 @@ static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS, SDValue Zero = DAG.getConstant(0, DL, DpVT); return SplitOpsAndApply(DAG, Subtarget, DL, DpVT, {Zero, DpOp0, DpOp1}, - DpBuilder, false); + DpBuilder, /*CheckBWI=*/false, Subtarget.hasVNNI()); } // Create a PSADBW given two sources representable as zexts of vXi8. |