diff options
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 74 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86MCInstLower.cpp | 31 |
6 files changed, 69 insertions, 87 deletions
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index b81641f..28fa2cd 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -414,8 +414,6 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, getActionDefinitionsBuilder(G_SEXT_INREG).lower(); - getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); - // fp constants getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 6db780f..8e08d16 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1338,6 +1338,10 @@ def ProcessorFeatures { list<SubtargetFeature> PTLFeatures = !listremove(ARLSFeatures, [FeatureWIDEKL]); + // Novalake + list<SubtargetFeature> NVLFeatures = + !listconcat(PTLFeatures, [FeaturePREFETCHI]); + // Clearwaterforest list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI, FeatureAVXVNNIINT16, @@ -1883,6 +1887,9 @@ foreach P = ["pantherlake", "wildcatlake"] in { def : ProcModel<P, AlderlakePModel, ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>; } +def : ProcModel<"novalake", AlderlakePModel, ProcessorFeatures.NVLFeatures, + ProcessorFeatures.ADLTuning>; + def : ProcModel<"clearwaterforest", AlderlakePModel, ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>; def : ProcModel<"emeraldrapids", SapphireRapidsModel, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0b64ff..b05d7c7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29755,65 +29755,30 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue *Low = nullptr) { - unsigned NumElts = VT.getVectorNumElements(); - // For vXi8 we will unpack the low and high half of each 128 bit lane to widen // to a vXi16 type. Do the multiplies, shift the results and pack the half // lane results back together. // We'll take different approaches for signed and unsigned. - // For unsigned we'll use punpcklbw/punpckhbw to put zero extend the bytes - // and use pmullw to calculate the full 16-bit product. + // For unsigned we'll use punpcklbw/punpckhbw to zero extend the bytes to + // words and use pmullw to calculate the full 16-bit product. // For signed we'll use punpcklbw/punpckbw to extend the bytes to words and // shift them left into the upper byte of each word. This allows us to use // pmulhw to calculate the full 16-bit product. This trick means we don't // need to sign extend the bytes to use pmullw. - - MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); + MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue ALo, AHi; + SDValue ALo, AHi, BLo, BHi; if (IsSigned) { ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A)); - AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A)); - } else { - ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero)); - AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero)); - } - - SDValue BLo, BHi; - if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { - // If the RHS is a constant, manually unpackl/unpackh and extend. - SmallVector<SDValue, 16> LoOps, HiOps; - for (unsigned i = 0; i != NumElts; i += 16) { - for (unsigned j = 0; j != 8; ++j) { - SDValue LoOp = B.getOperand(i + j); - SDValue HiOp = B.getOperand(i + j + 8); - - if (IsSigned) { - LoOp = DAG.getAnyExtOrTrunc(LoOp, dl, MVT::i16); - HiOp = DAG.getAnyExtOrTrunc(HiOp, dl, MVT::i16); - LoOp = DAG.getNode(ISD::SHL, dl, MVT::i16, LoOp, - DAG.getConstant(8, dl, MVT::i16)); - HiOp = DAG.getNode(ISD::SHL, dl, MVT::i16, HiOp, - DAG.getConstant(8, dl, MVT::i16)); - } else { - LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16); - HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16); - } - - LoOps.push_back(LoOp); - HiOps.push_back(HiOp); - } - } - - BLo = DAG.getBuildVector(ExVT, dl, LoOps); - BHi = DAG.getBuildVector(ExVT, dl, HiOps); - } else if (IsSigned) { BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B)); + AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A)); BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B)); } else { + ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero)); BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero)); + AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero)); BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero)); } @@ -29826,7 +29791,7 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, if (Low) *Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi); - return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf*/ true); + return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf=*/true); } static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, @@ -44848,10 +44813,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( } case X86ISD::PCMPGT: // icmp sgt(0, R) == ashr(R, BitWidth-1). - // iff we only need the sign bit then we can use R directly. - if (OriginalDemandedBits.isSignMask() && - ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) - return TLO.CombineTo(Op, Op.getOperand(1)); + if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) { + // iff we only need the signbit then we can use R directly. + if (OriginalDemandedBits.isSignMask()) + return TLO.CombineTo(Op, Op.getOperand(1)); + // otherwise we just need R's signbit for the comparison. + APInt SignMask = APInt::getSignMask(BitWidth); + if (SimplifyDemandedBits(Op.getOperand(1), SignMask, OriginalDemandedElts, + Known, TLO, Depth + 1)) + return true; + } break; case X86ISD::MOVMSK: { SDValue Src = Op.getOperand(0); @@ -47761,6 +47732,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, DL, DAG, Subtarget)) return V; + // If the sign bit is known then BLENDV can be folded away. + if (N->getOpcode() == X86ISD::BLENDV) { + KnownBits KnownCond = DAG.computeKnownBits(Cond); + if (KnownCond.isNegative()) + return LHS; + if (KnownCond.isNonNegative()) + return RHS; + } + if (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV) { SmallVector<int, 64> CondMask; if (createShuffleMaskFromVSELECT(CondMask, Cond, diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 6dd43b2..37d7772 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -606,16 +606,24 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { void X86TargetLowering::insertSSPDeclarations(Module &M) const { // MSVC CRT provides functionalities for stack protection. - if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || - Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + + RTLIB::LibcallImpl SecurityCookieVar = + getLibcallImpl(RTLIB::STACK_CHECK_GUARD); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported && + SecurityCookieVar != RTLIB::Unsupported) { + // MSVC CRT provides functionalities for stack protection. // MSVC CRT has a global variable holding security cookie. - M.getOrInsertGlobal("__security_cookie", + M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar), PointerType::getUnqual(M.getContext())); // MSVC CRT has a function to validate security cookie. - FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( - "__security_check_cookie", Type::getVoidTy(M.getContext()), - PointerType::getUnqual(M.getContext())); + FunctionCallee SecurityCheckCookie = + M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall), + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(M.getContext())); + if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { F->setCallingConv(CallingConv::X86_FastCall); F->addParamAttr(0, Attribute::AttrKind::InReg); diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 1d2cd39..5c23f91 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10809,39 +10809,27 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, if (!ST.hasSSE1()) return; - // PXOR is safe to use because it doesn't affect flags. - BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + BuildMI(MBB, Iter, DL, get(X86::V_SET0), Reg); } else if (X86::VR256RegClass.contains(Reg)) { // YMM# if (!ST.hasAVX()) return; - // VPXOR is safe to use because it doesn't affect flags. - BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + BuildMI(MBB, Iter, DL, get(X86::AVX_SET0), Reg); } else if (X86::VR512RegClass.contains(Reg)) { // ZMM# if (!ST.hasAVX512()) return; - // VPXORY is safe to use because it doesn't affect flags. - BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + BuildMI(MBB, Iter, DL, get(X86::AVX512_512_SET0), Reg); } else if (X86::VK1RegClass.contains(Reg) || X86::VK2RegClass.contains(Reg) || X86::VK4RegClass.contains(Reg) || X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg)) { if (!ST.hasVLX()) return; - // KXOR is safe to use because it doesn't affect flags. - unsigned Op = ST.hasBWI() ? X86::KXORQkk : X86::KXORWkk; - BuildMI(MBB, Iter, DL, get(Op), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + unsigned Op = ST.hasBWI() ? X86::KSET0Q : X86::KSET0W; + BuildMI(MBB, Iter, DL, get(Op), Reg); } } diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 1fca466f..713d504 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1928,6 +1928,17 @@ static void addConstantComments(const MachineInstr *MI, #define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \ case X86::Prefix##Instr##Suffix##rm##Postfix: +#define CASE_AVX512_ARITH_RM(Instr) \ + INSTR_CASE(V, Instr, Z128, ) \ + INSTR_CASE(V, Instr, Z128, k) \ + INSTR_CASE(V, Instr, Z128, kz) \ + INSTR_CASE(V, Instr, Z256, ) \ + INSTR_CASE(V, Instr, Z256, k) \ + INSTR_CASE(V, Instr, Z256, kz) \ + INSTR_CASE(V, Instr, Z, ) \ + INSTR_CASE(V, Instr, Z, k) \ + INSTR_CASE(V, Instr, Z, kz) + #define CASE_ARITH_RM(Instr) \ INSTR_CASE(, Instr, , ) /* SSE */ \ INSTR_CASE(V, Instr, , ) /* AVX-128 */ \ @@ -1943,22 +1954,12 @@ static void addConstantComments(const MachineInstr *MI, INSTR_CASE(V, Instr, Z, kz) // TODO: Add additional instructions when useful. - CASE_ARITH_RM(PMADDUBSW) { - unsigned SrcIdx = getSrcIdx(MI, 1); - if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { - std::string Comment; - raw_string_ostream CS(Comment); - unsigned VectorWidth = - X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); - CS << "["; - printConstant(C, VectorWidth, CS); - CS << "]"; - OutStreamer.AddComment(CS.str()); - } - break; - } - + CASE_ARITH_RM(PMADDUBSW) CASE_ARITH_RM(PMADDWD) + CASE_ARITH_RM(PMULDQ) + CASE_ARITH_RM(PMULUDQ) + CASE_ARITH_RM(PMULLD) + CASE_AVX512_ARITH_RM(PMULLQ) CASE_ARITH_RM(PMULLW) CASE_ARITH_RM(PMULHW) CASE_ARITH_RM(PMULHUW) |