diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/TargetParser/X86TargetParser.def | 1 | ||||
| -rw-r--r-- | llvm/lib/IR/Verifier.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 3 | ||||
| -rw-r--r-- | llvm/lib/TargetParser/X86TargetParser.cpp | 29 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-arith.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-arith.ll | 1 |
11 files changed, 75 insertions, 17 deletions
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 817db0f..85ff699 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -240,6 +240,7 @@ X86_FEATURE (SHA512, "sha512") X86_FEATURE (SM3, "sm3") X86_FEATURE (SM4, "sm4") X86_FEATURE (AVXVNNIINT16, "avxvnniint16") +X86_FEATURE (EVEX512, "evex512") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches") diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 1a9ce08..c0f30a6 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2030,6 +2030,17 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, "' does not apply to function return values", V); + unsigned MaxParameterWidth = 0; + auto GetMaxParameterWidth = [&MaxParameterWidth](Type *Ty) { + if (Ty->isVectorTy()) { + if (auto *VT = dyn_cast<FixedVectorType>(Ty)) { + unsigned Size = VT->getPrimitiveSizeInBits().getFixedValue(); + if (Size > MaxParameterWidth) + MaxParameterWidth = Size; + } + } + }; + GetMaxParameterWidth(FT->getReturnType()); verifyParameterAttrs(RetAttrs, FT->getReturnType(), V); // Verify parameter attributes. @@ -2048,6 +2059,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, } verifyParameterAttrs(ArgAttrs, Ty, V); + GetMaxParameterWidth(Ty); if (ArgAttrs.hasAttribute(Attribute::Nest)) { Check(!SawNest, "More than one parameter has attribute nest!", V); @@ -2203,6 +2215,16 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V); } + // Check EVEX512 feature. + if (MaxParameterWidth >= 512 && Attrs.hasFnAttr("target-features")) { + Triple T(M.getTargetTriple()); + if (T.isX86()) { + StringRef TF = Attrs.getFnAttr("target-features").getValueAsString(); + Check(!TF.contains("+avx512f") || !TF.contains("-evex512"), + "512-bit vector arguments require 'evex512' for AVX512", V); + } + } + checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-prefix", V); checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-entry", V); checkUnsignedBaseTenFuncAttr(Attrs, "warn-stack-size", V); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 6af3ebb..be167d6 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -285,6 +285,7 @@ private: SmallVectorImpl<char> &CB) const; PrefixKind emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, + const MCSubtargetInfo &STI, SmallVectorImpl<char> &CB) const; void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI, @@ -841,7 +842,7 @@ PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI, // REX prefix is optional, but if used must be immediately before the opcode // Encoding type for this instruction. return (TSFlags & X86II::EncodingMask) - ? emitVEXOpcodePrefix(MemoryOperand, MI, CB) + ? emitVEXOpcodePrefix(MemoryOperand, MI, STI, CB) : emitOpcodePrefix(MemoryOperand, MI, STI, CB); } @@ -860,6 +861,7 @@ PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI, /// \returns the used prefix. PrefixKind X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, + const MCSubtargetInfo &STI, SmallVectorImpl<char> &CB) const { const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint64_t TSFlags = Desc.TSFlags; @@ -919,6 +921,9 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, Prefix.setL(TSFlags & X86II::VEX_L); Prefix.setL2(TSFlags & X86II::EVEX_L2); + if ((TSFlags & X86II::EVEX_L2) && STI.hasFeature(X86::FeatureAVX512) && + !STI.hasFeature(X86::FeatureEVEX512)) + report_fatal_error("ZMM registers are not supported without EVEX512"); switch (TSFlags & X86II::OpPrefixMask) { case X86II::PD: Prefix.setPP(0x1); // 66 diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index de034fa..64f91ae 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -119,6 +119,8 @@ def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", "Support 16-bit floating point conversion instructions", [FeatureAVX]>; +def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true", + "Support ZMM and 64-bit mask instructions">; def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512", "Enable AVX-512 instructions", [FeatureAVX2, FeatureFMA, FeatureF16C]>; @@ -817,6 +819,7 @@ def ProcessorFeatures { ]; list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [ + FeatureEVEX512, FeatureBWI, FeatureCDI, FeatureDQI, @@ -940,6 +943,7 @@ def ProcessorFeatures { FeatureXSAVES, FeatureCLFLUSHOPT, FeatureAVX512, + FeatureEVEX512, FeatureCDI, FeatureDQI, FeatureBWI, @@ -982,6 +986,7 @@ def ProcessorFeatures { // Cannonlake list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512, + FeatureEVEX512, FeatureCDI, FeatureDQI, FeatureBWI, @@ -1262,6 +1267,7 @@ def ProcessorFeatures { FeatureF16C, FeatureFSGSBase, FeatureAVX512, + FeatureEVEX512, FeatureERI, FeatureCDI, FeaturePFI, @@ -1471,6 +1477,7 @@ def ProcessorFeatures { !listconcat(ZN2Features, ZN3AdditionalFeatures); list<SubtargetFeature> ZN4Tuning = ZN3Tuning; list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512, + FeatureEVEX512, FeatureCDI, FeatureDQI, FeatureBWI, diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 08e6e4e..a20fa6a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -903,6 +903,7 @@ def NoAVX : Predicate<"!Subtarget->hasAVX()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; +def HasEVEX512 : Predicate<"Subtarget->hasEVEX512()">; def HasAVX512 : Predicate<"Subtarget->hasAVX512()">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index bd29e93..3504ca2 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -1030,7 +1030,14 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg, bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); - if (RC.getID() != X86::TILERegClassID) + unsigned ID = RC.getID(); + const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); + if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) && + Subtarget.hasAVX512() && !Subtarget.hasEVEX512()) + report_fatal_error( + "64-bit mask registers are not supported without EVEX512"); + + if (ID != X86::TILERegClassID) return BaseImplRetVal; ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI); diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 319b3c7..afb2545 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -268,6 +268,17 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, if (!FS.empty()) FullFS = (Twine(FullFS) + "," + FS).str(); + // Attach EVEX512 feature when we have AVX512 features and EVEX512 is not set. + size_t posNoEVEX512 = FS.rfind("-evex512"); + size_t posNoAVX512F = FS.rfind("-avx512f"); + size_t posEVEX512 = FS.rfind("+evex512"); + size_t posAVX512F = FS.rfind("+avx512"); // Any AVX512XXX will enable AVX512F. + + if (posAVX512F != StringRef::npos && + (posNoAVX512F == StringRef::npos || posNoAVX512F < posAVX512F)) + if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos) + FullFS += ",+evex512"; + // Parse features string and set the CPU. ParseSubtargetFeatures(CPU, TuneCPU, FullFS); diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index d69b8fd..a458b5f 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -264,7 +264,8 @@ public: // If there are no 512-bit vectors and we prefer not to use 512-bit registers, // disable them in the legalizer. bool useAVX512Regs() const { - return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256); + return hasAVX512() && hasEVEX512() && + (canExtendTo512DQ() || RequiredVectorWidth > 256); } bool useLight256BitInstructions() const { diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 274049c..b9908dd 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -72,7 +72,7 @@ constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF | constexpr FeatureBitset FeaturesX86_64_V3 = FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C | FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE; -constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | +constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | FeatureEVEX512 | FeatureAVX512BW | FeatureAVX512CD | FeatureAVX512DQ | FeatureAVX512VL; @@ -96,8 +96,8 @@ constexpr FeatureBitset FeaturesBroadwell = // Intel Knights Landing and Knights Mill // Knights Landing has feature parity with Broadwell. constexpr FeatureBitset FeaturesKNL = - FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureAVX512CD | - FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1; + FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 | + FeatureAVX512CD | FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1; constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ; // Intel Skylake processors. @@ -107,9 +107,9 @@ constexpr FeatureBitset FeaturesSkylakeClient = // SkylakeServer inherits all SkylakeClient features except SGX. // FIXME: That doesn't match gcc. constexpr FeatureBitset FeaturesSkylakeServer = - (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureAVX512CD | - FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureCLWB | - FeaturePKU; + (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureEVEX512 | + FeatureAVX512CD | FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | + FeatureCLWB | FeaturePKU; constexpr FeatureBitset FeaturesCascadeLake = FeaturesSkylakeServer | FeatureAVX512VNNI; constexpr FeatureBitset FeaturesCooperLake = @@ -117,9 +117,9 @@ constexpr FeatureBitset FeaturesCooperLake = // Intel 10nm processors. constexpr FeatureBitset FeaturesCannonlake = - FeaturesSkylakeClient | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ | - FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI | - FeaturePKU | FeatureSHA; + FeaturesSkylakeClient | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD | + FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | + FeatureAVX512VBMI | FeaturePKU | FeatureSHA; constexpr FeatureBitset FeaturesICLClient = FeaturesCannonlake | FeatureAVX512BITALG | FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512VPOPCNTDQ | FeatureGFNI | FeatureRDPID | @@ -230,11 +230,11 @@ static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 | FeatureINVPCID | FeaturePKU | FeatureVAES | FeatureVPCLMULQDQ; static constexpr FeatureBitset FeaturesZNVER4 = - FeaturesZNVER3 | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ | - FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI | - FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512BITALG | - FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI | - FeatureSHSTK; + FeaturesZNVER3 | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD | + FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | + FeatureAVX512VBMI | FeatureAVX512VBMI2 | FeatureAVX512VNNI | + FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | + FeatureGFNI | FeatureSHSTK; // D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from // X86TargetParser.def to here. They are assigned by following ways: @@ -542,6 +542,7 @@ constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3; constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1; constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2; constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX; +constexpr FeatureBitset ImpliedFeaturesEVEX512 = {}; constexpr FeatureBitset ImpliedFeaturesAVX512F = FeatureAVX2 | FeatureF16C | FeatureFMA; diff --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll index 988097b..4988fc3 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s ; 256-bit diff --git a/llvm/test/CodeGen/X86/avx512vl-arith.ll b/llvm/test/CodeGen/X86/avx512vl-arith.ll index d5953f2..1006c56 100644 --- a/llvm/test/CodeGen/X86/avx512vl-arith.ll +++ b/llvm/test/CodeGen/X86/avx512vl-arith.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s ; 256-bit |
