aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorPhoebe Wang <phoebe.wang@intel.com>2023-09-07 20:33:31 +0800
committerPhoebe Wang <phoebe.wang@intel.com>2023-09-07 21:38:35 +0800
commit7dd48cc24de2d54d40527432cbee8a9d97a8a4f7 (patch)
treed948e491dc2b58222d4a6f760161828d0f20d6b8 /llvm
parent41518597705c558409725d4ba2d3c1af9110ca0d (diff)
downloadllvm-7dd48cc24de2d54d40527432cbee8a9d97a8a4f7.zip
llvm-7dd48cc24de2d54d40527432cbee8a9d97a8a4f7.tar.gz
llvm-7dd48cc24de2d54d40527432cbee8a9d97a8a4f7.tar.bz2
[X86][RFC] Add new option `-m[no-]evex512` to disable ZMM and 64-bit mask instructions for AVX512 features
This is an alternative of D157485 and a pre-feature to support AVX10. AVX10 Architecture Specification: https://cdrdv2.intel.com/v1/dl/getContent/784267 AVX10 Technical Paper: https://cdrdv2.intel.com/v1/dl/getContent/784343 RFC: https://discourse.llvm.org/t/rfc-design-for-avx10-feature-support/72661 Based on the feedbacks from LLVM and GCC community, we have agreed to start from supporting `-m[no-]evex512` on existing AVX512 features. The option `-mno-evex512` can be used with `-mavx512xxx` to build binaries that can run on both legacy AVX512 targets and AVX10-256. There're still arguments about what's the expected behavior when this option as well as `-mavx512xxx` used together with `-mavx10.1-256`. We decided to defer the support of `-mavx10.1` after we made consensus. Or furthermore, we start from supporting AVX10.2 and not providing any AVX10.1 options. Reviewed By: RKSimon, skan Differential Revision: https://reviews.llvm.org/D159250
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/TargetParser/X86TargetParser.def1
-rw-r--r--llvm/lib/IR/Verifier.cpp22
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp7
-rw-r--r--llvm/lib/Target/X86/X86.td7
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td1
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.cpp9
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp11
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h3
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp29
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-arith.ll1
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-arith.ll1
11 files changed, 75 insertions, 17 deletions
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 817db0f..85ff699 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -240,6 +240,7 @@ X86_FEATURE (SHA512, "sha512")
X86_FEATURE (SM3, "sm3")
X86_FEATURE (SM4, "sm4")
X86_FEATURE (AVXVNNIINT16, "avxvnniint16")
+X86_FEATURE (EVEX512, "evex512")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 1a9ce08..c0f30a6 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2030,6 +2030,17 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
"' does not apply to function return values",
V);
+ unsigned MaxParameterWidth = 0;
+ auto GetMaxParameterWidth = [&MaxParameterWidth](Type *Ty) {
+ if (Ty->isVectorTy()) {
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ unsigned Size = VT->getPrimitiveSizeInBits().getFixedValue();
+ if (Size > MaxParameterWidth)
+ MaxParameterWidth = Size;
+ }
+ }
+ };
+ GetMaxParameterWidth(FT->getReturnType());
verifyParameterAttrs(RetAttrs, FT->getReturnType(), V);
// Verify parameter attributes.
@@ -2048,6 +2059,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
}
verifyParameterAttrs(ArgAttrs, Ty, V);
+ GetMaxParameterWidth(Ty);
if (ArgAttrs.hasAttribute(Attribute::Nest)) {
Check(!SawNest, "More than one parameter has attribute nest!", V);
@@ -2203,6 +2215,16 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V);
}
+ // Check EVEX512 feature.
+ if (MaxParameterWidth >= 512 && Attrs.hasFnAttr("target-features")) {
+ Triple T(M.getTargetTriple());
+ if (T.isX86()) {
+ StringRef TF = Attrs.getFnAttr("target-features").getValueAsString();
+ Check(!TF.contains("+avx512f") || !TF.contains("-evex512"),
+ "512-bit vector arguments require 'evex512' for AVX512", V);
+ }
+ }
+
checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-prefix", V);
checkUnsignedBaseTenFuncAttr(Attrs, "patchable-function-entry", V);
checkUnsignedBaseTenFuncAttr(Attrs, "warn-stack-size", V);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 6af3ebb..be167d6 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -285,6 +285,7 @@ private:
SmallVectorImpl<char> &CB) const;
PrefixKind emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
SmallVectorImpl<char> &CB) const;
void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI,
@@ -841,7 +842,7 @@ PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
// REX prefix is optional, but if used must be immediately before the opcode
// Encoding type for this instruction.
return (TSFlags & X86II::EncodingMask)
- ? emitVEXOpcodePrefix(MemoryOperand, MI, CB)
+ ? emitVEXOpcodePrefix(MemoryOperand, MI, STI, CB)
: emitOpcodePrefix(MemoryOperand, MI, STI, CB);
}
@@ -860,6 +861,7 @@ PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
/// \returns the used prefix.
PrefixKind
X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
SmallVectorImpl<char> &CB) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
uint64_t TSFlags = Desc.TSFlags;
@@ -919,6 +921,9 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
Prefix.setL(TSFlags & X86II::VEX_L);
Prefix.setL2(TSFlags & X86II::EVEX_L2);
+ if ((TSFlags & X86II::EVEX_L2) && STI.hasFeature(X86::FeatureAVX512) &&
+ !STI.hasFeature(X86::FeatureEVEX512))
+ report_fatal_error("ZMM registers are not supported without EVEX512");
switch (TSFlags & X86II::OpPrefixMask) {
case X86II::PD:
Prefix.setPP(0x1); // 66
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index de034fa..64f91ae 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -119,6 +119,8 @@ def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
"Support 16-bit floating point conversion instructions",
[FeatureAVX]>;
+def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true",
+ "Support ZMM and 64-bit mask instructions">;
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
"Enable AVX-512 instructions",
[FeatureAVX2, FeatureFMA, FeatureF16C]>;
@@ -817,6 +819,7 @@ def ProcessorFeatures {
];
list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
+ FeatureEVEX512,
FeatureBWI,
FeatureCDI,
FeatureDQI,
@@ -940,6 +943,7 @@ def ProcessorFeatures {
FeatureXSAVES,
FeatureCLFLUSHOPT,
FeatureAVX512,
+ FeatureEVEX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
@@ -982,6 +986,7 @@ def ProcessorFeatures {
// Cannonlake
list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
+ FeatureEVEX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
@@ -1262,6 +1267,7 @@ def ProcessorFeatures {
FeatureF16C,
FeatureFSGSBase,
FeatureAVX512,
+ FeatureEVEX512,
FeatureERI,
FeatureCDI,
FeaturePFI,
@@ -1471,6 +1477,7 @@ def ProcessorFeatures {
!listconcat(ZN2Features, ZN3AdditionalFeatures);
list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
+ FeatureEVEX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 08e6e4e..a20fa6a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -903,6 +903,7 @@ def NoAVX : Predicate<"!Subtarget->hasAVX()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
+def HasEVEX512 : Predicate<"Subtarget->hasEVEX512()">;
def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index bd29e93..3504ca2 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1030,7 +1030,14 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
VirtReg, Order, Hints, MF, VRM, Matrix);
- if (RC.getID() != X86::TILERegClassID)
+ unsigned ID = RC.getID();
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+ if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) &&
+ Subtarget.hasAVX512() && !Subtarget.hasEVEX512())
+ report_fatal_error(
+ "64-bit mask registers are not supported without EVEX512");
+
+ if (ID != X86::TILERegClassID)
return BaseImplRetVal;
ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 319b3c7..afb2545 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -268,6 +268,17 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
if (!FS.empty())
FullFS = (Twine(FullFS) + "," + FS).str();
+ // Attach EVEX512 feature when we have AVX512 features and EVEX512 is not set.
+ size_t posNoEVEX512 = FS.rfind("-evex512");
+ size_t posNoAVX512F = FS.rfind("-avx512f");
+ size_t posEVEX512 = FS.rfind("+evex512");
+ size_t posAVX512F = FS.rfind("+avx512"); // Any AVX512XXX will enable AVX512F.
+
+ if (posAVX512F != StringRef::npos &&
+ (posNoAVX512F == StringRef::npos || posNoAVX512F < posAVX512F))
+ if (posEVEX512 == StringRef::npos && posNoEVEX512 == StringRef::npos)
+ FullFS += ",+evex512";
+
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPU, TuneCPU, FullFS);
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index d69b8fd..a458b5f 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -264,7 +264,8 @@ public:
// If there are no 512-bit vectors and we prefer not to use 512-bit registers,
// disable them in the legalizer.
bool useAVX512Regs() const {
- return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
+ return hasAVX512() && hasEVEX512() &&
+ (canExtendTo512DQ() || RequiredVectorWidth > 256);
}
bool useLight256BitInstructions() const {
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 274049c..b9908dd 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -72,7 +72,7 @@ constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
constexpr FeatureBitset FeaturesX86_64_V3 =
FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
-constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 |
+constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | FeatureEVEX512 |
FeatureAVX512BW | FeatureAVX512CD |
FeatureAVX512DQ | FeatureAVX512VL;
@@ -96,8 +96,8 @@ constexpr FeatureBitset FeaturesBroadwell =
// Intel Knights Landing and Knights Mill
// Knights Landing has feature parity with Broadwell.
constexpr FeatureBitset FeaturesKNL =
- FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureAVX512CD |
- FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
+ FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 |
+ FeatureAVX512CD | FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;
// Intel Skylake processors.
@@ -107,9 +107,9 @@ constexpr FeatureBitset FeaturesSkylakeClient =
// SkylakeServer inherits all SkylakeClient features except SGX.
// FIXME: That doesn't match gcc.
constexpr FeatureBitset FeaturesSkylakeServer =
- (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureAVX512CD |
- FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureCLWB |
- FeaturePKU;
+ (FeaturesSkylakeClient & ~FeatureSGX) | FeatureAVX512F | FeatureEVEX512 |
+ FeatureAVX512CD | FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL |
+ FeatureCLWB | FeaturePKU;
constexpr FeatureBitset FeaturesCascadeLake =
FeaturesSkylakeServer | FeatureAVX512VNNI;
constexpr FeatureBitset FeaturesCooperLake =
@@ -117,9 +117,9 @@ constexpr FeatureBitset FeaturesCooperLake =
// Intel 10nm processors.
constexpr FeatureBitset FeaturesCannonlake =
- FeaturesSkylakeClient | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
- FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
- FeaturePKU | FeatureSHA;
+ FeaturesSkylakeClient | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD |
+ FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA |
+ FeatureAVX512VBMI | FeaturePKU | FeatureSHA;
constexpr FeatureBitset FeaturesICLClient =
FeaturesCannonlake | FeatureAVX512BITALG | FeatureAVX512VBMI2 |
FeatureAVX512VNNI | FeatureAVX512VPOPCNTDQ | FeatureGFNI | FeatureRDPID |
@@ -230,11 +230,11 @@ static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 |
FeatureINVPCID | FeaturePKU |
FeatureVAES | FeatureVPCLMULQDQ;
static constexpr FeatureBitset FeaturesZNVER4 =
- FeaturesZNVER3 | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
- FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
- FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512BITALG |
- FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI |
- FeatureSHSTK;
+ FeaturesZNVER3 | FeatureAVX512F | FeatureEVEX512 | FeatureAVX512CD |
+ FeatureAVX512DQ | FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA |
+ FeatureAVX512VBMI | FeatureAVX512VBMI2 | FeatureAVX512VNNI |
+ FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
+ FeatureGFNI | FeatureSHSTK;
// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
// X86TargetParser.def to here. They are assigned by following ways:
@@ -542,6 +542,7 @@ constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3;
constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1;
constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2;
constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesEVEX512 = {};
constexpr FeatureBitset ImpliedFeaturesAVX512F =
FeatureAVX2 | FeatureF16C | FeatureFMA;
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
index 988097b..4988fc3 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s
; 256-bit
diff --git a/llvm/test/CodeGen/X86/avx512vl-arith.ll b/llvm/test/CodeGen/X86/avx512vl-arith.ll
index d5953f2..1006c56 100644
--- a/llvm/test/CodeGen/X86/avx512vl-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-arith.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s
; 256-bit