diff options
Diffstat (limited to 'llvm/lib/TargetParser')
-rw-r--r-- | llvm/lib/TargetParser/ARMTargetParser.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/TargetParser/CMakeLists.txt | 3 | ||||
-rw-r--r-- | llvm/lib/TargetParser/Host.cpp | 205 | ||||
-rw-r--r-- | llvm/lib/TargetParser/RISCVISAInfo.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/TargetParser/RISCVTargetParser.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/TargetParser/TargetParser.cpp | 660 | ||||
-rw-r--r-- | llvm/lib/TargetParser/Triple.cpp | 89 | ||||
-rw-r--r-- | llvm/lib/TargetParser/X86TargetParser.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/TargetParser/XtensaTargetParser.cpp | 93 |
9 files changed, 666 insertions, 435 deletions
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp index dcb30b7..08944e6 100644 --- a/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -535,9 +535,8 @@ void ARM::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) { } } -StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) { - StringRef ArchName = - CPU.empty() ? TT.getArchName() : getArchName(parseCPUArch(CPU)); +StringRef ARM::computeDefaultTargetABI(const Triple &TT) { + StringRef ArchName = TT.getArchName(); if (TT.isOSBinFormatMachO()) { if (TT.getEnvironment() == Triple::EABI || @@ -575,10 +574,9 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) { } } -ARM::ARMABI ARM::computeTargetABI(const Triple &TT, StringRef CPU, - StringRef ABIName) { +ARM::ARMABI ARM::computeTargetABI(const Triple &TT, StringRef ABIName) { if (ABIName.empty()) - ABIName = ARM::computeDefaultTargetABI(TT, CPU); + ABIName = ARM::computeDefaultTargetABI(TT); if (ABIName == "aapcs16") return ARM_ABI_AAPCS16; diff --git a/llvm/lib/TargetParser/CMakeLists.txt b/llvm/lib/TargetParser/CMakeLists.txt index 8f8b3a5..5eecfbf 100644 --- a/llvm/lib/TargetParser/CMakeLists.txt +++ b/llvm/lib/TargetParser/CMakeLists.txt @@ -9,7 +9,7 @@ if (HAS_WERROR_GLOBAL_CTORS AND NOT LLVM_HAS_NOGLOBAL_CTOR_MUTEX) endif() # Solaris code uses kstat, so specify dependency explicitly for shared builds. -if (${CMAKE_SYSTEM_NAME} MATCHES "SunOS") +if ("${CMAKE_SYSTEM_NAME}" MATCHES "SunOS") set(system_libs kstat) endif() @@ -27,6 +27,7 @@ add_llvm_component_library(LLVMTargetParser TargetParser.cpp Triple.cpp X86TargetParser.cpp + XtensaTargetParser.cpp ADDITIONAL_HEADER_DIRS Unix diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 7e09d30..2482753 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -11,7 +11,10 @@ //===----------------------------------------------------------------------===// #include "llvm/TargetParser/Host.h" +#include "llvm/ADT/Bitfields.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" @@ -167,35 +170,10 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { .Default(generic); } -StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { - // The cpuid register on arm is not accessible from user space. On Linux, - // it is exposed through the /proc/cpuinfo file. - - // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line - // in all cases. - SmallVector<StringRef, 32> Lines; - ProcCpuinfoContent.split(Lines, '\n'); - - // Look for the CPU implementer and hardware lines, and store the CPU part - // numbers found. - StringRef Implementer; - StringRef Hardware; - SmallVector<StringRef, 32> Parts; - for (StringRef Line : Lines) { - if (Line.consume_front("CPU implementer")) - Implementer = Line.ltrim("\t :"); - else if (Line.consume_front("Hardware")) - Hardware = Line.ltrim("\t :"); - else if (Line.consume_front("CPU part")) - Parts.emplace_back(Line.ltrim("\t :")); - } - - // Last `Part' seen, in case we don't analyse all `Parts' parsed. - StringRef Part = Parts.empty() ? StringRef() : Parts.back(); - - // Remove duplicate `Parts'. - llvm::sort(Parts); - Parts.erase(llvm::unique(Parts), Parts.end()); +StringRef +getHostCPUNameForARMFromComponents(StringRef Implementer, StringRef Hardware, + StringRef Part, ArrayRef<StringRef> Parts, + function_ref<unsigned()> GetVariant) { auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) { if (Parts.size() == 2) @@ -343,21 +321,17 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. // The Exynos chips have a convoluted ID scheme that doesn't seem to follow // any predictive pattern across variants and parts. - unsigned Variant = 0, Part = 0; // Look for the CPU variant line, whose value is a 1 digit hexadecimal // number, corresponding to the Variant bits in the CP15/C0 register. - for (auto I : Lines) - if (I.consume_front("CPU variant")) - I.ltrim("\t :").getAsInteger(0, Variant); + unsigned Variant = GetVariant(); - // Look for the CPU part line, whose value is a 3 digit hexadecimal - // number, corresponding to the PartNum bits in the CP15/C0 register. - for (auto I : Lines) - if (I.consume_front("CPU part")) - I.ltrim("\t :").getAsInteger(0, Part); + // Convert the CPU part line, whose value is a 3 digit hexadecimal number, + // corresponding to the PartNum bits in the CP15/C0 register. + unsigned PartAsInt; + Part.getAsInteger(0, PartAsInt); - unsigned Exynos = (Variant << 12) | Part; + unsigned Exynos = (Variant << 12) | PartAsInt; switch (Exynos) { default: // Default by falling through to Exynos M3. @@ -416,6 +390,78 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { return "generic"; } +StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { + // The cpuid register on arm is not accessible from user space. On Linux, + // it is exposed through the /proc/cpuinfo file. + + // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line + // in all cases. + SmallVector<StringRef, 32> Lines; + ProcCpuinfoContent.split(Lines, '\n'); + + // Look for the CPU implementer and hardware lines, and store the CPU part + // numbers found. + StringRef Implementer; + StringRef Hardware; + SmallVector<StringRef, 32> Parts; + for (StringRef Line : Lines) { + if (Line.consume_front("CPU implementer")) + Implementer = Line.ltrim("\t :"); + else if (Line.consume_front("Hardware")) + Hardware = Line.ltrim("\t :"); + else if (Line.consume_front("CPU part")) + Parts.emplace_back(Line.ltrim("\t :")); + } + + // Last `Part' seen, in case we don't analyse all `Parts' parsed. + StringRef Part = Parts.empty() ? StringRef() : Parts.back(); + + // Remove duplicate `Parts'. + llvm::sort(Parts); + Parts.erase(llvm::unique(Parts), Parts.end()); + + auto GetVariant = [&]() { + unsigned Variant = 0; + for (auto I : Lines) + if (I.consume_front("CPU variant")) + I.ltrim("\t :").getAsInteger(0, Variant); + return Variant; + }; + + return getHostCPUNameForARMFromComponents(Implementer, Hardware, Part, Parts, + GetVariant); +} + +StringRef sys::detail::getHostCPUNameForARM(uint64_t PrimaryCpuInfo, + ArrayRef<uint64_t> UniqueCpuInfos) { + // On Windows, the registry provides cached copied of the MIDR_EL1 register. + using PartNum = Bitfield::Element<uint16_t, 4, 12>; + using Implementer = Bitfield::Element<uint16_t, 24, 8>; + using Variant = Bitfield::Element<uint16_t, 20, 4>; + + SmallVector<std::string> PartsHolder; + PartsHolder.reserve(UniqueCpuInfos.size()); + for (auto Info : UniqueCpuInfos) + PartsHolder.push_back("0x" + utohexstr(Bitfield::get<PartNum>(Info), + /*LowerCase*/ true, + /*Width*/ 3)); + + SmallVector<StringRef> Parts; + Parts.reserve(PartsHolder.size()); + for (const auto &Part : PartsHolder) + Parts.push_back(Part); + + return getHostCPUNameForARMFromComponents( + "0x" + utohexstr(Bitfield::get<Implementer>(PrimaryCpuInfo), + /*LowerCase*/ true, + /*Width*/ 2), + /*Hardware*/ "", + "0x" + utohexstr(Bitfield::get<PartNum>(PrimaryCpuInfo), + /*LowerCase*/ true, + /*Width*/ 3), + Parts, [=]() { return Bitfield::get<Variant>(PrimaryCpuInfo); }); +} + namespace { StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { switch (Id) { @@ -713,20 +759,20 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, StringRef CPU; switch (Family) { - case 3: + case 0x3: CPU = "i386"; break; - case 4: + case 0x4: CPU = "i486"; break; - case 5: + case 0x5: if (testFeature(X86::FEATURE_MMX)) { CPU = "pentium-mmx"; break; } CPU = "pentium"; break; - case 6: + case 0x6: switch (Model) { case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile // processor, Intel Core 2 Quad processor, Intel Core 2 Quad @@ -1074,7 +1120,7 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, break; } break; - case 15: { + case 0xf: { if (testFeature(X86::FEATURE_64BIT)) { CPU = "nocona"; break; @@ -1086,7 +1132,7 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, CPU = "pentium4"; break; } - case 19: + case 0x13: switch (Model) { // Diamond Rapids: case 0x01: @@ -1450,6 +1496,75 @@ StringRef sys::getHostCPUName() { return "generic"; } +#elif defined(_M_ARM64) || defined(_M_ARM64EC) + +StringRef sys::getHostCPUName() { + constexpr char CentralProcessorKeyName[] = + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor"; + // Sub keys names are simple numbers ("0", "1", etc.) so 10 chars should be + // enough for the slash and name. + constexpr size_t SubKeyNameMaxSize = ARRAYSIZE(CentralProcessorKeyName) + 10; + + SmallVector<uint64_t> Values; + uint64_t PrimaryCpuInfo; + char PrimaryPartKeyName[SubKeyNameMaxSize]; + DWORD PrimaryPartKeyNameSize = 0; + HKEY CentralProcessorKey; + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, CentralProcessorKeyName, 0, KEY_READ, + &CentralProcessorKey) == ERROR_SUCCESS) { + for (unsigned Index = 0; Index < UINT32_MAX; ++Index) { + char SubKeyName[SubKeyNameMaxSize]; + DWORD SubKeySize = SubKeyNameMaxSize; + HKEY SubKey; + if ((RegEnumKeyExA(CentralProcessorKey, Index, SubKeyName, &SubKeySize, + nullptr, nullptr, nullptr, + nullptr) == ERROR_SUCCESS) && + (RegOpenKeyExA(CentralProcessorKey, SubKeyName, 0, KEY_READ, + &SubKey) == ERROR_SUCCESS)) { + // The "CP 4000" registry key contains a cached copy of the MIDR_EL1 + // register. + uint64_t RegValue; + DWORD ActualType; + DWORD RegValueSize = sizeof(RegValue); + if ((RegQueryValueExA(SubKey, "CP 4000", nullptr, &ActualType, + (PBYTE)&RegValue, + &RegValueSize) == ERROR_SUCCESS) && + (ActualType == REG_QWORD) && RegValueSize == sizeof(RegValue)) { + // Assume that the part with the "highest" reg key name is the primary + // part (to match the way that Linux's cpuinfo is written). Win32 + // makes no guarantees about the order of sub keys, so we have to + // compare the names. + if (PrimaryPartKeyNameSize < SubKeySize || + (PrimaryPartKeyNameSize == SubKeySize && + ::memcmp(SubKeyName, PrimaryPartKeyName, SubKeySize) > 0)) { + PrimaryCpuInfo = RegValue; + ::memcpy(PrimaryPartKeyName, SubKeyName, SubKeySize + 1); + PrimaryPartKeyNameSize = SubKeySize; + } + if (!llvm::is_contained(Values, RegValue)) { + Values.push_back(RegValue); + } + } + RegCloseKey(SubKey); + } else { + // No more sub keys. + break; + } + } + RegCloseKey(CentralProcessorKey); + } + + if (Values.empty()) { + return "generic"; + } + + // Win32 makes no guarantees about the order of sub keys, so sort to ensure + // reproducibility. + llvm::sort(Values); + + return detail::getHostCPUNameForARM(PrimaryCpuInfo, Values); +} + #elif defined(__APPLE__) && defined(__powerpc__) StringRef sys::getHostCPUName() { host_basic_info_data_t hostInfo; diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index d6afb8a..7617058 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -833,19 +833,6 @@ static bool operator<(StringRef LHS, const ImpliedExtsEntry &RHS) { #include "llvm/TargetParser/RISCVTargetParserDef.inc" void RISCVISAInfo::updateImplication() { - bool HasE = Exts.count("e") != 0; - bool HasI = Exts.count("i") != 0; - - // If not in e extension and i extension does not exist, i extension is - // implied - if (!HasE && !HasI) { - auto Version = findDefaultVersion("i"); - Exts["i"] = *Version; - } - - if (HasE && HasI) - Exts.erase("i"); - assert(llvm::is_sorted(ImpliedExts) && "Table not sorted by Name"); // This loop may execute over 1 iteration since implication can be layered @@ -887,6 +874,21 @@ void RISCVISAInfo::updateImplication() { auto Version = findDefaultVersion("zcf"); Exts["zcf"] = *Version; } + + // Handle I/E after implications have been resolved, in case either + // of them was implied by another extension. + bool HasE = Exts.count("e") != 0; + bool HasI = Exts.count("i") != 0; + + // If not in e extension and i extension does not exist, i extension is + // implied + if (!HasE && !HasI) { + auto Version = findDefaultVersion("i"); + Exts["i"] = *Version; + } + + if (HasE && HasI) + Exts.erase("i"); } static constexpr StringLiteral CombineIntoExts[] = { diff --git a/llvm/lib/TargetParser/RISCVTargetParser.cpp b/llvm/lib/TargetParser/RISCVTargetParser.cpp index 9957ec0..b53a1b9 100644 --- a/llvm/lib/TargetParser/RISCVTargetParser.cpp +++ b/llvm/lib/TargetParser/RISCVTargetParser.cpp @@ -153,12 +153,13 @@ namespace RISCVVType { // // Bits | Name | Description // -----+------------+------------------------------------------------ +// 8 | altfmt | Alternative format for bf16 // 7 | vma | Vector mask agnostic // 6 | vta | Vector tail agnostic // 5:3 | vsew[2:0] | Standard element width (SEW) setting // 2:0 | vlmul[2:0] | Vector register group multiplier (LMUL) setting unsigned encodeVTYPE(VLMUL VLMul, unsigned SEW, bool TailAgnostic, - bool MaskAgnostic) { + bool MaskAgnostic, bool AltFmt) { assert(isValidSEW(SEW) && "Invalid SEW"); unsigned VLMulBits = static_cast<unsigned>(VLMul); unsigned VSEWBits = encodeSEW(SEW); @@ -167,6 +168,8 @@ unsigned encodeVTYPE(VLMUL VLMul, unsigned SEW, bool TailAgnostic, VTypeI |= 0x40; if (MaskAgnostic) VTypeI |= 0x80; + if (AltFmt) + VTypeI |= 0x100; return VTypeI; } @@ -200,6 +203,10 @@ void printVType(unsigned VType, raw_ostream &OS) { unsigned Sew = getSEW(VType); OS << "e" << Sew; + bool AltFmt = RISCVVType::isAltFmt(VType); + if (AltFmt) + OS << "alt"; + unsigned LMul; bool Fractional; std::tie(LMul, Fractional) = decodeVLMUL(getVLMUL(VType)); diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 126be71..2194ef4 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -364,8 +364,326 @@ StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) { return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind); } -void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, - StringMap<bool> &Features) { +static std::pair<FeatureError, StringRef> +insertWaveSizeFeature(StringRef GPU, const Triple &T, + const StringMap<bool> &DefaultFeatures, + StringMap<bool> &Features) { + const bool IsNullGPU = GPU.empty(); + const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32"); + const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64"); + const bool HaveWave32 = Features.count("wavefrontsize32"); + const bool HaveWave64 = Features.count("wavefrontsize64"); + if (HaveWave32 && HaveWave64) + return {AMDGPU::INVALID_FEATURE_COMBINATION, + "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; + + if (HaveWave32 && !IsNullGPU && TargetHasWave64) + return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; + + if (HaveWave64 && !IsNullGPU && TargetHasWave32) + return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"}; + + // Don't assume any wavesize with an unknown subtarget. + // Default to wave32 if target supports both. + if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 && + !TargetHasWave64) + Features.insert(std::make_pair("wavefrontsize32", true)); + + for (const auto &Entry : DefaultFeatures) { + if (!Features.count(Entry.getKey())) + Features[Entry.getKey()] = Entry.getValue(); + } + + return {NO_ERROR, StringRef()}; +} + +/// Fills Features map with default values for given target GPU. +/// \p Features contains overriding target features and this function returns +/// default target features with entries overridden by \p Features. +static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, + StringMap<bool> &Features) { + AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); + switch (Kind) { + case GK_GFX1250: + Features["ci-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["gfx12-insts"] = true; + Features["gfx1250-insts"] = true; + Features["bitop3-insts"] = true; + Features["prng-inst"] = true; + Features["tanh-insts"] = true; + Features["tensor-cvt-lut-insts"] = true; + Features["transpose-load-f4f6-insts"] = true; + Features["bf16-trans-insts"] = true; + Features["bf16-cvt-insts"] = true; + Features["fp8-conversion-insts"] = true; + Features["fp8e5m3-insts"] = true; + Features["permlane16-swap"] = true; + Features["ashr-pk-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["vmem-pref-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["setprio-inc-wg-inst"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize32"] = true; + break; + case GK_GFX1201: + case GK_GFX1200: + case GK_GFX12_GENERIC: + Features["ci-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dot9-insts"] = true; + Features["dot10-insts"] = true; + Features["dot11-insts"] = true; + Features["dot12-insts"] = true; + Features["dl-insts"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["gfx12-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["image-insts"] = true; + Features["fp8-conversion-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + break; + case GK_GFX1153: + case GK_GFX1152: + case GK_GFX1151: + case GK_GFX1150: + case GK_GFX1103: + case GK_GFX1102: + case GK_GFX1101: + case GK_GFX1100: + case GK_GFX11_GENERIC: + Features["ci-insts"] = true; + Features["dot5-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dot9-insts"] = true; + Features["dot10-insts"] = true; + Features["dot12-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["image-insts"] = true; + Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + break; + case GK_GFX1036: + case GK_GFX1035: + case GK_GFX1034: + case GK_GFX1033: + case GK_GFX1032: + case GK_GFX1031: + case GK_GFX1030: + case GK_GFX10_3_GENERIC: + Features["ci-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["image-insts"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + break; + case GK_GFX1012: + case GK_GFX1011: + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + [[fallthrough]]; + case GK_GFX1013: + case GK_GFX1010: + case GK_GFX10_1_GENERIC: + Features["dl-insts"] = true; + Features["ci-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["image-insts"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + break; + case GK_GFX950: + Features["bitop3-insts"] = true; + Features["fp6bf6-cvt-scale-insts"] = true; + Features["fp4-cvt-scale-insts"] = true; + Features["bf8-cvt-scale-insts"] = true; + Features["fp8-cvt-scale-insts"] = true; + Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; + Features["f32-to-f16bf16-cvt-sr-insts"] = true; + Features["prng-inst"] = true; + Features["permlane16-swap"] = true; + Features["permlane32-swap"] = true; + Features["ashr-pk-insts"] = true; + Features["dot12-insts"] = true; + Features["dot13-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; + Features["gfx950-insts"] = true; + [[fallthrough]]; + case GK_GFX942: + Features["fp8-insts"] = true; + Features["fp8-conversion-insts"] = true; + if (Kind != GK_GFX950) + Features["xf32-insts"] = true; + [[fallthrough]]; + case GK_GFX9_4_GENERIC: + Features["gfx940-insts"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + Features["gfx90a-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["dot3-insts"] = true; + Features["dot4-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["mai-insts"] = true; + Features["dl-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx8-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["s-memrealtime"] = true; + Features["ci-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["vmem-to-lds-load-insts"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_GFX90A: + Features["gfx90a-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + [[fallthrough]]; + case GK_GFX908: + Features["dot3-insts"] = true; + Features["dot4-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["mai-insts"] = true; + [[fallthrough]]; + case GK_GFX906: + Features["dl-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + [[fallthrough]]; + case GK_GFX90C: + case GK_GFX909: + case GK_GFX904: + case GK_GFX902: + case GK_GFX900: + case GK_GFX9_GENERIC: + Features["gfx9-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; + [[fallthrough]]; + case GK_GFX810: + case GK_GFX805: + case GK_GFX803: + case GK_GFX802: + case GK_GFX801: + Features["gfx8-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["s-memrealtime"] = true; + Features["ci-insts"] = true; + Features["image-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_GFX705: + case GK_GFX704: + case GK_GFX703: + case GK_GFX702: + case GK_GFX701: + case GK_GFX700: + Features["ci-insts"] = true; + [[fallthrough]]; + case GK_GFX602: + case GK_GFX601: + case GK_GFX600: + Features["image-insts"] = true; + Features["s-memtime-inst"] = true; + Features["gws"] = true; + Features["atomic-fmin-fmax-global-f32"] = true; + Features["atomic-fmin-fmax-global-f64"] = true; + Features["wavefrontsize64"] = true; + break; + case GK_NONE: + break; + default: + llvm_unreachable("Unhandled GPU!"); + } +} + +/// Fills Features map with default values for given target GPU. +/// \p Features contains overriding target features and this function returns +/// default target features with entries overridden by \p Features. +std::pair<FeatureError, StringRef> +AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, + StringMap<bool> &Features) { // XXX - What does the member GPU mean if device name string passed here? if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) { // AMDGCN SPIRV must support the union of all AMDGCN features. This list @@ -379,6 +697,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["atomic-fadd-rtn-insts"] = true; Features["atomic-flat-pk-add-16-insts"] = true; Features["atomic-global-pk-add-bf16-inst"] = true; + Features["bf16-trans-insts"] = true; + Features["bf16-cvt-insts"] = true; Features["bf8-cvt-scale-insts"] = true; Features["bitop3-insts"] = true; Features["ci-insts"] = true; @@ -401,9 +721,10 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["f32-to-f16bf16-cvt-sr-insts"] = true; Features["fp4-cvt-scale-insts"] = true; Features["fp6bf6-cvt-scale-insts"] = true; - Features["fp8-insts"] = true; + Features["fp8e5m3-insts"] = true; Features["fp8-conversion-insts"] = true; Features["fp8-cvt-scale-insts"] = true; + Features["fp8-insts"] = true; Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; Features["gfx90a-insts"] = true; @@ -413,271 +734,27 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["gfx10-3-insts"] = true; Features["gfx11-insts"] = true; Features["gfx12-insts"] = true; + Features["gfx1250-insts"] = true; Features["gws"] = true; Features["image-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; Features["mai-insts"] = true; Features["permlane16-swap"] = true; Features["permlane32-swap"] = true; Features["prng-inst"] = true; + Features["setprio-inc-wg-inst"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + Features["tanh-insts"] = true; + Features["tensor-cvt-lut-insts"] = true; + Features["transpose-load-f4f6-insts"] = true; + Features["vmem-pref-insts"] = true; + Features["vmem-to-lds-load-insts"] = true; Features["wavefrontsize32"] = true; Features["wavefrontsize64"] = true; - Features["vmem-to-lds-load-insts"] = true; } else if (T.isAMDGCN()) { - AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU); - switch (Kind) { - case GK_GFX1250: - Features["ci-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["gfx12-insts"] = true; - Features["gfx1250-insts"] = true; - Features["bitop3-insts"] = true; - Features["prng-inst"] = true; - Features["tanh-insts"] = true; - Features["transpose-load-f4f6-insts"] = true; - Features["bf16-trans-insts"] = true; - Features["bf16-cvt-insts"] = true; - Features["fp8-conversion-insts"] = true; - Features["fp8e5m3-insts"] = true; - Features["permlane16-swap"] = true; - Features["ashr-pk-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["vmem-pref-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["setprio-inc-wg-inst"] = true; - break; - case GK_GFX1201: - case GK_GFX1200: - case GK_GFX12_GENERIC: - Features["ci-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dot9-insts"] = true; - Features["dot10-insts"] = true; - Features["dot11-insts"] = true; - Features["dot12-insts"] = true; - Features["dl-insts"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["gfx12-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["image-insts"] = true; - Features["fp8-conversion-insts"] = true; - break; - case GK_GFX1153: - case GK_GFX1152: - case GK_GFX1151: - case GK_GFX1150: - case GK_GFX1103: - case GK_GFX1102: - case GK_GFX1101: - case GK_GFX1100: - case GK_GFX11_GENERIC: - Features["ci-insts"] = true; - Features["dot5-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dot9-insts"] = true; - Features["dot10-insts"] = true; - Features["dot12-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["image-insts"] = true; - Features["gws"] = true; - break; - case GK_GFX1036: - case GK_GFX1035: - case GK_GFX1034: - case GK_GFX1033: - case GK_GFX1032: - case GK_GFX1031: - case GK_GFX1030: - case GK_GFX10_3_GENERIC: - Features["ci-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["image-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - break; - case GK_GFX1012: - case GK_GFX1011: - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - [[fallthrough]]; - case GK_GFX1013: - case GK_GFX1010: - case GK_GFX10_1_GENERIC: - Features["dl-insts"] = true; - Features["ci-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["image-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - break; - case GK_GFX950: - Features["bitop3-insts"] = true; - Features["fp6bf6-cvt-scale-insts"] = true; - Features["fp4-cvt-scale-insts"] = true; - Features["bf8-cvt-scale-insts"] = true; - Features["fp8-cvt-scale-insts"] = true; - Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true; - Features["f32-to-f16bf16-cvt-sr-insts"] = true; - Features["prng-inst"] = true; - Features["permlane16-swap"] = true; - Features["permlane32-swap"] = true; - Features["ashr-pk-insts"] = true; - Features["dot12-insts"] = true; - Features["dot13-insts"] = true; - Features["atomic-buffer-pk-add-bf16-inst"] = true; - Features["gfx950-insts"] = true; - [[fallthrough]]; - case GK_GFX942: - Features["fp8-insts"] = true; - Features["fp8-conversion-insts"] = true; - if (Kind != GK_GFX950) - Features["xf32-insts"] = true; - [[fallthrough]]; - case GK_GFX9_4_GENERIC: - Features["gfx940-insts"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - Features["gfx90a-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - Features["dot3-insts"] = true; - Features["dot4-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["mai-insts"] = true; - Features["dl-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx8-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["s-memrealtime"] = true; - Features["ci-insts"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - Features["vmem-to-lds-load-insts"] = true; - break; - case GK_GFX90A: - Features["gfx90a-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - [[fallthrough]]; - case GK_GFX908: - Features["dot3-insts"] = true; - Features["dot4-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["mai-insts"] = true; - [[fallthrough]]; - case GK_GFX906: - Features["dl-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - [[fallthrough]]; - case GK_GFX90C: - case GK_GFX909: - case GK_GFX904: - case GK_GFX902: - case GK_GFX900: - case GK_GFX9_GENERIC: - Features["gfx9-insts"] = true; - Features["vmem-to-lds-load-insts"] = true; - [[fallthrough]]; - case GK_GFX810: - case GK_GFX805: - case GK_GFX803: - case GK_GFX802: - case GK_GFX801: - Features["gfx8-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["s-memrealtime"] = true; - [[fallthrough]]; - case GK_GFX705: - case GK_GFX704: - case GK_GFX703: - case GK_GFX702: - case GK_GFX701: - case GK_GFX700: - Features["ci-insts"] = true; - [[fallthrough]]; - case GK_GFX602: - case GK_GFX601: - case GK_GFX600: - Features["image-insts"] = true; - Features["s-memtime-inst"] = true; - Features["gws"] = true; - break; - case GK_NONE: - break; - default: - llvm_unreachable("Unhandled GPU!"); - } + StringMap<bool> DefaultFeatures; + fillAMDGCNFeatureMap(GPU, T, DefaultFeatures); + return insertWaveSizeFeature(GPU, T, DefaultFeatures, Features); } else { if (GPU.empty()) GPU = "r600"; @@ -706,70 +783,5 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, llvm_unreachable("Unhandled GPU!"); } } -} - -static bool isWave32Capable(StringRef GPU, const Triple &T) { - bool IsWave32Capable = false; - // XXX - What does the member GPU mean if device name string passed here? - if (T.isAMDGCN()) { - switch (parseArchAMDGCN(GPU)) { - case GK_GFX1250: - case GK_GFX1201: - case GK_GFX1200: - case GK_GFX1153: - case GK_GFX1152: - case GK_GFX1151: - case GK_GFX1150: - case GK_GFX1103: - case GK_GFX1102: - case GK_GFX1101: - case GK_GFX1100: - case GK_GFX1036: - case GK_GFX1035: - case GK_GFX1034: - case GK_GFX1033: - case GK_GFX1032: - case GK_GFX1031: - case GK_GFX1030: - case GK_GFX1012: - case GK_GFX1011: - case GK_GFX1013: - case GK_GFX1010: - case GK_GFX12_GENERIC: - case GK_GFX11_GENERIC: - case GK_GFX10_3_GENERIC: - case GK_GFX10_1_GENERIC: - IsWave32Capable = true; - break; - default: - break; - } - } - return IsWave32Capable; -} - -std::pair<FeatureError, StringRef> -AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T, - StringMap<bool> &Features) { - bool IsWave32Capable = isWave32Capable(GPU, T); - const bool IsNullGPU = GPU.empty(); - const bool HaveWave32 = Features.count("wavefrontsize32"); - const bool HaveWave64 = Features.count("wavefrontsize64"); - if (HaveWave32 && HaveWave64) { - return {AMDGPU::INVALID_FEATURE_COMBINATION, - "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; - } - if (HaveWave32 && !IsNullGPU && !IsWave32Capable) { - return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; - } - // Don't assume any wavesize with an unknown subtarget. - if (!IsNullGPU) { - // Default to wave32 if available, or wave64 if not - if (!HaveWave32 && !HaveWave64) { - StringRef DefaultWaveSizeFeature = - IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64"; - Features.insert(std::make_pair(DefaultWaveSizeFeature, true)); - } - } return {NO_ERROR, StringRef()}; } diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 6acb0bc..ec15f23 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -329,6 +329,8 @@ StringRef Triple::getOSTypeName(OSType Kind) { case LiteOS: return "liteos"; case XROS: return "xros"; case Vulkan: return "vulkan"; + case CheriotRTOS: + return "cheriotrtos"; } llvm_unreachable("Invalid OSType"); @@ -687,49 +689,50 @@ static Triple::VendorType parseVendor(StringRef VendorName) { static Triple::OSType parseOS(StringRef OSName) { return StringSwitch<Triple::OSType>(OSName) - .StartsWith("darwin", Triple::Darwin) - .StartsWith("dragonfly", Triple::DragonFly) - .StartsWith("freebsd", Triple::FreeBSD) - .StartsWith("fuchsia", Triple::Fuchsia) - .StartsWith("ios", Triple::IOS) - .StartsWith("kfreebsd", Triple::KFreeBSD) - .StartsWith("linux", Triple::Linux) - .StartsWith("lv2", Triple::Lv2) - .StartsWith("macos", Triple::MacOSX) - .StartsWith("managarm", Triple::Managarm) - .StartsWith("netbsd", Triple::NetBSD) - .StartsWith("openbsd", Triple::OpenBSD) - .StartsWith("solaris", Triple::Solaris) - .StartsWith("uefi", Triple::UEFI) - .StartsWith("win32", Triple::Win32) - .StartsWith("windows", Triple::Win32) - .StartsWith("zos", Triple::ZOS) - .StartsWith("haiku", Triple::Haiku) - .StartsWith("rtems", Triple::RTEMS) - .StartsWith("aix", Triple::AIX) - .StartsWith("cuda", Triple::CUDA) - .StartsWith("nvcl", Triple::NVCL) - .StartsWith("amdhsa", Triple::AMDHSA) - .StartsWith("ps4", Triple::PS4) - .StartsWith("ps5", Triple::PS5) - .StartsWith("elfiamcu", Triple::ELFIAMCU) - .StartsWith("tvos", Triple::TvOS) - .StartsWith("watchos", Triple::WatchOS) - .StartsWith("bridgeos", Triple::BridgeOS) - .StartsWith("driverkit", Triple::DriverKit) - .StartsWith("xros", Triple::XROS) - .StartsWith("visionos", Triple::XROS) - .StartsWith("mesa3d", Triple::Mesa3D) - .StartsWith("amdpal", Triple::AMDPAL) - .StartsWith("hermit", Triple::HermitCore) - .StartsWith("hurd", Triple::Hurd) - .StartsWith("wasi", Triple::WASI) - .StartsWith("emscripten", Triple::Emscripten) - .StartsWith("shadermodel", Triple::ShaderModel) - .StartsWith("liteos", Triple::LiteOS) - .StartsWith("serenity", Triple::Serenity) - .StartsWith("vulkan", Triple::Vulkan) - .Default(Triple::UnknownOS); + .StartsWith("darwin", Triple::Darwin) + .StartsWith("dragonfly", Triple::DragonFly) + .StartsWith("freebsd", Triple::FreeBSD) + .StartsWith("fuchsia", Triple::Fuchsia) + .StartsWith("ios", Triple::IOS) + .StartsWith("kfreebsd", Triple::KFreeBSD) + .StartsWith("linux", Triple::Linux) + .StartsWith("lv2", Triple::Lv2) + .StartsWith("macos", Triple::MacOSX) + .StartsWith("managarm", Triple::Managarm) + .StartsWith("netbsd", Triple::NetBSD) + .StartsWith("openbsd", Triple::OpenBSD) + .StartsWith("solaris", Triple::Solaris) + .StartsWith("uefi", Triple::UEFI) + .StartsWith("win32", Triple::Win32) + .StartsWith("windows", Triple::Win32) + .StartsWith("zos", Triple::ZOS) + .StartsWith("haiku", Triple::Haiku) + .StartsWith("rtems", Triple::RTEMS) + .StartsWith("aix", Triple::AIX) + .StartsWith("cuda", Triple::CUDA) + .StartsWith("nvcl", Triple::NVCL) + .StartsWith("amdhsa", Triple::AMDHSA) + .StartsWith("ps4", Triple::PS4) + .StartsWith("ps5", Triple::PS5) + .StartsWith("elfiamcu", Triple::ELFIAMCU) + .StartsWith("tvos", Triple::TvOS) + .StartsWith("watchos", Triple::WatchOS) + .StartsWith("bridgeos", Triple::BridgeOS) + .StartsWith("driverkit", Triple::DriverKit) + .StartsWith("xros", Triple::XROS) + .StartsWith("visionos", Triple::XROS) + .StartsWith("mesa3d", Triple::Mesa3D) + .StartsWith("amdpal", Triple::AMDPAL) + .StartsWith("hermit", Triple::HermitCore) + .StartsWith("hurd", Triple::Hurd) + .StartsWith("wasi", Triple::WASI) + .StartsWith("emscripten", Triple::Emscripten) + .StartsWith("shadermodel", Triple::ShaderModel) + .StartsWith("liteos", Triple::LiteOS) + .StartsWith("serenity", Triple::Serenity) + .StartsWith("vulkan", Triple::Vulkan) + .StartsWith("cheriotrtos", Triple::CheriotRTOS) + .Default(Triple::UnknownOS); } static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 9cd35e3..b720965 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -143,8 +143,8 @@ constexpr FeatureBitset FeaturesDiamondRapids = FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT | FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 | - FeaturePPX | FeatureNDD | FeatureNF | FeatureCF | FeatureMOVRS | - FeatureAMX_MOVRS | FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 | + FeaturePPX | FeatureNDD | FeatureNF | FeatureMOVRS | FeatureAMX_MOVRS | + FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 | FeatureAMX_TRANSPOSE | FeatureUSERMSR; // Intel Atom processors. diff --git a/llvm/lib/TargetParser/XtensaTargetParser.cpp b/llvm/lib/TargetParser/XtensaTargetParser.cpp new file mode 100644 index 0000000..25725f2 --- /dev/null +++ b/llvm/lib/TargetParser/XtensaTargetParser.cpp @@ -0,0 +1,93 @@ +//==-- XtensaTargetParser - Parser for Xtensa features ------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise Xtensa hardware features +// +//===----------------------------------------------------------------------===// + +#include "llvm/TargetParser/XtensaTargetParser.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" + +namespace llvm { + +namespace Xtensa { +struct CPUInfo { + StringLiteral Name; + CPUKind Kind; + uint64_t Features; +}; + +struct FeatureName { + uint64_t ID; + const char *NameCStr; + size_t NameLength; + + StringRef getName() const { return StringRef(NameCStr, NameLength); } +}; + +const FeatureName XtensaFeatureNames[] = { +#define XTENSA_FEATURE(ID, NAME) {ID, "+" NAME, sizeof(NAME)}, +#include "llvm/TargetParser/XtensaTargetParser.def" +}; + +constexpr CPUInfo XtensaCPUInfo[] = { +#define XTENSA_CPU(ENUM, NAME, FEATURES) {NAME, CK_##ENUM, FEATURES}, +#include "llvm/TargetParser/XtensaTargetParser.def" +}; + +StringRef getBaseName(StringRef CPU) { + return llvm::StringSwitch<StringRef>(CPU) +#define XTENSA_CPU_ALIAS(NAME, ANAME) .Case(ANAME, NAME) +#include "llvm/TargetParser/XtensaTargetParser.def" + .Default(CPU); +} + +StringRef getAliasName(StringRef CPU) { + return llvm::StringSwitch<StringRef>(CPU) +#define XTENSA_CPU_ALIAS(NAME, ANAME) .Case(NAME, ANAME) +#include "llvm/TargetParser/XtensaTargetParser.def" + .Default(CPU); +} + +CPUKind parseCPUKind(StringRef CPU) { + CPU = getBaseName(CPU); + return llvm::StringSwitch<CPUKind>(CPU) +#define XTENSA_CPU(ENUM, NAME, FEATURES) .Case(NAME, CK_##ENUM) +#include "llvm/TargetParser/XtensaTargetParser.def" + .Default(CK_INVALID); +} + +// Get all features for the CPU +void getCPUFeatures(StringRef CPU, std::vector<StringRef> &Features) { + CPU = getBaseName(CPU); + auto I = llvm::find_if(XtensaCPUInfo, + [&](const CPUInfo &CI) { return CI.Name == CPU; }); + assert(I != std::end(XtensaCPUInfo) && "CPU not found!"); + uint64_t Bits = I->Features; + + for (const auto &F : XtensaFeatureNames) { + if ((Bits & F.ID) == F.ID) + Features.push_back(F.getName()); + } +} + +// Find all valid CPUs +void fillValidCPUList(std::vector<StringRef> &Values) { + for (const auto &C : XtensaCPUInfo) { + if (C.Kind != CK_INVALID) { + Values.emplace_back(C.Name); + StringRef Name = getAliasName(C.Name); + if (Name != C.Name) + Values.emplace_back(Name); + } + } +} + +} // namespace Xtensa +} // namespace llvm |