[NFC][AArch64] Organise extensions by archtecture version (#95898)

This updates the way the AArch64 architecture extensions are organised in AArch64Features.td to improve readability and maintainability of the file. Extensions are now grouped by the corresponding architecture version in which they were introduced.
author: Lucas Duarte Prates <lucas.prates@arm.com> 2024-06-19 10:28:47 +0100
committer: GitHub <noreply@github.com> 2024-06-19 10:28:47 +0100
commit: 116384a028f0e3801b9443e227e0840897202168 (patch)
tree: f54623a6218b5661a8fe304af5c1729676367c33
parent: cfb4dce90ce66acbd094e443f422c4e5c413a9e8 (diff)
download: llvm-116384a028f0e3801b9443e227e0840897202168.zip
llvm-116384a028f0e3801b9443e227e0840897202168.tar.gz
llvm-116384a028f0e3801b9443e227e0840897202168.tar.bz2
1 files changed, 419 insertions, 355 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index ffb899a..911a282 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -101,6 +101,11 @@ def : FMVOnlyExtension<"FEAT_SVE_PMULL128", "sve2-pmull128", "+sve2,+sve,+sve2-a
 // Arm Architecture Features, it should list all the relevant features. Not all
 // FEAT_ features have a corresponding SubtargetFeature.
 
+
+//===----------------------------------------------------------------------===//
+//  Armv8.0 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
 let ArchExtKindSpelling = "AEK_FP", MArchName = "fp" in
 def FeatureFPARMv8 : Extension<"fp-armv8", "FPARMv8",
   "Enable ARMv8 (FEAT_FP)", [],
@@ -111,21 +116,11 @@ def FeatureNEON : Extension<"neon", "NEON",
   "Enable Advanced SIMD instructions (FEAT_AdvSIMD)", [FeatureFPARMv8],
   "FEAT_SIMD", "+fp-armv8,+neon", 100>;
 
-def FeatureSM4 : Extension<
-    "sm4", "SM4",
-    "Enable SM3 and SM4 support (FEAT_SM4, FEAT_SM3)", [FeatureNEON],
-    "FEAT_SM4", "+sm4,+fp-armv8,+neon", 106>;
-
 def FeatureSHA2 : Extension<
     "sha2", "SHA2",
     "Enable SHA1 and SHA256 support (FEAT_SHA1, FEAT_SHA256)", [FeatureNEON],
     "FEAT_SHA2", "+sha2,+fp-armv8,+neon", 130>;
 
-def FeatureSHA3 : Extension<
-    "sha3", "SHA3",
-    "Enable SHA512 and SHA3 support (FEAT_SHA3, FEAT_SHA512)", [FeatureNEON, FeatureSHA2],
-    "FEAT_SHA3", "+sha3,+sha2,+fp-armv8,+neon", 140>;
-
 def FeatureAES : Extension<
     "aes", "AES",
     "Enable AES support (FEAT_AES, FEAT_PMULL)", [FeatureNEON],
@@ -147,26 +142,24 @@ def FeatureCRC : Extension<"crc", "CRC",
   "Enable ARMv8 CRC-32 checksum instructions (FEAT_CRC32)", [],
   "FEAT_CRC", "+crc", 110>;
 
-def FeatureRAS : Extension<"ras", "RAS",
-  "Enable ARMv8 Reliability, Availability and Serviceability Extensions (FEAT_RAS, FEAT_RASv1p1)">;
+// This SubtargetFeature is special. It controls only whether codegen will turn
+// `llvm.readcyclecounter()` into an access to a PMUv3 System Register. The
+// `FEAT_PMUv3*` system registers are always available for assembly/disassembly.
+let MArchName = "pmuv3" in
+def FeaturePerfMon : Extension<"perfmon", "PerfMon",
+  "Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension (FEAT_PMUv3)">;
 
-def FeatureRASv2 : Extension<"rasv2", "RASv2",
-  "Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions (FEAT_RASv2)",
-  [FeatureRAS]>;
+def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict",
+  "true", "Enable architectural speculation restriction (FEAT_CSV2_2)">;
+
+//===----------------------------------------------------------------------===//
+//  Armv8.1 Architecture Extensions
+//===----------------------------------------------------------------------===//
 
 def FeatureLSE : Extension<"lse", "LSE",
   "Enable ARMv8.1 Large System Extension (LSE) atomic instructions (FEAT_LSE)", [],
   "FEAT_LSE", "+lse", 80>;
 
-def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
-  "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules (FEAT_LSE2)">;
-
-def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
-  "Enable out of line atomics to support LSE instructions">;
-
-def FeatureFMV : SubtargetFeature<"fmv", "HasFMV", "true",
-  "Enable Function Multi Versioning support.">;
-
 let MArchAlias = "rdma" in
 def FeatureRDM : Extension<"rdm", "RDM",
   "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions (FEAT_RDM)",
@@ -187,22 +180,28 @@ def FeatureCONTEXTIDREL2 : SubtargetFeature<"CONTEXTIDREL2", "HasCONTEXTIDREL2",
 def FeatureVH : SubtargetFeature<"vh", "HasVH", "true",
     "Enables ARM v8.1 Virtual Host extension (FEAT_VHE)", [FeatureCONTEXTIDREL2] >;
 
-// This SubtargetFeature is special. It controls only whether codegen will turn
-// `llvm.readcyclecounter()` into an access to a PMUv3 System Register. The
-// `FEAT_PMUv3*` system registers are always available for assembly/disassembly.
-let MArchName = "pmuv3" in
-def FeaturePerfMon : Extension<"perfmon", "PerfMon",
-  "Enable Code Generation for ARMv8 PMUv3 Performance Monitors extension (FEAT_PMUv3)">;
+//===----------------------------------------------------------------------===//
+//  Armv8.2 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureSM4 : Extension<
+    "sm4", "SM4",
+    "Enable SM3 and SM4 support (FEAT_SM4, FEAT_SM3)", [FeatureNEON],
+    "FEAT_SM4", "+sm4,+fp-armv8,+neon", 106>;
+
+def FeatureSHA3 : Extension<
+    "sha3", "SHA3",
+    "Enable SHA512 and SHA3 support (FEAT_SHA3, FEAT_SHA512)", [FeatureNEON, FeatureSHA2],
+    "FEAT_SHA3", "+sha3,+sha2,+fp-armv8,+neon", 140>;
+
+def FeatureRAS : Extension<"ras", "RAS",
+  "Enable ARMv8 Reliability, Availability and Serviceability Extensions (FEAT_RAS, FEAT_RASv1p1)">;
 
 let ArchExtKindSpelling = "AEK_FP16", MArchName = "fp16" in
 def FeatureFullFP16 : Extension<"fullfp16", "FullFP16",
   "Full FP16 (FEAT_FP16)", [FeatureFPARMv8],
   "FEAT_FP16", "+fullfp16,+fp-armv8,+neon", 170>;
 
-def FeatureFP16FML : Extension<"fp16fml", "FP16FML",
-  "Enable FP16 FML instructions (FEAT_FHM)", [FeatureFullFP16],
-  "FEAT_FP16FML", "+fp16fml,+fullfp16,+fp-armv8,+neon", 175>;
-
 let ArchExtKindSpelling = "AEK_PROFILE", MArchName = "profile" in
 def FeatureSPE : Extension<"spe", "SPE",
   "Enable Statistical Profiling extension (FEAT_SPE)">;
@@ -212,7 +211,6 @@ def FeaturePAN_RWV : SubtargetFeature<
     "Enable v8.2 PAN s1e1R and s1e1W Variants (FEAT_PAN2)",
     [FeaturePAN]>;
 
-// UAO PState
 def FeaturePsUAO : SubtargetFeature< "uaops", "HasPsUAO", "true",
     "Enable v8.2 UAO PState (FEAT_UAO)">;
 
@@ -223,205 +221,29 @@ def FeatureSVE : Extension<"sve", "SVE",
   "Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16],
   "FEAT_SVE", "+sve,+fullfp16,+fp-armv8,+neon", 310>;
 
-// This flag is currently still labeled as Experimental, but when fully
-// implemented this should tell the compiler to use the zeroing pseudos to
-// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
-// lanes are known to be zero. The pseudos will then be expanded using the
-// MOVPRFX instruction to zero the inactive lanes. This feature should only be
-// enabled if MOVPRFX instructions are known to merge with the destructive
-// operations they prefix.
-//
-// This feature could similarly be extended to support cheap merging of _any_
-// value into the inactive lanes using the MOVPRFX instruction that uses
-// merging-predication.
-def FeatureExperimentalZeroingPseudos
-    : SubtargetFeature<"use-experimental-zeroing-pseudos",
-                       "UseExperimentalZeroingPseudos", "true",
-                       "Hint to the compiler that the MOVPRFX instruction is "
-                       "merged with destructive operations",
-                       []>;
-
-def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
-  "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
-
-def FeatureBF16 : Extension<"bf16", "BF16",
-    "Enable BFloat16 Extension (FEAT_BF16)", [],
-    "FEAT_BF16", "+bf16", 280>;
-
-def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
-  "NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;
-
-def FeatureSVE2 : Extension<"sve2", "SVE2",
-  "Enable Scalable Vector Extension 2 (SVE2) instructions (FEAT_SVE2)",
-  [FeatureSVE, FeatureUseScalarIncVL],
-  "FEAT_SVE2", "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370>;
-
-def FeatureSVE2AES : Extension<"sve2-aes", "SVE2AES",
-  "Enable AES SVE2 instructions (FEAT_SVE_AES, FEAT_SVE_PMULL128)",
-  [FeatureSVE2, FeatureAES],
-  "FEAT_SVE_AES", "+sve2,+sve,+sve2-aes,+fullfp16,+fp-armv8,+neon", 380>;
-
-def FeatureSVE2SM4 : Extension<"sve2-sm4", "SVE2SM4",
-  "Enable SM4 SVE2 instructions (FEAT_SVE_SM4)", [FeatureSVE2, FeatureSM4],
-  "FEAT_SVE_SM4", "+sve2,+sve,+sve2-sm4,+fullfp16,+fp-armv8,+neon", 420>;
-
-def FeatureSVE2SHA3 : Extension<"sve2-sha3", "SVE2SHA3",
-  "Enable SHA3 SVE2 instructions (FEAT_SVE_SHA3)", [FeatureSVE2, FeatureSHA3],
-  "FEAT_SVE_SHA3", "+sve2,+sve,+sve2-sha3,+fullfp16,+fp-armv8,+neon", 410>;
-
-def FeatureSVE2BitPerm : Extension<"sve2-bitperm", "SVE2BitPerm",
-  "Enable bit permutation SVE2 instructions (FEAT_SVE_BitPerm)", [FeatureSVE2],
-  "FEAT_SVE_BITPERM", "+sve2,+sve,+sve2-bitperm,+fullfp16,+fp-armv8,+neon", 400>;
-
-let FMVDependencies = "+sve2p1,+sve2,+sve,+fullfp16,+fp-armv8,+neon" in
-def FeatureSVE2p1: Extension<"sve2p1", "SVE2p1",
-  "Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>;
-
-def FeatureB16B16 : Extension<"b16b16", "B16B16",
-  "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>;
-
-def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
-                                        "Has zero-cycle register moves">;
-
-def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
-                                        "Has zero-cycle zeroing instructions for generic registers">;
-
-// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
-// as movi is more efficient across all cores. Newer cores can eliminate
-// fmovs early and there is no difference with movi, but this not true for
-// all implementations.
-def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false",
-                                        "Has no zero-cycle zeroing instructions for FP registers">;
-
-def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
-                                        "Has zero-cycle zeroing instructions",
-                                        [FeatureZCZeroingGP]>;
-
-/// ... but the floating-point version doesn't quite work in rare cases on older
-/// CPUs.
-def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
-    "HasZeroCycleZeroingFPWorkaround", "true",
-    "The zero-cycle floating-point zeroing instruction has a bug">;
-
-def FeatureStrictAlign : SubtargetFeature<"strict-align",
-                                          "RequiresStrictAlign", "true",
-                                          "Disallow all unaligned memory "
-                                          "access">;
-
-foreach i = {1-7,9-15,18,20-28} in
-    def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
-                                             "Reserve X"#i#", making it unavailable "
-                                             "as a GPR">;
-
-foreach i = {8-15,18} in
-    def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i,
-         "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">;
-
-def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",
-    "true",
-    "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;
-
-def FeaturePredictableSelectIsExpensive : SubtargetFeature<
-    "predictable-select-expensive", "PredictableSelectIsExpensive", "true",
-    "Prefer likely predicted branches over selects">;
-
-def FeatureEnableSelectOptimize : SubtargetFeature<
-    "enable-select-opt", "EnableSelectOptimize", "true",
-    "Enable the select optimize pass for select loop heuristics">;
-
-def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
-    "HasExynosCheapAsMoveHandling", "true",
-    "Use Exynos specific handling of cheap instructions">;
-
-def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
-    "UsePostRAScheduler", "true", "Schedule again after register allocation">;
-
-def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
-    "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">;
-
-def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
-    "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;
-
-def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address",
-    "IsStoreAddressAscend", "true",
-    "Schedule vector stores by ascending address">;
-
-def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",
-    "true", "STR of Q register with register offset is slow">;
-
-def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
-    "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
-    "true", "Use alternative pattern for sextload convert to f32">;
-
-def FeatureArithmeticBccFusion : SubtargetFeature<
-    "arith-bcc-fusion", "HasArithmeticBccFusion", "true",
-    "CPU fuses arithmetic+bcc operations">;
-
-def FeatureArithmeticCbzFusion : SubtargetFeature<
-    "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
-    "CPU fuses arithmetic + cbz/cbnz operations">;
-
-def FeatureCmpBccFusion : SubtargetFeature<
-    "cmp-bcc-fusion", "HasCmpBccFusion", "true",
-    "CPU fuses cmp+bcc operations">;
-
-def FeatureFuseAddress : SubtargetFeature<
-    "fuse-address", "HasFuseAddress", "true",
-    "CPU fuses address generation and memory operations">;
-
-def FeatureFuseAES : SubtargetFeature<
-    "fuse-aes", "HasFuseAES", "true",
-    "CPU fuses AES crypto operations">;
-
-def FeatureFuseArithmeticLogic : SubtargetFeature<
-    "fuse-arith-logic", "HasFuseArithmeticLogic", "true",
-    "CPU fuses arithmetic and logic operations">;
-
-def FeatureFuseCCSelect : SubtargetFeature<
-    "fuse-csel", "HasFuseCCSelect", "true",
-    "CPU fuses conditional select operations">;
-
-def FeatureFuseCryptoEOR : SubtargetFeature<
-    "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
-    "CPU fuses AES/PMULL and EOR operations">;
-
-def FeatureFuseAdrpAdd : SubtargetFeature<
-    "fuse-adrp-add", "HasFuseAdrpAdd", "true",
-    "CPU fuses adrp+add operations">;
-
-def FeatureFuseLiterals : SubtargetFeature<
-    "fuse-literals", "HasFuseLiterals", "true",
-    "CPU fuses literal generation operations">;
-
-def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
-   "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
-   "CPU fuses (a + b + 1) and (a - b - 1)">;
+let ArchExtKindSpelling = "AEK_I8MM" in
+def FeatureMatMulInt8 : Extension<"i8mm", "MatMulInt8",
+    "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)", [],
+    "FEAT_I8MM", "+i8mm", 270>;
 
-def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
-    "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
-    "Disable latency scheduling heuristic">;
+let ArchExtKindSpelling = "AEK_F32MM" in
+def FeatureMatMulFP32 : Extension<"f32mm", "MatMulFP32",
+    "Enable Matrix Multiply FP32 Extension (FEAT_F32MM)", [FeatureSVE],
+    "FEAT_SVE_F32MM", "+sve,+f32mm,+fullfp16,+fp-armv8,+neon", 350>;
 
-def FeatureStorePairSuppress : SubtargetFeature<
-    "store-pair-suppress", "EnableStorePairSuppress", "true",
-    "Enable Store Pair Suppression heuristics">;
+let ArchExtKindSpelling = "AEK_F64MM" in
+def FeatureMatMulFP64 : Extension<"f64mm", "MatMulFP64",
+    "Enable Matrix Multiply FP64 Extension (FEAT_F64MM)", [FeatureSVE],
+    "FEAT_SVE_F64MM", "+sve,+f64mm,+fullfp16,+fp-armv8,+neon", 360>;
 
-def FeatureForce32BitJumpTables
-   : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
-                      "Force jump table entries to be 32-bits wide except at MinSize">;
+//===----------------------------------------------------------------------===//
+//  Armv8.3 Architecture Extensions
+//===----------------------------------------------------------------------===//
 
 def FeatureRCPC : Extension<"rcpc", "RCPC",
     "Enable support for RCPC extension (FEAT_LRCPC)", [],
     "FEAT_RCPC", "+rcpc", 230>;
 
-def FeatureUseRSqrt : SubtargetFeature<
-    "use-reciprocal-square-root", "UseRSqrt", "true",
-    "Use the reciprocal square root approximation">;
-
-def FeatureDotProd : Extension<
-    "dotprod", "DotProd",
-    "Enable dot product support (FEAT_DotProd)", [FeatureNEON],
-    "FEAT_DOTPROD", "+dotprod,+fp-armv8,+neon", 104>;
-
 def FeaturePAuth : Extension<
     "pauth", "PAuth",
     "Enable v8.3-A Pointer Authentication extension (FEAT_PAuth)">;
@@ -448,6 +270,22 @@ def FeatureNV : SubtargetFeature<
     "nv", "HasNV", "true",
     "Enable v8.4-A Nested Virtualization Enchancement (FEAT_NV, FEAT_NV2)">;
 
+//===----------------------------------------------------------------------===//
+//  Armv8.4 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
+  "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules (FEAT_LSE2)">;
+
+def FeatureFP16FML : Extension<"fp16fml", "FP16FML",
+  "Enable FP16 FML instructions (FEAT_FHM)", [FeatureFullFP16],
+  "FEAT_FP16FML", "+fp16fml,+fullfp16,+fp-armv8,+neon", 175>;
+
+def FeatureDotProd : Extension<
+    "dotprod", "DotProd",
+    "Enable dot product support (FEAT_DotProd)", [FeatureNEON],
+    "FEAT_DOTPROD", "+dotprod,+fp-armv8,+neon", 104>;
+
 def FeatureMPAM : SubtargetFeature<
     "mpam", "HasMPAM", "true",
     "Enable v8.4-A Memory system Partitioning and Monitoring extension (FEAT_MPAM)">;
@@ -465,11 +303,6 @@ def FeatureAM : SubtargetFeature<
     "am", "HasAM", "true",
     "Enable v8.4-A Activity Monitors extension (FEAT_AMUv1)">;
 
-def FeatureAMVS : SubtargetFeature<
-    "amvs", "HasAMVS", "true",
-    "Enable v8.6-A Activity Monitors Virtualization support (FEAT_AMUv1p1)",
-    [FeatureAM]>;
-
 def FeatureSEL2 : SubtargetFeature<
     "sel2", "HasSEL2", "true",
     "Enable v8.4-A Secure Exception Level 2 extension (FEAT_SEL2)">;
@@ -483,40 +316,13 @@ def FeatureFlagM : Extension<
     "Enable v8.4-A Flag Manipulation Instructions (FEAT_FlagM)", [],
     "FEAT_FLAGM", "+flagm", 20>;
 
-// 8.4 RCPC enchancements: LDAPR & STLR instructions with Immediate Offset
 def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true",
     "Enable v8.4-A RCPC instructions with Immediate Offsets (FEAT_LRCPC2)",
     [FeatureRCPC]>;
 
-def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
-                                        "NegativeImmediates", "false",
-                                        "Convert immediates and instructions "
-                                        "to their negated or complemented "
-                                        "equivalent when the immediate does "
-                                        "not fit in the encoding.">;
-
-// Address operands with shift amount 2 or 3 are fast on all Arm chips except
-// some old Apple cores (A7-A10?) which handle all shifts slowly. Cortex-A57
-// and derived designs through Cortex-X1 take an extra micro-op for shifts
-// of 1 or 4. Other Arm chips handle all shifted operands at the same speed
-// as unshifted operands.
-//
-// We don't try to model the behavior of the old Apple cores because new code
-// targeting A7 is very unlikely to actually run on an A7. The Cortex cores
-// are modeled by FeatureAddrLSLSlow14.
-def FeatureAddrLSLSlow14 : SubtargetFeature<
-    "addr-lsl-slow-14", "HasAddrLSLSlow14", "true",
-    "Address operands with shift amount of 1 or 4 are slow">;
-
-def FeatureALULSLFast : SubtargetFeature<
-    "alu-lsl-fast", "HasALULSLFast", "true",
-    "Add/Sub operations with lsl shift <= 4 are cheap">;
-
-def FeatureAggressiveFMA :
-  SubtargetFeature<"aggressive-fma",
-                   "HasAggressiveFMA",
-                   "true",
-                   "Enable Aggressive FMA for floating-point.">;
+//===----------------------------------------------------------------------===//
+//  Armv8.5 Architecture Extensions
+//===----------------------------------------------------------------------===//
 
 def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true",
   "Enable alternative NZCV format for floating point comparisons (FEAT_FlagM2)">;
@@ -525,9 +331,6 @@ def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true",
   "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to "
   "an integer (in FP format) forcing it to fit into a 32- or 64-bit int (FEAT_FRINTTS)" >;
 
-def FeatureSpecRestrict : SubtargetFeature<"specrestrict", "HasSpecRestrict",
-  "true", "Enable architectural speculation restriction (FEAT_CSV2_2)">;
-
 def FeatureSB : Extension<"sb", "SB",
   "Enable v8.5 Speculation Barrier (FEAT_SB)", [],
   "FEAT_SB", "+sb", 470>;
@@ -560,35 +363,29 @@ def FeatureMTE : Extension<"mte", "MTE",
     "Enable Memory Tagging Extension (FEAT_MTE, FEAT_MTE2)", [],
     "FEAT_MEMTAG", "", 440>;
 
-def FeatureTRBE : SubtargetFeature<"trbe", "TRBE", "true",
-    "Enable Trace Buffer Extension (FEAT_TRBE)">;
-
-def FeatureETE : SubtargetFeature<"ete", "ETE", "true",
-    "Enable Embedded Trace Extension (FEAT_ETE)",
-    [FeatureTRBE]>;
+//===----------------------------------------------------------------------===//
+//  Armv8.6 Architecture Extensions
+//===----------------------------------------------------------------------===//
 
-def FeatureTME : Extension<"tme", "TME",
-    "Enable Transactional Memory Extension (FEAT_TME)" >;
+def FeatureBF16 : Extension<"bf16", "BF16",
+    "Enable BFloat16 Extension (FEAT_BF16)", [],
+    "FEAT_BF16", "+bf16", 280>;
 
-def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
-    "AllowTaggedGlobals",
-    "true", "Use an instruction sequence for taking the address of a global "
-    "that allows a memory tag in the upper address bits">;
+def FeatureAMVS : SubtargetFeature<
+    "amvs", "HasAMVS", "true",
+    "Enable v8.6-A Activity Monitors Virtualization support (FEAT_AMUv1p1)",
+    [FeatureAM]>;
 
-let ArchExtKindSpelling = "AEK_I8MM" in
-def FeatureMatMulInt8 : Extension<"i8mm", "MatMulInt8",
-    "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)", [],
-    "FEAT_I8MM", "+i8mm", 270>;
+def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps",
+    "true", "Enable fine grained virtualization traps extension (FEAT_FGT)">;
 
-let ArchExtKindSpelling = "AEK_F32MM" in
-def FeatureMatMulFP32 : Extension<"f32mm", "MatMulFP32",
-    "Enable Matrix Multiply FP32 Extension (FEAT_F32MM)", [FeatureSVE],
-    "FEAT_SVE_F32MM", "+sve,+f32mm,+fullfp16,+fp-armv8,+neon", 350>;
+def FeatureEnhancedCounterVirtualization :
+      SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization",
+      "true", "Enable enhanced counter virtualization extension (FEAT_ECV)">;
 
-let ArchExtKindSpelling = "AEK_F64MM" in
-def FeatureMatMulFP64 : Extension<"f64mm", "MatMulFP64",
-    "Enable Matrix Multiply FP64 Extension (FEAT_F64MM)", [FeatureSVE],
-    "FEAT_SVE_F64MM", "+sve,+f64mm,+fullfp16,+fp-armv8,+neon", 360>;
+//===----------------------------------------------------------------------===//
+//  Armv8.7 Architecture Extensions
+//===----------------------------------------------------------------------===//
 
 def FeatureXS : SubtargetFeature<"xs", "HasXS",
     "true", "Enable Armv8.7-A limited-TLB-maintenance instruction (FEAT_XS)">;
@@ -604,6 +401,13 @@ def FeatureLS64 : Extension<"ls64", "LS64",
     "Enable Armv8.7-A LD64B/ST64B Accelerator Extension (FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA)", [],
     "FEAT_LS64", "", 520>;
 
+def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF",
+    "true", "Enable extra register in the Statistical Profiling Extension (FEAT_SPEv1p2)">;
+
+//===----------------------------------------------------------------------===//
+//  Armv8.8 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
 def FeatureHBC : Extension<"hbc", "HBC",
     "Enable Armv8.8-A Hinted Conditional Branches Extension (FEAT_HBC)">;
 
@@ -614,18 +418,85 @@ def FeatureMOPS : Extension<"mops", "MOPS",
 def FeatureNMI : SubtargetFeature<"nmi", "HasNMI",
     "true", "Enable Armv8.8-A Non-maskable Interrupts (FEAT_NMI, FEAT_GICv3_NMI)">;
 
-def FeatureBRBE : Extension<"brbe", "BRBE",
-    "Enable Branch Record Buffer Extension (FEAT_BRBE)">;
+//===----------------------------------------------------------------------===//
+//  Armv8.9 Architecture Extensions
+//===----------------------------------------------------------------------===//
 
-def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF",
-    "true", "Enable extra register in the Statistical Profiling Extension (FEAT_SPEv1p2)">;
+def FeatureRASv2 : Extension<"rasv2", "RASv2",
+  "Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions (FEAT_RASv2)",
+  [FeatureRAS]>;
 
-def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps",
-    "true", "Enable fine grained virtualization traps extension (FEAT_FGT)">;
+def FeatureCSSC : Extension<"cssc", "CSSC",
+  "Enable Common Short Sequence Compression (CSSC) instructions (FEAT_CSSC)">;
 
-def FeatureEnhancedCounterVirtualization :
-      SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization",
-      "true", "Enable enhanced counter virtualization extension (FEAT_ECV)">;
+def FeatureCLRBHB : SubtargetFeature<"clrbhb", "HasCLRBHB",
+    "true", "Enable Clear BHB instruction (FEAT_CLRBHB)">;
+
+def FeaturePRFM_SLC : SubtargetFeature<"prfm-slc-target", "HasPRFM_SLC",
+    "true", "Enable SLC target for PRFM instruction">;
+
+let MArchName = "predres2" in
+def FeatureSPECRES2 : Extension<"specres2", "SPECRES2",
+    "Enable Speculation Restriction Instruction (FEAT_SPECRES2)",
+    [FeaturePredRes]>;
+
+def FeatureRCPC3 : Extension<"rcpc3", "RCPC3",
+    "Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set (FEAT_LRCPC3)",
+    [FeatureRCPC_IMMO],
+    "FEAT_RCPC3", "+rcpc,+rcpc3", 241>;
+
+def FeatureTHE : Extension<"the", "THE",
+    "Enable Armv8.9-A Translation Hardening Extension (FEAT_THE)">;
+
+//===----------------------------------------------------------------------===//
+//  Armv9.0 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
+  "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+
+def FeatureSVE2 : Extension<"sve2", "SVE2",
+  "Enable Scalable Vector Extension 2 (SVE2) instructions (FEAT_SVE2)",
+  [FeatureSVE, FeatureUseScalarIncVL],
+  "FEAT_SVE2", "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370>;
+
+def FeatureSVE2AES : Extension<"sve2-aes", "SVE2AES",
+  "Enable AES SVE2 instructions (FEAT_SVE_AES, FEAT_SVE_PMULL128)",
+  [FeatureSVE2, FeatureAES],
+  "FEAT_SVE_AES", "+sve2,+sve,+sve2-aes,+fullfp16,+fp-armv8,+neon", 380>;
+
+def FeatureSVE2SM4 : Extension<"sve2-sm4", "SVE2SM4",
+  "Enable SM4 SVE2 instructions (FEAT_SVE_SM4)", [FeatureSVE2, FeatureSM4],
+  "FEAT_SVE_SM4", "+sve2,+sve,+sve2-sm4,+fullfp16,+fp-armv8,+neon", 420>;
+
+def FeatureSVE2SHA3 : Extension<"sve2-sha3", "SVE2SHA3",
+  "Enable SHA3 SVE2 instructions (FEAT_SVE_SHA3)", [FeatureSVE2, FeatureSHA3],
+  "FEAT_SVE_SHA3", "+sve2,+sve,+sve2-sha3,+fullfp16,+fp-armv8,+neon", 410>;
+
+def FeatureSVE2BitPerm : Extension<"sve2-bitperm", "SVE2BitPerm",
+  "Enable bit permutation SVE2 instructions (FEAT_SVE_BitPerm)", [FeatureSVE2],
+  "FEAT_SVE_BITPERM", "+sve2,+sve,+sve2-bitperm,+fullfp16,+fp-armv8,+neon", 400>;
+
+def FeatureTRBE : SubtargetFeature<"trbe", "TRBE", "true",
+    "Enable Trace Buffer Extension (FEAT_TRBE)">;
+
+def FeatureETE : SubtargetFeature<"ete", "ETE", "true",
+    "Enable Embedded Trace Extension (FEAT_ETE)",
+    [FeatureTRBE]>;
+
+def FeatureTME : Extension<"tme", "TME",
+    "Enable Transactional Memory Extension (FEAT_TME)" >;
+
+//===----------------------------------------------------------------------===//
+//  Armv9.1 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Armv9.2 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureBRBE : Extension<"brbe", "BRBE",
+    "Enable Branch Record Buffer Extension (FEAT_BRBE)">;
 
 def FeatureRME : SubtargetFeature<"rme", "HasRME",
     "true", "Enable Realm Management Extension (FEAT_RME)">;
@@ -645,10 +516,28 @@ def FeatureSMEI16I64 : Extension<"sme-i16i64", "SMEI16I64",
 def FeatureSMEFA64 : Extension<"sme-fa64", "SMEFA64",
   "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>;
 
+//===----------------------------------------------------------------------===//
+//  Armv9.3 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
 def FeatureSME2 : Extension<"sme2", "SME2",
   "Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME],
   "FEAT_SME2", "+sme2,+sme,+bf16", 580>;
 
+def FeatureMEC : SubtargetFeature<"mec", "HasMEC",
+    "true", "Enable Memory Encryption Contexts Extension", [FeatureRME]>;
+
+//===----------------------------------------------------------------------===//
+//  Armv9.4 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+let FMVDependencies = "+sve2p1,+sve2,+sve,+fullfp16,+fp-armv8,+neon" in
+def FeatureSVE2p1: Extension<"sve2p1", "SVE2p1",
+  "Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>;
+
+def FeatureB16B16 : Extension<"b16b16", "B16B16",
+  "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>;
+
 let FMVDependencies = "+sme2,+sme-f16f16" in
 def FeatureSMEF16F16 : Extension<"sme-f16f16", "SMEF16F16",
   "Enable SME non-widening Float16 instructions (FEAT_SME_F16F16)", [FeatureSME2]>;
@@ -657,6 +546,31 @@ let FMVDependencies = "+sme2p1,+sme2,+sme,+bf16" in
 def FeatureSME2p1 : Extension<"sme2p1", "SME2p1",
   "Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>;
 
+def FeatureCHK : SubtargetFeature<"chk", "HasCHK",
+    "true", "Enable Armv8.0-A Check Feature Status Extension (FEAT_CHK)">;
+
+def FeatureGCS : Extension<"gcs", "GCS",
+    "Enable Armv9.4-A Guarded Call Stack Extension", [FeatureCHK]>;
+
+def FeatureITE : Extension<"ite", "ITE",
+    "Enable Armv9.4-A Instrumentation Extension FEAT_ITE", [FeatureETE,
+    FeatureTRBE]>;
+
+def FeatureLSE128 : Extension<"lse128", "LSE128",
+    "Enable Armv9.4-A 128-bit Atomic Instructions (FEAT_LSE128)",
+    [FeatureLSE]>;
+
+// FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, and FEAT_SYSINSTR128 are mutually implicit.
+// Therefore group them all under a single feature flag, d128:
+def FeatureD128 : Extension<"d128", "D128",
+    "Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers "
+    "and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)",
+    [FeatureLSE128]>;
+
+//===----------------------------------------------------------------------===//
+//  Armv9.5 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
 def FeatureFAMINMAX: Extension<"faminmax", "FAMINMAX",
    "Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">;
 
@@ -698,68 +612,230 @@ let FMVDependencies = "+fp8,+sme2" in
 def FeatureSMEF8F16 : Extension<"sme-f8f16", "SMEF8F16",
   "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSMEF8F32]>;
 
-def FeatureAppleA7SysReg  : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
-  "Apple A7 (the CPU formerly known as Cyclone)">;
+def FeatureCPA : Extension<"cpa", "CPA",
+    "Enable Armv9.5-A Checked Pointer Arithmetic (FEAT_CPA)">;
 
-def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true",
-  "Enable Exception Level 2 Virtual Memory System Architecture">;
+def FeaturePAuthLR : Extension<"pauth-lr", "PAuthLR",
+    "Enable Armv9.5-A PAC enhancements (FEAT_PAuth_LR)">;
 
-def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
-  "Enable Exception Level 3">;
+def FeatureTLBIW : Extension<"tlbiw", "TLBIW",
+  "Enable ARMv9.5-A TLBI VMALL for Dirty State (FEAT_TLBIW)">;
 
-def FeatureCSSC : Extension<"cssc", "CSSC",
-  "Enable Common Short Sequence Compression (CSSC) instructions (FEAT_CSSC)">;
+//===----------------------------------------------------------------------===//
+//  Other Features
+//===----------------------------------------------------------------------===//
 
-def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769",
-  "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">;
+def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
+  "Enable out of line atomics to support LSE instructions">;
 
-def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
-                                                 "NoBTIAtReturnTwice", "true",
-                                                 "Don't place a BTI instruction "
-                                                 "after a return-twice">;
+def FeatureFMV : SubtargetFeature<"fmv", "HasFMV", "true",
+  "Enable Function Multi Versioning support.">;
 
-def FeatureCHK : SubtargetFeature<"chk", "HasCHK",
-    "true", "Enable Armv8.0-A Check Feature Status Extension (FEAT_CHK)">;
+// This flag is currently still labeled as Experimental, but when fully
+// implemented this should tell the compiler to use the zeroing pseudos to
+// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
+// lanes are known to be zero. The pseudos will then be expanded using the
+// MOVPRFX instruction to zero the inactive lanes. This feature should only be
+// enabled if MOVPRFX instructions are known to merge with the destructive
+// operations they prefix.
+//
+// This feature could similarly be extended to support cheap merging of _any_
+// value into the inactive lanes using the MOVPRFX instruction that uses
+// merging-predication.
+def FeatureExperimentalZeroingPseudos
+    : SubtargetFeature<"use-experimental-zeroing-pseudos",
+                       "UseExperimentalZeroingPseudos", "true",
+                       "Hint to the compiler that the MOVPRFX instruction is "
+                       "merged with destructive operations",
+                       []>;
 
-def FeatureGCS : Extension<"gcs", "GCS",
-    "Enable Armv9.4-A Guarded Call Stack Extension", [FeatureCHK]>;
+def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
+  "NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;
 
-def FeatureCLRBHB : SubtargetFeature<"clrbhb", "HasCLRBHB",
-    "true", "Enable Clear BHB instruction (FEAT_CLRBHB)">;
+def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
+                                        "Has zero-cycle register moves">;
 
-def FeaturePRFM_SLC : SubtargetFeature<"prfm-slc-target", "HasPRFM_SLC",
-    "true", "Enable SLC target for PRFM instruction">;
+def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
+                                        "Has zero-cycle zeroing instructions for generic registers">;
 
-let MArchName = "predres2" in
-def FeatureSPECRES2 : Extension<"specres2", "SPECRES2",
-    "Enable Speculation Restriction Instruction (FEAT_SPECRES2)",
-    [FeaturePredRes]>;
+// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
+// as movi is more efficient across all cores. Newer cores can eliminate
+// fmovs early and there is no difference with movi, but this not true for
+// all implementations.
+def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false",
+                                        "Has no zero-cycle zeroing instructions for FP registers">;
 
-def FeatureMEC : SubtargetFeature<"mec", "HasMEC",
-    "true", "Enable Memory Encryption Contexts Extension", [FeatureRME]>;
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+                                        "Has zero-cycle zeroing instructions",
+                                        [FeatureZCZeroingGP]>;
 
-def FeatureITE : Extension<"ite", "ITE",
-    "Enable Armv9.4-A Instrumentation Extension FEAT_ITE", [FeatureETE,
-    FeatureTRBE]>;
+/// ... but the floating-point version doesn't quite work in rare cases on older
+/// CPUs.
+def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
+    "HasZeroCycleZeroingFPWorkaround", "true",
+    "The zero-cycle floating-point zeroing instruction has a bug">;
 
-def FeatureRCPC3 : Extension<"rcpc3", "RCPC3",
-    "Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set (FEAT_LRCPC3)",
-    [FeatureRCPC_IMMO],
-    "FEAT_RCPC3", "+rcpc,+rcpc3", 241>;
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+                                          "RequiresStrictAlign", "true",
+                                          "Disallow all unaligned memory "
+                                          "access">;
 
-def FeatureTHE : Extension<"the", "THE",
-    "Enable Armv8.9-A Translation Hardening Extension (FEAT_THE)">;
+foreach i = {1-7,9-15,18,20-28} in
+    def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
+                                             "Reserve X"#i#", making it unavailable "
+                                             "as a GPR">;
 
-def FeatureLSE128 : Extension<"lse128", "LSE128",
-    "Enable Armv9.4-A 128-bit Atomic Instructions (FEAT_LSE128)",
-    [FeatureLSE]>;
+foreach i = {8-15,18} in
+    def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i,
+         "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">;
 
-// FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, and FEAT_SYSINSTR128 are mutually implicit.
-// Therefore group them all under a single feature flag, d128:
-def FeatureD128 : Extension<"d128", "D128",
-    "Enable Armv9.4-A 128-bit Page Table Descriptors, System Registers "
-    "and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)",
-    [FeatureLSE128]>;
+def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",
+    "true",
+    "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;
+
+def FeaturePredictableSelectIsExpensive : SubtargetFeature<
+    "predictable-select-expensive", "PredictableSelectIsExpensive", "true",
+    "Prefer likely predicted branches over selects">;
+
+def FeatureEnableSelectOptimize : SubtargetFeature<
+    "enable-select-opt", "EnableSelectOptimize", "true",
+    "Enable the select optimize pass for select loop heuristics">;
+
+def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
+    "HasExynosCheapAsMoveHandling", "true",
+    "Use Exynos specific handling of cheap instructions">;
+
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
+    "UsePostRAScheduler", "true", "Schedule again after register allocation">;
+
+def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
+    "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">;
+
+def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
+    "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;
+
+def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address",
+    "IsStoreAddressAscend", "true",
+    "Schedule vector stores by ascending address">;
+
+def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",
+    "true", "STR of Q register with register offset is slow">;
+
+def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
+    "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
+    "true", "Use alternative pattern for sextload convert to f32">;
+
+def FeatureArithmeticBccFusion : SubtargetFeature<
+    "arith-bcc-fusion", "HasArithmeticBccFusion", "true",
+    "CPU fuses arithmetic+bcc operations">;
+
+def FeatureArithmeticCbzFusion : SubtargetFeature<
+    "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
+    "CPU fuses arithmetic + cbz/cbnz operations">;
+
+def FeatureCmpBccFusion : SubtargetFeature<
+    "cmp-bcc-fusion", "HasCmpBccFusion", "true",
+    "CPU fuses cmp+bcc operations">;
+
+def FeatureFuseAddress : SubtargetFeature<
+    "fuse-address", "HasFuseAddress", "true",
+    "CPU fuses address generation and memory operations">;
+
+def FeatureFuseAES : SubtargetFeature<
+    "fuse-aes", "HasFuseAES", "true",
+    "CPU fuses AES crypto operations">;
+
+def FeatureFuseArithmeticLogic : SubtargetFeature<
+    "fuse-arith-logic", "HasFuseArithmeticLogic", "true",
+    "CPU fuses arithmetic and logic operations">;
+
+def FeatureFuseCCSelect : SubtargetFeature<
+    "fuse-csel", "HasFuseCCSelect", "true",
+    "CPU fuses conditional select operations">;
+
+def FeatureFuseCryptoEOR : SubtargetFeature<
+    "fuse-crypto-eor", "HasFuseCryptoEOR", "true",
+    "CPU fuses AES/PMULL and EOR operations">;
+
+def FeatureFuseAdrpAdd : SubtargetFeature<
+    "fuse-adrp-add", "HasFuseAdrpAdd", "true",
+    "CPU fuses adrp+add operations">;
+
+def FeatureFuseLiterals : SubtargetFeature<
+    "fuse-literals", "HasFuseLiterals", "true",
+    "CPU fuses literal generation operations">;
+
+def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
+   "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
+   "CPU fuses (a + b + 1) and (a - b - 1)">;
+
+def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
+    "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
+    "Disable latency scheduling heuristic">;
+
+def FeatureStorePairSuppress : SubtargetFeature<
+    "store-pair-suppress", "EnableStorePairSuppress", "true",
+    "Enable Store Pair Suppression heuristics">;
+
+def FeatureForce32BitJumpTables
+   : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
+                      "Force jump table entries to be 32-bits wide except at MinSize">;
+
+def FeatureUseRSqrt : SubtargetFeature<
+    "use-reciprocal-square-root", "UseRSqrt", "true",
+    "Use the reciprocal square root approximation">;
+
+def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
+                                        "NegativeImmediates", "false",
+                                        "Convert immediates and instructions "
+                                        "to their negated or complemented "
+                                        "equivalent when the immediate does "
+                                        "not fit in the encoding.">;
+
+// Address operands with shift amount 2 or 3 are fast on all Arm chips except
+// some old Apple cores (A7-A10?) which handle all shifts slowly. Cortex-A57
+// and derived designs through Cortex-X1 take an extra micro-op for shifts
+// of 1 or 4. Other Arm chips handle all shifted operands at the same speed
+// as unshifted operands.
+//
+// We don't try to model the behavior of the old Apple cores because new code
+// targeting A7 is very unlikely to actually run on an A7. The Cortex cores
+// are modeled by FeatureAddrLSLSlow14.
+def FeatureAddrLSLSlow14 : SubtargetFeature<
+    "addr-lsl-slow-14", "HasAddrLSLSlow14", "true",
+    "Address operands with shift amount of 1 or 4 are slow">;
+
+def FeatureALULSLFast : SubtargetFeature<
+    "alu-lsl-fast", "HasALULSLFast", "true",
+    "Add/Sub operations with lsl shift <= 4 are cheap">;
+
+def FeatureAggressiveFMA :
+  SubtargetFeature<"aggressive-fma",
+                   "HasAggressiveFMA",
+                   "true",
+                   "Enable Aggressive FMA for floating-point.">;
+
+def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
+    "AllowTaggedGlobals",
+    "true", "Use an instruction sequence for taking the address of a global "
+    "that allows a memory tag in the upper address bits">;
+
+def FeatureAppleA7SysReg  : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
+  "Apple A7 (the CPU formerly known as Cyclone)">;
+
+def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true",
+  "Enable Exception Level 2 Virtual Memory System Architecture">;
+
+def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
+  "Enable Exception Level 3">;
+
+def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769",
+  "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">;
+
+def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
+                                                 "NoBTIAtReturnTwice", "true",
+                                                 "Don't place a BTI instruction "
+                                                 "after a return-twice">;
 
 def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp",
     "true", "Do not emit ldp">;
@@ -773,18 +849,6 @@ def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedO
 def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",
     "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;
 
-// AArch64 2023 Architecture Extensions (v9.5-A)
-
-def FeatureCPA : Extension<"cpa", "CPA",
-    "Enable Armv9.5-A Checked Pointer Arithmetic (FEAT_CPA)">;
-
-def FeaturePAuthLR : Extension<"pauth-lr", "PAuthLR",
-    "Enable Armv9.5-A PAC enhancements (FEAT_PAuth_LR)">;
-
-def FeatureTLBIW : Extension<"tlbiw", "TLBIW",
-  "Enable ARMv9.5-A TLBI VMALL for Dirty State (FEAT_TLBIW)">;
-
-
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
author	Lucas Duarte Prates <lucas.prates@arm.com>	2024-06-19 10:28:47 +0100
committer	GitHub <noreply@github.com>	2024-06-19 10:28:47 +0100
commit	116384a028f0e3801b9443e227e0840897202168 (patch)
tree	f54623a6218b5661a8fe304af5c1729676367c33
parent	cfb4dce90ce66acbd094e443f422c4e5c413a9e8 (diff)
download	llvm-116384a028f0e3801b9443e227e0840897202168.zip llvm-116384a028f0e3801b9443e227e0840897202168.tar.gz llvm-116384a028f0e3801b9443e227e0840897202168.tar.bz2