120 files changed, 1130 insertions, 563 deletions
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 118d579..b215a15 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -620,13 +620,11 @@ private:
   }
 
   /// Return a label at a given \p Address in the function. If the label does
-  /// not exist - create it. Assert if the \p Address does not belong to
-  /// the function. If \p CreatePastEnd is true, then return the function
-  /// end label when the \p Address points immediately past the last byte
-  /// of the function.
+  /// not exist - create it.
+  ///
   /// NOTE: the function always returns a local (temp) symbol, even if there's
   ///       a global symbol that corresponds to an entry at this address.
-  MCSymbol *getOrCreateLocalLabel(uint64_t Address, bool CreatePastEnd = false);
+  MCSymbol *getOrCreateLocalLabel(uint64_t Address);
 
   /// Register an data entry at a given \p Offset into the function.
   void markDataAtOffset(uint64_t Offset) {
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 776f4ae..84023ef 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1035,13 +1035,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
   return BranchType;
 }
 
-MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
-                                                bool CreatePastEnd) {
+MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address) {
   const uint64_t Offset = Address - getAddress();
 
-  if ((Offset == getSize()) && CreatePastEnd)
-    return getFunctionEndLabel();
-
   auto LI = Labels.find(Offset);
   if (LI != Labels.end())
     return LI->second;
@@ -1052,6 +1048,9 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
       return IslandSym;
   }
 
+  if (Offset == getSize())
+    return getFunctionEndLabel();
+
   MCSymbol *Label = BC.Ctx->createNamedTempSymbol();
   Labels[Offset] = Label;
 
@@ -1994,7 +1993,7 @@ void BinaryFunction::postProcessJumpTables() {
       if (IsBuiltinUnreachable) {
         BinaryFunction *TargetBF = BC.getBinaryFunctionAtAddress(EntryAddress);
         MCSymbol *Label = TargetBF ? TargetBF->getSymbol()
-                                   : getOrCreateLocalLabel(EntryAddress, true);
+                                   : getOrCreateLocalLabel(EntryAddress);
         JT.Entries.push_back(Label);
         continue;
       }
@@ -2005,7 +2004,7 @@ void BinaryFunction::postProcessJumpTables() {
           BC.getBinaryFunctionContainingAddress(EntryAddress);
       MCSymbol *Label;
       if (HasOneParent && TargetBF == this) {
-        Label = getOrCreateLocalLabel(EntryAddress, true);
+        Label = getOrCreateLocalLabel(EntryAddress);
       } else {
         const uint64_t Offset = EntryAddress - TargetBF->getAddress();
         Label = Offset ? TargetBF->addEntryPointAtOffset(Offset)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 6fa66ab..77e5688 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -2949,9 +2949,7 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
           ReferencedSymbol =
               ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
         } else {
-          ReferencedSymbol =
-              ReferencedBF->getOrCreateLocalLabel(Address,
-                                                  /*CreatePastEnd =*/true);
+          ReferencedSymbol = ReferencedBF->getOrCreateLocalLabel(Address);
 
           // If ContainingBF != nullptr, it equals ReferencedBF (see
           // if-condition above) so we're handling a relocation from a function
diff --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst
index 5570dbb..f63037a7 100644
--- a/clang/docs/ClangOffloadBundler.rst
+++ b/clang/docs/ClangOffloadBundler.rst
@@ -28,7 +28,7 @@ A bundled code object may also be used to bundle just the offloaded code
 objects, and embedded as data into the host code object. The host compilation
 includes an ``init`` function that will use the runtime corresponding to the
 offload kind (see :ref:`clang-offload-kind-table`) to load the offload code
-objects appropriate to the devices present when the host program is executed.
+objects appropriate for the devices present when the host program is executed.
 
 :program:`clang-offload-bundler` is located in
 `clang/tools/clang-offload-bundler`.
@@ -147,7 +147,7 @@ bundle file is:
   <end>     ::== OFFLOAD_BUNDLER_MAGIC_STR__END__
 
 **comment**
-  The symbol used for starting single-line comment in the file type of
+  The symbol used for starting a single-line comment in the file type of
   constituting bundles. E.g. it is ";" for ll ``File Type`` and "#" for "s"
   ``File Type``.
 
@@ -155,13 +155,13 @@ bundle file is:
   The :ref:`clang-bundle-entry-id` for the enclosing bundle.
 
 **eol**
-  The end of line character.
+  The end-of-line character.
 
 **bundle**
   The code object stored in one of the supported text file formats.
 
 **OFFLOAD_BUNDLER_MAGIC_STR__**
-  Magic string that marks the existence of offloading data i.e.
+  The magic string that marks the existence of offloading data i.e.
   "__CLANG_OFFLOAD_BUNDLE__".
 
 .. _clang-bundled-code-object-layout:
@@ -231,7 +231,7 @@ Where:
       ============= ==============================================================
       host          Host code object. ``clang-offload-bundler`` always includes
                     this entry as the first bundled code object entry. For an
-                    embedded bundled code object this entry is not used by the
+                    embedded bundled code object, this entry is not used by the
                     runtime and so is generally an empty code object.
 
       hip           Offload code object for the HIP language. Used for all
@@ -272,7 +272,7 @@ without differentiation based on offload kind.
     ``<arch><sub>-<vendor>-<sys>-<env>``
 
     However, in order to standardize outputs for tools that consume bitcode bundles
-    and to parse target ID containing dashes, the bundler only accepts target
+    and to parse a target ID containing dashes, the bundler only accepts target
     triples in the 4-field format:
 
     ``<arch><sub>-<vendor>-<sys>-<env>``
@@ -292,7 +292,7 @@ Bundled Code Object Composition
   * If there is an entry with a target feature specified as *Any*, then all
     entries must specify that target feature as *Any* for the same processor.
 
-There may be additional target specific restrictions.
+There may be additional target-specific restrictions.
 
 .. _compatibility-bundle-entry-id:
 
@@ -300,9 +300,9 @@ Compatibility Rules for Bundle Entry ID
 ---------------------------------------
 
   A code object, specified using its Bundle Entry ID, can be loaded and
-  executed on a target processor, if:
+  executed on a target processor if:
 
-  * Their offload kinds are the same or comptible.
+  * Their offload kinds are the same or compatible.
   * Their target triples are compatible.
   * Their Target IDs are compatible as defined in :ref:`compatibility-target-id`.
 
@@ -331,7 +331,7 @@ Target ID syntax is defined by the following BNF syntax:
 Where:
 
 **processor**
-  Is a the target specific processor or any alternative processor name.
+  Is the target-specific processor or any alternative processor name.
 
 **target-feature**
   Is a target feature name that is supported by the processor. Each target
@@ -350,7 +350,7 @@ Where:
     can only be loaded on a processor configured with the target feature on.
 
   *Off*
-    specified by ``-``, indicating the target feature is disabled. A code
+    Specified by ``-``, indicating the target feature is disabled. A code
     object compiled with a target ID specifying a target feature off
     can only be loaded on a processor configured with the target feature off.
 
@@ -360,9 +360,9 @@ Compatibility Rules for Target ID
 ---------------------------------
 
   A code object compiled for a Target ID is considered compatible for a
-  target, if:
+  target if:
 
-  * Their processor is same.
+  * Their processor is the same.
   * Their feature set is compatible as defined above.
 
 There are two forms of target ID:
@@ -380,10 +380,10 @@ There are two forms of target ID:
   alphabetic order. Command line tools convert non-canonical form to canonical
   form.
 
-Target Specific information
+Target-Specific information
 ===========================
 
-Target specific information is available for the following:
+Target-specific information is available for the following:
 
 *AMD GPU*
   AMD GPU supports target ID and target features. See `User Guide for AMDGPU Backend
@@ -397,7 +397,7 @@ Most other targets do not support target IDs.
 Archive Unbundling
 ==================
 
-Unbundling of a heterogeneous device archive (HDA) is done to create device specific
+Unbundling of a heterogeneous device archive (HDA) is done to create device-specific
 archives. HDA is in a format compatible with GNU ``ar`` utility and contains a
 collection of bundled device binaries where each bundle file will contain
 device binaries for a host and one or more targets. The output device-specific
@@ -469,7 +469,7 @@ compatible with that particular offload target. Compatibility between a
 device binary in HDA and a target is based on the compatibility between their
 bundle entry IDs as defined in :ref:`compatibility-bundle-entry-id`.
 
-Following cases may arise during compatibility testing:
+The following cases may arise during compatibility testing:
 
 * A binary is compatible with one or more targets: Insert the binary into the
   device-specific archive of each compatible target.
@@ -517,7 +517,7 @@ Compression and Decompression
 
 ``clang-offload-bundler`` provides features to compress and decompress the full
 bundle, leveraging inherent redundancies within the bundle entries. Use the
-`-compress` command-line option to enable this compression capability.
+``-compress`` command-line option to enable this compression capability.
 
 The compressed offload bundle begins with a header followed by the compressed binary data:
 
@@ -542,4 +542,4 @@ The compressed offload bundle begins with a header followed by the compressed bi
 - **Compressed Data**:
     The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.
 
-    > **Note**: Version 3 is now the default format. For backward compatibility with older HIP runtimes that support version 2 only, set the environment variable `COMPRESSED_BUNDLE_FORMAT_VERSION=2`.
+    > **Note**: Version 3 is now the default format. For backward compatibility with older HIP runtimes that support version 2 only, set the environment variable ``COMPRESSED_BUNDLE_FORMAT_VERSION=2``.
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index b05cb5a..cde354c 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -581,12 +581,7 @@ Interpreter::Parse(llvm::StringRef Code) {
   if (!TuOrErr)
     return TuOrErr.takeError();
 
-  PTUs.emplace_back(PartialTranslationUnit());
-  PartialTranslationUnit &LastPTU = PTUs.back();
-  LastPTU.TUPart = *TuOrErr;
-
-  if (std::unique_ptr<llvm::Module> M = Act->GenModule())
-    LastPTU.TheModule = std::move(M);
+  PartialTranslationUnit &LastPTU = IncrParser->RegisterPTU(*TuOrErr);
 
   return LastPTU;
 }
diff --git a/clang/test/ClangScanDeps/resource_directory.c b/clang/test/ClangScanDeps/resource_directory.c
index 6183e8a..5c4b24f 100644
--- a/clang/test/ClangScanDeps/resource_directory.c
+++ b/clang/test/ClangScanDeps/resource_directory.c
@@ -1,4 +1,5 @@
-// REQUIRES: shell
+// Path seperator differences
+// UNSUPPORTED: system-windows
 
 // RUN: rm -rf %t && mkdir %t
 // RUN: cp %S/Inputs/resource_directory/* %t
diff --git a/clang/test/Driver/baremetal-multilib-custom-error.yaml b/clang/test/Driver/baremetal-multilib-custom-error.yaml
index 0be92e2..bc06ed4 100644
--- a/clang/test/Driver/baremetal-multilib-custom-error.yaml
+++ b/clang/test/Driver/baremetal-multilib-custom-error.yaml
@@ -1,4 +1,3 @@
-# REQUIRES: shell
 # UNSUPPORTED: system-windows
 
 # RUN: %clang --multi-lib-config=%s -no-canonical-prefixes -print-multi-directory 2>&1 \
diff --git a/clang/test/Frontend/absolute-paths-symlinks.c b/clang/test/Frontend/absolute-paths-symlinks.c
index 8170910..80bca34 100644
--- a/clang/test/Frontend/absolute-paths-symlinks.c
+++ b/clang/test/Frontend/absolute-paths-symlinks.c
@@ -12,6 +12,5 @@
 // CHECK-SAME: error: unknown type name
 This do not compile
 
-// REQUIRES: shell
 // Don't make symlinks on Windows.
 // UNSUPPORTED: system-windows
diff --git a/clang/test/Tooling/clang-check-pwd.cpp b/clang/test/Tooling/clang-check-pwd.cpp
index 309cee5..e4360c0 100644
--- a/clang/test/Tooling/clang-check-pwd.cpp
+++ b/clang/test/Tooling/clang-check-pwd.cpp
@@ -12,5 +12,3 @@
 // CHECK: a type specifier is required
 // CHECK: .foobar/test.cpp
 invalid;
-
-// REQUIRES: shell
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index d9ff116..c21b2ba 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -36,14 +36,14 @@ enum VendorSignatures {
   SIG_AMD = 0x68747541,   // Auth
 };
 
-enum ProcessorVendors : unsigned int {
+enum ProcessorVendors {
   VENDOR_INTEL = 1,
   VENDOR_AMD,
   VENDOR_OTHER,
   VENDOR_MAX
 };
 
-enum ProcessorTypes : unsigned int {
+enum ProcessorTypes {
   INTEL_BONNELL = 1,
   INTEL_CORE2,
   INTEL_COREI7,
@@ -235,6 +235,19 @@ enum ProcessorFeatures {
   CPU_FEATURE_MAX
 };
 
+#ifndef _WIN32
+__attribute__((visibility("hidden")))
+#endif
+struct __processor_model {
+  unsigned int __cpu_vendor;
+  unsigned int __cpu_type;
+  unsigned int __cpu_subtype;
+  unsigned int __cpu_features[1];
+} __cpu_model = {0, 0, 0, {0}};
+
+static_assert(sizeof(__cpu_model) == 16,
+              "Wrong size of __cpu_model will result in ABI break");
+
 // This code is copied from lib/Support/Host.cpp.
 // Changes to either file should be mirrored in the other.
 
@@ -319,13 +332,17 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
 
 #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
 
-static const char *getIntelProcessorTypeAndSubtype(
-    unsigned Family, unsigned Model, const unsigned *Features,
-    enum ProcessorTypes *Type, enum ProcessorSubtypes *Subtype) {
+static const char *
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                const unsigned *Features,
+                                struct __processor_model *CpuModel) {
   // We select CPU strings to match the code in Host.cpp, but we don't use them
   // in compiler-rt.
   const char *CPU = 0;
 
+  enum ProcessorTypes Type = CPU_TYPE_MAX;
+  enum ProcessorSubtypes Subtype = CPU_SUBTYPE_MAX;
+
   switch (Family) {
   case 0x6:
     switch (Model) {
@@ -337,7 +354,7 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0x16: // Intel Celeron processor model 16h. All processors are
                // manufactured using the 65 nm process
       CPU = "core2";
-      *Type = INTEL_CORE2;
+      Type = INTEL_CORE2;
       break;
     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
@@ -346,7 +363,7 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
                // the 45 nm process.
       CPU = "penryn";
-      *Type = INTEL_CORE2;
+      Type = INTEL_CORE2;
       break;
     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
@@ -355,29 +372,29 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0x1f:
     case 0x2e: // Nehalem EX
       CPU = "nehalem";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_NEHALEM;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_NEHALEM;
       break;
     case 0x25: // Intel Core i7, laptop version.
     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 32 nm process.
     case 0x2f: // Westmere EX
       CPU = "westmere";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_WESTMERE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_WESTMERE;
       break;
     case 0x2a: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
     case 0x2d:
       CPU = "sandybridge";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_SANDYBRIDGE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_SANDYBRIDGE;
       break;
     case 0x3a:
     case 0x3e: // Ivy Bridge EP
       CPU = "ivybridge";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_IVYBRIDGE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_IVYBRIDGE;
       break;
 
     // Haswell:
@@ -386,8 +403,8 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0x45:
     case 0x46:
       CPU = "haswell";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_HASWELL;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_HASWELL;
       break;
 
     // Broadwell:
@@ -396,8 +413,8 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0x4f:
     case 0x56:
       CPU = "broadwell";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_BROADWELL;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_BROADWELL;
       break;
 
     // Skylake:
@@ -408,61 +425,61 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0xa5: // Comet Lake-H/S
     case 0xa6: // Comet Lake-U
       CPU = "skylake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_SKYLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_SKYLAKE;
       break;
 
     // Rocketlake:
     case 0xa7:
       CPU = "rocketlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ROCKETLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ROCKETLAKE;
       break;
 
     // Skylake Xeon:
     case 0x55:
-      *Type = INTEL_COREI7;
+      Type = INTEL_COREI7;
       if (testFeature(FEATURE_AVX512BF16)) {
         CPU = "cooperlake";
-        *Subtype = INTEL_COREI7_COOPERLAKE;
+        Subtype = INTEL_COREI7_COOPERLAKE;
       } else if (testFeature(FEATURE_AVX512VNNI)) {
         CPU = "cascadelake";
-        *Subtype = INTEL_COREI7_CASCADELAKE;
+        Subtype = INTEL_COREI7_CASCADELAKE;
       } else {
         CPU = "skylake-avx512";
-        *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
+        Subtype = INTEL_COREI7_SKYLAKE_AVX512;
       }
       break;
 
     // Cannonlake:
     case 0x66:
       CPU = "cannonlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_CANNONLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_CANNONLAKE;
       break;
 
     // Icelake:
     case 0x7d:
     case 0x7e:
       CPU = "icelake-client";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ICELAKE_CLIENT;
       break;
 
     // Tigerlake:
     case 0x8c:
     case 0x8d:
       CPU = "tigerlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_TIGERLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_TIGERLAKE;
       break;
 
     // Alderlake:
     case 0x97:
     case 0x9a:
       CPU = "alderlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ALDERLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ALDERLAKE;
       break;
 
     // Raptorlake:
@@ -470,23 +487,23 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0xba:
     case 0xbf:
       CPU = "raptorlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ALDERLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ALDERLAKE;
       break;
 
     // Meteorlake:
     case 0xaa:
     case 0xac:
       CPU = "meteorlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ALDERLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ALDERLAKE;
       break;
 
     // Gracemont:
     case 0xbe:
       CPU = "gracemont";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ALDERLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ALDERLAKE;
       break;
 
     // Arrowlake:
@@ -494,72 +511,72 @@ static const char *getIntelProcessorTypeAndSubtype(
     // Arrowlake U:
     case 0xb5:
       CPU = "arrowlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ARROWLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ARROWLAKE;
       break;
 
     // Arrowlake S:
     case 0xc6:
       CPU = "arrowlake-s";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ARROWLAKE_S;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ARROWLAKE_S;
       break;
 
     // Lunarlake:
     case 0xbd:
       CPU = "lunarlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ARROWLAKE_S;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ARROWLAKE_S;
       break;
 
     // Pantherlake:
     case 0xcc:
       CPU = "pantherlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_PANTHERLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_PANTHERLAKE;
       break;
 
     // Wildcatlake:
     case 0xd5:
       CPU = "wildcatlake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_PANTHERLAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_PANTHERLAKE;
       break;
 
     // Icelake Xeon:
     case 0x6a:
     case 0x6c:
       CPU = "icelake-server";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ICELAKE_SERVER;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_ICELAKE_SERVER;
       break;
 
     // Emerald Rapids:
     case 0xcf:
       CPU = "emeraldrapids";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
       break;
 
     // Sapphire Rapids:
     case 0x8f:
       CPU = "sapphirerapids";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
       break;
 
     // Granite Rapids:
     case 0xad:
       CPU = "graniterapids";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_GRANITERAPIDS;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_GRANITERAPIDS;
       break;
 
     // Granite Rapids D:
     case 0xae:
       CPU = "graniterapids-d";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_GRANITERAPIDS_D;
       break;
 
     case 0x1c: // Most 45 nm Intel Atom processors
@@ -568,7 +585,7 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0x35: // 32 nm Atom Midview
     case 0x36: // 32 nm Atom Midview
       CPU = "bonnell";
-      *Type = INTEL_BONNELL;
+      Type = INTEL_BONNELL;
       break;
 
     // Atom Silvermont codes from the Intel software optimization guide.
@@ -579,52 +596,52 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0x5d:
     case 0x4c: // really airmont
       CPU = "silvermont";
-      *Type = INTEL_SILVERMONT;
+      Type = INTEL_SILVERMONT;
       break;
     // Goldmont:
     case 0x5c: // Apollo Lake
     case 0x5f: // Denverton
       CPU = "goldmont";
-      *Type = INTEL_GOLDMONT;
+      Type = INTEL_GOLDMONT;
       break; // "goldmont"
     case 0x7a:
       CPU = "goldmont-plus";
-      *Type = INTEL_GOLDMONT_PLUS;
+      Type = INTEL_GOLDMONT_PLUS;
       break;
     case 0x86:
     case 0x8a: // Lakefield
     case 0x96: // Elkhart Lake
     case 0x9c: // Jasper Lake
       CPU = "tremont";
-      *Type = INTEL_TREMONT;
+      Type = INTEL_TREMONT;
       break;
 
     // Sierraforest:
     case 0xaf:
       CPU = "sierraforest";
-      *Type = INTEL_SIERRAFOREST;
+      Type = INTEL_SIERRAFOREST;
       break;
 
     // Grandridge:
     case 0xb6:
       CPU = "grandridge";
-      *Type = INTEL_GRANDRIDGE;
+      Type = INTEL_GRANDRIDGE;
       break;
 
     // Clearwaterforest:
     case 0xdd:
       CPU = "clearwaterforest";
-      *Type = INTEL_CLEARWATERFOREST;
+      Type = INTEL_CLEARWATERFOREST;
       break;
 
     case 0x57:
       CPU = "knl";
-      *Type = INTEL_KNL;
+      Type = INTEL_KNL;
       break;
 
     case 0x85:
       CPU = "knm";
-      *Type = INTEL_KNM;
+      Type = INTEL_KNM;
       break;
 
     default: // Unknown family 6 CPU.
@@ -636,8 +653,8 @@ static const char *getIntelProcessorTypeAndSubtype(
     // Diamond Rapids:
     case 0x01:
       CPU = "diamondrapids";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_DIAMONDRAPIDS;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_DIAMONDRAPIDS;
       break;
 
     default: // Unknown family 19 CPU.
@@ -649,8 +666,8 @@ static const char *getIntelProcessorTypeAndSubtype(
     case 0x1:
     case 0x3:
       CPU = "novalake";
-      *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_NOVALAKE;
+      Type = INTEL_COREI7;
+      Subtype = INTEL_COREI7_NOVALAKE;
       break;
     default: // Unknown family 0x12 CPU.
       break;
@@ -661,14 +678,23 @@ static const char *getIntelProcessorTypeAndSubtype(
     break; // Unknown.
   }
 
+  if (Type != CPU_TYPE_MAX)
+    CpuModel->__cpu_type = Type;
+  if (Subtype != CPU_SUBTYPE_MAX)
+    CpuModel->__cpu_subtype = Subtype;
+
   return CPU;
 }
 
-static const char *getAMDProcessorTypeAndSubtype(
-    unsigned Family, unsigned Model, const unsigned *Features,
-    enum ProcessorTypes *Type, enum ProcessorSubtypes *Subtype) {
+static const char *
+getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                              const unsigned *Features,
+                              struct __processor_model *CpuModel) {
   const char *CPU = 0;
 
+  enum ProcessorTypes Type = CPU_TYPE_MAX;
+  enum ProcessorSubtypes Subtype = CPU_SUBTYPE_MAX;
+
   switch (Family) {
   case 4:
     CPU = "i486";
@@ -709,53 +735,53 @@ static const char *getAMDProcessorTypeAndSubtype(
   case 16:
   case 18:
     CPU = "amdfam10";
-    *Type = AMDFAM10H; // "amdfam10"
+    Type = AMDFAM10H; // "amdfam10"
     switch (Model) {
     case 2:
-      *Subtype = AMDFAM10H_BARCELONA;
+      Subtype = AMDFAM10H_BARCELONA;
       break;
     case 4:
-      *Subtype = AMDFAM10H_SHANGHAI;
+      Subtype = AMDFAM10H_SHANGHAI;
       break;
     case 8:
-      *Subtype = AMDFAM10H_ISTANBUL;
+      Subtype = AMDFAM10H_ISTANBUL;
       break;
     }
     break;
   case 20:
     CPU = "btver1";
-    *Type = AMD_BTVER1;
+    Type = AMD_BTVER1;
     break;
   case 21:
     CPU = "bdver1";
-    *Type = AMDFAM15H;
+    Type = AMDFAM15H;
     if (Model >= 0x60 && Model <= 0x7f) {
       CPU = "bdver4";
-      *Subtype = AMDFAM15H_BDVER4;
+      Subtype = AMDFAM15H_BDVER4;
       break; // 60h-7Fh: Excavator
     }
     if (Model >= 0x30 && Model <= 0x3f) {
       CPU = "bdver3";
-      *Subtype = AMDFAM15H_BDVER3;
+      Subtype = AMDFAM15H_BDVER3;
       break; // 30h-3Fh: Steamroller
     }
     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
       CPU = "bdver2";
-      *Subtype = AMDFAM15H_BDVER2;
+      Subtype = AMDFAM15H_BDVER2;
       break; // 02h, 10h-1Fh: Piledriver
     }
     if (Model <= 0x0f) {
-      *Subtype = AMDFAM15H_BDVER1;
+      Subtype = AMDFAM15H_BDVER1;
       break; // 00h-0Fh: Bulldozer
     }
     break;
   case 22:
     CPU = "btver2";
-    *Type = AMD_BTVER2;
+    Type = AMD_BTVER2;
     break;
   case 23:
     CPU = "znver1";
-    *Type = AMDFAM17H;
+    Type = AMDFAM17H;
     if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
         (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
         (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
@@ -771,20 +797,20 @@ static const char *getAMDProcessorTypeAndSubtype(
       // Family 17h Models 98h-9Fh (Mero) Zen 2
       // Family 17h Models A0h-AFh (Mendocino) Zen 2
       CPU = "znver2";
-      *Subtype = AMDFAM17H_ZNVER2;
+      Subtype = AMDFAM17H_ZNVER2;
       break;
     }
     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
       // Family 17h Models 10h-1Fh (Raven1) Zen
       // Family 17h Models 10h-1Fh (Picasso) Zen+
       // Family 17h Models 20h-2Fh (Raven2 x86) Zen
-      *Subtype = AMDFAM17H_ZNVER1;
+      Subtype = AMDFAM17H_ZNVER1;
       break;
     }
     break;
   case 25:
     CPU = "znver3";
-    *Type = AMDFAM19H;
+    Type = AMDFAM19H;
     if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
         (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
         (Model >= 0x50 && Model <= 0x5f)) {
@@ -793,7 +819,7 @@ static const char *getAMDProcessorTypeAndSubtype(
       // Family 19h Models 30h-3Fh (Badami) Zen 3
       // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
       // Family 19h Models 50h-5Fh (Cezanne) Zen 3
-      *Subtype = AMDFAM19H_ZNVER3;
+      Subtype = AMDFAM19H_ZNVER3;
       break;
     }
     if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
@@ -805,13 +831,13 @@ static const char *getAMDProcessorTypeAndSubtype(
       // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
       // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
       CPU = "znver4";
-      *Subtype = AMDFAM19H_ZNVER4;
+      Subtype = AMDFAM19H_ZNVER4;
       break; //  "znver4"
     }
     break; // family 19h
   case 26:
     CPU = "znver5";
-    *Type = AMDFAM1AH;
+    Type = AMDFAM1AH;
     if (Model <= 0x77) {
       // Models 00h-0Fh (Breithorn).
       // Models 10h-1Fh (Breithorn-Dense).
@@ -823,7 +849,7 @@ static const char *getAMDProcessorTypeAndSubtype(
       // Models 60h-6Fh (Krackan1).
       // Models 70h-77h (Sarlak).
       CPU = "znver5";
-      *Subtype = AMDFAM1AH_ZNVER5;
+      Subtype = AMDFAM1AH_ZNVER5;
       break; //  "znver5"
     }
     break;
@@ -831,6 +857,11 @@ static const char *getAMDProcessorTypeAndSubtype(
     break; // Unknown AMD CPU.
   }
 
+  if (Type != CPU_TYPE_MAX)
+    CpuModel->__cpu_type = Type;
+  if (Subtype != CPU_SUBTYPE_MAX)
+    CpuModel->__cpu_subtype = Subtype;
+
   return CPU;
 }
 
@@ -1155,19 +1186,6 @@ int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
 #ifndef _WIN32
 __attribute__((visibility("hidden")))
 #endif
-struct __processor_model {
-  unsigned int __cpu_vendor;
-  enum ProcessorTypes __cpu_type;
-  enum ProcessorSubtypes __cpu_subtype;
-  unsigned int __cpu_features[1];
-} __cpu_model = {0, 0, 0, {0}};
-
-static_assert(sizeof(__cpu_model) == 16,
-              "Wrong size of __cpu_model will result in ABI break");
-
-#ifndef _WIN32
-__attribute__((visibility("hidden")))
-#endif
 unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
 
 // A constructor function that is sets __cpu_model and __cpu_features2 with
@@ -1207,15 +1225,11 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
 
   if (Vendor == SIG_INTEL) {
     // Get CPU type.
-    getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
-                                    &(__cpu_model.__cpu_type),
-                                    &(__cpu_model.__cpu_subtype));
+    getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], &__cpu_model);
     __cpu_model.__cpu_vendor = VENDOR_INTEL;
   } else if (Vendor == SIG_AMD) {
     // Get CPU type.
-    getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
-                                  &(__cpu_model.__cpu_type),
-                                  &(__cpu_model.__cpu_subtype));
+    getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], &__cpu_model);
     __cpu_model.__cpu_vendor = VENDOR_AMD;
   } else
     __cpu_model.__cpu_vendor = VENDOR_OTHER;
diff --git a/libc/src/setjmp/x86_64/sigsetjmp.cpp b/libc/src/setjmp/x86_64/sigsetjmp.cpp
index 4c97a01..2bad053 100644
--- a/libc/src/setjmp/x86_64/sigsetjmp.cpp
+++ b/libc/src/setjmp/x86_64/sigsetjmp.cpp
@@ -41,7 +41,7 @@ LLVM_LIBC_FUNCTION(int, sigsetjmp, (sigjmp_buf buf)) {
       [epilogue] "X"(sigsetjmp_epilogue)
       : "eax", "ebx", "ecx");
 }
-#endif
+#else
 [[gnu::naked]]
 LLVM_LIBC_FUNCTION(int, sigsetjmp, (sigjmp_buf, int)) {
   asm(R"(
@@ -64,5 +64,6 @@ LLVM_LIBC_FUNCTION(int, sigsetjmp, (sigjmp_buf, int)) {
       [epilogue] "X"(sigsetjmp_epilogue)
       : "rax", "rbx");
 }
+#endif
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libcxx/cmake/caches/Armv7Arm.cmake b/libcxx/cmake/caches/Armv7Arm.cmake
index e60d43f..be8f9ff 100644
--- a/libcxx/cmake/caches/Armv7Arm.cmake
+++ b/libcxx/cmake/caches/Armv7Arm.cmake
@@ -1,3 +1,5 @@
 set(CMAKE_CXX_COMPILER_TARGET "armv7l-linux-gnueabihf" CACHE STRING "")
 set(CMAKE_CXX_FLAGS "-marm" CACHE STRING "")
 set(CMAKE_C_FLAGS "-marm" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Armv7Thumb-no-exceptions.cmake b/libcxx/cmake/caches/Armv7Thumb-no-exceptions.cmake
index 70b619f..5ddfa15 100644
--- a/libcxx/cmake/caches/Armv7Thumb-no-exceptions.cmake
+++ b/libcxx/cmake/caches/Armv7Thumb-no-exceptions.cmake
@@ -3,3 +3,5 @@ set(CMAKE_CXX_FLAGS "-mthumb" CACHE STRING "")
 set(CMAKE_C_FLAGS "-mthumb" CACHE STRING "")
 set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
 set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Armv8Arm.cmake b/libcxx/cmake/caches/Armv8Arm.cmake
index a289471..e0379b5 100644
--- a/libcxx/cmake/caches/Armv8Arm.cmake
+++ b/libcxx/cmake/caches/Armv8Arm.cmake
@@ -1,3 +1,5 @@
 set(CMAKE_CXX_COMPILER_TARGET "armv8l-linux-gnueabihf" CACHE STRING "")
 set(CMAKE_CXX_FLAGS "-marm" CACHE STRING "")
 set(CMAKE_C_FLAGS "-marm" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Armv8Thumb-no-exceptions.cmake b/libcxx/cmake/caches/Armv8Thumb-no-exceptions.cmake
index b4c7dfc..b0a5348 100644
--- a/libcxx/cmake/caches/Armv8Thumb-no-exceptions.cmake
+++ b/libcxx/cmake/caches/Armv8Thumb-no-exceptions.cmake
@@ -3,3 +3,5 @@ set(CMAKE_CXX_FLAGS "-mthumb" CACHE STRING "")
 set(CMAKE_C_FLAGS "-mthumb" CACHE STRING "")
 set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
 set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-hardening-mode-debug.cmake b/libcxx/cmake/caches/Generic-hardening-mode-debug.cmake
index 1d401ba..483e8a7 100644
--- a/libcxx/cmake/caches/Generic-hardening-mode-debug.cmake
+++ b/libcxx/cmake/caches/Generic-hardening-mode-debug.cmake
@@ -1 +1,3 @@
 set(LIBCXX_HARDENING_MODE "debug" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-hardening-mode-extensive.cmake b/libcxx/cmake/caches/Generic-hardening-mode-extensive.cmake
index 72263df..d4d506e 100644
--- a/libcxx/cmake/caches/Generic-hardening-mode-extensive.cmake
+++ b/libcxx/cmake/caches/Generic-hardening-mode-extensive.cmake
@@ -1 +1,3 @@
 set(LIBCXX_HARDENING_MODE "extensive" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake b/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake
index d4ce32c..2d61773 100644
--- a/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake
+++ b/libcxx/cmake/caches/Generic-hardening-mode-fast-with-abi-breaks.cmake
@@ -8,3 +8,5 @@ set(_defines
     _LIBCPP_ABI_BOUNDED_ITERATORS_IN_OPTIONAL
 )
 set(LIBCXX_ABI_DEFINES "${_defines}" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-hardening-mode-fast.cmake b/libcxx/cmake/caches/Generic-hardening-mode-fast.cmake
index cad5a1c..acc9cbd 100644
--- a/libcxx/cmake/caches/Generic-hardening-mode-fast.cmake
+++ b/libcxx/cmake/caches/Generic-hardening-mode-fast.cmake
@@ -1 +1,3 @@
 set(LIBCXX_HARDENING_MODE "fast" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-merged.cmake b/libcxx/cmake/caches/Generic-merged.cmake
index 7ebb802..461d103 100644
--- a/libcxx/cmake/caches/Generic-merged.cmake
+++ b/libcxx/cmake/caches/Generic-merged.cmake
@@ -9,3 +9,6 @@ set(LIBCXXABI_ENABLE_STATIC_UNWINDER ON CACHE BOOL "")
 set(LIBCXXABI_STATICALLY_LINK_UNWINDER_IN_STATIC_LIBRARY ON CACHE BOOL "")
 
 set(LIBUNWIND_ENABLE_SHARED OFF CACHE BOOL "")
+
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-msan.cmake b/libcxx/cmake/caches/Generic-msan.cmake
index a8fbd28..a7e3128 100644
--- a/libcxx/cmake/caches/Generic-msan.cmake
+++ b/libcxx/cmake/caches/Generic-msan.cmake
@@ -1,4 +1,4 @@
 set(LLVM_USE_SANITIZER "MemoryWithOrigins" CACHE STRING "")
-set(LIBCXX_TEST_PARAMS "long_tests=False" CACHE STRING "") # MSAN is really slow and tests can sometimes timeout otherwise
+set(LIBCXX_TEST_PARAMS "long_tests=False;enable_modules=clang" CACHE STRING "") # MSAN is really slow and tests can sometimes timeout otherwise
 set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
 set(LIBCXXABI_USE_LLVM_UNWINDER OFF CACHE BOOL "") # MSAN is compiled against the system unwinder, which leads to false positives
diff --git a/libcxx/cmake/caches/Generic-optimized-speed.cmake b/libcxx/cmake/caches/Generic-optimized-speed.cmake
index 577a5de..b3a845c 100644
--- a/libcxx/cmake/caches/Generic-optimized-speed.cmake
+++ b/libcxx/cmake/caches/Generic-optimized-speed.cmake
@@ -2,3 +2,5 @@ set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
 set(LIBCXX_TEST_PARAMS "optimization=speed" CACHE STRING "")
 set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
 set(LIBUNWIND_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-static.cmake b/libcxx/cmake/caches/Generic-static.cmake
index ed2bf85..0866a21 100644
--- a/libcxx/cmake/caches/Generic-static.cmake
+++ b/libcxx/cmake/caches/Generic-static.cmake
@@ -1,3 +1,5 @@
 set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
 set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "")
 set(LIBUNWIND_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-tsan.cmake b/libcxx/cmake/caches/Generic-tsan.cmake
index c42c1bb..662fb1f 100644
--- a/libcxx/cmake/caches/Generic-tsan.cmake
+++ b/libcxx/cmake/caches/Generic-tsan.cmake
@@ -1,2 +1,4 @@
 set(LLVM_USE_SANITIZER "Thread" CACHE STRING "")
 set(LIBCXXABI_USE_LLVM_UNWINDER OFF CACHE BOOL "") # TSAN is compiled against the system unwinder, which leads to false positives
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-ubsan.cmake b/libcxx/cmake/caches/Generic-ubsan.cmake
index 7ad891e..f1a36c3 100644
--- a/libcxx/cmake/caches/Generic-ubsan.cmake
+++ b/libcxx/cmake/caches/Generic-ubsan.cmake
@@ -1,2 +1,4 @@
 set(LLVM_USE_SANITIZER "Undefined" CACHE STRING "")
 set(LIBCXX_ABI_UNSTABLE ON CACHE BOOL "")
+set(LIBCXX_TEST_PARAMS "enable_modules=clang" CACHE STRING "")
+set(LIBCXXABI_TEST_PARAMS "${LIBCXX_TEST_PARAMS}" CACHE STRING "")
diff --git a/libcxx/include/__flat_map/flat_map.h b/libcxx/include/__flat_map/flat_map.h
index 7bb235b..159e652 100644
--- a/libcxx/include/__flat_map/flat_map.h
+++ b/libcxx/include/__flat_map/flat_map.h
@@ -47,6 +47,7 @@
 #include <__ranges/container_compatible_range.h>
 #include <__ranges/drop_view.h>
 #include <__ranges/from_range.h>
+#include <__ranges/range_adaptor.h>
 #include <__ranges/size.h>
 #include <__ranges/subrange.h>
 #include <__ranges/zip_view.h>
diff --git a/libcxx/include/__flat_map/flat_multimap.h b/libcxx/include/__flat_map/flat_multimap.h
index 96d9454..6a60872 100644
--- a/libcxx/include/__flat_map/flat_multimap.h
+++ b/libcxx/include/__flat_map/flat_multimap.h
@@ -45,6 +45,7 @@
 #include <__ranges/container_compatible_range.h>
 #include <__ranges/drop_view.h>
 #include <__ranges/from_range.h>
+#include <__ranges/range_adaptor.h>
 #include <__ranges/size.h>
 #include <__ranges/subrange.h>
 #include <__ranges/zip_view.h>
diff --git a/libcxx/include/__flat_set/flat_multiset.h b/libcxx/include/__flat_set/flat_multiset.h
index 5cfa38f..7be0b2d 100644
--- a/libcxx/include/__flat_set/flat_multiset.h
+++ b/libcxx/include/__flat_set/flat_multiset.h
@@ -40,6 +40,7 @@
 #include <__ranges/container_compatible_range.h>
 #include <__ranges/drop_view.h>
 #include <__ranges/from_range.h>
+#include <__ranges/range_adaptor.h>
 #include <__ranges/size.h>
 #include <__ranges/subrange.h>
 #include <__type_traits/container_traits.h>
diff --git a/libcxx/test/libcxx/containers/views/mdspan/layout_stride/assert.ctor.extents_span.non_unique.pass.cpp b/libcxx/test/libcxx/containers/views/mdspan/layout_stride/assert.ctor.extents_span.non_unique.pass.cpp
index fd0701e..ed2e475 100644
--- a/libcxx/test/libcxx/containers/views/mdspan/layout_stride/assert.ctor.extents_span.non_unique.pass.cpp
+++ b/libcxx/test/libcxx/containers/views/mdspan/layout_stride/assert.ctor.extents_span.non_unique.pass.cpp
@@ -30,8 +30,9 @@
 //
 // Effects: Direct-non-list-initializes extents_ with e, and for all d in the range [0, rank_), direct-non-list-initializes strides_[d] with as_const(s[d]).
 
-#include <mdspan>
 #include <cassert>
+#include <mdspan>
+#include <span>
 
 #include "check_assertion.h"
 
diff --git a/libcxx/test/libcxx/utilities/utility/__murmur2_or_cityhash.abi-v2.pass.cpp b/libcxx/test/libcxx/utilities/utility/__murmur2_or_cityhash.abi-v2.pass.cpp
index 25c0b9f..6101f0a 100644
--- a/libcxx/test/libcxx/utilities/utility/__murmur2_or_cityhash.abi-v2.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/__murmur2_or_cityhash.abi-v2.pass.cpp
@@ -15,6 +15,7 @@
 // REQUIRES: libcpp-abi-version=2
 
 #include <cassert>
+#include <cstdint>
 #include <string>
 #include <utility>
 
diff --git a/libcxx/test/std/iterators/iterator.container/ssize.LWG3207.compile.pass.cpp b/libcxx/test/std/iterators/iterator.container/ssize.LWG3207.compile.pass.cpp
index e97020f..1ca68b1 100644
--- a/libcxx/test/std/iterators/iterator.container/ssize.LWG3207.compile.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.container/ssize.LWG3207.compile.pass.cpp
@@ -22,6 +22,7 @@
 #include <iterator>
 #include <climits>
 #include <cstddef>
+#include <cstdint>
 
 // Test the test:
 static_assert(sizeof(std::ptrdiff_t) == 4, "Run only on these platforms");
diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst
index 5cb8d04..d95b552 100644
--- a/llvm/docs/Security.rst
+++ b/llvm/docs/Security.rst
@@ -51,7 +51,7 @@ username for an individual isn't available, the brackets will be empty.
 * Nikhil Gupta (Nvidia) []
 * Oliver Hunt (Apple) [@ojhunt]
 * Peter Smith (ARM) [@smithp35]
-* Pietro Albini (Ferrous Systems; Rust) [@pietroalbini]
+* Pietro Albini (Oxide Computer Company; Rust) [@pietroalbini]
 * Serge Guelton (Mozilla) [@serge-sans-paille]
 * Sergey Zverev (Intel) [@offsake]
 * Shayne Hiet-Block (Microsoft) [@GreatKeeper]
diff --git a/llvm/docs/YamlIO.rst b/llvm/docs/YamlIO.rst
index c5079d8..4f523cb 100644
--- a/llvm/docs/YamlIO.rst
+++ b/llvm/docs/YamlIO.rst
@@ -807,7 +807,7 @@ Flow Mapping
 A YAML "flow mapping" is a mapping that uses the inline notation
 (e.g { x: 1, y: 0 } ) when written to YAML. To specify that a type should be
 written in YAML using flow mapping, your MappingTraits specialization should
-add "static const bool flow = true;". For instance:
+add ``static constexpr bool flow = true;``. For instance:
 
 .. code-block:: c++
 
@@ -824,7 +824,7 @@ add "static const bool flow = true;". For instance:
         ...
       }
 
-      static const bool flow = true;
+      static constexpr bool flow = true;
     }
 
 Flow mappings are subject to line wrapping according to the ``Output`` object
@@ -859,7 +859,7 @@ Flow Sequence
 A YAML "flow sequence" is a sequence that when written to YAML it uses the
 inline notation (e.g [ foo, bar ] ).  To specify that a sequence type should
 be written in YAML as a flow sequence, your SequenceTraits specialization should
-add "static const bool flow = true;".  For instance:
+add ``static constexpr bool flow = true;``.  For instance:
 
 .. code-block:: c++
 
@@ -869,7 +869,7 @@ add "static const bool flow = true;".  For instance:
     static MyListEl &element(IO &io, MyList &list, size_t index) { ... }
 
     // The existence of this member causes YAML I/O to use a flow sequence
-    static const bool flow = true;
+    static constexpr bool flow = true;
   };
 
 With the above, if you used MyList as the data type in your native data
diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h
index 8b2425e..1b40dac 100644
--- a/llvm/include/llvm/ADT/ImmutableSet.h
+++ b/llvm/include/llvm/ADT/ImmutableSet.h
@@ -635,9 +635,7 @@ public:
     // if find a collision compare those trees by their contents.
     unsigned digest = TNew->computeDigest();
     TreeTy *&entry = Cache[maskCacheIndex(digest)];
-    do {
-      if (!entry)
-        break;
+    if (entry) {
       for (TreeTy *T = entry ; T != nullptr; T = T->next) {
         // Compare the Contents('T') with Contents('TNew')
         typename TreeTy::iterator TI = T->begin(), TE = T->end();
@@ -653,7 +651,6 @@ public:
       entry->prev = TNew;
       TNew->next = entry;
     }
-    while (false);
 
     entry = TNew;
     TNew->IsCanonicalized = true;
diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h
index eb977fb..1c53291 100644
--- a/llvm/include/llvm/Analysis/DDG.h
+++ b/llvm/include/llvm/Analysis/DDG.h
@@ -96,7 +96,7 @@ public:
   RootDDGNode() : DDGNode(NodeKind::Root) {}
   RootDDGNode(const RootDDGNode &N) = delete;
   RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {}
-  ~RootDDGNode() = default;
+  ~RootDDGNode() override = default;
 
   /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
   static bool classof(const DDGNode *N) {
@@ -114,7 +114,7 @@ public:
   SimpleDDGNode(Instruction &I);
   SimpleDDGNode(const SimpleDDGNode &N);
   SimpleDDGNode(SimpleDDGNode &&N);
-  ~SimpleDDGNode();
+  ~SimpleDDGNode() override;
 
   SimpleDDGNode &operator=(const SimpleDDGNode &N) = default;
 
@@ -176,7 +176,7 @@ public:
   PiBlockDDGNode(const PiNodeList &List);
   PiBlockDDGNode(const PiBlockDDGNode &N);
   PiBlockDDGNode(PiBlockDDGNode &&N);
-  ~PiBlockDDGNode();
+  ~PiBlockDDGNode() override;
 
   PiBlockDDGNode &operator=(const PiBlockDDGNode &N) = default;
 
@@ -318,7 +318,7 @@ public:
       : DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
   DataDependenceGraph(Function &F, DependenceInfo &DI);
   DataDependenceGraph(Loop &L, LoopInfo &LI, DependenceInfo &DI);
-  ~DataDependenceGraph();
+  ~DataDependenceGraph() override;
 
   /// If node \p N belongs to a pi-block return a pointer to the pi-block,
   /// otherwise return null.
diff --git a/llvm/include/llvm/Analysis/InteractiveModelRunner.h b/llvm/include/llvm/Analysis/InteractiveModelRunner.h
index 66473ae..cfa0506 100644
--- a/llvm/include/llvm/Analysis/InteractiveModelRunner.h
+++ b/llvm/include/llvm/Analysis/InteractiveModelRunner.h
@@ -51,7 +51,7 @@ public:
     Log->flush();
   }
 
-  virtual ~InteractiveModelRunner();
+  ~InteractiveModelRunner() override;
 
 private:
   void *evaluateUntyped() override;
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index cc4c482..d71fa55 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -33,7 +33,7 @@ public:
                       GetModelRunner,
                   std::function<bool(CallBase &)> GetDefaultAdvice);
 
-  virtual ~MLInlineAdvisor() = default;
+  ~MLInlineAdvisor() override = default;
 
   void onPassEntry(LazyCallGraph::SCC *SCC) override;
   void onPassExit(LazyCallGraph::SCC *SCC) override;
@@ -105,7 +105,7 @@ class MLInlineAdvice : public InlineAdvice {
 public:
   MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
                  OptimizationRemarkEmitter &ORE, bool Recommendation);
-  virtual ~MLInlineAdvice() = default;
+  ~MLInlineAdvice() override = default;
 
   void recordInliningImpl() override;
   void recordInliningWithCalleeDeletedImpl() override;
diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
index 641036d..ff423a9 100644
--- a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
+++ b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
@@ -106,7 +106,7 @@ public:
     assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model");
   }
 
-  virtual ~ReleaseModeModelRunner() = default;
+  ~ReleaseModeModelRunner() override = default;
 
   static bool classof(const MLModelRunner *R) {
     return R->getKind() == MLModelRunner::Kind::Release;
diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
index 2966f0c..b7b816f 100644
--- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
+++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -156,7 +156,7 @@ public:
   static char ID;
 
   StackSafetyGlobalInfoWrapperPass();
-  ~StackSafetyGlobalInfoWrapperPass();
+  ~StackSafetyGlobalInfoWrapperPass() override;
 
   const StackSafetyGlobalInfo &getResult() const { return SSGI; }
 
diff --git a/llvm/include/llvm/DWARFCFIChecker/DWARFCFIFunctionFrameAnalyzer.h b/llvm/include/llvm/DWARFCFIChecker/DWARFCFIFunctionFrameAnalyzer.h
index 03e93de..3895da2 100644
--- a/llvm/include/llvm/DWARFCFIChecker/DWARFCFIFunctionFrameAnalyzer.h
+++ b/llvm/include/llvm/DWARFCFIChecker/DWARFCFIFunctionFrameAnalyzer.h
@@ -32,7 +32,7 @@ class LLVM_ABI CFIFunctionFrameAnalyzer : public CFIFunctionFrameReceiver {
 public:
   CFIFunctionFrameAnalyzer(MCContext &Context, const MCInstrInfo &MCII)
       : CFIFunctionFrameReceiver(Context), MCII(MCII) {}
-  ~CFIFunctionFrameAnalyzer();
+  ~CFIFunctionFrameAnalyzer() override;
 
   void startFunctionFrame(bool IsEH,
                           ArrayRef<MCCFIInstruction> Prologue) override;
diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFStreamer.h
index 03fc729..4ffef90 100644
--- a/llvm/include/llvm/DWARFLinker/Classic/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFStreamer.h
@@ -48,7 +48,7 @@ public:
                 raw_pwrite_stream &OutFile,
                 DWARFLinkerBase::MessageHandlerTy Warning)
       : OutFile(OutFile), OutFileType(OutFileType), WarningHandler(Warning) {}
-  virtual ~DwarfStreamer() = default;
+  ~DwarfStreamer() override = default;
 
   static Expected<std::unique_ptr<DwarfStreamer>> createStreamer(
       const Triple &TheTriple, DWARFLinkerBase::OutputFileType FileType,
diff --git a/llvm/include/llvm/DWARFLinker/Parallel/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/Parallel/DWARFLinker.h
index 8046513..db60695 100644
--- a/llvm/include/llvm/DWARFLinker/Parallel/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinker/Parallel/DWARFLinker.h
@@ -120,7 +120,7 @@ using SectionHandlerTy =
 
 class DWARFLinker : public DWARFLinkerBase {
 public:
-  virtual ~DWARFLinker() = default;
+  ~DWARFLinker() override = default;
 
   /// Creates dwarf linker instance.
   LLVM_ABI static std::unique_ptr<DWARFLinker>
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
index a64a2e8..7af2ff1 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -600,10 +600,7 @@ struct Variant {
 namespace std {
 
 template <> struct hash<llvm::pdb::PDB_SymType> {
-  using argument_type = llvm::pdb::PDB_SymType;
-  using result_type = std::size_t;
-
-  result_type operator()(const argument_type &Arg) const {
+  std::size_t operator()(const llvm::pdb::PDB_SymType &Arg) const {
     return std::hash<int>()(static_cast<int>(Arg));
   }
 };
diff --git a/llvm/include/llvm/Debuginfod/BuildIDFetcher.h b/llvm/include/llvm/Debuginfod/BuildIDFetcher.h
index f0ecea1..8f9c2aa 100644
--- a/llvm/include/llvm/Debuginfod/BuildIDFetcher.h
+++ b/llvm/include/llvm/Debuginfod/BuildIDFetcher.h
@@ -24,7 +24,7 @@ class DebuginfodFetcher : public object::BuildIDFetcher {
 public:
   DebuginfodFetcher(std::vector<std::string> DebugFileDirectories)
       : BuildIDFetcher(std::move(DebugFileDirectories)) {}
-  virtual ~DebuginfodFetcher() = default;
+  ~DebuginfodFetcher() override = default;
 
   /// Fetches the given Build ID using debuginfod and returns a local path to
   /// the resulting file.
diff --git a/llvm/include/llvm/IR/DroppedVariableStatsIR.h b/llvm/include/llvm/IR/DroppedVariableStatsIR.h
index 3e18256..9fc2319 100644
--- a/llvm/include/llvm/IR/DroppedVariableStatsIR.h
+++ b/llvm/include/llvm/IR/DroppedVariableStatsIR.h
@@ -71,13 +71,12 @@ private:
                                         StringRef PassLevel);
 
   /// Override base class method to run on an llvm::Function specifically.
-  virtual void
-  visitEveryInstruction(unsigned &DroppedCount,
-                        DenseMap<VarID, DILocation *> &InlinedAtsMap,
-                        VarID Var) override;
+  void visitEveryInstruction(unsigned &DroppedCount,
+                             DenseMap<VarID, DILocation *> &InlinedAtsMap,
+                             VarID Var) override;
 
   /// Override base class method to run on #dbg_values specifically.
-  virtual void visitEveryDebugRecord(
+  void visitEveryDebugRecord(
       DenseSet<VarID> &VarIDSet,
       DenseMap<StringRef, DenseMap<VarID, DILocation *>> &InlinedAtsMap,
       StringRef FuncName, bool Before) override;
diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h
index d813ae9..a8cd56f 100644
--- a/llvm/include/llvm/IR/OptBisect.h
+++ b/llvm/include/llvm/IR/OptBisect.h
@@ -51,7 +51,7 @@ public:
   /// through LLVMContext.
   OptBisect() = default;
 
-  virtual ~OptBisect() = default;
+  ~OptBisect() override = default;
 
   /// Checks the bisect limit to determine if the specified pass should run.
   ///
diff --git a/llvm/include/llvm/MC/MCDXContainerWriter.h b/llvm/include/llvm/MC/MCDXContainerWriter.h
index 44d2af3..9b6ba896 100644
--- a/llvm/include/llvm/MC/MCDXContainerWriter.h
+++ b/llvm/include/llvm/MC/MCDXContainerWriter.h
@@ -23,7 +23,7 @@ protected:
   MCDXContainerTargetWriter() {}
 
 public:
-  virtual ~MCDXContainerTargetWriter();
+  ~MCDXContainerTargetWriter() override;
 
   Triple::ObjectFormatType getFormat() const override {
     return Triple::DXContainer;
diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h
index a8b00aa..35614cb 100644
--- a/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -62,7 +62,7 @@ protected:
                           bool HasRelocationAddend_, uint8_t ABIVersion_ = 0);
 
 public:
-  virtual ~MCELFObjectTargetWriter() = default;
+  ~MCELFObjectTargetWriter() override = default;
 
   Triple::ObjectFormatType getFormat() const override { return Triple::ELF; }
   static bool classof(const MCObjectTargetWriter *W) {
diff --git a/llvm/include/llvm/MC/MCGOFFObjectWriter.h b/llvm/include/llvm/MC/MCGOFFObjectWriter.h
index 78a4915..ec07637 100644
--- a/llvm/include/llvm/MC/MCGOFFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCGOFFObjectWriter.h
@@ -21,7 +21,7 @@ protected:
   MCGOFFObjectTargetWriter() = default;
 
 public:
-  virtual ~MCGOFFObjectTargetWriter() = default;
+  ~MCGOFFObjectTargetWriter() override = default;
 
   Triple::ObjectFormatType getFormat() const override { return Triple::GOFF; }
 
diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h
index 41416a2..2163312b 100644
--- a/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -48,7 +48,7 @@ protected:
   }
 
 public:
-  virtual ~MCMachObjectTargetWriter();
+  ~MCMachObjectTargetWriter() override;
 
   Triple::ObjectFormatType getFormat() const override { return Triple::MachO; }
   static bool classof(const MCObjectTargetWriter *W) {
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 1899cb6..d9aecd8 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -68,7 +68,7 @@ protected:
   MCObjectStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
                    std::unique_ptr<MCObjectWriter> OW,
                    std::unique_ptr<MCCodeEmitter> Emitter);
-  ~MCObjectStreamer();
+  ~MCObjectStreamer() override;
 
 public:
   /// state management
diff --git a/llvm/include/llvm/MC/MCWasmObjectWriter.h b/llvm/include/llvm/MC/MCWasmObjectWriter.h
index 3d5b094..9dd588d 100644
--- a/llvm/include/llvm/MC/MCWasmObjectWriter.h
+++ b/llvm/include/llvm/MC/MCWasmObjectWriter.h
@@ -27,7 +27,7 @@ protected:
   explicit MCWasmObjectTargetWriter(bool Is64Bit_, bool IsEmscripten);
 
 public:
-  virtual ~MCWasmObjectTargetWriter();
+  ~MCWasmObjectTargetWriter() override;
 
   Triple::ObjectFormatType getFormat() const override { return Triple::Wasm; }
   static bool classof(const MCObjectTargetWriter *W) {
diff --git a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
index b11eacc..4a9cf9c 100644
--- a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -29,7 +29,7 @@ protected:
   MCWinCOFFObjectTargetWriter(unsigned Machine_);
 
 public:
-  virtual ~MCWinCOFFObjectTargetWriter() = default;
+  ~MCWinCOFFObjectTargetWriter() override = default;
 
   Triple::ObjectFormatType getFormat() const override { return Triple::COFF; }
   static bool classof(const MCObjectTargetWriter *W) {
diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
index 3700901..296a19c 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -57,7 +57,7 @@ public:
   LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
              unsigned StoreQueueSize, bool AssumeNoAlias);
 
-  virtual ~LSUnitBase();
+  ~LSUnitBase() override;
 
   /// Returns the total number of entries in the load queue.
   unsigned getLoadQueueSize() const { return LQSize; }
@@ -465,19 +465,19 @@ public:
   /// 6. A store has to wait until an older store barrier is fully executed.
   unsigned dispatch(const InstRef &IR) override;
 
-  virtual void onInstructionIssued(const InstRef &IR) override {
+  void onInstructionIssued(const InstRef &IR) override {
     unsigned GroupID = IR.getInstruction()->getLSUTokenID();
     Groups[GroupID]->onInstructionIssued(IR);
   }
 
-  virtual void onInstructionRetired(const InstRef &IR) override;
+  void onInstructionRetired(const InstRef &IR) override;
 
-  virtual void onInstructionExecuted(const InstRef &IR) override;
+  void onInstructionExecuted(const InstRef &IR) override;
 
-  virtual void cycleEvent() override;
+  void cycleEvent() override;
 
 #ifndef NDEBUG
-  virtual void dump() const override;
+  void dump() const override;
 #endif
 
 private:
diff --git a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
index d88ee7c..958911d 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -121,7 +121,7 @@ public:
   DefaultResourceStrategy(uint64_t UnitMask)
       : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask),
         RemovedFromNextInSequence(0) {}
-  virtual ~DefaultResourceStrategy() = default;
+  ~DefaultResourceStrategy() override = default;
 
   uint64_t select(uint64_t ReadyMask) override;
   void used(uint64_t Mask) override;
diff --git a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
index 0372600..34ff155 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -47,7 +47,7 @@ class LLVM_ABI DefaultSchedulerStrategy : public SchedulerStrategy {
 
 public:
   DefaultSchedulerStrategy() = default;
-  virtual ~DefaultSchedulerStrategy();
+  ~DefaultSchedulerStrategy() override;
 
   bool compare(const InstRef &Lhs, const InstRef &Rhs) const override {
     int LhsRank = computeRank(Lhs);
diff --git a/llvm/include/llvm/MCA/View.h b/llvm/include/llvm/MCA/View.h
index 4d6f930..5b2e546 100644
--- a/llvm/include/llvm/MCA/View.h
+++ b/llvm/include/llvm/MCA/View.h
@@ -26,7 +26,7 @@ namespace mca {
 
 class LLVM_ABI View : public HWEventListener {
 public:
-  virtual ~View() = default;
+  ~View() override = default;
 
   virtual void printView(llvm::raw_ostream &OS) const = 0;
   virtual StringRef getNameAsString() const = 0;
diff --git a/llvm/include/llvm/ObjCopy/ConfigManager.h b/llvm/include/llvm/ObjCopy/ConfigManager.h
index 27fbd96..1568799 100644
--- a/llvm/include/llvm/ObjCopy/ConfigManager.h
+++ b/llvm/include/llvm/ObjCopy/ConfigManager.h
@@ -23,7 +23,7 @@ namespace llvm {
 namespace objcopy {
 
 struct LLVM_ABI ConfigManager : public MultiFormatConfig {
-  virtual ~ConfigManager() {}
+  ~ConfigManager() override {}
 
   const CommonConfig &getCommonConfig() const override { return Common; }
 
diff --git a/llvm/include/llvm/Object/GOFFObjectFile.h b/llvm/include/llvm/Object/GOFFObjectFile.h
index b6b22ee..80da64e 100644
--- a/llvm/include/llvm/Object/GOFFObjectFile.h
+++ b/llvm/include/llvm/Object/GOFFObjectFile.h
@@ -91,10 +91,10 @@ private:
 
   // SectionRef.
   void moveSectionNext(DataRefImpl &Sec) const override;
-  virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
+  Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
   uint64_t getSectionAddress(DataRefImpl Sec) const override;
   uint64_t getSectionSize(DataRefImpl Sec) const override;
-  virtual Expected<ArrayRef<uint8_t>>
+  Expected<ArrayRef<uint8_t>>
   getSectionContents(DataRefImpl Sec) const override;
   uint64_t getSectionIndex(DataRefImpl Sec) const override { return Sec.d.a; }
   uint64_t getSectionAlignment(DataRefImpl Sec) const override;
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 25578ec..8fdae7a 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -110,7 +110,7 @@ class LLVM_ABI RawMemProfReader final : public MemProfReader {
 public:
   RawMemProfReader(const RawMemProfReader &) = delete;
   RawMemProfReader &operator=(const RawMemProfReader &) = delete;
-  virtual ~RawMemProfReader() override;
+  ~RawMemProfReader() override;
 
   // Prints the contents of the profile in YAML format.
   void printYAML(raw_ostream &OS);
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
index 7031728..6733e1c 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfWriter.h
@@ -92,7 +92,7 @@ public:
   PGOCtxProfileWriter(raw_ostream &Out,
                       std::optional<unsigned> VersionOverride = std::nullopt,
                       bool IncludeEmpty = false);
-  ~PGOCtxProfileWriter() { Writer.ExitBlock(); }
+  ~PGOCtxProfileWriter() override { Writer.ExitBlock(); }
 
   void startContextSection() override;
   void writeContextual(const ctx_profile::ContextNode &RootNode,
diff --git a/llvm/include/llvm/SandboxIR/BasicBlock.h b/llvm/include/llvm/SandboxIR/BasicBlock.h
index 25bbb6c..a8dd508 100644
--- a/llvm/include/llvm/SandboxIR/BasicBlock.h
+++ b/llvm/include/llvm/SandboxIR/BasicBlock.h
@@ -78,7 +78,7 @@ class BasicBlock : public Value {
   }
 
 public:
-  ~BasicBlock() = default;
+  ~BasicBlock() override = default;
   /// For isa/dyn_cast.
   static bool classof(const Value *From) {
     return From->getSubclassID() == Value::ClassID::Block;
diff --git a/llvm/include/llvm/SandboxIR/PassManager.h b/llvm/include/llvm/SandboxIR/PassManager.h
index 6fccaf0..93ca710 100644
--- a/llvm/include/llvm/SandboxIR/PassManager.h
+++ b/llvm/include/llvm/SandboxIR/PassManager.h
@@ -49,7 +49,7 @@ protected:
   }
   PassManager(const PassManager &) = delete;
   PassManager(PassManager &&) = default;
-  virtual ~PassManager() = default;
+  ~PassManager() override = default;
   PassManager &operator=(const PassManager &) = delete;
 
 public:
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 4d6cbc5..06508bf 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -74,7 +74,7 @@ public:
   TargetLoweringObjectFile(const TargetLoweringObjectFile &) = delete;
   TargetLoweringObjectFile &
   operator=(const TargetLoweringObjectFile &) = delete;
-  virtual ~TargetLoweringObjectFile();
+  ~TargetLoweringObjectFile() override;
 
   Mangler &getMangler() const { return *Mang; }
 
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index e57032a..a013f27 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -3325,7 +3325,7 @@ struct LLVM_ABI AbstractAttribute : public IRPosition, public AADepGraphNode {
   AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
 
   /// Virtual destructor.
-  virtual ~AbstractAttribute() = default;
+  ~AbstractAttribute() override = default;
 
   /// Compile time access to the IR attribute kind.
   static constexpr Attribute::AttrKind IRAttributeKind = Attribute::None;
@@ -5588,7 +5588,7 @@ struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>,
 // Synthetic root node for the Attributor's internal call graph.
 struct AttributorCallGraph : public AACallGraphNode {
   AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {}
-  virtual ~AttributorCallGraph() = default;
+  ~AttributorCallGraph() override = default;
 
   AACallEdgeIterator optimisticEdgesBegin() const override {
     return AACallEdgeIterator(A, A.Functions.begin());
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h
index 7d02d9a..588a6eb 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h
@@ -305,7 +305,7 @@ public:
     return make_range(MemSuccs.begin(), MemSuccs.end());
   }
 #ifndef NDEBUG
-  virtual void print(raw_ostream &OS, bool PrintDeps = true) const override;
+  void print(raw_ostream &OS, bool PrintDeps = true) const override;
 #endif // NDEBUG
 };
 
diff --git a/llvm/include/llvm/XRay/FDRRecords.h b/llvm/include/llvm/XRay/FDRRecords.h
index 91689cae..8a12e33 100644
--- a/llvm/include/llvm/XRay/FDRRecords.h
+++ b/llvm/include/llvm/XRay/FDRRecords.h
@@ -101,7 +101,7 @@ public:
 
   MetadataType metadataType() const { return MT; }
 
-  virtual ~MetadataRecord() = default;
+  ~MetadataRecord() override = default;
 };
 
 // What follows are specific Metadata record types which encapsulate the
diff --git a/llvm/include/llvm/XRay/FDRTraceWriter.h b/llvm/include/llvm/XRay/FDRTraceWriter.h
index 957039d..dc68c7f 100644
--- a/llvm/include/llvm/XRay/FDRTraceWriter.h
+++ b/llvm/include/llvm/XRay/FDRTraceWriter.h
@@ -30,7 +30,7 @@ class LLVM_ABI FDRTraceWriter : public RecordVisitor {
 public:
   // Construct an FDRTraceWriter associated with an output stream.
   explicit FDRTraceWriter(raw_ostream &O, const XRayFileHeader &H);
-  ~FDRTraceWriter();
+  ~FDRTraceWriter() override;
 
   Error visit(BufferExtents &) override;
   Error visit(WallclockRecord &) override;
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index c4fee39..5169b43 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1242,7 +1242,7 @@ public:
     return std::nullopt;
   }
 
-  virtual ~InlineCostCallAnalyzer() = default;
+  ~InlineCostCallAnalyzer() override = default;
   int getThreshold() const { return Threshold; }
   int getCost() const { return Cost; }
   int getStaticBonusApplied() const { return StaticBonusApplied; }
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 8da51d0..2a0a6a2 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4866,6 +4866,89 @@ static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
   return nullptr;
 }
 
+/// Look for the following pattern and simplify %to_fold to %identicalPhi.
+/// Here %phi, %to_fold and %phi.next perform the same functionality as
+/// %identicalPhi and hence the select instruction %to_fold can be folded
+/// into %identicalPhi.
+///
+/// BB1:
+///   %identicalPhi = phi [ X, %BB0 ], [ %identicalPhi.next, %BB1 ]
+///   %phi = phi [ X, %BB0 ], [ %phi.next, %BB1 ]
+///   ...
+///   %identicalPhi.next = select %cmp, %val, %identicalPhi
+///                      (or select %cmp, %identicalPhi, %val)
+///   %to_fold = select %cmp2, %identicalPhi, %phi
+///   %phi.next = select %cmp, %val, %to_fold
+///             (or select %cmp, %to_fold, %val)
+///
+/// Prove that %phi and %identicalPhi are the same by induction:
+///
+/// Base case: Both %phi and %identicalPhi are equal on entry to the loop.
+/// Inductive case:
+/// Suppose %phi and %identicalPhi are equal at iteration i.
+/// We look at their values at iteration i+1 which are %phi.next and
+/// %identicalPhi.next. They would have become different only when %cmp is
+/// false and the corresponding values %to_fold and %identicalPhi differ
+/// (similar reason for the other "or" case in the bracket).
+///
+/// The only condition when %to_fold and %identicalPh could differ is when %cmp2
+/// is false and %to_fold is %phi, which contradicts our inductive hypothesis
+/// that %phi and %identicalPhi are equal. Thus %phi and %identicalPhi are
+/// always equal at iteration i+1.
+bool isSelectWithIdenticalPHI(PHINode &PN, PHINode &IdenticalPN) {
+  if (PN.getParent() != IdenticalPN.getParent())
+    return false;
+  if (PN.getNumIncomingValues() != 2)
+    return false;
+
+  // Check that only the backedge incoming value is different.
+  unsigned DiffVals = 0;
+  BasicBlock *DiffValBB = nullptr;
+  for (unsigned i = 0; i < 2; i++) {
+    BasicBlock *PredBB = PN.getIncomingBlock(i);
+    if (PN.getIncomingValue(i) !=
+        IdenticalPN.getIncomingValueForBlock(PredBB)) {
+      DiffVals++;
+      DiffValBB = PredBB;
+    }
+  }
+  if (DiffVals != 1)
+    return false;
+  // Now check that the backedge incoming values are two select
+  // instructions with the same condition. Either their true
+  // values are the same, or their false values are the same.
+  auto *SI = dyn_cast<SelectInst>(PN.getIncomingValueForBlock(DiffValBB));
+  auto *IdenticalSI =
+      dyn_cast<SelectInst>(IdenticalPN.getIncomingValueForBlock(DiffValBB));
+  if (!SI || !IdenticalSI)
+    return false;
+  if (SI->getCondition() != IdenticalSI->getCondition())
+    return false;
+
+  SelectInst *SIOtherVal = nullptr;
+  Value *IdenticalSIOtherVal = nullptr;
+  if (SI->getTrueValue() == IdenticalSI->getTrueValue()) {
+    SIOtherVal = dyn_cast<SelectInst>(SI->getFalseValue());
+    IdenticalSIOtherVal = IdenticalSI->getFalseValue();
+  } else if (SI->getFalseValue() == IdenticalSI->getFalseValue()) {
+    SIOtherVal = dyn_cast<SelectInst>(SI->getTrueValue());
+    IdenticalSIOtherVal = IdenticalSI->getTrueValue();
+  } else {
+    return false;
+  }
+
+  // Now check that the other values in select, i.e., %to_fold and
+  // %identicalPhi, are essentially the same value.
+  if (!SIOtherVal || IdenticalSIOtherVal != &IdenticalPN)
+    return false;
+  if (!(SIOtherVal->getTrueValue() == &IdenticalPN &&
+        SIOtherVal->getFalseValue() == &PN) &&
+      !(SIOtherVal->getTrueValue() == &PN &&
+        SIOtherVal->getFalseValue() == &IdenticalPN))
+    return false;
+  return true;
+}
+
 /// Given operands for a SelectInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -5041,7 +5124,14 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
   std::optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
   if (Imp)
     return *Imp ? TrueVal : FalseVal;
-
+  // Look for same PHIs in the true and false values.
+  if (auto *TruePHI = dyn_cast<PHINode>(TrueVal))
+    if (auto *FalsePHI = dyn_cast<PHINode>(FalseVal)) {
+      if (isSelectWithIdenticalPHI(*TruePHI, *FalsePHI))
+        return FalseVal;
+      if (isSelectWithIdenticalPHI(*FalsePHI, *TruePHI))
+        return TrueVal;
+    }
   return nullptr;
 }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 12a784e..11ca48d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -18,8 +18,7 @@ using namespace llvm;
 
 unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
   resetUsedFlag(true);
-  auto IterBool =
-      Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS)));
+  auto IterBool = Pool.try_emplace(Sym, Pool.size(), TLS);
   return IterBool.first->second.Number;
 }
 
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index 93ae548..7bef3a8 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -86,10 +86,7 @@ template <> struct llvm::DenseMapInfo<VariableID> {
 using VarLocInsertPt = PointerUnion<const Instruction *, const DbgRecord *>;
 
 template <> struct std::hash<VarLocInsertPt> {
-  using argument_type = VarLocInsertPt;
-  using result_type = std::size_t;
-
-  result_type operator()(const argument_type &Arg) const {
+  std::size_t operator()(const VarLocInsertPt &Arg) const {
     return std::hash<void *>()(Arg.getOpaqueValue());
   }
 };
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 8e48d19..109444b 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -363,7 +363,7 @@ void TailDuplicator::processPHI(
   Register SrcReg = MI->getOperand(SrcOpIdx).getReg();
   unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
   const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
-  LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
+  LocalVRMap.try_emplace(DefReg, SrcReg, SrcSubReg);
 
   // Insert a copy from source to the end of the block. The def register is the
   // available value liveout of the block.
@@ -411,7 +411,7 @@ void TailDuplicator::duplicateInstruction(
       const TargetRegisterClass *RC = MRI->getRegClass(Reg);
       Register NewReg = MRI->createVirtualRegister(RC);
       MO.setReg(NewReg);
-      LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+      LocalVRMap.try_emplace(Reg, NewReg, 0);
       if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
         addSSAUpdateEntry(Reg, NewReg, PredBB);
       continue;
@@ -463,7 +463,7 @@ void TailDuplicator::duplicateInstruction(
               NewReg)
           .addReg(VI->second.Reg, 0, VI->second.SubReg);
       LocalVRMap.erase(VI);
-      LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+      LocalVRMap.try_emplace(Reg, NewReg, 0);
       MO.setReg(NewReg);
       // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
       // is equivalent to the whole register Reg. Hence, Reg:subreg
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 1096e57..3c222f5 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -843,7 +843,7 @@ public:
   SlotTracker(const SlotTracker &) = delete;
   SlotTracker &operator=(const SlotTracker &) = delete;
 
-  ~SlotTracker() = default;
+  ~SlotTracker() override = default;
 
   void setProcessHook(
       std::function<void(AbstractSlotTrackerStorage *, const Module *, bool)>);
@@ -5323,7 +5323,7 @@ struct MDTreeAsmWriterContext : public AsmWriterContext {
     --Level;
   }
 
-  ~MDTreeAsmWriterContext() {
+  ~MDTreeAsmWriterContext() override {
     for (const auto &Entry : Buffer) {
       MainOS << "\n";
       unsigned NumIndent = Entry.first * 2U;
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 4bc2a18..b618222 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1711,7 +1711,7 @@ public:
                              /*ShouldEmitImportsFiles=*/false),
         IRFiles(std::move(IRFiles)), CombinedCGDataHash(CombinedCGDataHash) {}
 
-  virtual Error runThinLTOBackendThread(
+  Error runThinLTOBackendThread(
       AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
       ModuleSummaryIndex &CombinedIndex,
       const FunctionImporter::ImportMapTy &ImportList,
@@ -2271,8 +2271,8 @@ public:
         RemoteCompilerPrependArgs(RemoteCompilerPrependArgs),
         RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {}
 
-  virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
-                     llvm::Triple Triple) override {
+  void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
+             llvm::Triple Triple) override {
     UID = itostr(sys::Process::getProcessId());
     Jobs.resize((size_t)ThinLTONumTasks);
     this->ThinLTOTaskOffset = ThinLTOTaskOffset;
diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp
index d68f4af..71bd397 100644
--- a/llvm/lib/MC/GOFFObjectWriter.cpp
+++ b/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -440,7 +440,7 @@ public:
     SetBuffer(Buffer, sizeof(Buffer));
   }
 
-  ~TextStream() { flush(); }
+  ~TextStream() override { flush(); }
 };
 } // namespace
 
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index be8c022..885fa55 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -295,7 +295,7 @@ public:
                              unsigned Flags, unsigned Isa,
                              unsigned Discriminator, StringRef FileName,
                              StringRef Location = {}) override;
-  virtual void emitDwarfLocLabelDirective(SMLoc Loc, StringRef Name) override;
+  void emitDwarfLocLabelDirective(SMLoc Loc, StringRef Name) override;
 
   MCSymbol *getDwarfLineTableSymbol(unsigned CUID) override;
 
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 968ccf7..a6188f0 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -835,7 +835,14 @@ AsmToken AsmLexer::LexToken() {
   }
 
   if (isAtStartOfComment(TokStart)) {
-    CurPtr += MAI.getCommentString().size() - 1;
+    StringRef CommentString = MAI.getCommentString();
+    // For multi-char comment strings, advance CurPtr only if we matched the
+    // full string. This stops us from accidentally eating the newline if the
+    // current line ends in a single comment char.
+    if (CommentString.size() > 1 &&
+        StringRef(TokStart, CommentString.size()) == CommentString) {
+      CurPtr += CommentString.size() - 1;
+    }
     return LexLineComment();
   }
 
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index acea3ab..dd1bc2b 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -726,7 +726,7 @@ public:
     Lexer.setLexHLASMStrings(true);
   }
 
-  ~HLASMAsmParser() { Lexer.setSkipSpace(true); }
+  ~HLASMAsmParser() override { Lexer.setSkipSpace(true); }
 
   bool parseStatement(ParseStatementInfo &Info,
                       MCAsmParserSemaCallback *SI) override;
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index fce6b2a..d466009 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -184,7 +184,7 @@ struct CsectSectionEntry : public SectionEntry {
       Group->clear();
   }
 
-  virtual ~CsectSectionEntry() = default;
+  ~CsectSectionEntry() override = default;
 };
 
 struct DwarfSectionEntry : public SectionEntry {
@@ -220,7 +220,7 @@ struct DwarfSectionEntry : public SectionEntry {
 
   DwarfSectionEntry(DwarfSectionEntry &&s) = default;
 
-  virtual ~DwarfSectionEntry() = default;
+  ~DwarfSectionEntry() override = default;
 };
 
 struct ExceptionTableEntry {
@@ -249,7 +249,7 @@ struct ExceptionSectionEntry : public SectionEntry {
     memcpy(Name, N.data(), N.size());
   }
 
-  virtual ~ExceptionSectionEntry() = default;
+  ~ExceptionSectionEntry() override = default;
 };
 
 struct CInfoSymInfo {
@@ -276,7 +276,7 @@ struct CInfoSymSectionEntry : public SectionEntry {
   std::unique_ptr<CInfoSymInfo> Entry;
 
   CInfoSymSectionEntry(StringRef N, int32_t Flags) : SectionEntry(N, Flags) {}
-  virtual ~CInfoSymSectionEntry() = default;
+  ~CInfoSymSectionEntry() override = default;
   void addEntry(std::unique_ptr<CInfoSymInfo> NewEntry) {
     Entry = std::move(NewEntry);
     Entry->Offset = sizeof(uint32_t);
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 7ec0e9b..4f6473f 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -109,7 +109,7 @@ protected:
   WritableMemoryBuffer &Out;
 
 public:
-  virtual ~SectionWriter() = default;
+  ~SectionWriter() override = default;
 
   Error visit(const Section &Sec) override;
   Error visit(const OwnedDataSection &Sec) override;
@@ -134,7 +134,7 @@ private:
   using Elf_Sym = typename ELFT::Sym;
 
 public:
-  virtual ~ELFSectionWriter() {}
+  ~ELFSectionWriter() override {}
   Error visit(const SymbolTableSection &Sec) override;
   Error visit(const RelocationSection &Sec) override;
   Error visit(const GnuDebugLinkSection &Sec) override;
@@ -180,7 +180,7 @@ public:
 
 class BinarySectionWriter : public SectionWriter {
 public:
-  virtual ~BinarySectionWriter() {}
+  ~BinarySectionWriter() override {}
 
   Error visit(const SymbolTableSection &Sec) override;
   Error visit(const RelocationSection &Sec) override;
@@ -346,7 +346,7 @@ private:
   size_t totalSize() const;
 
 public:
-  virtual ~ELFWriter() {}
+  ~ELFWriter() override {}
   bool WriteSectionHeaders;
 
   // For --only-keep-debug, select an alternative section/segment layout
@@ -367,7 +367,7 @@ private:
   uint64_t TotalSize = 0;
 
 public:
-  ~BinaryWriter() {}
+  ~BinaryWriter() override {}
   Error finalize() override;
   Error write() override;
   BinaryWriter(Object &Obj, raw_ostream &Out, const CommonConfig &Config)
@@ -784,7 +784,7 @@ private:
   SymbolTableSection *Symbols = nullptr;
 
 public:
-  virtual ~SectionIndexSection() {}
+  ~SectionIndexSection() override {}
   void addIndex(uint32_t Index) {
     assert(Size > 0);
     Indexes.push_back(Index);
diff --git a/llvm/lib/ObjectYAML/GOFFEmitter.cpp b/llvm/lib/ObjectYAML/GOFFEmitter.cpp
index c26893c..82800b1 100644
--- a/llvm/lib/ObjectYAML/GOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/GOFFEmitter.cpp
@@ -71,7 +71,7 @@ public:
     SetBufferSize(GOFF::PayloadLength);
   }
 
-  ~GOFFOstream() { finalize(); }
+  ~GOFFOstream() override { finalize(); }
 
   void makeNewRecord(GOFF::RecordType Type, size_t Size) {
     fillRecord();
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 4787604..e21cf8e 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -5354,7 +5354,7 @@ APInt DoubleAPFloat::bitcastToAPInt() const {
       Floats[0].bitcastToAPInt().getRawData()[0],
       Floats[1].bitcastToAPInt().getRawData()[0],
   };
-  return APInt(128, 2, Data);
+  return APInt(128, Data);
 }
 
 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
@@ -5643,8 +5643,7 @@ APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
 
   // Create a minimally-sized APInt to represent the source value.
   const unsigned SrcBitWidth = SrcMSB + 1;
-  APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth,
-                      /*numWords=*/SrcCount, Src},
+  APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
                 /*isUnsigned=*/true};
 
   // Stage 1: Initial Approximation.
diff --git a/llvm/lib/Support/LSP/Protocol.cpp b/llvm/lib/Support/LSP/Protocol.cpp
index f221263..f8eeb32 100644
--- a/llvm/lib/Support/LSP/Protocol.cpp
+++ b/llvm/lib/Support/LSP/Protocol.cpp
@@ -96,10 +96,6 @@ static void percentEncode(StringRef Content, std::string &Out) {
 static std::string percentDecode(StringRef Content) {
   std::string Result;
   for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
-    if (*I != '%') {
-      Result += *I;
-      continue;
-    }
     if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
         llvm::isHexDigit(*(I + 2))) {
       Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 02c5390..6214f4d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -740,7 +740,7 @@ static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
   return "r600";
 }
 
-static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
+static Reloc::Model getEffectiveRelocModel() {
   // The AMDGPU toolchain only supports generating shared objects, so we
   // must always use PIC.
   return Reloc::PIC_;
@@ -754,8 +754,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
                                          CodeGenOptLevel OptLevel)
     : CodeGenTargetMachineImpl(
           T, TT.computeDataLayout(), TT, getGPUOrDefault(TT, CPU), FS, Options,
-          getEffectiveRelocModel(RM),
-          getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
+          getEffectiveRelocModel(), getEffectiveCodeModel(CM, CodeModel::Small),
+          OptLevel),
       TLOF(createTLOF(getTargetTriple())) {
   initAsmInfo();
   if (TT.isAMDGCN()) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index c5e26c1..9de4c9d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -62,8 +62,7 @@ static cl::opt<bool>
                           cl::desc("Enable the merge base offset pass"),
                           cl::init(true), cl::Hidden);
 
-static Reloc::Model getEffectiveRelocModel(const Triple &TT,
-                                           std::optional<Reloc::Model> RM) {
+static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
   return RM.value_or(Reloc::Static);
 }
 
@@ -92,7 +91,7 @@ LoongArchTargetMachine::LoongArchTargetMachine(
     const TargetOptions &Options, std::optional<Reloc::Model> RM,
     std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
     : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options,
-                               getEffectiveRelocModel(TT, RM),
+                               getEffectiveRelocModel(RM),
                                getEffectiveLoongArchCodeModel(TT, CM), OL),
       TLOF(std::make_unique<TargetLoweringObjectFileELF>()) {
   initAsmInfo();
diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
index 847c27ba..f525d43 100644
--- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp
+++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
@@ -46,13 +46,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kTarget() {
 
 namespace {
 
-Reloc::Model getEffectiveRelocModel(const Triple &TT,
-                                    std::optional<Reloc::Model> RM) {
+Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
   // If not defined we default to static
-  if (!RM.has_value())
-    return Reloc::Static;
-
-  return *RM;
+  return RM.value_or(Reloc::Static);
 }
 
 CodeModel::Model getEffectiveCodeModel(std::optional<CodeModel::Model> CM,
@@ -73,7 +69,7 @@ M68kTargetMachine::M68kTargetMachine(const Target &T, const Triple &TT,
                                      std::optional<CodeModel::Model> CM,
                                      CodeGenOptLevel OL, bool JIT)
     : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options,
-                               getEffectiveRelocModel(TT, RM),
+                               getEffectiveRelocModel(RM),
                                ::getEffectiveCodeModel(CM, JIT), OL),
       TLOF(std::make_unique<M68kELFTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this) {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index f81b1e12..ae54ff1 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -141,8 +141,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVAsmPrinterPass(*PR);
 }
 
-static Reloc::Model getEffectiveRelocModel(const Triple &TT,
-                                           std::optional<Reloc::Model> RM) {
+static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
   return RM.value_or(Reloc::Static);
 }
 
@@ -154,7 +153,7 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
                                        CodeGenOptLevel OL, bool JIT)
     : CodeGenTargetMachineImpl(
           T, TT.computeDataLayout(Options.MCOptions.getABIName()), TT, CPU, FS,
-          Options, getEffectiveRelocModel(TT, RM),
+          Options, getEffectiveRelocModel(RM),
           getEffectiveCodeModel(CM, CodeModel::Small), OL),
       TLOF(std::make_unique<RISCVELFTargetObjectFile>()) {
   initAsmInfo();
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index a9c638c..621640c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -127,16 +127,11 @@ LLVMInitializeWebAssemblyTarget() {
 // WebAssembly Lowering public interface.
 //===----------------------------------------------------------------------===//
 
-static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM,
-                                           const Triple &TT) {
-  if (!RM) {
-    // Default to static relocation model.  This should always be more optimial
-    // than PIC since the static linker can determine all global addresses and
-    // assume direct function calls.
-    return Reloc::Static;
-  }
-
-  return *RM;
+static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
+  // Default to static relocation model.  This should always be more optimial
+  // than PIC since the static linker can determine all global addresses and
+  // assume direct function calls.
+  return RM.value_or(Reloc::Static);
 }
 
 using WebAssembly::WasmEnableEH;
@@ -197,7 +192,7 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
     const TargetOptions &Options, std::optional<Reloc::Model> RM,
     std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
     : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, FS, Options,
-                               getEffectiveRelocModel(RM, TT),
+                               getEffectiveRelocModel(RM),
                                getEffectiveCodeModel(CM, CodeModel::Large), OL),
       TLOF(new WebAssemblyTargetObjectFile()),
       UsesMultivalueABI(Options.MCOptions.getABIName() == "experimental-mv") {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 6d16599..5048561 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1044,15 +1044,13 @@ struct AAPointerInfoImpl
     return AAPointerInfo::manifest(A);
   }
 
-  virtual const_bin_iterator begin() const override { return State::begin(); }
-  virtual const_bin_iterator end() const override { return State::end(); }
-  virtual int64_t numOffsetBins() const override {
-    return State::numOffsetBins();
-  }
-  virtual bool reachesReturn() const override {
+  const_bin_iterator begin() const override { return State::begin(); }
+  const_bin_iterator end() const override { return State::end(); }
+  int64_t numOffsetBins() const override { return State::numOffsetBins(); }
+  bool reachesReturn() const override {
     return !ReturnedOffsets.isUnassigned();
   }
-  virtual void addReturnedOffsetsTo(OffsetInfo &OI) const override {
+  void addReturnedOffsetsTo(OffsetInfo &OI) const override {
     if (ReturnedOffsets.isUnknown()) {
       OI.setUnknown();
       return;
@@ -6653,7 +6651,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
   AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
       : AAHeapToStack(IRP, A) {}
 
-  ~AAHeapToStackFunction() {
+  ~AAHeapToStackFunction() override {
     // Ensure we call the destructor so we release any memory allocated in the
     // sets.
     for (auto &It : AllocationInfos)
@@ -8374,7 +8372,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
     AccessKind2Accesses.fill(nullptr);
   }
 
-  ~AAMemoryLocationImpl() {
+  ~AAMemoryLocationImpl() override {
     // The AccessSets are allocated via a BumpPtrAllocator, we call
     // the destructor manually.
     for (AccessSet *AS : AccessKind2Accesses)
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5e2247f..d7eb745 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2693,7 +2693,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
   AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
       : AAExecutionDomain(IRP, A) {}
 
-  ~AAExecutionDomainFunction() { delete RPOT; }
+  ~AAExecutionDomainFunction() override { delete RPOT; }
 
   void initialize(Attributor &A) override {
     Function *F = getAnchorScope();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index ede73f8..9c75d9a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -72,7 +72,7 @@ public:
       : InstCombiner(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI,
                      PSI, DL, RPOT) {}
 
-  virtual ~InstCombinerImpl() = default;
+  ~InstCombinerImpl() override = default;
 
   /// Perform early cleanup and prepare the InstCombine worklist.
   bool prepareWorklist(Function &F);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3356516..facb0fa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4378,8 +4378,21 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
   const SCEV *TC =
       vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE);
   assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
-  RemainingIterations =
-      SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+  const SCEV *KnownMinTC;
+  bool ScalableTC = match(TC, m_scev_c_Mul(m_SCEV(KnownMinTC), m_SCEVVScale()));
+  // Use versions of TC and VF in which both are either scalable or fixed.
+  if (ScalableTC == MainLoopVF.isScalable())
+    RemainingIterations =
+        SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
+  else if (ScalableTC) {
+    const SCEV *EstimatedTC = SE.getMulExpr(
+        KnownMinTC,
+        SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1)));
+    RemainingIterations = SE.getURemExpr(
+        EstimatedTC, SE.getElementCount(TCType, MainLoopVF * IC));
+  } else
+    RemainingIterations =
+        SE.getURemExpr(TC, SE.getElementCount(TCType, EstimatedRuntimeVF * IC));
 
   // No iterations left to process in the epilogue.
   if (RemainingIterations->isZero())
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cdb9e7e..4fcaf6d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17641,12 +17641,28 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
                 [](Value *V) {
                   return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
                 })) ||
-        all_of(E->Scalars, [&](Value *V) {
-          return isa<PoisonValue>(V) ||
-                 (E->Idx == 0 && isa<InsertElementInst>(V)) ||
-                 E->isCopyableElement(V) ||
-                 (!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
-        }))
+        (all_of(E->Scalars,
+                [&](Value *V) {
+                  return isa<PoisonValue>(V) ||
+                         (E->Idx == 0 && isa<InsertElementInst>(V)) ||
+                         E->isCopyableElement(V) ||
+                         (!isVectorLikeInstWithConstOps(V) &&
+                          isUsedOutsideBlock(V));
+                }) &&
+         (!E->doesNotNeedToSchedule() ||
+          any_of(E->Scalars,
+                 [&](Value *V) {
+                   if (!isa<Instruction>(V) ||
+                       (E->hasCopyableElements() && E->isCopyableElement(V)))
+                     return false;
+                   return !areAllOperandsNonInsts(V);
+                 }) ||
+          none_of(E->Scalars, [&](Value *V) {
+            if (!isa<Instruction>(V) ||
+                (E->hasCopyableElements() && E->isCopyableElement(V)))
+              return false;
+            return MustGather.contains(V);
+          }))))
       Res = FindLastInst();
     else
       Res = FindFirstInst();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 2591df8..5b9f005 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -398,7 +398,7 @@ public:
                DebugLoc DL = DebugLoc::getUnknown())
       : VPDef(SC), VPUser(Operands), DL(DL) {}
 
-  virtual ~VPRecipeBase() = default;
+  ~VPRecipeBase() override = default;
 
   /// Clone the current recipe.
   virtual VPRecipeBase *clone() = 0;
@@ -576,7 +576,7 @@ public:
     return R && classof(R);
   }
 
-  virtual VPSingleDefRecipe *clone() override = 0;
+  VPSingleDefRecipe *clone() override = 0;
 
   /// Returns the underlying instruction.
   Instruction *getUnderlyingInstr() {
@@ -907,7 +907,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
     return R && classof(R);
   }
 
-  virtual VPRecipeWithIRFlags *clone() override = 0;
+  VPRecipeWithIRFlags *clone() override = 0;
 
   static inline bool classof(const VPSingleDefRecipe *U) {
     auto *R = dyn_cast<VPRecipeBase>(U);
@@ -2068,7 +2068,7 @@ public:
     return classof(static_cast<const VPRecipeBase *>(R));
   }
 
-  virtual void execute(VPTransformState &State) override = 0;
+  void execute(VPTransformState &State) override = 0;
 
   /// Returns the step value of the induction.
   VPValue *getStepValue() { return getOperand(1); }
@@ -2557,7 +2557,7 @@ public:
                               VPCostContext &Ctx) const override;
 
   /// Returns true if the recipe only uses the first lane of operand \p Op.
-  virtual bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
+  bool onlyFirstLaneUsed(const VPValue *Op) const override = 0;
 
   /// Returns the number of stored operands of this interleave group. Returns 0
   /// for load interleave groups.
diff --git a/llvm/test/MC/AsmParser/comments-x86-darwin-eol-dropped.s b/llvm/test/MC/AsmParser/comments-x86-darwin-eol-dropped.s
new file mode 100644
index 0000000..662e598
--- /dev/null
+++ b/llvm/test/MC/AsmParser/comments-x86-darwin-eol-dropped.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple i386-apple-darwin %s 2>&1 | FileCheck %s
+.p2align 3
+// CHECK: .p2align 3
+test:
+// CHECK-LABEL: test:
+// CHECK: pushl %ebp
+// CHECK: movl %esp, %ebp
+# Check that the following line's comment # doesn't drop the movl after
+   pushl %ebp #
+   movl %esp, %ebp
diff --git a/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll
new file mode 100644
index 0000000..db4965b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select_with_identical_phi.ll
@@ -0,0 +1,417 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+@A = extern_weak global float, align 4
+
+; %same.as.v1 is a select with two phis %v1 and %phi.to.remove as the true
+; and false values, while %v1 and %phi.to.remove are actually the same.
+; Fold the selection instruction %same.as.v1 to %v1.
+define void @select_with_identical_phi(ptr %m, ptr %n, i32 %count) {
+; CHECK-LABEL: @select_with_identical_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q:%.*]] = phi ptr [ [[M:%.*]], [[ENTRY]] ], [ [[Q_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C:%.*]] = phi ptr [ [[N:%.*]], [[ENTRY]] ], [ [[C_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[Q_LOAD:%.*]] = load float, ptr [[Q]], align 4
+; CHECK-NEXT:    [[C_LOAD:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[Q_LOAD]], [[C_LOAD]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[SUB]], [[V0]]
+; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], float [[SUB]], float [[V0]]
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[SUB]], [[V1]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2]], float [[SUB]], float [[V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[Q_NEXT]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 4
+; CHECK-NEXT:    [[C_NEXT]] = getelementptr inbounds nuw i8, ptr [[C]], i64 4
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], [[COUNT:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %q = phi ptr [ %m, %entry ], [ %q.next, %for.body ]
+  %c = phi ptr [ %n, %entry ], [ %c.next, %for.body ]
+  %q.load = load float, ptr %q
+  %c.load = load float, ptr %c
+  %sub = fsub float %q.load, %c.load
+  %cmp1 = fcmp olt float %sub, %v0
+  %v0.1 = select i1 %cmp1, float %sub, float %v0
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %cmp2 = fcmp ogt float  %sub, %same.as.v1
+  %v1.1 = select i1 %cmp2, float %sub, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %sub, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %q.next = getelementptr inbounds i8, ptr %q, i64 4
+  %c.next = getelementptr inbounds i8, ptr %c, i64 4
+  %exitcond = icmp eq i32 %inc.i, %count
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; Reduced from select_with_identical_phi().
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_2(i1 %cmp1, i1 %cmp2, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %v1.1 = select i1 %cmp2, float %x, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; The difference from select_with_identical_phi_2() is that the true and false values in
+; %phi.to.remove.next and %v1.1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_3(i1 %cmp1, i1 %cmp2, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2:%.*]], float [[V1]], float [[X:%.*]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %v1.1 = select i1 %cmp2, float %v1, float %x
+  %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %x
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; The difference from select_with_identical_phi_2() is that the true and false values in
+; same.as.v1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_4(i1 %cmp1, i1 %cmp2, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1
+  %v1.1 = select i1 %cmp2, float %x, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; The difference from select_with_identical_phi() is that the true and false values in
+; %same.as.v1, %phi.to.remove.next and %v1.1 are swapped.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_5(i1 %cmp1, i1 %cmp2, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2:%.*]], float [[V1]], float [[X:%.*]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %phi.to.remove, float %v1
+  %v1.1 = select i1 %cmp2, float %v1, float %x
+  %phi.to.remove.next = select i1 %cmp2, float %same.as.v1, float %x
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; The difference from select_with_identical_phi_2() is that not all phis are sorted with
+; the same order of incoming BBs.
+; Check that %same.as.v1 can be folded.
+define void @select_with_identical_phi_6(i1 %cmp1, i1 %cmp2, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ %phi.to.remove.next, %for.body ], [ 0xC415AF1D80000000, %entry ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %v1.1 = select i1 %cmp2, float %x, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; %v1 and %phi.to.remove do not have the same start value.
+; Cannot fold %same.as.v1.
+define void @select_with_identical_phi_negative_1(i1 %cmp1, i1 %cmp2, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_negative_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[PHI_TO_REMOVE:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[PHI_TO_REMOVE_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SAME_AS_V1:%.*]] = select i1 [[CMP1:%.*]], float [[V1]], float [[PHI_TO_REMOVE]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]]
+; CHECK-NEXT:    [[PHI_TO_REMOVE_NEXT]] = select i1 [[CMP2]], float [[X]], float [[SAME_AS_V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v1 = phi float [ 0x4415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %v1.1 = select i1 %cmp2, float %x, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; %v1 and %phi.to.remove do not act as the same phi since %v1.1 and %phi.to.remove.next do not evolve the same.
+; Cannot fold %same.as.v1.
+define void @select_with_identical_phi_negative_2(i1 %cmp1, i1 %cmp2, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_negative_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[PHI_TO_REMOVE:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[PHI_TO_REMOVE_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SAME_AS_V1:%.*]] = select i1 [[CMP1:%.*]], float [[V1]], float [[PHI_TO_REMOVE]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2:%.*]], float [[V1]], float [[X:%.*]]
+; CHECK-NEXT:    [[PHI_TO_REMOVE_NEXT]] = select i1 [[CMP2]], float [[X]], float [[SAME_AS_V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %v1.1 = select i1 %cmp2, float %v1, float %x
+  %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; %v1 and %phi.to.remove do not act as the same phi since %v1.1 and %phi.to.remove.next do not
+; have the same condition.
+; Cannot fold %same.as.v1.
+define void @select_with_identical_phi_negative_3(i1 %cmp1, i1 %cmp2, i1 %cmp3, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_negative_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY:%.*]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[PHI_TO_REMOVE:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[PHI_TO_REMOVE_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SAME_AS_V1:%.*]] = select i1 [[CMP1:%.*]], float [[V1]], float [[PHI_TO_REMOVE]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP3:%.*]], float [[V1]], float [[X:%.*]]
+; CHECK-NEXT:    [[PHI_TO_REMOVE_NEXT]] = select i1 [[CMP2:%.*]], float [[X]], float [[SAME_AS_V1]]
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %v1, float %phi.to.remove
+  %v1.1 = select i1 %cmp3, float %v1, float %x
+  %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
+
+; The true and false values of %same.as.v1 are not really the same phi.
+; Cannot fold %same.as.v1.
+define void @select_with_identical_phi_negative_4(i1 %cmp1, i1 %cmp2, float %x) {
+; CHECK-LABEL: @select_with_identical_phi_negative_4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[V0:%.*]] = phi float [ 0x4415AF1D80000000, [[ENTRY:%.*]] ], [ [[V0_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[V1_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[PHI_TO_REMOVE:%.*]] = phi float [ 0xC415AF1D80000000, [[ENTRY]] ], [ [[PHI_TO_REMOVE_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SAME_AS_V1:%.*]] = select i1 [[CMP1:%.*]], float [[V0]], float [[PHI_TO_REMOVE]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP2:%.*]], float [[X:%.*]], float [[V1]]
+; CHECK-NEXT:    [[PHI_TO_REMOVE_NEXT]] = select i1 [[CMP2]], float [[X]], float [[SAME_AS_V1]]
+; CHECK-NEXT:    [[V0_1]] = fadd float [[V0]], 1.000000e+00
+; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC_I]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    store float [[V1_1]], ptr @A, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                    ; preds = %entry, %for.body
+  %v0 = phi float [ 0x4415AF1D80000000, %entry ], [ %v0.1, %for.body ]
+  %v1 = phi float [ 0xC415AF1D80000000, %entry ], [ %v1.1, %for.body ]
+  %phi.to.remove = phi float [ 0xC415AF1D80000000, %entry ], [ %phi.to.remove.next, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+  %same.as.v1 = select i1 %cmp1, float %v0, float %phi.to.remove
+  %v1.1 = select i1 %cmp2, float %x, float %v1
+  %phi.to.remove.next = select i1 %cmp2, float %x, float %same.as.v1
+  %v0.1 = fadd float %v0, 1.0
+  %inc.i = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc.i, 100
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %vl.1.lcssa = phi float [ %v1.1, %for.body ]
+  store float %vl.1.lcssa, ptr @A
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll
index 5a2eee0..2f5a58c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll
@@ -37,7 +37,6 @@ define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali
 ;
 ; CHECK-ARMPL-LABEL: define void @sincos_f32(
 ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-ARMPL:  [[ENTRY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_PH:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY1:.*:]]
@@ -51,15 +50,6 @@ define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4
-; CHECK-ARMPL:  [[MIDDLE_BLOCK:.*:]]
-; CHECK-ARMPL:  [[SCALAR_PH:.*:]]
-; CHECK-ARMPL:  [[FOR_BODY:.*:]]
-; CHECK-ARMPL:  [[VEC_EPILOG_VECTOR_BODY:.*:]]
-; CHECK-ARMPL:    [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> [[WIDE_LOAD3:%.*]])
-; CHECK-ARMPL:    [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0
-; CHECK-ARMPL:    [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1
-; CHECK-ARMPL:    store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4
-; CHECK-ARMPL:    store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4
 ; CHECK-ARMPL:  [[VEC_EPILOG_MIDDLE_BLOCK:.*:]]
 ; CHECK-ARMPL:  [[VEC_EPILOG_SCALAR_PH:.*:]]
 ; CHECK-ARMPL:  [[FOR_BODY1:.*:]]
@@ -269,7 +259,6 @@ define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias
 ;
 ; CHECK-ARMPL-LABEL: define void @modf_f32(
 ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] {
-; CHECK-ARMPL:  [[ENTRY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_PH:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY1:.*:]]
@@ -283,15 +272,6 @@ define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4
-; CHECK-ARMPL:  [[MIDDLE_BLOCK:.*:]]
-; CHECK-ARMPL:  [[SCALAR_PH:.*:]]
-; CHECK-ARMPL:  [[FOR_BODY:.*:]]
-; CHECK-ARMPL:  [[VEC_EPILOG_VECTOR_BODY:.*:]]
-; CHECK-ARMPL:    [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> [[WIDE_LOAD3:%.*]])
-; CHECK-ARMPL:    [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0
-; CHECK-ARMPL:    [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1
-; CHECK-ARMPL:    store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4
-; CHECK-ARMPL:    store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4
 ; CHECK-ARMPL:  [[VEC_EPILOG_MIDDLE_BLOCK:.*:]]
 ; CHECK-ARMPL:  [[VEC_EPILOG_SCALAR_PH:.*:]]
 ; CHECK-ARMPL:  [[FOR_BODY1:.*:]]
@@ -429,7 +409,6 @@ define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa
 ;
 ; CHECK-ARMPL-LABEL: define void @sincospi_f32(
 ; CHECK-ARMPL-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0]] {
-; CHECK-ARMPL:  [[ENTRY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_PH:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY:.*:]]
 ; CHECK-ARMPL:  [[VECTOR_BODY1:.*:]]
@@ -443,15 +422,6 @@ define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP15]], ptr [[TMP22:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP16]], ptr [[TMP24:%.*]], align 4
 ; CHECK-ARMPL:    store <vscale x 4 x float> [[TMP17]], ptr [[TMP27:%.*]], align 4
-; CHECK-ARMPL:  [[MIDDLE_BLOCK:.*:]]
-; CHECK-ARMPL:  [[SCALAR_PH:.*:]]
-; CHECK-ARMPL:  [[FOR_BODY:.*:]]
-; CHECK-ARMPL:  [[VEC_EPILOG_VECTOR_BODY:.*:]]
-; CHECK-ARMPL:    [[TMP29:%.*]] = call { <4 x float>, <4 x float> } @llvm.sincospi.v4f32(<4 x float> [[WIDE_LOAD3:%.*]])
-; CHECK-ARMPL:    [[TMP25:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 0
-; CHECK-ARMPL:    [[TMP26:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP29]], 1
-; CHECK-ARMPL:    store <4 x float> [[TMP25]], ptr [[TMP30:%.*]], align 4
-; CHECK-ARMPL:    store <4 x float> [[TMP26]], ptr [[TMP28:%.*]], align 4
 ; CHECK-ARMPL:  [[VEC_EPILOG_MIDDLE_BLOCK:.*:]]
 ; CHECK-ARMPL:  [[VEC_EPILOG_SCALAR_PH:.*:]]
 ; CHECK-ARMPL:  [[FOR_BODY1:.*:]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
index c1f0a35..0d8a102 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
@@ -9,49 +9,57 @@ define void @cost_store_i8(ptr %dst) #0 {
 ; DEFAULT-LABEL: define void @cost_store_i8(
 ; DEFAULT-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
 ; DEFAULT-NEXT:  iter.check:
-; DEFAULT-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; DEFAULT-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT:    [[TMP13:%.*]] = shl nuw i64 [[TMP10]], 3
+; DEFAULT-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP13]]
+; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; DEFAULT:       vector.main.loop.iter.check:
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5
-; DEFAULT-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP3]]
-; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; DEFAULT-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
+; DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 101, [[TMP1]]
+; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; DEFAULT:       vector.ph:
-; DEFAULT-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32
-; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 101, [[TMP5]]
+; DEFAULT-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
+; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3]]
 ; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]]
 ; DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DEFAULT:       vector.body:
 ; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; DEFAULT-NEXT:    [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; DEFAULT-NEXT:    [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 4
-; DEFAULT-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[TMP9]], i64 [[TMP23]]
-; DEFAULT-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP9]], align 1
-; DEFAULT-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP24]], align 1
-; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; DEFAULT-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; DEFAULT-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; DEFAULT-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
+; DEFAULT-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP6]]
+; DEFAULT-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP4]], align 1
+; DEFAULT-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP7]], align 1
+; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; DEFAULT-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; DEFAULT:       middle.block:
 ; DEFAULT-NEXT:    [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]]
 ; DEFAULT-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; DEFAULT:       vec.epilog.iter.check:
-; DEFAULT-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
+; DEFAULT-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP13]]
 ; DEFAULT-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
 ; DEFAULT:       vec.epilog.ph:
 ; DEFAULT-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; DEFAULT-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; DEFAULT-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
+; DEFAULT-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP12]]
+; DEFAULT-NEXT:    [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]]
 ; DEFAULT-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; DEFAULT:       vec.epilog.vector.body:
-; DEFAULT-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; DEFAULT-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5]]
-; DEFAULT-NEXT:    store <8 x i8> zeroinitializer, ptr [[TMP19]], align 1
-; DEFAULT-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 8
-; DEFAULT-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT6]], 96
-; DEFAULT-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; DEFAULT-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]]
+; DEFAULT-NEXT:    store <vscale x 8 x i8> zeroinitializer, ptr [[TMP9]], align 1
+; DEFAULT-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP12]]
+; DEFAULT-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]]
+; DEFAULT-NEXT:    br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; DEFAULT:       vec.epilog.middle.block:
-; DEFAULT-NEXT:    br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; DEFAULT-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 101, [[N_VEC3]]
+; DEFAULT-NEXT:    br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; DEFAULT:       vec.epilog.scalar.ph:
-; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
 ; DEFAULT-NEXT:    br label [[LOOP:%.*]]
 ; DEFAULT:       loop:
 ; DEFAULT-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -70,25 +78,25 @@ define void @cost_store_i8(ptr %dst) #0 {
 ; PRED:       vector.ph:
 ; PRED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; PRED-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
-; PRED-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT:    [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
-; PRED-NEXT:    [[TMP9:%.*]] = sub i64 101, [[TMP8]]
-; PRED-NEXT:    [[TMP10:%.*]] = icmp ugt i64 101, [[TMP8]]
-; PRED-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0
+; PRED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; PRED-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
+; PRED-NEXT:    [[TMP4:%.*]] = sub i64 101, [[TMP3]]
+; PRED-NEXT:    [[TMP5:%.*]] = icmp ugt i64 101, [[TMP3]]
+; PRED-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP4]], i64 0
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 101)
 ; PRED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; PRED:       vector.body:
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PRED-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; PRED-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> zeroinitializer, ptr align 1 [[TMP13]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
+; PRED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; PRED-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> zeroinitializer, ptr align 1 [[TMP7]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
-; PRED-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]])
-; PRED-NEXT:    [[TMP14:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
-; PRED-NEXT:    [[TMP12:%.*]] = xor i1 [[TMP14]], true
-; PRED-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; PRED-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP6]])
+; PRED-NEXT:    [[TMP8:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
+; PRED-NEXT:    [[TMP9:%.*]] = xor i1 [[TMP8]], true
+; PRED-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; PRED:       middle.block:
-; PRED-NEXT:    br label [[LOOP:%.*]]
+; PRED-NEXT:    br label [[EXIT:%.*]]
 ; PRED:       exit:
 ; PRED-NEXT:    ret void
 ;
@@ -113,33 +121,33 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
 ; DEFAULT-NEXT:  iter.check:
 ; DEFAULT-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; DEFAULT:       vector.memcheck:
-; DEFAULT-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1000
-; DEFAULT-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
-; DEFAULT-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]]
-; DEFAULT-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]]
+; DEFAULT-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 1000
+; DEFAULT-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
+; DEFAULT-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; DEFAULT-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
 ; DEFAULT-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; DEFAULT-NEXT:    br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; DEFAULT:       vector.main.loop.iter.check:
 ; DEFAULT-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; DEFAULT:       vector.ph:
-; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
-; DEFAULT-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT3]], <16 x i16> poison, <16 x i32> zeroinitializer
-; DEFAULT-NEXT:    [[TMP7:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT4]] to <16 x i8>
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+; DEFAULT-NEXT:    [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
 ; DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DEFAULT:       vector.body:
 ; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; DEFAULT-NEXT:    [[TMP4:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
-; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TMP4]], i64 0
-; DEFAULT-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
-; DEFAULT-NEXT:    [[TMP5:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT2]] to <16 x i8>
-; DEFAULT-NEXT:    [[TMP9:%.*]] = and <16 x i8> [[TMP5]], [[TMP7]]
-; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; DEFAULT-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP10]], i32 16
-; DEFAULT-NEXT:    store <16 x i8> [[TMP9]], ptr [[TMP10]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
-; DEFAULT-NEXT:    store <16 x i8> [[TMP9]], ptr [[TMP13]], align 1, !alias.scope [[META9]], !noalias [[META6]]
+; DEFAULT-NEXT:    [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer
+; DEFAULT-NEXT:    [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8>
+; DEFAULT-NEXT:    [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]]
+; DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
+; DEFAULT-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; DEFAULT-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1, !alias.scope [[META9]], !noalias [[META6]]
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; DEFAULT-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
-; DEFAULT-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; DEFAULT-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
+; DEFAULT-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; DEFAULT:       middle.block:
 ; DEFAULT-NEXT:    br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; DEFAULT:       vec.epilog.iter.check:
@@ -148,20 +156,20 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
 ; DEFAULT-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer
-; DEFAULT-NEXT:    [[TMP15:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
+; DEFAULT-NEXT:    [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
 ; DEFAULT-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; DEFAULT:       vec.epilog.vector.body:
-; DEFAULT-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; DEFAULT-NEXT:    [[TMP16:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
-; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP16]], i64 0
+; DEFAULT-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; DEFAULT-NEXT:    [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer
-; DEFAULT-NEXT:    [[TMP18:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
-; DEFAULT-NEXT:    [[TMP14:%.*]] = and <8 x i8> [[TMP18]], [[TMP15]]
-; DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX5]]
-; DEFAULT-NEXT:    store <8 x i8> [[TMP14]], ptr [[TMP26]], align 1, !alias.scope [[META9]], !noalias [[META6]]
-; DEFAULT-NEXT:    [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 8
-; DEFAULT-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT8]], 1000
-; DEFAULT-NEXT:    br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; DEFAULT-NEXT:    [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
+; DEFAULT-NEXT:    [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]]
+; DEFAULT-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
+; DEFAULT-NEXT:    store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]]
+; DEFAULT-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8
+; DEFAULT-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 1000
+; DEFAULT-NEXT:    br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; DEFAULT:       vec.epilog.middle.block:
 ; DEFAULT-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; DEFAULT:       vec.epilog.scalar.ph:
@@ -186,35 +194,35 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
 ; PRED-NEXT:  entry:
 ; PRED-NEXT:    br label [[VECTOR_MEMCHECK:%.*]]
 ; PRED:       vector.memcheck:
-; PRED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1000
-; PRED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
-; PRED-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP]]
-; PRED-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]]
+; PRED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 1000
+; PRED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
+; PRED-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; PRED-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
 ; PRED-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; PRED-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; PRED:       vector.ph:
-; PRED-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2
+; PRED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; PRED-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
 ; PRED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[X]], i64 0
 ; PRED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1000)
-; PRED-NEXT:    [[TMP11:%.*]] = trunc <vscale x 2 x i16> [[BROADCAST_SPLAT]] to <vscale x 2 x i8>
+; PRED-NEXT:    [[TMP2:%.*]] = trunc <vscale x 2 x i16> [[BROADCAST_SPLAT]] to <vscale x 2 x i8>
 ; PRED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; PRED:       vector.body:
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
-; PRED-NEXT:    [[TMP7:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META3:![0-9]+]]
-; PRED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP7]], i64 0
+; PRED-NEXT:    [[TMP3:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META3:![0-9]+]]
+; PRED-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP3]], i64 0
 ; PRED-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; PRED-NEXT:    [[TMP8:%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT3]] to <vscale x 2 x i8>
-; PRED-NEXT:    [[TMP9:%.*]] = and <vscale x 2 x i8> [[TMP8]], [[TMP11]]
-; PRED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
-; PRED-NEXT:    call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP9]], ptr align 1 [[TMP5]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]]
+; PRED-NEXT:    [[TMP4:%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT3]] to <vscale x 2 x i8>
+; PRED-NEXT:    [[TMP5:%.*]] = and <vscale x 2 x i8> [[TMP4]], [[TMP2]]
+; PRED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; PRED-NEXT:    call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP5]], ptr align 1 [[TMP6]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]]
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1000)
-; PRED-NEXT:    [[TMP12:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
-; PRED-NEXT:    [[TMP13:%.*]] = xor i1 [[TMP12]], true
-; PRED-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; PRED-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
+; PRED-NEXT:    [[TMP8:%.*]] = xor i1 [[TMP7]], true
+; PRED-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; PRED:       middle.block:
 ; PRED-NEXT:    br label [[EXIT:%.*]]
 ; PRED:       scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index bbc0e33..9c8a187 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -10,8 +10,8 @@
 target triple = "aarch64-linux-gnu"
 
 ; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16'
-; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)
-; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
+; DEBUG-NOT: Create Skeleton for epilogue vectorized loop (first pass)
+; DEBUG: Executing best plan with VF=vscale x 16, UF=2
 
 ; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_16'
 ; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced.
@@ -20,9 +20,7 @@ target triple = "aarch64-linux-gnu"
 
 define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-LABEL: @main_vf_vscale_x_16(
-; CHECK-NEXT:  iter.check:
-; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
-; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
@@ -43,27 +41,12 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-NEXT:    store <vscale x 16 x i8> splat (i8 1), ptr [[TMP7]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
-; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
-; CHECK:       vec.epilog.ph:
-; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
-; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]]
-; CHECK-NEXT:    store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
-; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
-; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ;
@@ -150,7 +133,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
 ; CHECK-NEXT:    store <vscale x 2 x i64> splat (i64 1), ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
@@ -261,13 +244,13 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
 ; CHECK-NEXT:    store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
@@ -279,7 +262,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
 ; CHECK-NEXT:    store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -382,14 +365,14 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
 ; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000
@@ -400,7 +383,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
 ; CHECK-NEXT:    store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000
-; CHECK-NEXT:    br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
@@ -513,14 +496,14 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
 ; CHECK-NEXT:    store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
-; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]]
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
@@ -528,15 +511,15 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
 ; CHECK-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
-; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP18]], align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
-; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP19]], align 4
-; CHECK-NEXT:    [[TMP20:%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
-; CHECK-NEXT:    store <2 x float> [[TMP20]], ptr [[TMP19]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
+; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
+; CHECK-NEXT:    store <2 x float> [[TMP17]], ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
-; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
-; CHECK-NEXT:    br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
+; CHECK-NEXT:    br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]]
 ; CHECK-NEXT:    br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -575,8 +558,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
 ; CHECK-VF8-NEXT:    store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
 ; CHECK-VF8-NEXT:    store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
 ; CHECK-VF8-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-VF8-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF8-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-VF8-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF8-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
 ; CHECK-VF8:       middle.block:
 ; CHECK-VF8-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF8-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
@@ -611,34 +594,34 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[V:%.*]] = tail call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[N:%.*]] = mul nuw nsw i64 [[V]], 1024
-; CHECK-NEXT:    br label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP6]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP10]]
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
-; CHECK-NEXT:    [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT:    [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
-; CHECK-NEXT:    store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
-; CHECK-NEXT:    store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP4]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]]
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
+; CHECK-NEXT:    store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
@@ -676,8 +659,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
 ; CHECK-VF8-NEXT:    store <vscale x 4 x float> [[TMP10]], ptr [[TMP6]], align 4
 ; CHECK-VF8-NEXT:    store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4
 ; CHECK-VF8-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
-; CHECK-VF8-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF8-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-VF8-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF8-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
 ; CHECK-VF8:       middle.block:
 ; CHECK-VF8-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF8-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
index ff0887c..fbf6323 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll
@@ -6,9 +6,9 @@ define void @test(ptr %0, i64 %1, i64 %2) {
 ; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP4]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
 ; CHECK-NEXT:    br [[DOTPREHEADER_LR_PH:label %.*]]
 ; CHECK:       [[_PREHEADER_LR_PH:.*:]]
 ; CHECK-NEXT:    br [[DOTPREHEADER_US_US_PREHEADER:label %.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
index bfb623a..6d713e8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
@@ -9,11 +9,11 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 ; CHECK:       [[IF_THEN]]:
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP1]], i32 3
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
new file mode 100644
index 0000000..ec554b4
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-100 < %s | FileCheck %s
+
+define void @test(i32 %arg) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARG]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT:    br i1 false, label %[[BB8:.*]], label %[[BB4:.*]]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]]
+; CHECK-NEXT:    br label %[[BB8]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ [[TMP4]], %[[BB4]] ], [ [[TMP1]], %[[BB1]] ]
+; CHECK-NEXT:    ret void
+;
+bb:
+  br label %bb1
+
+bb1:
+  %sub = sub i32 0, %arg
+  %add = add i32 0, 0
+  %add2 = add i32 0, 0
+  %add3 = add i32 0, 0
+  br i1 false, label %bb8, label %bb4
+
+bb4:
+  %add5 = add i32 %add3, 0
+  %add6 = add i32 0, 0
+  %add7 = add i32 0, 0
+  br label %bb8
+
+bb8:
+  %phi = phi i32 [ %sub, %bb4 ], [ %sub, %bb1 ]
+  %phi9 = phi i32 [ %add5, %bb4 ], [ %add, %bb1 ]
+  %phi10 = phi i32 [ %add6, %bb4 ], [ %add2, %bb1 ]
+  %phi11 = phi i32 [ %add7, %bb4 ], [ %add3, %bb1 ]
+  ret void
+}
+
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
index 7c8cb02..2623366 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll
@@ -5,11 +5,11 @@ define void @test(ptr %0, i64 %1, i64 %2, i1 %3, i64 %4, i64 %5) {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]], i1 [[TMP3:%.*]], i64 [[TMP4:%.*]], i64 [[TMP5:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 240
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i64> <i64 1, i64 1, i64 1, i64 poison>, i64 [[TMP2]], i32 3
 ; CHECK-NEXT:    [[TMP12:%.*]] = add <4 x i64> [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
 ; CHECK-NEXT:    [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr null, align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = load <2 x i64>, ptr [[TMP8]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll b/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll
index ef75a8d..88e2cca 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll
@@ -10,13 +10,13 @@ define i32 @test(i32 %0, i1 %1) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x double>
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB7:.*]], label %[[BB9:.*]]
 ; CHECK:       [[BB7]]:
-; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
 ; CHECK-NEXT:    br label %[[BB16:.*]]
 ; CHECK:       [[BB9]]:
 ; CHECK-NEXT:    br i1 false, label %[[BB14:.*]], label %[[BB10:.*]]
 ; CHECK:       [[BB10]]:
-; CHECK-NEXT:    [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP5]])
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]])
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> zeroinitializer)
 ; CHECK-NEXT:    br label %[[BB14]]
 ; CHECK:       [[BB14]]:
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 5dae6c0..b4437f7 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -666,7 +666,7 @@ public:
 #endif
   }
 
-  ~X86SavedState() {
+  ~X86SavedState() override {
     // Restoring the X87 state does not flush pending exceptions, make sure
     // these exceptions are flushed now.
 #if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-statistics.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-statistics.cpp
index 1179795..71eb422 100644
--- a/llvm/tools/llvm-jitlink/llvm-jitlink-statistics.cpp
+++ b/llvm/tools/llvm-jitlink/llvm-jitlink-statistics.cpp
@@ -52,7 +52,7 @@ public:
       S.ObjLayer.addPlugin(std::move(Instance));
   }
 
-  ~StatsPlugin() { publish(dbgs()); }
+  ~StatsPlugin() override { publish(dbgs()); }
 
   void publish(raw_ostream &OS);
 
diff --git a/llvm/tools/llvm-mca/Views/InstructionView.h b/llvm/tools/llvm-mca/Views/InstructionView.h
index ae57246..fbaa527 100644
--- a/llvm/tools/llvm-mca/Views/InstructionView.h
+++ b/llvm/tools/llvm-mca/Views/InstructionView.h
@@ -38,7 +38,7 @@ public:
                   llvm::MCInstPrinter &Printer, llvm::ArrayRef<llvm::MCInst> S)
       : STI(STI), MCIP(Printer), Source(S), InstrStream(InstructionString) {}
 
-  virtual ~InstructionView();
+  ~InstructionView() override;
 
   StringRef getNameAsString() const override { return "Instructions"; }
 
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 9c9b2dd..423a11f 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -795,7 +795,7 @@ public:
   void printFileSummary(StringRef FileStr, ObjectFile &Obj,
                         ArrayRef<std::string> InputFilenames,
                         const Archive *A) override;
-  virtual void printZeroSymbolOtherField(const Elf_Sym &Symbol) const override;
+  void printZeroSymbolOtherField(const Elf_Sym &Symbol) const override;
 
   void printDefaultRelRelaReloc(const Relocation<ELFT> &R,
                                 StringRef SymbolName,
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 30f0a8e5..fbe96bb 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -5062,8 +5062,8 @@ TEST(APFloatTest, PPCDoubleDoubleAddSpecial) {
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected, RM) = Tp;
 
     {
-      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
       A1.add(A2, RM);
 
       EXPECT_EQ(Expected, A1.getCategory())
@@ -5072,8 +5072,8 @@ TEST(APFloatTest, PPCDoubleDoubleAddSpecial) {
                  .str();
     }
     {
-      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
       A2.add(A1, RM);
 
       EXPECT_EQ(Expected, A2.getCategory())
@@ -5126,8 +5126,8 @@ TEST(APFloatTest, PPCDoubleDoubleAdd) {
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp;
 
     {
-      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
       A1.add(A2, RM);
 
       EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
@@ -5140,8 +5140,8 @@ TEST(APFloatTest, PPCDoubleDoubleAdd) {
                  .str();
     }
     {
-      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
       A2.add(A1, RM);
 
       EXPECT_EQ(Expected[0], A2.bitcastToAPInt().getRawData()[0])
@@ -5175,8 +5175,8 @@ TEST(APFloatTest, PPCDoubleDoubleSubtract) {
     APFloat::roundingMode RM;
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp;
 
-    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
     A1.subtract(A2, RM);
 
     EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
@@ -5230,8 +5230,8 @@ TEST(APFloatTest, PPCDoubleDoubleMultiplySpecial) {
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected, RM) = Tp;
 
     {
-      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
       A1.multiply(A2, RM);
 
       EXPECT_EQ(Expected, A1.getCategory())
@@ -5240,8 +5240,8 @@ TEST(APFloatTest, PPCDoubleDoubleMultiplySpecial) {
                  .str();
     }
     {
-      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
       A2.multiply(A1, RM);
 
       EXPECT_EQ(Expected, A2.getCategory())
@@ -5303,8 +5303,8 @@ TEST(APFloatTest, PPCDoubleDoubleMultiply) {
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp;
 
     {
-      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
       A1.multiply(A2, RM);
 
       EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
@@ -5317,8 +5317,8 @@ TEST(APFloatTest, PPCDoubleDoubleMultiply) {
                  .str();
     }
     {
-      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+      APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+      APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
       A2.multiply(A1, RM);
 
       EXPECT_EQ(Expected[0], A2.bitcastToAPInt().getRawData()[0])
@@ -5350,8 +5350,8 @@ TEST(APFloatTest, PPCDoubleDoubleDivide) {
     APFloat::roundingMode RM;
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp;
 
-    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
     A1.divide(A2, RM);
 
     EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
@@ -5383,8 +5383,8 @@ TEST(APFloatTest, PPCDoubleDoubleRemainder) {
     uint64_t Op1[2], Op2[2], Expected[2];
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1]) = Tp;
 
-    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
     A1.remainder(A2);
 
     EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
@@ -5418,8 +5418,8 @@ TEST(APFloatTest, PPCDoubleDoubleMod) {
     uint64_t Op1[2], Op2[2], Expected[2];
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1]) = Tp;
 
-    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
     A1.mod(A2);
 
     EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
@@ -6282,8 +6282,8 @@ TEST(APFloatTest, PPCDoubleDoubleCompare) {
     APFloat::cmpResult Expected;
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected) = Tp;
 
-    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
     EXPECT_EQ(Expected, A1.compare(A2))
         << formatv("compare(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1],
                    Op2[0], Op2[1])
@@ -6410,8 +6410,8 @@ TEST(APFloatTest, PPCDoubleDoubleBitwiseIsEqual) {
     bool Expected;
     std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected) = Tp;
 
-    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
-    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
+    APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, Op1));
+    APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, Op2));
     EXPECT_EQ(Expected, A1.bitwiseIsEqual(A2))
         << formatv("({0:x} + {1:x}) = ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0],
                    Op2[1])
@@ -6423,16 +6423,15 @@ TEST(APFloatTest, PPCDoubleDoubleHashValue) {
   uint64_t Data1[] = {0x3ff0000000000001ull, 0x0000000000000001ull};
   uint64_t Data2[] = {0x3ff0000000000001ull, 0};
   // The hash values are *hopefully* different.
-  EXPECT_NE(
-      hash_value(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data1))),
-      hash_value(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data2))));
+  EXPECT_NE(hash_value(APFloat(APFloat::PPCDoubleDouble(), APInt(128, Data1))),
+            hash_value(APFloat(APFloat::PPCDoubleDouble(), APInt(128, Data2))));
 }
 
 TEST(APFloatTest, PPCDoubleDoubleChangeSign) {
   uint64_t Data[] = {
       0x400f000000000000ull, 0xbcb0000000000000ull,
   };
-  APFloat Float(APFloat::PPCDoubleDouble(), APInt(128, 2, Data));
+  APFloat Float(APFloat::PPCDoubleDouble(), APInt(128, Data));
   {
     APFloat Actual =
         APFloat::copySign(Float, APFloat(APFloat::IEEEdouble(), "1"));
@@ -6452,14 +6451,14 @@ TEST(APFloatTest, PPCDoubleDoubleFactories) {
     uint64_t Data[] = {
         0, 0,
     };
-    EXPECT_EQ(APInt(128, 2, Data),
+    EXPECT_EQ(APInt(128, Data),
               APFloat::getZero(APFloat::PPCDoubleDouble()).bitcastToAPInt());
   }
   {
     uint64_t Data[] = {
         0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
     };
-    EXPECT_EQ(APInt(128, 2, Data),
+    EXPECT_EQ(APInt(128, Data),
               APFloat::getLargest(APFloat::PPCDoubleDouble()).bitcastToAPInt());
   }
   {
@@ -6467,12 +6466,12 @@ TEST(APFloatTest, PPCDoubleDoubleFactories) {
         0x0000000000000001ull, 0,
     };
     EXPECT_EQ(
-        APInt(128, 2, Data),
+        APInt(128, Data),
         APFloat::getSmallest(APFloat::PPCDoubleDouble()).bitcastToAPInt());
   }
   {
     uint64_t Data[] = {0x0360000000000000ull, 0};
-    EXPECT_EQ(APInt(128, 2, Data),
+    EXPECT_EQ(APInt(128, Data),
               APFloat::getSmallestNormalized(APFloat::PPCDoubleDouble())
                   .bitcastToAPInt());
   }
@@ -6481,7 +6480,7 @@ TEST(APFloatTest, PPCDoubleDoubleFactories) {
         0x8000000000000000ull, 0x0000000000000000ull,
     };
     EXPECT_EQ(
-        APInt(128, 2, Data),
+        APInt(128, Data),
         APFloat::getZero(APFloat::PPCDoubleDouble(), true).bitcastToAPInt());
   }
   {
@@ -6489,14 +6488,14 @@ TEST(APFloatTest, PPCDoubleDoubleFactories) {
         0xffefffffffffffffull, 0xfc8ffffffffffffeull,
     };
     EXPECT_EQ(
-        APInt(128, 2, Data),
+        APInt(128, Data),
         APFloat::getLargest(APFloat::PPCDoubleDouble(), true).bitcastToAPInt());
   }
   {
     uint64_t Data[] = {
         0x8000000000000001ull, 0x0000000000000000ull,
     };
-    EXPECT_EQ(APInt(128, 2, Data),
+    EXPECT_EQ(APInt(128, Data),
               APFloat::getSmallest(APFloat::PPCDoubleDouble(), true)
                   .bitcastToAPInt());
   }
@@ -6504,7 +6503,7 @@ TEST(APFloatTest, PPCDoubleDoubleFactories) {
     uint64_t Data[] = {
         0x8360000000000000ull, 0x0000000000000000ull,
     };
-    EXPECT_EQ(APInt(128, 2, Data),
+    EXPECT_EQ(APInt(128, Data),
               APFloat::getSmallestNormalized(APFloat::PPCDoubleDouble(), true)
                   .bitcastToAPInt());
   }
@@ -6523,7 +6522,7 @@ TEST(APFloatTest, PPCDoubleDoubleIsDenormal) {
         0x4010000000000000ull, 0x4008000000000000ull,
     };
     EXPECT_TRUE(
-        APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data)).isDenormal());
+        APFloat(APFloat::PPCDoubleDouble(), APInt(128, Data)).isDenormal());
   }
 }
 
@@ -6533,7 +6532,7 @@ TEST(APFloatTest, PPCDoubleDoubleScalbn) {
       0x4008000000000000ull, 0x3cb8000000000000ull,
   };
   APFloat Result =
-      scalbn(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Input)), 1,
+      scalbn(APFloat(APFloat::PPCDoubleDouble(), APInt(128, Input)), 1,
              APFloat::rmNearestTiesToEven);
   // 6.0 + 6.0 << 53
   EXPECT_EQ(0x4018000000000000ull, Result.bitcastToAPInt().getRawData()[0]);
diff --git a/llvm/unittests/ADT/TrieRawHashMapTest.cpp b/llvm/unittests/ADT/TrieRawHashMapTest.cpp
index c9081f5..7a95f1d 100644
--- a/llvm/unittests/ADT/TrieRawHashMapTest.cpp
+++ b/llvm/unittests/ADT/TrieRawHashMapTest.cpp
@@ -64,7 +64,7 @@ public:
   }
 
   void destroyTrie() { Trie.reset(); }
-  ~SimpleTrieHashMapTest() { destroyTrie(); }
+  ~SimpleTrieHashMapTest() override { destroyTrie(); }
 
   // Use the number itself as hash to test the pathological case.
   static HashType hash(uint64_t Num) {
diff --git a/llvm/unittests/CAS/CASTestConfig.h b/llvm/unittests/CAS/CASTestConfig.h
index c08968b..8d3c553 100644
--- a/llvm/unittests/CAS/CASTestConfig.h
+++ b/llvm/unittests/CAS/CASTestConfig.h
@@ -42,8 +42,8 @@ protected:
     auto TD = GetParam()(++(*NextCASIndex));
     return std::move(TD.Cache);
   }
-  void SetUp() { NextCASIndex = 0; }
-  void TearDown() { NextCASIndex = std::nullopt; }
+  void SetUp() override { NextCASIndex = 0; }
+  void TearDown() override { NextCASIndex = std::nullopt; }
 };
 
 #endif
diff --git a/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp
index 01ff1f3..87fad37 100644
--- a/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp
+++ b/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp
@@ -61,7 +61,7 @@ Context &getContext() {
 class SystemZMCSymbolizerTest : public MCSymbolizer {
 public:
   SystemZMCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
-  ~SystemZMCSymbolizerTest() {}
+  ~SystemZMCSymbolizerTest() override {}
 
   bool tryAddingSymbolicOperand([[maybe_unused]] MCInst &Inst,
                                 [[maybe_unused]] raw_ostream &CStream,
diff --git a/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
index 8d4e46c0..286528f 100644
--- a/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
+++ b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
@@ -62,7 +62,7 @@ Context &getContext() {
 class X86MCSymbolizerTest : public MCSymbolizer {
 public:
   X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
-  ~X86MCSymbolizerTest() {}
+  ~X86MCSymbolizerTest() override {}
 
   struct OpInfo {
     int64_t Value = 0;
diff --git a/llvm/unittests/SandboxIR/PassTest.cpp b/llvm/unittests/SandboxIR/PassTest.cpp
index 0c813b2..1a32702 100644
--- a/llvm/unittests/SandboxIR/PassTest.cpp
+++ b/llvm/unittests/SandboxIR/PassTest.cpp
@@ -84,7 +84,9 @@ define void @foo() {
   class TestNamePass final : public FunctionPass {
   public:
     TestNamePass(llvm::StringRef Name) : FunctionPass(Name) {}
-    bool runOnFunction(Function &F, const Analyses &A) { return false; }
+    bool runOnFunction(Function &F, const Analyses &A) override {
+      return false;
+    }
   };
   EXPECT_DEATH(TestNamePass("white space"), ".*whitespace.*");
   EXPECT_DEATH(TestNamePass("-dash"), ".*start with.*");
@@ -146,7 +148,7 @@ define i8 @foo(i8 %v0, i8 %v1) {
   class TestNamePass final : public RegionPass {
   public:
     TestNamePass(llvm::StringRef Name) : RegionPass(Name) {}
-    bool runOnRegion(Region &F, const Analyses &A) { return false; }
+    bool runOnRegion(Region &F, const Analyses &A) override { return false; }
   };
   EXPECT_DEATH(TestNamePass("white space"), ".*whitespace.*");
   EXPECT_DEATH(TestNamePass("-dash"), ".*start with.*");
diff --git a/llvm/unittests/Support/ScopedPrinterTest.cpp b/llvm/unittests/Support/ScopedPrinterTest.cpp
index 1e2b138..280b3ee 100644
--- a/llvm/unittests/Support/ScopedPrinterTest.cpp
+++ b/llvm/unittests/Support/ScopedPrinterTest.cpp
@@ -109,7 +109,7 @@ protected:
     verifyJSONScopedPrinter(JSONExpectedOut, Func);
   }
 
-  void TearDown() {
+  void TearDown() override {
     // JSONScopedPrinter fails an assert if nothing's been printed.
     if (!HasPrintedToJSON)
       JSONWriter.printString("");
diff --git a/llvm/unittests/Transforms/Utils/ValueMapperTest.cpp b/llvm/unittests/Transforms/Utils/ValueMapperTest.cpp
index 86ad41f..e39cd70 100644
--- a/llvm/unittests/Transforms/Utils/ValueMapperTest.cpp
+++ b/llvm/unittests/Transforms/Utils/ValueMapperTest.cpp
@@ -401,7 +401,8 @@ TEST(ValueMapperTest, mapValueLocalAsMetadataToConstant) {
 class TestTypeRemapper : public ValueMapTypeRemapper {
 public:
   TestTypeRemapper(Type *Ty) : DstTy(Ty) { }
-  Type *remapType(Type *srcTy) { return DstTy; }
+  Type *remapType(Type *srcTy) override { return DstTy; }
+
 private:
   Type *DstTy;
 };
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
index 6477589..0f1241e 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
@@ -899,7 +899,7 @@ public:
   OperandPredicateMatcher(PredicateKind Kind, unsigned InsnVarID,
                           unsigned OpIdx)
       : PredicateMatcher(Kind, InsnVarID, OpIdx) {}
-  virtual ~OperandPredicateMatcher();
+  ~OperandPredicateMatcher() override;
 
   /// Compare the priority of this object and B.
   ///
@@ -1375,7 +1375,7 @@ class InstructionPredicateMatcher : public PredicateMatcher {
 public:
   InstructionPredicateMatcher(PredicateKind Kind, unsigned InsnVarID)
       : PredicateMatcher(Kind, InsnVarID) {}
-  virtual ~InstructionPredicateMatcher() {}
+  ~InstructionPredicateMatcher() override {}
 
   /// Compare the priority of this object and B.
   ///
diff --git a/llvm/utils/TableGen/Common/GlobalISel/Patterns.h b/llvm/utils/TableGen/Common/GlobalISel/Patterns.h
index b0d47c3..2bf2a22 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/Patterns.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/Patterns.h
@@ -318,7 +318,7 @@ private:
 /// instruction.
 class InstructionPattern : public Pattern {
 public:
-  virtual ~InstructionPattern() = default;
+  ~InstructionPattern() override = default;
 
   static bool classof(const Pattern *P) {
     return P->getKind() == K_CodeGenInstruction || P->getKind() == K_PatFrag ||
diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
index 28723bf..043bc628 100644
--- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
@@ -2441,7 +2441,7 @@ public:
   explicit GICombinerEmitter(const RecordKeeper &RK,
                              const CodeGenTarget &Target, StringRef Name,
                              const Record *Combiner);
-  ~GICombinerEmitter() {}
+  ~GICombinerEmitter() override {}
 
   void run(raw_ostream &OS);
 };