rebase on top of diffbase branchusers/mingmingl-llvm/spr/aarch64jumptable

author: mingmingl <mingmingl@google.com> 2025-02-05 22:58:44 -0800
committer: mingmingl <mingmingl@google.com> 2025-02-05 22:58:44 -0800
commit: 9f20ebb51dd6879968cf53c39131c68e0421a324 (patch)
tree: 6d7a7e7b1d20dddf01628cf98cfa8e054408f36e
parent: fb798fd923517ef559228cbb75f7abfe5526de9d (diff)
parent: eaa5fd57cd0f1c819a3b0c68f9402124842fd212 (diff)
download: llvm-users/mingmingl-llvm/spr/aarch64jumptable.zip
llvm-users/mingmingl-llvm/spr/aarch64jumptable.tar.gz
llvm-users/mingmingl-llvm/spr/aarch64jumptable.tar.bz2
720 files changed, 28630 insertions, 10069 deletions
diff --git a/.github/workflows/build-ci-container.yml b/.github/workflows/build-ci-container.yml
index c419986..8272c8f 100644
--- a/.github/workflows/build-ci-container.yml
+++ b/.github/workflows/build-ci-container.yml
@@ -96,7 +96,7 @@ jobs:
         run: |
           function push_container {
             image_name=$1
-            latest_name=$(echo $image_name | sed 's/:[.0-9]\+$/:latest/g')
+            latest_name=$(echo $image_name | sed 's/:[a-f0-9]\+$/:latest/g')
             podman tag $image_name $latest_name
             echo "Pushing $image_name ..."
             podman push $image_name
diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml
index c49939e..204ee64 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -58,7 +58,6 @@ jobs:
       target-cmake-flags: ${{ steps.vars.outputs.target-cmake-flags }}
       ccache: ${{ steps.vars.outputs.ccache }}
       build-flang: ${{ steps.vars.outputs.build-flang }}
-      enable-pgo: ${{ steps.vars.outputs.enable-pgo }}
       release-binary-basename: ${{ steps.vars.outputs.release-binary-basename }}
       release-binary-filename: ${{ steps.vars.outputs.release-binary-filename }}
       build-runs-on: ${{ steps.vars.outputs.build-runs-on }}
@@ -130,9 +129,6 @@ jobs:
           echo ccache=sccache >> $GITHUB_OUTPUT
         fi
 
-        # Detect necessary CMake flags
-        echo "enable-pgo=false" >> $GITHUB_OUTPUT
-        target_cmake_flags="-DLLVM_RELEASE_ENABLE_PGO=OFF"
         # The macOS builds try to cross compile some libraries so we need to
         # add extra CMake args to disable them.
         # See https://github.com/llvm/llvm-project/issues/99767
@@ -238,13 +234,14 @@ jobs:
             ${{ needs.prepare.outputs.target-cmake-flags }} \
             -C clang/cmake/caches/Release.cmake \
             -DBOOTSTRAP_LLVM_PARALLEL_LINK_JOBS=1 \
-            -DBOOTSTRAP_CPACK_PACKAGE_FILE_NAME="${{ needs.prepare.outputs.release-binary-basename }}"
+            -DBOOTSTRAP_BOOTSTRAP_CPACK_PACKAGE_FILE_NAME="${{ needs.prepare.outputs.release-binary-basename }}"
 
     - name: Build
       shell: bash
       run: |
         ninja -v -C ${{ steps.setup-stage.outputs.build-prefix }}/build stage2-package
-        mv ${{ steps.setup-stage.outputs.build-prefix  }}/build/tools/clang/stage2-bins/${{ needs.prepare.outputs.release-binary-filename }} .
+        release_dir=`find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname 'stage2-bins'`
+        mv $release_dir/${{ needs.prepare.outputs.release-binary-filename }} .
     
     - uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
       with:
@@ -259,7 +256,7 @@ jobs:
       shell: bash
       run: |
         find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname ${{ needs.prepare.outputs.release-binary-filename }} -delete
-        rm -Rf ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/_CPack_Packages
+        find ${{ steps.setup-stage.outputs.build-prefix }}/build -iname _CPack_Packages -prune -exec rm -r {} +
     
     - name: Save Stage
       uses: ./workflows-main/.github/workflows/release-binaries-save-stage
diff --git a/bolt/test/AArch64/exceptions-plt.cpp b/bolt/test/AArch64/exceptions-plt.cpp
index 576f0fc9..33c2840 100644
--- a/bolt/test/AArch64/exceptions-plt.cpp
+++ b/bolt/test/AArch64/exceptions-plt.cpp
@@ -2,7 +2,9 @@
 
 // REQUIRES: system-linux
 
-// RUN: %clangxx %cxxflags -O1 -Wl,-q,-znow %s -o %t.exe
+// RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
+// Link against a DSO to ensure PLT entries.
+// RUN: %clangxx %cxxflags -O1 -Wl,-q,-znow %s %t.so -o %t.exe
 // RUN: llvm-bolt %t.exe -o %t.bolt.exe --plt=all --print-only=.*main.* \
 // RUN:   --print-finalized 2>&1 | FileCheck %s
 
diff --git a/bolt/test/AArch64/plt-call.test b/bolt/test/AArch64/plt-call.test
index da307d4..1fa62c4 100644
--- a/bolt/test/AArch64/plt-call.test
+++ b/bolt/test/AArch64/plt-call.test
@@ -1,6 +1,8 @@
 // Verify that PLTCall optimization works.
 
-RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \
+RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
+// Link against a DSO to ensure PLT entries.
+RUN: %clang %cflags %p/../Inputs/plt-tailcall.c %t.so \
 RUN:    -o %t -Wl,-q
 RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt  --print-only=foo | FileCheck %s
 
diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
index 31a7910..d76f869 100644
--- a/bolt/test/X86/callcont-fallthru.s
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -1,7 +1,9 @@
 ## Ensures that a call continuation fallthrough count is set when using
 ## pre-aggregated perf data.
 
-# RUN: %clangxx %cxxflags %s -o %t -Wl,-q -nostdlib
+# RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
+## Link against a DSO to ensure PLT entries.
+# RUN: %clangxx %cxxflags %s %t.so -o %t -Wl,-q -nostdlib
 # RUN: link_fdata %s %t %t.pa1 PREAGG
 # RUN: link_fdata %s %t %t.pa2 PREAGG2
 # RUN: link_fdata %s %t %t.pa3 PREAGG3
diff --git a/bolt/test/X86/cfi-instrs-reordered.s b/bolt/test/X86/cfi-instrs-reordered.s
index c325aaf..5173fa6 100644
--- a/bolt/test/X86/cfi-instrs-reordered.s
+++ b/bolt/test/X86/cfi-instrs-reordered.s
@@ -3,7 +3,9 @@
 
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
 # RUN: llvm-strip --strip-unneeded %t.o
-# RUN: %clangxx %cflags %t.o -o %t.exe
+# RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
+## Link against a DSO to ensure PLT entries.
+# RUN: %clangxx %cflags %t.o %t.so -o %t.exe
 # RUN: llvm-bolt %t.exe -o %t --reorder-blocks=cache --print-after-lowering \
 # RUN:   --print-only=_Z10SolveCubicddddPiPd 2>&1 | FileCheck %s
 #
diff --git a/bolt/test/X86/plt-call.test b/bolt/test/X86/plt-call.test
index e6ae86c..aeee302 100644
--- a/bolt/test/X86/plt-call.test
+++ b/bolt/test/X86/plt-call.test
@@ -1,6 +1,8 @@
 // Verify that PLTCall optimization works.
 
-RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \
+RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
+// Link against a DSO to ensure PLT entries.
+RUN: %clang %cflags %p/../Inputs/plt-tailcall.c %t.so \
 RUN:    -o %t -Wl,-q
 RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt  --print-only=foo | FileCheck %s
 
diff --git a/bolt/test/runtime/exceptions-plt.cpp b/bolt/test/runtime/exceptions-plt.cpp
index 8a75a3c..3d8e7a5 100644
--- a/bolt/test/runtime/exceptions-plt.cpp
+++ b/bolt/test/runtime/exceptions-plt.cpp
@@ -2,7 +2,9 @@
 
 // REQUIRES: system-linux
 
-// RUN: %clangxx %cxxflags -O1 -Wl,-q,-znow %s -o %t.exe
+// RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
+// Link against a DSO to ensure PLT entries.
+// RUN: %clangxx %cxxflags -O1 -Wl,-q,-znow %s %t.so -o %t.exe
 // RUN: llvm-bolt %t.exe -o %t.bolt.exe --plt=all
 // RUN: %t.bolt.exe
 
diff --git a/bolt/test/runtime/plt-lld.test b/bolt/test/runtime/plt-lld.test
index b505a19..3432e18 100644
--- a/bolt/test/runtime/plt-lld.test
+++ b/bolt/test/runtime/plt-lld.test
@@ -1,14 +1,15 @@
 // This test checks that the pointers to PLT are properly updated.
-// The test is using lld linker.
+// The test uses lld and links against a DSO to ensure PLT entries.
+RUN: %clang %cflags -fpic -shared -xc /dev/null -o %t.so
 
 // Non-PIE:
-RUN: %clang %cflags -no-pie %p/../Inputs/plt.c -fuse-ld=lld \
+RUN: %clang %cflags -no-pie %p/../Inputs/plt.c %t.so -fuse-ld=lld \
 RUN:    -o %t.lld.exe -Wl,-q
 RUN: llvm-bolt %t.lld.exe -o %t.lld.bolt.exe --use-old-text=0 --lite=0
 RUN: %t.lld.bolt.exe | FileCheck %s
 
 // PIE:
-RUN: %clang %cflags -fPIC -pie %p/../Inputs/plt.c -fuse-ld=lld \
+RUN: %clang %cflags -fPIC -pie %p/../Inputs/plt.c %t.so -fuse-ld=lld \
 RUN:    -o %t.lld.pie.exe -Wl,-q
 RUN: llvm-bolt %t.lld.pie.exe -o %t.lld.bolt.pie.exe --use-old-text=0 --lite=0
 RUN: %t.lld.bolt.pie.exe | FileCheck %s
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 18ecf1e..21c1ff2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -112,6 +112,8 @@ Removed Compiler Flags
 Attribute Changes in Clang
 --------------------------
 
+- The ``no_sanitize`` attribute now accepts both ``gnu`` and ``clang`` names.
+
 Improvements to Clang's diagnostics
 -----------------------------------
 
@@ -143,12 +145,16 @@ Bug Fixes to Attribute Support
 Bug Fixes to C++ Support
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
+- Clang is now better at keeping track of friend function template instance contexts. (#GH55509)
+
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Miscellaneous Bug Fixes
 ^^^^^^^^^^^^^^^^^^^^^^^
 
+- HTML tags in comments that span multiple lines are now parsed correctly by Clang's comment parser. (#GH120843)
+
 Miscellaneous Clang Crashes Fixed
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/docs/analyzer/developer-docs.rst b/clang/docs/analyzer/developer-docs.rst
index 1b8133f..60c0e71 100644
--- a/clang/docs/analyzer/developer-docs.rst
+++ b/clang/docs/analyzer/developer-docs.rst
@@ -11,3 +11,4 @@ Contents:
    developer-docs/InitializerLists
    developer-docs/nullability
    developer-docs/RegionStore
+   developer-docs/PerformanceInvestigation
diff --git a/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst b/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst
new file mode 100644
index 0000000..3ee6e11
--- /dev/null
+++ b/clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst
@@ -0,0 +1,47 @@
+=========================
+Performance Investigation
+=========================
+
+Multiple factors contribute to the time it takes to analyze a file with Clang Static Analyzer.
+A translation unit contains multiple entry points, each of which take multiple steps to analyze.
+
+You can add the ``-ftime-trace=file.json`` option to break down the analysis time into individual entry points and steps within each entry point.
+You can explore the generated JSON file in a Chromium browser using the ``chrome://tracing`` URL,
+or using `speedscope <https://speedscope.app>`_.
+Once you narrow down to specific analysis steps you are interested in, you can more effectively employ heavier profilers,
+such as `Perf <https://perfwiki.github.io/main/>`_ and `Callgrind <https://valgrind.org/docs/manual/cl-manual.html>`_.
+
+Each analysis step has a time scope in the trace, corresponds to processing of an exploded node, and is designated with a ``ProgramPoint``.
+If the ``ProgramPoint`` is associated with a location, you can see it on the scope metadata label.
+
+Here is an example of a time trace produced with
+
+.. code-block:: bash
+   :caption: Clang Static Analyzer invocation to generate a time trace of string.c analysis.
+
+   clang -cc1 -nostdsysteminc -analyze -analyzer-constraints=range \
+         -setup-static-analyzer -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
+         -verify ./clang/test/Analysis/string.c \
+         -ftime-trace=trace.json -ftime-trace-granularity=1
+
+.. image:: ../images/speedscope.png
+
+On the speedscope screenshot above, under the first time ruler is the bird's-eye view of the entire trace that spans a little over 60 milliseconds.
+Under the second ruler (focused on the 18.09-18.13ms time point) you can see a narrowed-down portion.
+The second box ("HandleCode memset...") that spans entire screen (and actually extends beyond it) corresponds to the analysis of ``memset16_region_cast()`` entry point that is defined in the "string.c" test file on line 1627.
+Below it, you can find multiple sub-scopes each corresponding to processing of a single exploded node.
+
+- First: a ``PostStmt`` for some statement on line 1634. This scope has a selected subscope "CheckerManager::runCheckersForCallEvent (Pre)" that takes 5 microseconds.
+- Four other nodes, too small to be discernible at this zoom level
+- Last on this screenshot: another ``PostStmt`` for a statement on line 1635.
+
+In addition to the ``-ftime-trace`` option, you can use ``-ftime-trace-granularity`` to fine-tune the time trace.
+
+- ``-ftime-trace-granularity=NN`` dumps only time scopes that are longer than NN microseconds.
+- ``-ftime-trace-verbose`` enables some additional dumps in the frontend related to template instantiations.
+  At the moment, it has no effect on the traces from the static analyzer.
+
+Note: Both Chrome-tracing and speedscope tools might struggle with time traces above 100 MB in size.
+Luckily, in most cases the default max-steps boundary of 225 000 produces the traces of approximately that size
+for a single entry point.
+You can use ``-analyze-function=get_global_options`` together with ``-ftime-trace`` to narrow down analysis to a specific entry point.
diff --git a/clang/docs/analyzer/images/speedscope.png b/clang/docs/analyzer/images/speedscope.png
new file mode 100644
index 0000000..767725d6
--- /dev/null
+++ b/clang/docs/analyzer/images/speedscope.png
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 499d27a..f305cbb 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -2298,6 +2298,13 @@ public:
     FunctionDeclBits.IsLateTemplateParsed = ILT;
   }
 
+  bool isInstantiatedFromMemberTemplate() const {
+    return FunctionDeclBits.IsInstantiatedFromMemberTemplate;
+  }
+  void setInstantiatedFromMemberTemplate(bool Val = true) {
+    FunctionDeclBits.IsInstantiatedFromMemberTemplate = Val;
+  }
+
   /// Whether this function is "trivial" in some specialized C++ senses.
   /// Can only be true for default constructors, copy constructors,
   /// copy assignment operators, and destructors.  Not meaningful until
diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 3bb82c1..648dae2 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -1780,6 +1780,8 @@ protected:
     uint64_t HasImplicitReturnZero : 1;
     LLVM_PREFERRED_TYPE(bool)
     uint64_t IsLateTemplateParsed : 1;
+    LLVM_PREFERRED_TYPE(bool)
+    uint64_t IsInstantiatedFromMemberTemplate : 1;
 
     /// Kind of contexpr specifier as defined by ConstexprSpecKind.
     LLVM_PREFERRED_TYPE(ConstexprSpecKind)
@@ -1830,7 +1832,7 @@ protected:
   };
 
   /// Number of inherited and non-inherited bits in FunctionDeclBitfields.
-  enum { NumFunctionDeclBits = NumDeclContextBits + 31 };
+  enum { NumFunctionDeclBits = NumDeclContextBits + 32 };
 
   /// Stores the bits used by CXXConstructorDecl. If modified
   /// NumCXXConstructorDeclBits and the accessor
@@ -1841,12 +1843,12 @@ protected:
     LLVM_PREFERRED_TYPE(FunctionDeclBitfields)
     uint64_t : NumFunctionDeclBits;
 
-    /// 20 bits to fit in the remaining available space.
+    /// 19 bits to fit in the remaining available space.
     /// Note that this makes CXXConstructorDeclBitfields take
     /// exactly 64 bits and thus the width of NumCtorInitializers
     /// will need to be shrunk if some bit is added to NumDeclContextBitfields,
     /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields.
-    uint64_t NumCtorInitializers : 17;
+    uint64_t NumCtorInitializers : 16;
     LLVM_PREFERRED_TYPE(bool)
     uint64_t IsInheritingConstructor : 1;
 
@@ -1860,7 +1862,7 @@ protected:
   };
 
   /// Number of inherited and non-inherited bits in CXXConstructorDeclBitfields.
-  enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 20 };
+  enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 19 };
 
   /// Stores the bits used by ObjCMethodDecl.
   /// If modified NumObjCMethodDeclBits and the accessor
diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h
index 9ecff2c..a30ae79 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -1011,6 +1011,26 @@ public:
     return getTemplatedDecl()->isThisDeclarationADefinition();
   }
 
+  bool isCompatibleWithDefinition() const {
+    return getTemplatedDecl()->isInstantiatedFromMemberTemplate() ||
+           isThisDeclarationADefinition();
+  }
+
+  // This bit closely tracks 'RedeclarableTemplateDecl::InstantiatedFromMember',
+  // except this is per declaration, while the redeclarable field is
+  // per chain. This indicates a template redeclaration which
+  // is compatible with the definition, in the non-trivial case
+  // where this is not already a definition.
+  // This is only really needed for instantiating the definition of friend
+  // function templates, which can have redeclarations in different template
+  // contexts.
+  // The bit is actually stored in the FunctionDecl for space efficiency
+  // reasons.
+  void setInstantiatedFromMemberTemplate(FunctionTemplateDecl *D) {
+    getTemplatedDecl()->setInstantiatedFromMemberTemplate();
+    RedeclarableTemplateDecl::setInstantiatedFromMemberTemplate(D);
+  }
+
   /// Return the specialization with the provided arguments if it exists,
   /// otherwise return the insertion point.
   FunctionDecl *findSpecialization(ArrayRef<TemplateArgument> Args,
@@ -1841,15 +1861,23 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl,
   LLVM_PREFERRED_TYPE(TemplateSpecializationKind)
   unsigned SpecializationKind : 3;
 
+  /// Indicate that we have matched a parameter pack with a non pack
+  /// argument, when the opposite match is also allowed.
+  /// This needs to be cached as deduction is performed during declaration,
+  /// and we need the information to be preserved so that it is consistent
+  /// during instantiation.
+  bool StrictPackMatch : 1;
+
 protected:
   ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
                                   DeclContext *DC, SourceLocation StartLoc,
                                   SourceLocation IdLoc,
                                   ClassTemplateDecl *SpecializedTemplate,
                                   ArrayRef<TemplateArgument> Args,
+                                  bool StrictPackMatch,
                                   ClassTemplateSpecializationDecl *PrevDecl);
 
-  explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
+  ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
 
 public:
   friend class ASTDeclReader;
@@ -1859,7 +1887,7 @@ public:
   Create(ASTContext &Context, TagKind TK, DeclContext *DC,
          SourceLocation StartLoc, SourceLocation IdLoc,
          ClassTemplateDecl *SpecializedTemplate,
-         ArrayRef<TemplateArgument> Args,
+         ArrayRef<TemplateArgument> Args, bool StrictPackMatch,
          ClassTemplateSpecializationDecl *PrevDecl);
   static ClassTemplateSpecializationDecl *CreateDeserialized(ASTContext &C,
                                                              GlobalDeclID ID);
@@ -1930,6 +1958,8 @@ public:
     SpecializationKind = TSK;
   }
 
+  bool hasStrictPackMatch() const { return StrictPackMatch; }
+
   /// Get the point of instantiation (if any), or null if none.
   SourceLocation getPointOfInstantiation() const {
     return PointOfInstantiation;
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index b9088eff..abdf933 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -9451,7 +9451,8 @@ struct TargetOMPContext final : public llvm::omp::OMPContext {
   TargetOMPContext(ASTContext &ASTCtx,
                    std::function<void(StringRef)> &&DiagUnknownTrait,
                    const FunctionDecl *CurrentFunctionDecl,
-                   ArrayRef<llvm::omp::TraitProperty> ConstructTraits);
+                   ArrayRef<llvm::omp::TraitProperty> ConstructTraits,
+                   int DeviceNum);
 
   virtual ~TargetOMPContext() = default;
 
diff --git a/clang/include/clang/Analysis/ProgramPoint.h b/clang/include/clang/Analysis/ProgramPoint.h
index b933957..1df1f1c 100644
--- a/clang/include/clang/Analysis/ProgramPoint.h
+++ b/clang/include/clang/Analysis/ProgramPoint.h
@@ -85,6 +85,9 @@ public:
               LoopExitKind,
               EpsilonKind};
 
+  static StringRef getProgramPointKindName(Kind K);
+  std::optional<SourceLocation> getSourceLocation() const;
+
 private:
   const void *Data1;
   llvm::PointerIntPair<const void *, 2, unsigned> Data2;
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 2a3a29b..4384a98 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -380,6 +380,13 @@ class Clang<string name, bit allowInC = 1, int version = 1>
   bit AllowInC = allowInC;
 }
 
+// This spelling combines the spellings of GCC and Clang for cases where the
+// spellings are equivalent for compile compatibility.
+class ClangGCC<string name, bit allowInC = 1, int version = 1>
+    : Spelling<name, "ClangGCC", version> {
+  bit AllowInC = allowInC;
+}
+
 // HLSL Annotation spellings
 class HLSLAnnotation<string name> : Spelling<name, "HLSLAnnotation">;
 
@@ -3677,7 +3684,7 @@ def X86ForceAlignArgPointer : InheritableAttr, TargetSpecificAttr<TargetAnyX86>
 }
 
 def NoSanitize : InheritableAttr {
-  let Spellings = [Clang<"no_sanitize">];
+  let Spellings = [ClangGCC<"no_sanitize">];
   let Args = [VariadicStringArgument<"Sanitizers">];
   let Subjects = SubjectList<[Function, ObjCMethod, GlobalVar], ErrorDiag>;
   let Documentation = [NoSanitizeDocs];
diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h
index 3e5da2a..e80bce34 100644
--- a/clang/include/clang/Basic/OpenMPKinds.h
+++ b/clang/include/clang/Basic/OpenMPKinds.h
@@ -399,6 +399,14 @@ bool isOpenMPInformationalDirective(OpenMPDirectiveKind DKind);
 /// \return true - if the above condition is met for this directive
 /// otherwise - false.
 bool isOpenMPCapturingDirective(OpenMPDirectiveKind DKind);
+
+/// Checks if the specified directive is an order concurrent nestable
+/// directive that can be nested within region corresponding to construct
+/// on which order clause was specified with concurrent as ordering argument.
+/// \param DKind Specified directive.
+/// \return true - if the above condition is met for this directive
+/// otherwise - false.
+bool isOpenMPOrderConcurrentNestableDirective(OpenMPDirectiveKind DKind);
 }
 
 template <>
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 77c2f88..b9e46a5 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1471,6 +1471,7 @@ public:
   /// specification
   virtual bool validateBranchProtection(StringRef Spec, StringRef Arch,
                                         BranchProtectionInfo &BPI,
+                                        const LangOptions &LO,
                                         StringRef &Err) const {
     Err = "";
     return false;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 0ab923f..0387cc41 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -932,7 +932,9 @@ def W_Joined : Joined<["-"], "W">, Group<W_Group>,
 def Xanalyzer : Separate<["-"], "Xanalyzer">,
   HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">,
   Group<StaticAnalyzer_Group>;
-def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>;
+def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>,
+  HelpText<"Pass <arg> to the compiliation if the target matches <arch>">,
+  MetaVarName<"<arch> <arg>">;
 def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[NoXarchOption]>,
   HelpText<"Pass <arg> to the CUDA/HIP host compilation">, MetaVarName<"<arg>">;
 def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[NoXarchOption]>,
@@ -1115,8 +1117,8 @@ def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">,
 
 // Common offloading options
 let Group = offload_Group in {
-def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>,
-  Visibility<[ClangOption, FlangOption]>,
+def offload_arch_EQ : Joined<["--"], "offload-arch=">,
+  Visibility<[ClangOption, FlangOption]>, Flags<[NoXarchOption]>,
   HelpText<"Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). "
            "If 'native' is used the compiler will detect locally installed architectures. "
            "For HIP offloading, the device architecture can be followed by target ID features "
@@ -6876,6 +6878,7 @@ defm backslash : OptInFC1FFlag<"backslash", "Specify that backslash in string in
 defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of .NEQV.">;
 defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">;
 defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">;
+defm implicit_none_ext : OptInFC1FFlag<"implicit-none-ext", "No implicit externals allowed">;
 defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing underscore to external names">;
 defm ppc_native_vec_elem_order: BoolOptionWithoutMarshalling<"f", "ppc-native-vector-element-order",
   PosFlag<SetTrue, [], [ClangOption], "Specifies PowerPC native vector element order (default)">,
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index c7f2422..c03ec00 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -933,7 +933,7 @@ class Sema;
     /// Have we matched any packs on the parameter side, versus any non-packs on
     /// the argument side, in a context where the opposite matching is also
     /// allowed?
-    bool HasMatchedPackOnParmToNonPackOnArg : 1;
+    bool StrictPackMatch : 1;
 
     /// True if the candidate was found using ADL.
     LLVM_PREFERRED_TYPE(CallExpr::ADLCallKind)
@@ -1010,8 +1010,7 @@ class Sema;
     friend class OverloadCandidateSet;
     OverloadCandidate()
         : IsSurrogate(false), IgnoreObjectArgument(false),
-          TookAddressOfOverload(false),
-          HasMatchedPackOnParmToNonPackOnArg(false),
+          TookAddressOfOverload(false), StrictPackMatch(false),
           IsADLCandidate(llvm::to_underlying(CallExpr::NotADL)),
           RewriteKind(CRK_None) {}
   };
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 59e2926..1870d127 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10180,18 +10180,15 @@ public:
   /// \param PartialOverloading true if we are performing "partial" overloading
   /// based on an incomplete set of function arguments. This feature is used by
   /// code completion.
-  void AddOverloadCandidate(FunctionDecl *Function, DeclAccessPair FoundDecl,
-                            ArrayRef<Expr *> Args,
-                            OverloadCandidateSet &CandidateSet,
-                            bool SuppressUserConversions = false,
-                            bool PartialOverloading = false,
-                            bool AllowExplicit = true,
-                            bool AllowExplicitConversion = false,
-                            ADLCallKind IsADLCandidate = ADLCallKind::NotADL,
-                            ConversionSequenceList EarlyConversions = {},
-                            OverloadCandidateParamOrder PO = {},
-                            bool AggregateCandidateDeduction = false,
-                            bool HasMatchedPackOnParmToNonPackOnArg = false);
+  void AddOverloadCandidate(
+      FunctionDecl *Function, DeclAccessPair FoundDecl, ArrayRef<Expr *> Args,
+      OverloadCandidateSet &CandidateSet, bool SuppressUserConversions = false,
+      bool PartialOverloading = false, bool AllowExplicit = true,
+      bool AllowExplicitConversion = false,
+      ADLCallKind IsADLCandidate = ADLCallKind::NotADL,
+      ConversionSequenceList EarlyConversions = {},
+      OverloadCandidateParamOrder PO = {},
+      bool AggregateCandidateDeduction = false, bool StrictPackMatch = false);
 
   /// Add all of the function declarations in the given function set to
   /// the overload candidate set.
@@ -10227,7 +10224,7 @@ public:
                           bool PartialOverloading = false,
                           ConversionSequenceList EarlyConversions = {},
                           OverloadCandidateParamOrder PO = {},
-                          bool HasMatchedPackOnParmToNonPackOnArg = false);
+                          bool StrictPackMatch = false);
 
   /// Add a C++ member function template as a candidate to the candidate
   /// set, using template argument deduction to produce an appropriate member
@@ -10274,7 +10271,7 @@ public:
       CXXRecordDecl *ActingContext, Expr *From, QualType ToType,
       OverloadCandidateSet &CandidateSet, bool AllowObjCConversionOnExplicit,
       bool AllowExplicit, bool AllowResultConversion = true,
-      bool HasMatchedPackOnParmToNonPackOnArg = false);
+      bool StrictPackMatch = false);
 
   /// Adds a conversion function template specialization
   /// candidate to the overload set, using template argument deduction
@@ -11694,7 +11691,7 @@ public:
 
     /// Is set to true when, in the context of TTP matching, a pack parameter
     /// matches non-pack arguments.
-    bool MatchedPackOnParmToNonPackOnArg = false;
+    bool StrictPackMatch = false;
   };
 
   /// Check that the given template argument corresponds to the given
@@ -11803,7 +11800,7 @@ public:
                                      TemplateParameterList *Params,
                                      TemplateArgumentLoc &Arg,
                                      bool PartialOrdering,
-                                     bool *MatchedPackOnParmToNonPackOnArg);
+                                     bool *StrictPackMatch);
 
   void NoteTemplateLocation(const NamedDecl &Decl,
                             std::optional<SourceRange> ParamRange = {});
@@ -12497,7 +12494,7 @@ public:
   bool isTemplateTemplateParameterAtLeastAsSpecializedAs(
       TemplateParameterList *PParam, TemplateDecl *PArg, TemplateDecl *AArg,
       const DefaultArguments &DefaultArgs, SourceLocation ArgLoc,
-      bool PartialOrdering, bool *MatchedPackOnParmToNonPackOnArg);
+      bool PartialOrdering, bool *StrictPackMatch);
 
   /// Mark which template parameters are used in a given expression.
   ///
@@ -13499,8 +13496,8 @@ public:
   bool InstantiateClassTemplateSpecialization(
       SourceLocation PointOfInstantiation,
       ClassTemplateSpecializationDecl *ClassTemplateSpec,
-      TemplateSpecializationKind TSK, bool Complain = true,
-      bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false);
+      TemplateSpecializationKind TSK, bool Complain,
+      bool PrimaryStrictPackMatch);
 
   /// Instantiates the definitions of all of the member
   /// of the given class, which is an instantiation of a class template
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index a056a96..fa244da 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -849,6 +849,9 @@ public:
       ArrayRef<OMPInteropInfo> AppendArgs, SourceLocation AdjustArgsLoc,
       SourceLocation AppendArgsLoc, SourceRange SR);
 
+  /// Called on device_num selector in context selectors.
+  void ActOnOpenMPDeviceNum(Expr *DeviceNumExpr);
+
   OMPClause *ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
                                          SourceLocation StartLoc,
                                          SourceLocation LParenLoc,
@@ -1410,6 +1413,13 @@ public:
 
   void handleOMPAssumeAttr(Decl *D, const ParsedAttr &AL);
 
+  /// Setter and getter functions for device_num.
+  void setOpenMPDeviceNum(int Num);
+
+  int getOpenMPDeviceNum() const;
+
+  void setOpenMPDeviceNumID(StringRef ID);
+
 private:
   void *VarDataSharingAttributesStack;
 
@@ -1480,6 +1490,12 @@ private:
 
   /// All `omp assumes` we encountered so far.
   SmallVector<OMPAssumeAttr *, 4> OMPAssumeGlobal;
+
+  /// Device number specified by the context selector.
+  int DeviceNum = -1;
+
+  /// Device number identifier specified by the context selector.
+  StringRef DeviceNumID;
 };
 
 } // namespace clang
diff --git a/clang/include/clang/Sema/TemplateDeduction.h b/clang/include/clang/Sema/TemplateDeduction.h
index 9c12eef..020e19b 100644
--- a/clang/include/clang/Sema/TemplateDeduction.h
+++ b/clang/include/clang/Sema/TemplateDeduction.h
@@ -54,7 +54,7 @@ class TemplateDeductionInfo {
   /// Have we matched any packs on the parameter side, versus any non-packs on
   /// the argument side, in a context where the opposite matching is also
   /// allowed?
-  bool MatchedPackOnParmToNonPackOnArg = false;
+  bool StrictPackMatch = false;
 
   /// The template parameter depth for which we're performing deduction.
   unsigned DeducedDepth;
@@ -92,13 +92,9 @@ public:
     return DeducedDepth;
   }
 
-  bool hasMatchedPackOnParmToNonPackOnArg() const {
-    return MatchedPackOnParmToNonPackOnArg;
-  }
+  bool hasStrictPackMatch() const { return StrictPackMatch; }
 
-  void setMatchedPackOnParmToNonPackOnArg() {
-    MatchedPackOnParmToNonPackOnArg = true;
-  }
+  void setStrictPackMatch() { StrictPackMatch = true; }
 
   /// Get the number of explicitly-specified arguments.
   unsigned getNumExplicitArgs() const {
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
index 4a343f2..f002f86 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -55,15 +55,18 @@ enum class ScanningOptimizations {
   HeaderSearch = 1,
 
   /// Remove warnings from system modules.
-  SystemWarnings = 2,
+  SystemWarnings = (1 << 1),
 
   /// Remove unused -ivfsoverlay arguments.
-  VFS = 4,
+  VFS = (1 << 2),
 
   /// Canonicalize -D and -U options.
-  Macros = 8,
+  Macros = (1 << 3),
 
-  DSS_LAST_BITMASK_ENUM(Macros),
+  /// Ignore the compiler's working directory if it is safe.
+  IgnoreCWD = (1 << 4),
+
+  DSS_LAST_BITMASK_ENUM(IgnoreCWD),
   Default = All
 };
 
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
index ddb078d..bcc9ea1 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
@@ -128,14 +128,17 @@ public:
   /// \param LookupModuleOutput This function is called to fill in
   ///                           "-fmodule-file=", "-o" and other output
   ///                           arguments for dependencies.
+  /// \param TUBuffer Optional memory buffer for translation unit input. If
+  ///                 TUBuffer is nullopt, the input should be included in the
+  ///                 Commandline already.
   ///
   /// \returns a \c StringError with the diagnostic output if clang errors
   /// occurred, \c TranslationUnitDeps otherwise.
-  llvm::Expected<TranslationUnitDeps>
-  getTranslationUnitDependencies(const std::vector<std::string> &CommandLine,
-                                 StringRef CWD,
-                                 const llvm::DenseSet<ModuleID> &AlreadySeen,
-                                 LookupModuleOutputCallback LookupModuleOutput);
+  llvm::Expected<TranslationUnitDeps> getTranslationUnitDependencies(
+      const std::vector<std::string> &CommandLine, StringRef CWD,
+      const llvm::DenseSet<ModuleID> &AlreadySeen,
+      LookupModuleOutputCallback LookupModuleOutput,
+      std::optional<llvm::MemoryBufferRef> TUBuffer = std::nullopt);
 
   /// Given a compilation context specified via the Clang driver command-line,
   /// gather modular dependencies of module with the given name, and return the
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
index da6e040..ee7582b 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -17,6 +17,7 @@
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBufferRef.h"
 #include <optional>
 #include <string>
 
@@ -83,9 +84,21 @@ public:
                            llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
 
   /// Run the dependency scanning tool for a given clang driver command-line,
-  /// and report the discovered dependencies to the provided consumer. If \p
-  /// ModuleName isn't empty, this function reports the dependencies of module
-  /// \p ModuleName.
+  /// and report the discovered dependencies to the provided consumer. If
+  /// TUBuffer is not nullopt, it is used as TU input for the dependency
+  /// scanning. Otherwise, the input should be included as part of the
+  /// command-line.
+  ///
+  /// \returns false if clang errors occurred (with diagnostics reported to
+  /// \c DiagConsumer), true otherwise.
+  bool computeDependencies(
+      StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+      DependencyConsumer &DepConsumer, DependencyActionController &Controller,
+      DiagnosticConsumer &DiagConsumer,
+      std::optional<llvm::MemoryBufferRef> TUBuffer = std::nullopt);
+
+  /// Run the dependency scanning tool for a given clang driver command-line
+  /// for a specific module.
   ///
   /// \returns false if clang errors occurred (with diagnostics reported to
   /// \c DiagConsumer), true otherwise.
@@ -94,13 +107,28 @@ public:
                            DependencyConsumer &DepConsumer,
                            DependencyActionController &Controller,
                            DiagnosticConsumer &DiagConsumer,
-                           std::optional<StringRef> ModuleName = std::nullopt);
+                           StringRef ModuleName);
+
+  /// Run the dependency scanning tool for a given clang driver command-line
+  /// for a specific translation unit via file system or memory buffer.
+  ///
   /// \returns A \c StringError with the diagnostic output if clang errors
   /// occurred, success otherwise.
   llvm::Error computeDependencies(
       StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
       DependencyConsumer &Consumer, DependencyActionController &Controller,
-      std::optional<StringRef> ModuleName = std::nullopt);
+      std::optional<llvm::MemoryBufferRef> TUBuffer = std::nullopt);
+
+  /// Run the dependency scanning tool for a given clang driver command-line
+  /// for a specific module.
+  ///
+  /// \returns A \c StringError with the diagnostic output if clang errors
+  /// occurred, success otherwise.
+  llvm::Error computeDependencies(StringRef WorkingDirectory,
+                                  const std::vector<std::string> &CommandLine,
+                                  DependencyConsumer &Consumer,
+                                  DependencyActionController &Controller,
+                                  StringRef ModuleName);
 
   bool shouldEagerLoadModules() const { return EagerLoadModules; }
 
@@ -121,6 +149,15 @@ private:
   ScanningOptimizations OptimizeArgs;
   /// Whether to set up command-lines to load PCM files eagerly.
   bool EagerLoadModules;
+
+  /// Private helper functions.
+  bool scanDependencies(StringRef WorkingDirectory,
+                        const std::vector<std::string> &CommandLine,
+                        DependencyConsumer &Consumer,
+                        DependencyActionController &Controller,
+                        DiagnosticConsumer &DC,
+                        llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+                        std::optional<StringRef> ModuleName);
 };
 
 } // end namespace dependencies
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index f3aedbc..de61786 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1055,7 +1055,8 @@ void ASTContext::PrintStats() const {
 void ASTContext::mergeDefinitionIntoModule(NamedDecl *ND, Module *M,
                                            bool NotifyListeners) {
   if (NotifyListeners)
-    if (auto *Listener = getASTMutationListener())
+    if (auto *Listener = getASTMutationListener();
+        Listener && !ND->isUnconditionallyVisible())
       Listener->RedefinedHiddenDefinition(ND, M);
 
   MergedDefModules[cast<NamedDecl>(ND->getCanonicalDecl())].push_back(M);
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index c9f2f90..c27ebbf 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -6320,10 +6320,10 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateSpecializationDecl(
 
     updateLookupTableForTemplateParameters(*ToTPList);
   } else { // Not a partial specialization.
-    if (GetImportedOrCreateDecl(
-            D2, D, Importer.getToContext(), D->getTagKind(), DC,
-            *BeginLocOrErr, *IdLocOrErr, ClassTemplate, TemplateArgs,
-            PrevDecl))
+    if (GetImportedOrCreateDecl(D2, D, Importer.getToContext(), D->getTagKind(),
+                                DC, *BeginLocOrErr, *IdLocOrErr, ClassTemplate,
+                                TemplateArgs, D->hasStrictPackMatch(),
+                                PrevDecl))
       return D2;
 
     // Update InsertPos, because preceding import calls may have invalidated
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index c140837..1e1e96a 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -4715,6 +4715,14 @@ bool Compiler<Emitter>::VisitCallExpr(const CallExpr *E) {
     } else if (!this->visit(MC->getImplicitObjectArgument())) {
       return false;
     }
+  } else if (const auto *PD =
+                 dyn_cast<CXXPseudoDestructorExpr>(E->getCallee())) {
+    const Expr *Base = PD->getBase();
+    if (!Base->isGLValue())
+      return this->discard(Base);
+    if (!this->visit(Base))
+      return false;
+    return this->emitKill(E);
   } else if (!FuncDecl) {
     const Expr *Callee = E->getCallee();
     CalleeOffset = this->allocateLocalPrimitive(Callee, PT_FnPtr, true, false);
@@ -5596,6 +5604,22 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) {
 
       if (!emitFieldInitializer(NestedField, NestedFieldOffset, InitExpr))
         return false;
+
+      // Mark all chain links as initialized.
+      unsigned InitFieldOffset = 0;
+      for (const NamedDecl *ND : IFD->chain().drop_back()) {
+        const auto *FD = cast<FieldDecl>(ND);
+        const Record *FieldRecord = this->P.getOrCreateRecord(FD->getParent());
+        assert(FieldRecord);
+        NestedField = FieldRecord->getField(FD);
+        InitFieldOffset += NestedField->Offset;
+        assert(NestedField);
+        if (!this->emitGetPtrThisField(InitFieldOffset, InitExpr))
+          return false;
+        if (!this->emitFinishInitPop(InitExpr))
+          return false;
+      }
+
     } else {
       assert(Init->isDelegatingInitializer());
       if (!this->emitThis(InitExpr))
diff --git a/clang/lib/AST/ByteCode/Descriptor.h b/clang/lib/AST/ByteCode/Descriptor.h
index a73e28d..96c82a1 100644
--- a/clang/lib/AST/ByteCode/Descriptor.h
+++ b/clang/lib/AST/ByteCode/Descriptor.h
@@ -61,6 +61,11 @@ struct alignas(void *) GlobalInlineDescriptor {
 };
 static_assert(sizeof(GlobalInlineDescriptor) == sizeof(void *), "");
 
+enum class Lifetime : uint8_t {
+  Started,
+  Ended,
+};
+
 /// Inline descriptor embedded in structures and arrays.
 ///
 /// Such descriptors precede all composite array elements and structure fields.
@@ -100,12 +105,14 @@ struct InlineDescriptor {
   LLVM_PREFERRED_TYPE(bool)
   unsigned IsArrayElement : 1;
 
+  Lifetime LifeState;
+
   const Descriptor *Desc;
 
   InlineDescriptor(const Descriptor *D)
       : Offset(sizeof(InlineDescriptor)), IsConst(false), IsInitialized(false),
         IsBase(false), IsActive(false), IsFieldMutable(false),
-        IsArrayElement(false), Desc(D) {}
+        IsArrayElement(false), LifeState(Lifetime::Started), Desc(D) {}
 
   void dump() const { dump(llvm::errs()); }
   void dump(llvm::raw_ostream &OS) const;
diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp
index 3c55c88..92a169a 100644
--- a/clang/lib/AST/ByteCode/Disasm.cpp
+++ b/clang/lib/AST/ByteCode/Disasm.cpp
@@ -240,7 +240,7 @@ LLVM_DUMP_METHOD void Descriptor::dump(llvm::raw_ostream &OS) const {
   else if (isRecord())
     OS << " record";
   else if (isPrimitive())
-    OS << " primitive";
+    OS << " primitive " << primTypeToString(getPrimType());
 
   if (isZeroSizeArray())
     OS << " zero-size-array";
diff --git a/clang/lib/AST/ByteCode/Function.h b/clang/lib/AST/ByteCode/Function.h
index 2d3421e..e17183e 100644
--- a/clang/lib/AST/ByteCode/Function.h
+++ b/clang/lib/AST/ByteCode/Function.h
@@ -51,6 +51,11 @@ public:
     return llvm::make_range(Descriptors.begin(), Descriptors.end());
   }
 
+  llvm::iterator_range<LocalVectorTy::const_reverse_iterator>
+  locals_reverse() const {
+    return llvm::reverse(Descriptors);
+  }
+
 private:
   /// Object descriptors in this block.
   LocalVectorTy Descriptors;
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index 1123ced..bf48139 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -561,6 +561,18 @@ bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
   return false;
 }
 
+static bool CheckLifetime(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
+                          AccessKinds AK) {
+  if (Ptr.getLifetime() == Lifetime::Started)
+    return true;
+
+  if (!S.checkingPotentialConstantExpression()) {
+    S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_uninit)
+        << AK << /*uninitialized=*/false << S.Current->getRange(OpPC);
+  }
+  return false;
+}
+
 bool CheckGlobalInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
   if (Ptr.isInitialized())
     return true;
@@ -605,6 +617,8 @@ bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
     return false;
   if (!CheckActive(S, OpPC, Ptr, AK))
     return false;
+  if (!CheckLifetime(S, OpPC, Ptr, AK))
+    return false;
   if (!CheckInitialized(S, OpPC, Ptr, AK))
     return false;
   if (!CheckTemporary(S, OpPC, Ptr, AK))
@@ -634,6 +648,8 @@ bool CheckFinalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
     return false;
   if (!CheckActive(S, OpPC, Ptr, AK_Read))
     return false;
+  if (!CheckLifetime(S, OpPC, Ptr, AK_Read))
+    return false;
   if (!CheckInitialized(S, OpPC, Ptr, AK_Read))
     return false;
   if (!CheckTemporary(S, OpPC, Ptr, AK_Read))
@@ -650,6 +666,8 @@ bool CheckStore(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
     return false;
   if (!CheckDummy(S, OpPC, Ptr, AK_Assign))
     return false;
+  if (!CheckLifetime(S, OpPC, Ptr, AK_Assign))
+    return false;
   if (!CheckExtern(S, OpPC, Ptr))
     return false;
   if (!CheckRange(S, OpPC, Ptr, AK_Assign))
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 9f029ad..66fd31f 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -1254,6 +1254,12 @@ bool GetLocal(InterpState &S, CodePtr OpPC, uint32_t I) {
   return true;
 }
 
+static inline bool Kill(InterpState &S, CodePtr OpPC) {
+  const auto &Ptr = S.Stk.pop<Pointer>();
+  Ptr.endLifetime();
+  return true;
+}
+
 /// 1) Pops the value from the stack.
 /// 2) Writes the value to the local variable with the
 ///    given offset.
diff --git a/clang/lib/AST/ByteCode/InterpFrame.cpp b/clang/lib/AST/ByteCode/InterpFrame.cpp
index 89fc7a4..c383b2b 100644
--- a/clang/lib/AST/ByteCode/InterpFrame.cpp
+++ b/clang/lib/AST/ByteCode/InterpFrame.cpp
@@ -99,7 +99,7 @@ void InterpFrame::initScope(unsigned Idx) {
 }
 
 void InterpFrame::destroy(unsigned Idx) {
-  for (auto &Local : Func->getScope(Idx).locals()) {
+  for (auto &Local : Func->getScope(Idx).locals_reverse()) {
     S.deallocate(localBlock(Local.Offset));
   }
 }
diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td
index 4b0c902..088a3e4 100644
--- a/clang/lib/AST/ByteCode/Opcodes.td
+++ b/clang/lib/AST/ByteCode/Opcodes.td
@@ -394,6 +394,11 @@ def GetLocal : AccessOpcode { let HasCustomEval = 1; }
 // [] -> [Pointer]
 def SetLocal : AccessOpcode { let HasCustomEval = 1; }
 
+def Kill : Opcode {
+  let Types = [];
+  let Args = [];
+}
+
 def CheckDecl : Opcode {
   let Args = [ArgVarDecl];
 }
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index 971b0d5..3970d58 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -687,6 +687,22 @@ public:
   /// Deactivates an entire strurcutre.
   void deactivate() const;
 
+  Lifetime getLifetime() const {
+    if (!isBlockPointer())
+      return Lifetime::Started;
+    if (asBlockPointer().Base < sizeof(InlineDescriptor))
+      return Lifetime::Started;
+    return getInlineDesc()->LifeState;
+  }
+
+  void endLifetime() const {
+    if (!isBlockPointer())
+      return;
+    if (asBlockPointer().Base < sizeof(InlineDescriptor))
+      return;
+    getInlineDesc()->LifeState = Lifetime::Ended;
+  }
+
   /// Compare two pointers.
   ComparisonCategoryResult compare(const Pointer &Other) const {
     if (!hasSameBase(*this, Other))
diff --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp
index ec9a5b4..804be89 100644
--- a/clang/lib/AST/CommentLexer.cpp
+++ b/clang/lib/AST/CommentLexer.cpp
@@ -196,6 +196,15 @@ const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
   return BufferEnd;
 }
 
+const char *skipHorizontalWhitespace(const char *BufferPtr,
+                                     const char *BufferEnd) {
+  for (; BufferPtr != BufferEnd; ++BufferPtr) {
+    if (!isHorizontalWhitespace(*BufferPtr))
+      return BufferPtr;
+  }
+  return BufferEnd;
+}
+
 bool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
   return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
 }
@@ -637,17 +646,41 @@ void Lexer::setupAndLexHTMLStartTag(Token &T) {
   formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
   T.setHTMLTagStartName(Name);
 
-  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
+  BufferPtr = skipHorizontalWhitespace(BufferPtr, CommentEnd);
+  if (BufferPtr == CommentEnd) { // in BCPL comments
+    State = LS_HTMLStartTag;
+    return;
+  }
 
   const char C = *BufferPtr;
   if (BufferPtr != CommentEnd &&
-      (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C)))
+      (C == '>' || C == '/' || isVerticalWhitespace(C) ||
+       isHTMLIdentifierStartingCharacter(C)))
     State = LS_HTMLStartTag;
 }
 
 void Lexer::lexHTMLStartTag(Token &T) {
   assert(State == LS_HTMLStartTag);
 
+  // Skip leading whitespace and comment decorations
+  while (isVerticalWhitespace(*BufferPtr)) {
+    BufferPtr = skipNewline(BufferPtr, CommentEnd);
+
+    if (CommentState == LCS_InsideCComment)
+      skipLineStartingDecorations();
+
+    BufferPtr = skipHorizontalWhitespace(BufferPtr, CommentEnd);
+    if (BufferPtr == CommentEnd) {
+      // HTML starting tags must be defined in a single comment block.
+      // It's likely a user-error where they forgot to terminate the comment.
+      State = LS_Normal;
+      // Since at least one newline was skipped and one token needs to be lexed,
+      // return a newline.
+      formTokenWithChars(T, BufferPtr, tok::newline);
+      return;
+    }
+  }
+
   const char *TokenPtr = BufferPtr;
   char C = *TokenPtr;
   if (isHTMLIdentifierCharacter(C)) {
@@ -693,14 +726,13 @@ void Lexer::lexHTMLStartTag(Token &T) {
 
   // Now look ahead and return to normal state if we don't see any HTML tokens
   // ahead.
-  BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
+  BufferPtr = skipHorizontalWhitespace(BufferPtr, CommentEnd);
   if (BufferPtr == CommentEnd) {
-    State = LS_Normal;
     return;
   }
 
   C = *BufferPtr;
-  if (!isHTMLIdentifierStartingCharacter(C) &&
+  if (!isHTMLIdentifierStartingCharacter(C) && !isVerticalWhitespace(C) &&
       C != '=' && C != '\"' && C != '\'' && C != '>' && C != '/') {
     State = LS_Normal;
     return;
@@ -774,8 +806,17 @@ again:
         BufferPtr++;
 
       CommentState = LCS_InsideBCPLComment;
-      if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
+      switch (State) {
+      case LS_VerbatimBlockFirstLine:
+      case LS_VerbatimBlockBody:
+        break;
+      case LS_HTMLStartTag:
+        BufferPtr = skipHorizontalWhitespace(BufferPtr, BufferEnd);
+        break;
+      default:
         State = LS_Normal;
+        break;
+      }
       CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
       goto again;
     }
@@ -807,6 +848,14 @@ again:
     while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
       EndWhitespace++;
 
+    // When lexing the start of an HTML tag (i.e. going through the attributes)
+    // there won't be any newlines generated.
+    if (State == LS_HTMLStartTag && EndWhitespace != BufferEnd) {
+      CommentState = LCS_BeforeComment;
+      BufferPtr = EndWhitespace;
+      goto again;
+    }
+
     // Turn any whitespace between comments (and there is only whitespace
     // between them -- guaranteed by comment extraction) into a newline.  We
     // have two newlines between C comments in total (first one was synthesized
@@ -829,6 +878,14 @@ again:
         BufferPtr += 2;
         assert(BufferPtr <= BufferEnd);
 
+        // When lexing the start of an HTML tag (i.e. going through the
+        // attributes) there won't be any newlines generated - whitespace still
+        // needs to be skipped.
+        if (State == LS_HTMLStartTag && BufferPtr != BufferEnd) {
+          CommentState = LCS_BetweenComments;
+          goto again;
+        }
+
         // Synthenize newline just after the C comment, regardless if there is
         // actually a newline.
         formTokenWithChars(T, BufferPtr, tok::newline);
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 7e8a172..610207c 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3065,6 +3065,7 @@ FunctionDecl::FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC,
   FunctionDeclBits.IsIneligibleOrNotSelected = false;
   FunctionDeclBits.HasImplicitReturnZero = false;
   FunctionDeclBits.IsLateTemplateParsed = false;
+  FunctionDeclBits.IsInstantiatedFromMemberTemplate = false;
   FunctionDeclBits.ConstexprKind = static_cast<uint64_t>(ConstexprKind);
   FunctionDeclBits.BodyContainsImmediateEscalatingExpression = false;
   FunctionDeclBits.InstantiationIsPending = false;
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index 2e1ed9e..7fb89bf5 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -957,18 +957,17 @@ FunctionTemplateSpecializationInfo *FunctionTemplateSpecializationInfo::Create(
 // ClassTemplateSpecializationDecl Implementation
 //===----------------------------------------------------------------------===//
 
-ClassTemplateSpecializationDecl::
-ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
-                                DeclContext *DC, SourceLocation StartLoc,
-                                SourceLocation IdLoc,
-                                ClassTemplateDecl *SpecializedTemplate,
-                                ArrayRef<TemplateArgument> Args,
-                                ClassTemplateSpecializationDecl *PrevDecl)
+ClassTemplateSpecializationDecl::ClassTemplateSpecializationDecl(
+    ASTContext &Context, Kind DK, TagKind TK, DeclContext *DC,
+    SourceLocation StartLoc, SourceLocation IdLoc,
+    ClassTemplateDecl *SpecializedTemplate, ArrayRef<TemplateArgument> Args,
+    bool StrictPackMatch, ClassTemplateSpecializationDecl *PrevDecl)
     : CXXRecordDecl(DK, TK, Context, DC, StartLoc, IdLoc,
                     SpecializedTemplate->getIdentifier(), PrevDecl),
-    SpecializedTemplate(SpecializedTemplate),
-    TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args)),
-    SpecializationKind(TSK_Undeclared) {
+      SpecializedTemplate(SpecializedTemplate),
+      TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args)),
+      SpecializationKind(TSK_Undeclared), StrictPackMatch(StrictPackMatch) {
+  assert(DK == Kind::ClassTemplateSpecialization || StrictPackMatch == false);
 }
 
 ClassTemplateSpecializationDecl::ClassTemplateSpecializationDecl(ASTContext &C,
@@ -977,18 +976,14 @@ ClassTemplateSpecializationDecl::ClassTemplateSpecializationDecl(ASTContext &C,
                     SourceLocation(), nullptr, nullptr),
       SpecializationKind(TSK_Undeclared) {}
 
-ClassTemplateSpecializationDecl *
-ClassTemplateSpecializationDecl::Create(ASTContext &Context, TagKind TK,
-                                        DeclContext *DC,
-                                        SourceLocation StartLoc,
-                                        SourceLocation IdLoc,
-                                        ClassTemplateDecl *SpecializedTemplate,
-                                        ArrayRef<TemplateArgument> Args,
-                                   ClassTemplateSpecializationDecl *PrevDecl) {
-  auto *Result =
-      new (Context, DC) ClassTemplateSpecializationDecl(
-          Context, ClassTemplateSpecialization, TK, DC, StartLoc, IdLoc,
-          SpecializedTemplate, Args, PrevDecl);
+ClassTemplateSpecializationDecl *ClassTemplateSpecializationDecl::Create(
+    ASTContext &Context, TagKind TK, DeclContext *DC, SourceLocation StartLoc,
+    SourceLocation IdLoc, ClassTemplateDecl *SpecializedTemplate,
+    ArrayRef<TemplateArgument> Args, bool StrictPackMatch,
+    ClassTemplateSpecializationDecl *PrevDecl) {
+  auto *Result = new (Context, DC) ClassTemplateSpecializationDecl(
+      Context, ClassTemplateSpecialization, TK, DC, StartLoc, IdLoc,
+      SpecializedTemplate, Args, StrictPackMatch, PrevDecl);
   Result->setMayHaveOutOfDateDef(false);
 
   // If the template decl is incomplete, copy the external lexical storage from
@@ -1175,7 +1170,9 @@ ClassTemplatePartialSpecializationDecl::ClassTemplatePartialSpecializationDecl(
     ClassTemplatePartialSpecializationDecl *PrevDecl)
     : ClassTemplateSpecializationDecl(
           Context, ClassTemplatePartialSpecialization, TK, DC, StartLoc, IdLoc,
-          SpecializedTemplate, Args, PrevDecl),
+          // Tracking StrictPackMatch for Partial
+          // Specializations is not needed.
+          SpecializedTemplate, Args, /*StrictPackMatch=*/false, PrevDecl),
       TemplateParams(Params), InstantiatedFromMember(nullptr, false) {
   if (AdoptTemplateParameterList(Params, this))
     setInvalidDecl();
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index 36ef1fc..169e3ee 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -1003,6 +1003,11 @@ void JSONNodeDumper::VisitRecordDecl(const RecordDecl *RD) {
 void JSONNodeDumper::VisitCXXRecordDecl(const CXXRecordDecl *RD) {
   VisitRecordDecl(RD);
 
+  if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
+    if (CTSD->hasStrictPackMatch())
+      JOS.attribute("strict-pack-match", true);
+  }
+
   // All other information requires a complete definition.
   if (!RD->isCompleteDefinition())
     return;
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 532933d..bbf7a4d 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -2927,9 +2927,13 @@ llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
 TargetOMPContext::TargetOMPContext(
     ASTContext &ASTCtx, std::function<void(StringRef)> &&DiagUnknownTrait,
     const FunctionDecl *CurrentFunctionDecl,
-    ArrayRef<llvm::omp::TraitProperty> ConstructTraits)
+    ArrayRef<llvm::omp::TraitProperty> ConstructTraits, int DeviceNum)
     : OMPContext(ASTCtx.getLangOpts().OpenMPIsTargetDevice,
-                 ASTCtx.getTargetInfo().getTriple()),
+                 ASTCtx.getTargetInfo().getTriple(),
+                 ASTCtx.getLangOpts().OMPTargetTriples.empty()
+                     ? llvm::Triple()
+                     : ASTCtx.getLangOpts().OMPTargetTriples[0],
+                 DeviceNum),
       FeatureValidityCheck([&](StringRef FeatureName) {
         return ASTCtx.getTargetInfo().isValidFeatureName(FeatureName);
       }),
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 10d7e4c..6da1f77 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -2525,8 +2525,11 @@ void TextNodeDumper::VisitCXXRecordDecl(const CXXRecordDecl *D) {
     OS << " instantiated_from";
     dumpPointer(Instance);
   }
-  if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(D))
+  if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
     dumpTemplateSpecializationKind(CTSD->getSpecializationKind());
+    if (CTSD->hasStrictPackMatch())
+      OS << " strict-pack-match";
+  }
 
   dumpNestedNameSpecifier(D->getQualifier());
 
diff --git a/clang/lib/Analysis/ProgramPoint.cpp b/clang/lib/Analysis/ProgramPoint.cpp
index 7945c5c..e508681 100644
--- a/clang/lib/Analysis/ProgramPoint.cpp
+++ b/clang/lib/Analysis/ProgramPoint.cpp
@@ -49,6 +49,121 @@ LLVM_DUMP_METHOD void ProgramPoint::dump() const {
   return printJson(llvm::errs());
 }
 
+StringRef ProgramPoint::getProgramPointKindName(Kind K) {
+  switch (K) {
+  case BlockEdgeKind:
+    return "BlockEdge";
+  case BlockEntranceKind:
+    return "BlockEntrance";
+  case BlockExitKind:
+    return "BlockExit";
+  case PreStmtKind:
+    return "PreStmt";
+  case PreStmtPurgeDeadSymbolsKind:
+    return "PreStmtPurgeDeadSymbols";
+  case PostStmtPurgeDeadSymbolsKind:
+    return "PostStmtPurgeDeadSymbols";
+  case PostStmtKind:
+    return "PostStmt";
+  case PreLoadKind:
+    return "PreLoad";
+  case PostLoadKind:
+    return "PostLoad";
+  case PreStoreKind:
+    return "PreStore";
+  case PostStoreKind:
+    return "PostStore";
+  case PostConditionKind:
+    return "PostCondition";
+  case PostLValueKind:
+    return "PostLValue";
+  case PostAllocatorCallKind:
+    return "PostAllocatorCall";
+  case PostInitializerKind:
+    return "PostInitializer";
+  case CallEnterKind:
+    return "CallEnter";
+  case CallExitBeginKind:
+    return "CallExitBegin";
+  case CallExitEndKind:
+    return "CallExitEnd";
+  case FunctionExitKind:
+    return "FunctionExit";
+  case PreImplicitCallKind:
+    return "PreImplicitCall";
+  case PostImplicitCallKind:
+    return "PostImplicitCall";
+  case LoopExitKind:
+    return "LoopExit";
+  case EpsilonKind:
+    return "Epsilon";
+  }
+  llvm_unreachable("Unknown ProgramPoint kind");
+}
+
+std::optional<SourceLocation> ProgramPoint::getSourceLocation() const {
+  switch (getKind()) {
+  case BlockEdgeKind:
+    // If needed, the source and or destination beginning can be used to get
+    // source location.
+    return std::nullopt;
+  case BlockEntranceKind:
+    // If needed, first statement of the block can be used.
+    return std::nullopt;
+  case BlockExitKind:
+    if (const auto *B = castAs<BlockExit>().getBlock()) {
+      if (const auto *T = B->getTerminatorStmt()) {
+        return T->getBeginLoc();
+      }
+    }
+    return std::nullopt;
+  case PreStmtKind:
+  case PreStmtPurgeDeadSymbolsKind:
+  case PostStmtPurgeDeadSymbolsKind:
+  case PostStmtKind:
+  case PreLoadKind:
+  case PostLoadKind:
+  case PreStoreKind:
+  case PostStoreKind:
+  case PostConditionKind:
+  case PostLValueKind:
+  case PostAllocatorCallKind:
+    if (const Stmt *S = castAs<StmtPoint>().getStmt())
+      return S->getBeginLoc();
+    return std::nullopt;
+  case PostInitializerKind:
+    if (const auto *Init = castAs<PostInitializer>().getInitializer())
+      return Init->getSourceLocation();
+    return std::nullopt;
+  case CallEnterKind:
+    if (const Stmt *S = castAs<CallEnter>().getCallExpr())
+      return S->getBeginLoc();
+    return std::nullopt;
+  case CallExitBeginKind:
+    if (const Stmt *S = castAs<CallExitBegin>().getReturnStmt())
+      return S->getBeginLoc();
+    return std::nullopt;
+  case CallExitEndKind:
+    return std::nullopt;
+  case FunctionExitKind:
+    if (const auto *B = castAs<FunctionExitPoint>().getBlock();
+        B && B->getTerminatorStmt())
+      return B->getTerminatorStmt()->getBeginLoc();
+    return std::nullopt;
+  case PreImplicitCallKind:
+    return castAs<ImplicitCallPoint>().getLocation();
+  case PostImplicitCallKind:
+    return castAs<ImplicitCallPoint>().getLocation();
+  case LoopExitKind:
+    if (const Stmt *S = castAs<LoopExit>().getLoopStmt())
+      return S->getBeginLoc();
+    return std::nullopt;
+  case EpsilonKind:
+    return std::nullopt;
+  }
+  llvm_unreachable("Unknown ProgramPoint kind");
+}
+
 void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
   const ASTContext &Context =
       getLocationContext()->getAnalysisDeclContext()->getASTContext();
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 62a13f0..956d92a7 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -765,6 +765,12 @@ bool clang::isOpenMPCapturingDirective(OpenMPDirectiveKind DKind) {
   return false;
 }
 
+bool clang::isOpenMPOrderConcurrentNestableDirective(
+    OpenMPDirectiveKind DKind) {
+  return DKind == OMPD_atomic || DKind == OMPD_loop || DKind == OMPD_simd ||
+         DKind == OMPD_parallel || isOpenMPLoopTransformationDirective(DKind);
+}
+
 void clang::getOpenMPCaptureRegions(
     SmallVectorImpl<OpenMPDirectiveKind> &CaptureRegions,
     OpenMPDirectiveKind DKind) {
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 049b8d6..fad8d77 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -323,11 +323,19 @@ bool AArch64TargetInfo::validateGlobalRegisterVariable(
 
 bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef,
                                                  BranchProtectionInfo &BPI,
+                                                 const LangOptions &LO,
                                                  StringRef &Err) const {
   llvm::ARM::ParsedBranchProtection PBP;
   if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err, HasPAuthLR))
     return false;
 
+  // GCS is currently untested with ptrauth-returns, but enabling this could be
+  // allowed in future after testing with a suitable system.
+  if (LO.PointerAuthReturns &&
+      (PBP.Scope != "none" || PBP.BranchProtectionPAuthLR ||
+       PBP.GuardedControlStack))
+    return false;
+
   BPI.SignReturnAddr =
       llvm::StringSwitch<LangOptions::SignReturnAddressScopeKind>(PBP.Scope)
           .Case("non-leaf", LangOptions::SignReturnAddressScopeKind::NonLeaf)
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index f2510ad..9b6451b 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -132,6 +132,7 @@ public:
 
   bool validateBranchProtection(StringRef Spec, StringRef Arch,
                                 BranchProtectionInfo &BPI,
+                                const LangOptions &LO,
                                 StringRef &Err) const override;
 
   bool isValidCPUName(StringRef Name) const override;
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 637ee1c..ca2c1ff 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -405,6 +405,7 @@ bool ARMTargetInfo::isBranchProtectionSupportedArch(StringRef Arch) const {
 
 bool ARMTargetInfo::validateBranchProtection(StringRef Spec, StringRef Arch,
                                              BranchProtectionInfo &BPI,
+                                             const LangOptions &LO,
                                              StringRef &Err) const {
   llvm::ARM::ParsedBranchProtection PBP;
   if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err))
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index 22033a6..1719217 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -155,6 +155,7 @@ public:
   bool isBranchProtectionSupportedArch(StringRef Arch) const override;
   bool validateBranchProtection(StringRef Spec, StringRef Arch,
                                 BranchProtectionInfo &BPI,
+                                const LangOptions &LO,
                                 StringRef &Err) const override;
 
   // FIXME: This should be based on Arch attributes, not CPU names.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 53f5135..361e4c4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1057,20 +1057,20 @@ namespace {
 /// StructFieldAccess is a simple visitor class to grab the first MemberExpr
 /// from an Expr. It records any ArraySubscriptExpr we meet along the way.
 class StructFieldAccess
-    : public ConstStmtVisitor<StructFieldAccess, const MemberExpr *> {
+    : public ConstStmtVisitor<StructFieldAccess, const Expr *> {
   bool AddrOfSeen = false;
 
 public:
   const ArraySubscriptExpr *ASE = nullptr;
 
-  const MemberExpr *VisitMemberExpr(const MemberExpr *E) {
+  const Expr *VisitMemberExpr(const MemberExpr *E) {
     if (AddrOfSeen && E->getType()->isArrayType())
       // Avoid forms like '&ptr->array'.
       return nullptr;
     return E;
   }
 
-  const MemberExpr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
+  const Expr *VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
     if (ASE)
       // We don't support multiple subscripts.
       return nullptr;
@@ -1079,17 +1079,19 @@ public:
     ASE = E;
     return Visit(E->getBase());
   }
-  const MemberExpr *VisitCastExpr(const CastExpr *E) {
+  const Expr *VisitCastExpr(const CastExpr *E) {
+    if (E->getCastKind() == CK_LValueToRValue)
+      return E;
     return Visit(E->getSubExpr());
   }
-  const MemberExpr *VisitParenExpr(const ParenExpr *E) {
+  const Expr *VisitParenExpr(const ParenExpr *E) {
     return Visit(E->getSubExpr());
   }
-  const MemberExpr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
+  const Expr *VisitUnaryAddrOf(const clang::UnaryOperator *E) {
     AddrOfSeen = true;
     return Visit(E->getSubExpr());
   }
-  const MemberExpr *VisitUnaryDeref(const clang::UnaryOperator *E) {
+  const Expr *VisitUnaryDeref(const clang::UnaryOperator *E) {
     AddrOfSeen = false;
     return Visit(E->getSubExpr());
   }
@@ -1190,7 +1192,7 @@ CodeGenFunction::emitCountedByMemberSize(const Expr *E, llvm::Value *EmittedE,
   // GCC does for consistency's sake.
 
   StructFieldAccess Visitor;
-  const MemberExpr *ME = Visitor.Visit(E);
+  const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Visitor.Visit(E));
   if (!ME)
     return nullptr;
 
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index e2ae104..c96301c 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -3305,18 +3305,9 @@ CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input) {
 
 llvm::ConvergenceControlInst *
 CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB) {
-  CGBuilderTy::InsertPoint IP = Builder.saveIP();
-  if (BB->empty())
-    Builder.SetInsertPoint(BB);
-  else
-    Builder.SetInsertPoint(BB->getFirstInsertionPt());
-
-  llvm::CallBase *CB = Builder.CreateIntrinsic(
-      llvm::Intrinsic::experimental_convergence_loop, {}, {});
-  Builder.restoreIP(IP);
-
-  CB = addConvergenceControlToken(CB);
-  return cast<llvm::ConvergenceControlInst>(CB);
+  llvm::ConvergenceControlInst *ParentToken = ConvergenceTokenStack.back();
+  assert(ParentToken);
+  return llvm::ConvergenceControlInst::CreateLoop(*BB, ParentToken);
 }
 
 llvm::ConvergenceControlInst *
@@ -3329,13 +3320,5 @@ CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
   // Adding a convergence token requires the function to be marked as
   // convergent.
   F->setConvergent();
-
-  CGBuilderTy::InsertPoint IP = Builder.saveIP();
-  Builder.SetInsertPoint(&BB->front());
-  llvm::CallBase *I = Builder.CreateIntrinsic(
-      llvm::Intrinsic::experimental_convergence_entry, {}, {});
-  assert(isa<llvm::IntrinsicInst>(I));
-  Builder.restoreIP(IP);
-
-  return cast<llvm::ConvergenceControlInst>(I);
+  return llvm::ConvergenceControlInst::CreateEntry(*BB);
 }
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index e2e4348..4922b08 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -147,8 +147,8 @@ public:
           CGM.getTarget().parseTargetAttr(TA->getFeaturesStr());
       if (!Attr.BranchProtection.empty()) {
         StringRef Error;
-        (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
-                                                       Attr.CPU, BPI, Error);
+        (void)CGM.getTarget().validateBranchProtection(
+            Attr.BranchProtection, Attr.CPU, BPI, CGM.getLangOpts(), Error);
         assert(Error.empty());
       }
     }
diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp
index 47e31ce..77641ce 100644
--- a/clang/lib/CodeGen/Targets/ARM.cpp
+++ b/clang/lib/CodeGen/Targets/ARM.cpp
@@ -149,8 +149,8 @@ public:
         StringRef DiagMsg;
         StringRef Arch =
             Attr.CPU.empty() ? CGM.getTarget().getTargetOpts().CPU : Attr.CPU;
-        if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
-                                                      Arch, BPI, DiagMsg)) {
+        if (!CGM.getTarget().validateBranchProtection(
+                Attr.BranchProtection, Arch, BPI, CGM.getLangOpts(), DiagMsg)) {
           CGM.getDiags().Report(
               D->getLocation(),
               diag::warn_target_unsupported_branch_protection_attribute)
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 912777a..5a4737f 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3409,7 +3409,9 @@ class OffloadingActionBuilder final {
       // Collect all offload arch parameters, removing duplicates.
       std::set<StringRef> GpuArchs;
       bool Error = false;
-      for (Arg *A : Args) {
+      const ToolChain &TC = *ToolChains.front();
+      for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"",
+                                          AssociatedOffloadKind)) {
         if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
               A->getOption().matches(options::OPT_no_offload_arch_EQ)))
           continue;
@@ -3420,7 +3422,6 @@ class OffloadingActionBuilder final {
               ArchStr == "all") {
             GpuArchs.clear();
           } else if (ArchStr == "native") {
-            const ToolChain &TC = *ToolChains.front();
             auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args);
             if (!GPUsOrErr) {
               TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index ebc9820..c25d1b6 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1648,7 +1648,8 @@ void ToolChain::TranslateXarchArgs(
       A->getOption().matches(options::OPT_Xarch_host))
     ValuePos = 0;
 
-  unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos));
+  const InputArgList &BaseArgs = Args.getBaseArgs();
+  unsigned Index = BaseArgs.MakeIndex(A->getValue(ValuePos));
   unsigned Prev = Index;
   std::unique_ptr<llvm::opt::Arg> XarchArg(Opts.ParseOneArg(Args, Index));
 
@@ -1672,8 +1673,31 @@ void ToolChain::TranslateXarchArgs(
     Diags.Report(DiagID) << A->getAsString(Args);
     return;
   }
+
   XarchArg->setBaseArg(A);
   A = XarchArg.release();
+
+  // Linker input arguments require custom handling. The problem is that we
+  // have already constructed the phase actions, so we can not treat them as
+  // "input arguments".
+  if (A->getOption().hasFlag(options::LinkerInput)) {
+    // Convert the argument into individual Zlinker_input_args. Need to do this
+    // manually to avoid memory leaks with the allocated arguments.
+    for (const char *Value : A->getValues()) {
+      auto Opt = Opts.getOption(options::OPT_Zlinker_input);
+      unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
+      auto NewArg =
+          new Arg(Opt, BaseArgs.MakeArgString(Opt.getPrefix() + Opt.getName()),
+                  Index, BaseArgs.getArgString(Index + 1), A);
+
+      DAL->append(NewArg);
+      if (!AllocatedArgs)
+        DAL->AddSynthesizedArg(NewArg);
+      else
+        AllocatedArgs->push_back(NewArg);
+    }
+  }
+
   if (!AllocatedArgs)
     DAL->AddSynthesizedArg(A);
   else
@@ -1697,19 +1721,17 @@ llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs(
     } else if (A->getOption().matches(options::OPT_Xarch_host)) {
       NeedTrans = !IsDevice;
       Skip = IsDevice;
-    } else if (A->getOption().matches(options::OPT_Xarch__) && IsDevice) {
-      // Do not translate -Xarch_ options for non CUDA/HIP toolchain since
-      // they may need special translation.
-      // Skip this argument unless the architecture matches BoundArch
-      if (BoundArch.empty() || A->getValue(0) != BoundArch)
-        Skip = true;
-      else
-        NeedTrans = true;
+    } else if (A->getOption().matches(options::OPT_Xarch__)) {
+      NeedTrans = A->getValue() == getArchName() ||
+                  (!BoundArch.empty() && A->getValue() == BoundArch);
+      Skip = !NeedTrans;
     }
     if (NeedTrans || Skip)
       Modified = true;
-    if (NeedTrans)
+    if (NeedTrans) {
+      A->claim();
       TranslateXarchArgs(Args, A, DAL, AllocatedArgs);
+    }
     if (!Skip)
       DAL->append(A);
   }
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index aad6bc7..bc941a4 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -146,9 +146,24 @@ public:
   getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
                           const std::string &GPUArch,
                           bool isOpenMP = false) const;
+
   SanitizerMask getSupportedSanitizers() const override {
     return SanitizerKind::Address;
   }
+
+  void diagnoseUnsupportedSanitizers(const llvm::opt::ArgList &Args) const {
+    if (!Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
+                      true))
+      return;
+    auto &Diags = getDriver().getDiags();
+    for (auto *A : Args.filtered(options::OPT_fsanitize_EQ)) {
+      SanitizerMask K =
+          parseSanitizerValue(A->getValue(), /*Allow Groups*/ false);
+      if (K != SanitizerKind::Address)
+        Diags.Report(clang::diag::warn_drv_unsupported_option_for_target)
+            << A->getAsString(Args) << getTriple().str();
+    }
+  }
 };
 
 } // end namespace toolchains
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 24d244b..00bf9c7 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -37,6 +37,8 @@ AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
   // Lookup binaries into the driver directory, this is used to
   // discover the 'amdgpu-arch' executable.
   getProgramPaths().push_back(getDriver().Dir);
+  // Diagnose unsupported sanitizer options only once.
+  diagnoseUnsupportedSanitizers(Args);
 }
 
 void AMDGPUOpenMPToolChain::addClangTargetOptions(
@@ -71,10 +73,10 @@ llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
 
   const OptTable &Opts = getDriver().getOpts();
 
-  for (Arg *A : Args) {
-    if (!llvm::is_contained(*DAL, A))
+  for (Arg *A : Args)
+    if (!shouldSkipSanitizeOption(*this, Args, BoundArch, A) &&
+        !llvm::is_contained(*DAL, A))
       DAL->append(A);
-  }
 
   if (!BoundArch.empty()) {
     DAL->eraseArg(options::OPT_march_EQ);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 9b5132c5..27de346 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1618,32 +1618,34 @@ static void CollectARMPACBTIOptions(const ToolChain &TC, const ArgList &Args,
     GuardedControlStack = PBP.GuardedControlStack;
   }
 
-  CmdArgs.push_back(
-      Args.MakeArgString(Twine("-msign-return-address=") + Scope));
-  if (Scope != "none") {
+  bool HasPtrauthReturns = llvm::any_of(CmdArgs, [](const char *Arg) {
+    return StringRef(Arg) == "-fptrauth-returns";
+  });
+  // GCS is currently untested with ptrauth-returns, but enabling this could be
+  // allowed in future after testing with a suitable system.
+  if (HasPtrauthReturns &&
+      (Scope != "none" || BranchProtectionPAuthLR || GuardedControlStack)) {
     if (Triple.getEnvironment() == llvm::Triple::PAuthTest)
       D.Diag(diag::err_drv_unsupported_opt_for_target)
           << A->getAsString(Args) << Triple.getTriple();
+    else
+      D.Diag(diag::err_drv_incompatible_options)
+          << A->getAsString(Args) << "-fptrauth-returns";
+  }
+
+  CmdArgs.push_back(
+      Args.MakeArgString(Twine("-msign-return-address=") + Scope));
+  if (Scope != "none")
     CmdArgs.push_back(
         Args.MakeArgString(Twine("-msign-return-address-key=") + Key));
-  }
-  if (BranchProtectionPAuthLR) {
-    if (Triple.getEnvironment() == llvm::Triple::PAuthTest)
-      D.Diag(diag::err_drv_unsupported_opt_for_target)
-          << A->getAsString(Args) << Triple.getTriple();
+  if (BranchProtectionPAuthLR)
     CmdArgs.push_back(
         Args.MakeArgString(Twine("-mbranch-protection-pauth-lr")));
-  }
   if (IndirectBranches)
     CmdArgs.push_back("-mbranch-target-enforce");
-  // GCS is currently untested with PAuthABI, but enabling this could be allowed
-  // in future after testing with a suitable system.
-  if (GuardedControlStack) {
-    if (Triple.getEnvironment() == llvm::Triple::PAuthTest)
-      D.Diag(diag::err_drv_unsupported_opt_for_target)
-          << A->getAsString(Args) << Triple.getTriple();
+
+  if (GuardedControlStack)
     CmdArgs.push_back("-mguarded-control-stack");
-  }
 }
 
 void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
@@ -1822,12 +1824,6 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
       CmdArgs.push_back("-aarch64-enable-global-merge=true");
   }
 
-  // Enable/disable return address signing and indirect branch targets.
-  CollectARMPACBTIOptions(getToolChain(), Args, CmdArgs, true /*isAArch64*/);
-
-  if (Triple.getEnvironment() == llvm::Triple::PAuthTest)
-    handlePAuthABI(Args, CmdArgs);
-
   // Handle -msve_vector_bits=<bits>
   if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
     StringRef Val = A->getValue();
@@ -1896,6 +1892,12 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
                     options::OPT_fno_ptrauth_init_fini_address_discrimination);
   Args.addOptInFlag(CmdArgs, options::OPT_faarch64_jump_table_hardening,
                     options::OPT_fno_aarch64_jump_table_hardening);
+
+  if (Triple.getEnvironment() == llvm::Triple::PAuthTest)
+    handlePAuthABI(Args, CmdArgs);
+
+  // Enable/disable return address signing and indirect branch targets.
+  CollectARMPACBTIOptions(getToolChain(), Args, CmdArgs, true /*isAArch64*/);
 }
 
 void Clang::AddLoongArchTargetArgs(const ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 9a276c5..b26c5bf 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -2777,30 +2777,6 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
   // and try to push it down into tool specific logic.
 
   for (Arg *A : Args) {
-    if (A->getOption().matches(options::OPT_Xarch__)) {
-      // Skip this argument unless the architecture matches either the toolchain
-      // triple arch, or the arch being bound.
-      StringRef XarchArch = A->getValue(0);
-      if (!(XarchArch == getArchName() ||
-            (!BoundArch.empty() && XarchArch == BoundArch)))
-        continue;
-
-      Arg *OriginalArg = A;
-      TranslateXarchArgs(Args, A, DAL);
-
-      // Linker input arguments require custom handling. The problem is that we
-      // have already constructed the phase actions, so we can not treat them as
-      // "input arguments".
-      if (A->getOption().hasFlag(options::LinkerInput)) {
-        // Convert the argument into individual Zlinker_input_args.
-        for (const char *Value : A->getValues()) {
-          DAL->AddSeparateArg(
-              OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value);
-        }
-        continue;
-      }
-    }
-
     // Sob. These is strictly gcc compatible for the time being. Apple
     // gcc translates options twice, which means that self-expanding
     // options add duplicates.
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 1ae865f..e4019c4 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -42,6 +42,7 @@ void Flang::addFortranDialectOptions(const ArgList &Args,
                             options::OPT_fopenacc,
                             options::OPT_finput_charset_EQ,
                             options::OPT_fimplicit_none,
+                            options::OPT_fimplicit_none_ext,
                             options::OPT_fno_implicit_none,
                             options::OPT_fbackslash,
                             options::OPT_fno_backslash,
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 158a252..0e50edd 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -216,17 +216,8 @@ HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple,
   // Lookup binaries into the driver directory, this is used to
   // discover the clang-offload-bundler executable.
   getProgramPaths().push_back(getDriver().Dir);
-
   // Diagnose unsupported sanitizer options only once.
-  if (!Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
-                    true))
-    return;
-  for (auto *A : Args.filtered(options::OPT_fsanitize_EQ)) {
-    SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
-    if (K != SanitizerKind::Address)
-      D.getDiags().Report(clang::diag::warn_drv_unsupported_option_for_target)
-          << A->getAsString(Args) << getTriple().str();
-  }
+  diagnoseUnsupportedSanitizers(Args);
 }
 
 void HIPAMDToolChain::addClangTargetOptions(
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 89b8393..77a3695 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -888,7 +888,18 @@ void Parser::parseOMPTraitPropertyKind(OMPTraitProperty &TIProperty,
   TIProperty.Kind = TraitProperty::invalid;
 
   SourceLocation NameLoc = Tok.getLocation();
-  StringRef Name = getNameFromIdOrString(*this, Tok, CONTEXT_TRAIT_LVL);
+  StringRef Name;
+  if (Selector == llvm::omp::TraitSelector::target_device_device_num) {
+    Name = "number";
+    TIProperty.Kind = getOpenMPContextTraitPropertyKind(Set, Selector, Name);
+    ExprResult DeviceNumExprResult = ParseExpression();
+    if (DeviceNumExprResult.isUsable()) {
+      Expr *DeviceNumExpr = DeviceNumExprResult.get();
+      Actions.OpenMP().ActOnOpenMPDeviceNum(DeviceNumExpr);
+    }
+    return;
+  }
+  Name = getNameFromIdOrString(*this, Tok, CONTEXT_TRAIT_LVL);
   if (Name.empty()) {
     Diag(Tok.getLocation(), diag::note_omp_declare_variant_ctx_options)
         << CONTEXT_TRAIT_LVL << listOpenMPContextTraitProperties(Set, Selector);
@@ -918,7 +929,8 @@ void Parser::parseOMPTraitPropertyKind(OMPTraitProperty &TIProperty,
         << "(<property-name>)";
     return;
   }
-  TraitSelector SelectorForName = getOpenMPContextTraitSelectorKind(Name);
+  TraitSelector SelectorForName =
+      getOpenMPContextTraitSelectorKind(Name, SetForName);
   if (SelectorForName != TraitSelector::invalid) {
     Diag(NameLoc, diag::note_omp_declare_variant_ctx_is_a)
         << Name << CONTEXT_SELECTOR_LVL << CONTEXT_TRAIT_LVL;
@@ -935,7 +947,7 @@ void Parser::parseOMPTraitPropertyKind(OMPTraitProperty &TIProperty,
   }
   for (const auto &PotentialSet :
        {TraitSet::construct, TraitSet::user, TraitSet::implementation,
-        TraitSet::device}) {
+        TraitSet::device, TraitSet::target_device}) {
     TraitProperty PropertyForName =
         getOpenMPContextTraitPropertyKind(PotentialSet, Selector, Name);
     if (PropertyForName == TraitProperty::invalid)
@@ -1062,7 +1074,7 @@ void Parser::parseOMPTraitSelectorKind(OMPTraitSelector &TISelector,
     return;
   }
 
-  TISelector.Kind = getOpenMPContextTraitSelectorKind(Name);
+  TISelector.Kind = getOpenMPContextTraitSelectorKind(Name, Set);
   if (TISelector.Kind != TraitSelector::invalid) {
     if (checkForDuplicates(*this, Name, NameLoc, Seen, CONTEXT_SELECTOR_LVL))
       TISelector.Kind = TraitSelector::invalid;
@@ -1084,7 +1096,7 @@ void Parser::parseOMPTraitSelectorKind(OMPTraitSelector &TISelector,
   }
   for (const auto &PotentialSet :
        {TraitSet::construct, TraitSet::user, TraitSet::implementation,
-        TraitSet::device}) {
+        TraitSet::device, TraitSet::target_device}) {
     TraitProperty PropertyForName = getOpenMPContextTraitPropertyKind(
         PotentialSet, TraitSelector::invalid, Name);
     if (PropertyForName == TraitProperty::invalid)
@@ -1259,7 +1271,8 @@ void Parser::parseOMPTraitSetKind(OMPTraitSet &TISet,
   // It follows diagnosis and helping notes.
   Diag(NameLoc, diag::warn_omp_declare_variant_ctx_not_a_set) << Name;
 
-  TraitSelector SelectorForName = getOpenMPContextTraitSelectorKind(Name);
+  TraitSelector SelectorForName =
+      getOpenMPContextTraitSelectorKind(Name, TISet.Kind);
   if (SelectorForName != TraitSelector::invalid) {
     Diag(NameLoc, diag::note_omp_declare_variant_ctx_is_a)
         << Name << CONTEXT_SELECTOR_LVL << CONTEXT_SELECTOR_SET_LVL;
@@ -1276,7 +1289,7 @@ void Parser::parseOMPTraitSetKind(OMPTraitSet &TISet,
   }
   for (const auto &PotentialSet :
        {TraitSet::construct, TraitSet::user, TraitSet::implementation,
-        TraitSet::device}) {
+        TraitSet::device, TraitSet::target_device}) {
     TraitProperty PropertyForName = getOpenMPContextTraitPropertyKind(
         PotentialSet, TraitSelector::invalid, Name);
     if (PropertyForName == TraitProperty::invalid)
@@ -2253,7 +2266,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
     TargetOMPContext OMPCtx(
         ASTCtx, std::move(DiagUnknownTrait),
         /* CurrentFunctionDecl */ nullptr,
-        /* ConstructTraits */ ArrayRef<llvm::omp::TraitProperty>());
+        /* ConstructTraits */ ArrayRef<llvm::omp::TraitProperty>(),
+        Actions.OpenMP().getOpenMPDeviceNum());
 
     if (isVariantApplicableInContext(VMI, OMPCtx, /* DeviceSetOnly */ true)) {
       Actions.OpenMP().ActOnOpenMPBeginDeclareVariant(Loc, TI);
@@ -2805,7 +2819,8 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
         };
     TargetOMPContext OMPCtx(ASTContext, std::move(DiagUnknownTrait),
                             /* CurrentFunctionDecl */ nullptr,
-                            ArrayRef<llvm::omp::TraitProperty>());
+                            ArrayRef<llvm::omp::TraitProperty>(),
+                            Actions.OpenMP().getOpenMPDeviceNum());
 
     // A single match is returned for OpenMP 5.0
     int BestIdx = getBestVariantMatchForContext(VMIs, OMPCtx);
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 9d7d225..f351663 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -3073,7 +3073,8 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
   if (ParsedAttrs.BranchProtection.empty())
     return false;
   if (!Context.getTargetInfo().validateBranchProtection(
-          ParsedAttrs.BranchProtection, ParsedAttrs.CPU, BPI, DiagMsg)) {
+          ParsedAttrs.BranchProtection, ParsedAttrs.CPU, BPI,
+          Context.getLangOpts(), DiagMsg)) {
     if (DiagMsg.empty())
       return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
              << Unsupported << None << "branch-protection" << Target;
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b83b2b1..7e36601 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4788,13 +4788,26 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack,
       getLeafOrCompositeConstructs(ParentRegion, LeafOrComposite);
   OpenMPDirectiveKind EnclosingConstruct = ParentLOC.back();
 
-  if (SemaRef.LangOpts.OpenMP >= 51 && Stack->isParentOrderConcurrent() &&
-      CurrentRegion != OMPD_simd && CurrentRegion != OMPD_loop &&
-      CurrentRegion != OMPD_parallel &&
-      !isOpenMPCombinedParallelADirective(CurrentRegion)) {
-    SemaRef.Diag(StartLoc, diag::err_omp_prohibited_region_order)
-        << getOpenMPDirectiveName(CurrentRegion);
-    return true;
+  if (Stack->isParentOrderConcurrent()) {
+    bool InvalidOrderNesting = false;
+    if ((SemaRef.LangOpts.OpenMP == 51 || SemaRef.LangOpts.OpenMP == 52) &&
+        CurrentRegion != OMPD_simd && CurrentRegion != OMPD_loop &&
+        CurrentRegion != OMPD_parallel &&
+        !isOpenMPCombinedParallelADirective(CurrentRegion)) {
+      InvalidOrderNesting = true;
+    } else if (SemaRef.LangOpts.OpenMP >= 60 &&
+               !isOpenMPOrderConcurrentNestableDirective(CurrentRegion)) {
+      // OpenMP 6.0 [12.3 order Clause, Restrictions]
+      // Only regions that correspond to order-concurrent-nestable constructs
+      // or order-concurrent-nestable routines may be strictly nested regions
+      // of regions that correspond to constructs on which the order clause is
+      // specified with concurrent as the ordering argument.
+      InvalidOrderNesting = true;
+    }
+    if (InvalidOrderNesting) {
+      SemaRef.Diag(StartLoc, diag::err_omp_prohibited_region_order)
+          << getOpenMPDirectiveName(CurrentRegion);
+    }
   }
   if (isOpenMPSimdDirective(ParentRegion) &&
       ((SemaRef.LangOpts.OpenMP <= 45 && CurrentRegion != OMPD_ordered) ||
@@ -7114,7 +7127,8 @@ ExprResult SemaOpenMP::ActOnOpenMPCall(ExprResult Call, Scope *Scope,
   if (!CalleeFnDecl)
     return Call;
 
-  if (getLangOpts().OpenMP >= 51 && CalleeFnDecl->getIdentifier() &&
+  if (getLangOpts().OpenMP >= 51 && getLangOpts().OpenMP < 60 &&
+      CalleeFnDecl->getIdentifier() &&
       CalleeFnDecl->getName().starts_with_insensitive("omp_")) {
     // checking for any calls inside an Order region
     if (Scope && Scope->isOpenMPOrderClauseScope())
@@ -7134,7 +7148,7 @@ ExprResult SemaOpenMP::ActOnOpenMPCall(ExprResult Call, Scope *Scope,
   };
   TargetOMPContext OMPCtx(Context, std::move(DiagUnknownTrait),
                           SemaRef.getCurFunctionDecl(),
-                          DSAStack->getConstructTraits());
+                          DSAStack->getConstructTraits(), getOpenMPDeviceNum());
 
   QualType CalleeFnType = CalleeFnDecl->getType();
 
@@ -15631,6 +15645,38 @@ ExprResult SemaOpenMP::VerifyPositiveIntegerConstantInClause(
   return ICE;
 }
 
+void SemaOpenMP::setOpenMPDeviceNum(int Num) { DeviceNum = Num; }
+
+void SemaOpenMP::setOpenMPDeviceNumID(StringRef ID) { DeviceNumID = ID; }
+
+int SemaOpenMP::getOpenMPDeviceNum() const { return DeviceNum; }
+
+void SemaOpenMP::ActOnOpenMPDeviceNum(Expr *DeviceNumExpr) {
+  llvm::APSInt Result;
+  Expr::EvalResult EvalResult;
+  // Evaluate the expression to an integer value
+  if (!DeviceNumExpr->isValueDependent() &&
+      DeviceNumExpr->EvaluateAsInt(EvalResult, SemaRef.Context)) {
+    // The device expression must evaluate to a non-negative integer value.
+    Result = EvalResult.Val.getInt();
+    if (Result.isNonNegative()) {
+      setOpenMPDeviceNum(Result.getZExtValue());
+    } else {
+      Diag(DeviceNumExpr->getExprLoc(),
+           diag::err_omp_negative_expression_in_clause)
+          << "device_num" << 0 << DeviceNumExpr->getSourceRange();
+    }
+  } else if (auto *DeclRef = dyn_cast<DeclRefExpr>(DeviceNumExpr)) {
+    // Check if the expression is an identifier
+    IdentifierInfo *IdInfo = DeclRef->getDecl()->getIdentifier();
+    if (IdInfo) {
+      setOpenMPDeviceNumID(IdInfo->getName());
+    }
+  } else {
+    Diag(DeviceNumExpr->getExprLoc(), diag::err_expected_expression);
+  }
+}
+
 OMPClause *SemaOpenMP::ActOnOpenMPSafelenClause(Expr *Len,
                                                 SourceLocation StartLoc,
                                                 SourceLocation LParenLoc,
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 16ecea6..8d5b5ac 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -6927,7 +6927,7 @@ void Sema::AddOverloadCandidate(
     bool PartialOverloading, bool AllowExplicit, bool AllowExplicitConversions,
     ADLCallKind IsADLCandidate, ConversionSequenceList EarlyConversions,
     OverloadCandidateParamOrder PO, bool AggregateCandidateDeduction,
-    bool HasMatchedPackOnParmToNonPackOnArg) {
+    bool StrictPackMatch) {
   const FunctionProtoType *Proto
     = dyn_cast<FunctionProtoType>(Function->getType()->getAs<FunctionType>());
   assert(Proto && "Functions without a prototype cannot be overloaded");
@@ -6947,7 +6947,7 @@ void Sema::AddOverloadCandidate(
                          Expr::Classification::makeSimpleLValue(), Args,
                          CandidateSet, SuppressUserConversions,
                          PartialOverloading, EarlyConversions, PO,
-                         HasMatchedPackOnParmToNonPackOnArg);
+                         StrictPackMatch);
       return;
     }
     // We treat a constructor like a non-member function, since its object
@@ -6990,8 +6990,7 @@ void Sema::AddOverloadCandidate(
       CandidateSet.getRewriteInfo().getRewriteKind(Function, PO);
   Candidate.IsADLCandidate = llvm::to_underlying(IsADLCandidate);
   Candidate.ExplicitCallArguments = Args.size();
-  Candidate.HasMatchedPackOnParmToNonPackOnArg =
-      HasMatchedPackOnParmToNonPackOnArg;
+  Candidate.StrictPackMatch = StrictPackMatch;
 
   // Explicit functions are not actually candidates at all if we're not
   // allowing them in this context, but keep them around so we can point
@@ -7563,7 +7562,7 @@ void Sema::AddMethodCandidate(
     Expr::Classification ObjectClassification, ArrayRef<Expr *> Args,
     OverloadCandidateSet &CandidateSet, bool SuppressUserConversions,
     bool PartialOverloading, ConversionSequenceList EarlyConversions,
-    OverloadCandidateParamOrder PO, bool HasMatchedPackOnParmToNonPackOnArg) {
+    OverloadCandidateParamOrder PO, bool StrictPackMatch) {
   const FunctionProtoType *Proto
     = dyn_cast<FunctionProtoType>(Method->getType()->getAs<FunctionType>());
   assert(Proto && "Methods without a prototype cannot be overloaded");
@@ -7594,8 +7593,7 @@ void Sema::AddMethodCandidate(
   Candidate.TookAddressOfOverload =
       CandidateSet.getKind() == OverloadCandidateSet::CSK_AddressOfOverloadSet;
   Candidate.ExplicitCallArguments = Args.size();
-  Candidate.HasMatchedPackOnParmToNonPackOnArg =
-      HasMatchedPackOnParmToNonPackOnArg;
+  Candidate.StrictPackMatch = StrictPackMatch;
 
   bool IgnoreExplicitObject =
       (Method->isExplicitObjectMemberFunction() &&
@@ -7805,8 +7803,7 @@ void Sema::AddMethodTemplateCandidate(
   AddMethodCandidate(cast<CXXMethodDecl>(Specialization), FoundDecl,
                      ActingContext, ObjectType, ObjectClassification, Args,
                      CandidateSet, SuppressUserConversions, PartialOverloading,
-                     Conversions, PO,
-                     Info.hasMatchedPackOnParmToNonPackOnArg());
+                     Conversions, PO, Info.hasStrictPackMatch());
 }
 
 /// Determine whether a given function template has a simple explicit specifier
@@ -7894,7 +7891,7 @@ void Sema::AddTemplateOverloadCandidate(
       PartialOverloading, AllowExplicit,
       /*AllowExplicitConversions=*/false, IsADLCandidate, Conversions, PO,
       Info.AggregateDeductionCandidateHasMismatchedArity,
-      Info.hasMatchedPackOnParmToNonPackOnArg());
+      Info.hasStrictPackMatch());
 }
 
 bool Sema::CheckNonDependentConversions(
@@ -8016,8 +8013,7 @@ void Sema::AddConversionCandidate(
     CXXConversionDecl *Conversion, DeclAccessPair FoundDecl,
     CXXRecordDecl *ActingContext, Expr *From, QualType ToType,
     OverloadCandidateSet &CandidateSet, bool AllowObjCConversionOnExplicit,
-    bool AllowExplicit, bool AllowResultConversion,
-    bool HasMatchedPackOnParmToNonPackOnArg) {
+    bool AllowExplicit, bool AllowResultConversion, bool StrictPackMatch) {
   assert(!Conversion->getDescribedFunctionTemplate() &&
          "Conversion function templates use AddTemplateConversionCandidate");
   QualType ConvType = Conversion->getConversionType().getNonReferenceType();
@@ -8062,8 +8058,7 @@ void Sema::AddConversionCandidate(
   Candidate.FinalConversion.setAllToTypes(ToType);
   Candidate.Viable = true;
   Candidate.ExplicitCallArguments = 1;
-  Candidate.HasMatchedPackOnParmToNonPackOnArg =
-      HasMatchedPackOnParmToNonPackOnArg;
+  Candidate.StrictPackMatch = StrictPackMatch;
 
   // Explicit functions are not actually candidates at all if we're not
   // allowing them in this context, but keep them around so we can point
@@ -8266,7 +8261,7 @@ void Sema::AddTemplateConversionCandidate(
   AddConversionCandidate(Specialization, FoundDecl, ActingDC, From, ToType,
                          CandidateSet, AllowObjCConversionOnExplicit,
                          AllowExplicit, AllowResultConversion,
-                         Info.hasMatchedPackOnParmToNonPackOnArg());
+                         Info.hasStrictPackMatch());
 }
 
 void Sema::AddSurrogateCandidate(CXXConversionDecl *Conversion,
@@ -10618,9 +10613,8 @@ bool clang::isBetterOverloadCandidate(
           isa<CXXConstructorDecl>(Cand2.Function))
     return isa<CXXConstructorDecl>(Cand1.Function);
 
-  if (Cand1.HasMatchedPackOnParmToNonPackOnArg !=
-      Cand2.HasMatchedPackOnParmToNonPackOnArg)
-    return Cand2.HasMatchedPackOnParmToNonPackOnArg;
+  if (Cand1.StrictPackMatch != Cand2.StrictPackMatch)
+    return Cand2.StrictPackMatch;
 
   //    -- F1 is a non-template function and F2 is a function template
   //       specialization, or, if not that,
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 35ece88..9e68972 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -3651,7 +3651,7 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
           ClassTemplate->getDeclContext(),
           ClassTemplate->getTemplatedDecl()->getBeginLoc(),
           ClassTemplate->getLocation(), ClassTemplate, CTAI.CanonicalConverted,
-          nullptr);
+          CTAI.StrictPackMatch, nullptr);
       ClassTemplate->AddSpecialization(Decl, InsertPos);
       if (ClassTemplate->isOutOfLine())
         Decl->setLexicalDeclContext(ClassTemplate->getLexicalDeclContext());
@@ -5436,7 +5436,7 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param, TemplateArgumentLoc &ArgLoc,
   case TemplateArgument::TemplateExpansion:
     if (CheckTemplateTemplateArgument(TempParm, Params, ArgLoc,
                                       CTAI.PartialOrdering,
-                                      &CTAI.MatchedPackOnParmToNonPackOnArg))
+                                      &CTAI.StrictPackMatch))
       return true;
 
     CTAI.SugaredConverted.push_back(Arg);
@@ -5762,7 +5762,7 @@ bool Sema::CheckTemplateArgumentList(
 
     SaveAndRestore _1(CTAI.PartialOrdering, false);
     SaveAndRestore _2(CTAI.MatchingTTP, false);
-    SaveAndRestore _3(CTAI.MatchedPackOnParmToNonPackOnArg, {});
+    SaveAndRestore _3(CTAI.StrictPackMatch, {});
     // Check the default template argument.
     if (CheckTemplateArgument(*Param, Arg, Template, TemplateLoc, RAngleLoc, 0,
                               CTAI, CTAK_Specified))
@@ -7361,10 +7361,11 @@ static void DiagnoseTemplateParameterListArityMismatch(
     Sema &S, TemplateParameterList *New, TemplateParameterList *Old,
     Sema::TemplateParameterListEqualKind Kind, SourceLocation TemplateArgLoc);
 
-bool Sema::CheckTemplateTemplateArgument(
-    TemplateTemplateParmDecl *Param, TemplateParameterList *Params,
-    TemplateArgumentLoc &Arg, bool PartialOrdering,
-    bool *MatchedPackOnParmToNonPackOnArg) {
+bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param,
+                                         TemplateParameterList *Params,
+                                         TemplateArgumentLoc &Arg,
+                                         bool PartialOrdering,
+                                         bool *StrictPackMatch) {
   TemplateName Name = Arg.getArgument().getAsTemplateOrTemplatePattern();
   auto [Template, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs();
   if (!Template) {
@@ -7404,7 +7405,7 @@ bool Sema::CheckTemplateTemplateArgument(
   //   is at least as specialized as the template-argument A.
   if (!isTemplateTemplateParameterAtLeastAsSpecializedAs(
           Params, Param, Template, DefaultArgs, Arg.getLocation(),
-          PartialOrdering, MatchedPackOnParmToNonPackOnArg))
+          PartialOrdering, StrictPackMatch))
     return true;
   // P2113
   // C++20[temp.func.order]p2
@@ -8526,7 +8527,7 @@ DeclResult Sema::ActOnClassTemplateSpecialization(
     // this explicit specialization or friend declaration.
     Specialization = ClassTemplateSpecializationDecl::Create(
         Context, Kind, DC, KWLoc, TemplateNameLoc, ClassTemplate,
-        CTAI.CanonicalConverted, PrevDecl);
+        CTAI.CanonicalConverted, CTAI.StrictPackMatch, PrevDecl);
     Specialization->setTemplateArgsAsWritten(TemplateArgs);
     SetNestedNameSpecifier(*this, Specialization, SS);
     if (TemplateParameterLists.size() > 0) {
@@ -9869,7 +9870,7 @@ DeclResult Sema::ActOnExplicitInstantiation(
     // this explicit specialization.
     Specialization = ClassTemplateSpecializationDecl::Create(
         Context, Kind, ClassTemplate->getDeclContext(), KWLoc, TemplateNameLoc,
-        ClassTemplate, CTAI.CanonicalConverted, PrevDecl);
+        ClassTemplate, CTAI.CanonicalConverted, CTAI.StrictPackMatch, PrevDecl);
     SetNestedNameSpecifier(*this, Specialization, SS);
 
     // A MSInheritanceAttr attached to the previous declaration must be
@@ -9924,9 +9925,9 @@ DeclResult Sema::ActOnExplicitInstantiation(
     = cast_or_null<ClassTemplateSpecializationDecl>(
                                               Specialization->getDefinition());
   if (!Def)
-    InstantiateClassTemplateSpecialization(
-        TemplateNameLoc, Specialization, TSK,
-        /*Complain=*/true, CTAI.MatchedPackOnParmToNonPackOnArg);
+    InstantiateClassTemplateSpecialization(TemplateNameLoc, Specialization, TSK,
+                                           /*Complain=*/true,
+                                           CTAI.StrictPackMatch);
   else if (TSK == TSK_ExplicitInstantiationDefinition) {
     MarkVTableUsed(TemplateNameLoc, Specialization, true);
     Specialization->setPointOfInstantiation(Def->getPointOfInstantiation());
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 137942f..eb70dd7 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -2776,7 +2776,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
         if (!FoldPackParameter)
           return TemplateDeductionResult::MiscellaneousDeductionFailure;
         if (FoldPackArgument)
-          Info.setMatchedPackOnParmToNonPackOnArg();
+          Info.setStrictPackMatch();
       }
       // Deduce template arguments from the pattern.
       if (auto Result = DeduceTemplateArguments(
@@ -2972,7 +2972,7 @@ ConvertDeducedTemplateArgument(Sema &S, NamedDecl *Param,
         Arg, QualType(), Info.getLocation(), Param);
 
     SaveAndRestore _1(CTAI.MatchingTTP, false);
-    SaveAndRestore _2(CTAI.MatchedPackOnParmToNonPackOnArg, false);
+    SaveAndRestore _2(CTAI.StrictPackMatch, false);
     // Check the template argument, converting it as necessary.
     auto Res = S.CheckTemplateArgument(
         Param, ArgLoc, Template, Template->getLocation(),
@@ -2981,8 +2981,8 @@ ConvertDeducedTemplateArgument(Sema &S, NamedDecl *Param,
             ? (Arg.wasDeducedFromArrayBound() ? Sema::CTAK_DeducedFromArrayBound
                                               : Sema::CTAK_Deduced)
             : Sema::CTAK_Specified);
-    if (CTAI.MatchedPackOnParmToNonPackOnArg)
-      Info.setMatchedPackOnParmToNonPackOnArg();
+    if (CTAI.StrictPackMatch)
+      Info.setStrictPackMatch();
     return Res;
   };
 
@@ -3177,7 +3177,7 @@ static TemplateDeductionResult ConvertDeducedTemplateArguments(
 
     SaveAndRestore _1(CTAI.PartialOrdering, false);
     SaveAndRestore _2(CTAI.MatchingTTP, false);
-    SaveAndRestore _3(CTAI.MatchedPackOnParmToNonPackOnArg, false);
+    SaveAndRestore _3(CTAI.StrictPackMatch, false);
     // Check whether we can actually use the default argument.
     if (S.CheckTemplateArgument(
             Param, DefArg, TD, TD->getLocation(), TD->getSourceRange().getEnd(),
@@ -3341,8 +3341,6 @@ FinishTemplateArgumentDeduction(
     return ConstraintsNotSatisfied
                ? TemplateDeductionResult::ConstraintsNotSatisfied
                : TemplateDeductionResult::SubstitutionFailure;
-  if (InstCTAI.MatchedPackOnParmToNonPackOnArg)
-    Info.setMatchedPackOnParmToNonPackOnArg();
 
   TemplateParameterList *TemplateParams = Template->getTemplateParameters();
   for (unsigned I = 0, E = TemplateParams->size(); I != E; ++I) {
@@ -4074,22 +4072,7 @@ TemplateDeductionResult Sema::FinishTemplateArgumentDeduction(
   if (FunctionTemplate->getFriendObjectKind())
     Owner = FunctionTemplate->getLexicalDeclContext();
   FunctionDecl *FD = FunctionTemplate->getTemplatedDecl();
-  // additional check for inline friend,
-  // ```
-  //   template <class F1> int foo(F1 X);
-  //   template <int A1> struct A {
-  //     template <class F1> friend int foo(F1 X) { return A1; }
-  //   };
-  //   template struct A<1>;
-  //   int a = foo(1.0);
-  // ```
-  const FunctionDecl *FDFriend;
-  if (FD->getFriendObjectKind() == Decl::FriendObjectKind::FOK_None &&
-      FD->isDefined(FDFriend, /*CheckForPendingFriendDefinition*/ true) &&
-      FDFriend->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None) {
-    FD = const_cast<FunctionDecl *>(FDFriend);
-    Owner = FD->getLexicalDeclContext();
-  }
+
   MultiLevelTemplateArgumentList SubstArgs(
       FunctionTemplate, CanonicalDeducedArgumentList->asArray(),
       /*Final=*/false);
@@ -6514,7 +6497,7 @@ bool Sema::isMoreSpecializedThanPrimary(
 bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs(
     TemplateParameterList *P, TemplateDecl *PArg, TemplateDecl *AArg,
     const DefaultArguments &DefaultArgs, SourceLocation ArgLoc,
-    bool PartialOrdering, bool *MatchedPackOnParmToNonPackOnArg) {
+    bool PartialOrdering, bool *StrictPackMatch) {
   // C++1z [temp.arg.template]p4: (DR 150)
   //   A template template-parameter P is at least as specialized as a
   //   template template-argument A if, given the following rewrite to two
@@ -6572,8 +6555,8 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs(
                                   /*ConstraintsNotSatisfied=*/nullptr))
       return false;
     PArgs = std::move(CTAI.SugaredConverted);
-    if (MatchedPackOnParmToNonPackOnArg)
-      *MatchedPackOnParmToNonPackOnArg |= CTAI.MatchedPackOnParmToNonPackOnArg;
+    if (StrictPackMatch)
+      *StrictPackMatch |= CTAI.StrictPackMatch;
   }
 
   // Determine whether P1 is at least as specialized as P2.
@@ -6598,9 +6581,8 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs(
       PartialOrdering ? PackFold::ArgumentToParameter : PackFold::Both,
       /*HasDeducedAnyParam=*/nullptr)) {
   case clang::TemplateDeductionResult::Success:
-    if (MatchedPackOnParmToNonPackOnArg &&
-        Info.hasMatchedPackOnParmToNonPackOnArg())
-      *MatchedPackOnParmToNonPackOnArg = true;
+    if (StrictPackMatch && Info.hasStrictPackMatch())
+      *StrictPackMatch = true;
     break;
 
   case TemplateDeductionResult::MiscellaneousDeductionFailure:
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 44bca0d..d9a47ca 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -190,7 +190,7 @@ HandleVarTemplateSpec(const VarTemplateSpecializationDecl *VarTemplSpec,
   llvm::PointerUnion<VarTemplateDecl *, VarTemplatePartialSpecializationDecl *>
       Specialized = VarTemplSpec->getSpecializedTemplateOrPartial();
   if (VarTemplatePartialSpecializationDecl *Partial =
-          Specialized.dyn_cast<VarTemplatePartialSpecializationDecl *>()) {
+          dyn_cast<VarTemplatePartialSpecializationDecl *>(Specialized)) {
     if (!SkipForSpecialization)
       Result.addOuterTemplateArguments(
           Partial, VarTemplSpec->getTemplateInstantiationArgs().asArray(),
@@ -479,9 +479,6 @@ MultiLevelTemplateArgumentList Sema::getTemplateInstantiationArgs(
   using namespace TemplateInstArgsHelpers;
   const Decl *CurDecl = ND;
 
-  if (!CurDecl)
-    CurDecl = Decl::castFromDeclContext(DC);
-
   if (Innermost) {
     Result.addOuterTemplateArguments(const_cast<NamedDecl *>(ND), *Innermost,
                                      Final);
@@ -495,8 +492,10 @@ MultiLevelTemplateArgumentList Sema::getTemplateInstantiationArgs(
     // has a depth of 0.
     if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(CurDecl))
       HandleDefaultTempArgIntoTempTempParam(TTP, Result);
-    CurDecl = Response::UseNextDecl(CurDecl).NextDecl;
-  }
+    CurDecl = DC ? Decl::castFromDeclContext(DC)
+                 : Response::UseNextDecl(CurDecl).NextDecl;
+  } else if (!CurDecl)
+    CurDecl = Decl::castFromDeclContext(DC);
 
   while (!CurDecl->isFileContextDecl()) {
     Response R;
@@ -4059,8 +4058,7 @@ bool Sema::usesPartialOrExplicitSpecialization(
 static ActionResult<CXXRecordDecl *> getPatternForClassTemplateSpecialization(
     Sema &S, SourceLocation PointOfInstantiation,
     ClassTemplateSpecializationDecl *ClassTemplateSpec,
-    TemplateSpecializationKind TSK,
-    bool PrimaryHasMatchedPackOnParmToNonPackOnArg) {
+    TemplateSpecializationKind TSK, bool PrimaryStrictPackMatch) {
   Sema::InstantiatingTemplate Inst(S, PointOfInstantiation, ClassTemplateSpec);
   if (Inst.isInvalid())
     return {/*Invalid=*/true};
@@ -4113,12 +4111,11 @@ static ActionResult<CXXRecordDecl *> getPatternForClassTemplateSpecialization(
             MakeDeductionFailureInfo(S.Context, Result, Info));
         (void)Result;
       } else {
-        auto &List =
-            Info.hasMatchedPackOnParmToNonPackOnArg() ? ExtraMatched : Matched;
+        auto &List = Info.hasStrictPackMatch() ? ExtraMatched : Matched;
         List.push_back(MatchResult{Partial, Info.takeCanonical()});
       }
     }
-    if (Matched.empty() && PrimaryHasMatchedPackOnParmToNonPackOnArg)
+    if (Matched.empty() && PrimaryStrictPackMatch)
       Matched = std::move(ExtraMatched);
 
     // If we're dealing with a member template where the template parameters
@@ -4223,7 +4220,7 @@ bool Sema::InstantiateClassTemplateSpecialization(
     SourceLocation PointOfInstantiation,
     ClassTemplateSpecializationDecl *ClassTemplateSpec,
     TemplateSpecializationKind TSK, bool Complain,
-    bool PrimaryHasMatchedPackOnParmToNonPackOnArg) {
+    bool PrimaryStrictPackMatch) {
   // Perform the actual instantiation on the canonical declaration.
   ClassTemplateSpec = cast<ClassTemplateSpecializationDecl>(
       ClassTemplateSpec->getCanonicalDecl());
@@ -4231,9 +4228,9 @@ bool Sema::InstantiateClassTemplateSpecialization(
     return true;
 
   ActionResult<CXXRecordDecl *> Pattern =
-      getPatternForClassTemplateSpecialization(
-          *this, PointOfInstantiation, ClassTemplateSpec, TSK,
-          PrimaryHasMatchedPackOnParmToNonPackOnArg);
+      getPatternForClassTemplateSpecialization(*this, PointOfInstantiation,
+                                               ClassTemplateSpec, TSK,
+                                               PrimaryStrictPackMatch);
   if (!Pattern.isUsable())
     return Pattern.isInvalid();
 
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 4855e8a..1f42f95 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -12,6 +12,7 @@
 #include "TreeTransform.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTLambda.h"
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/DependentDiagnostic.h"
@@ -4038,7 +4039,7 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
       ClassTemplateSpecializationDecl::Create(
           SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(),
           D->getLocation(), InstClassTemplate, CTAI.CanonicalConverted,
-          PrevDecl);
+          CTAI.StrictPackMatch, PrevDecl);
   InstD->setTemplateArgsAsWritten(InstTemplateArgs);
 
   // Add this partial specialization to the set of class template partial
@@ -5276,9 +5277,31 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
     RebuildTypeSourceInfoForDefaultSpecialMembers();
     SetDeclDefaulted(Function, PatternDecl->getLocation());
   } else {
+    NamedDecl *ND = Function;
+    DeclContext *DC = ND->getLexicalDeclContext();
+    std::optional<ArrayRef<TemplateArgument>> Innermost;
+    if (auto *Primary = Function->getPrimaryTemplate();
+        Primary &&
+        !isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function) &&
+        Function->getTemplateSpecializationKind() !=
+            TSK_ExplicitSpecialization) {
+      auto It = llvm::find_if(Primary->redecls(),
+                              [](const RedeclarableTemplateDecl *RTD) {
+                                return cast<FunctionTemplateDecl>(RTD)
+                                    ->isCompatibleWithDefinition();
+                              });
+      assert(It != Primary->redecls().end() &&
+             "Should't get here without a definition");
+      if (FunctionDecl *Def = cast<FunctionTemplateDecl>(*It)
+                                  ->getTemplatedDecl()
+                                  ->getDefinition())
+        DC = Def->getLexicalDeclContext();
+      else
+        DC = (*It)->getLexicalDeclContext();
+      Innermost.emplace(Function->getTemplateSpecializationArgs()->asArray());
+    }
     MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(
-        Function, Function->getLexicalDeclContext(), /*Final=*/false,
-        /*Innermost=*/std::nullopt, false, PatternDecl);
+        Function, DC, /*Final=*/false, Innermost, false, PatternDecl);
 
     // Substitute into the qualifier; we can get a substitution failure here
     // through evil use of alias templates.
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 1fa5239..fd1b73d 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -9399,7 +9399,7 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
         runWithSufficientStackSpace(Loc, [&] {
           Diagnosed = InstantiateClassTemplateSpecialization(
               Loc, ClassTemplateSpec, TSK_ImplicitInstantiation,
-              /*Complain=*/Diagnoser);
+              /*Complain=*/Diagnoser, ClassTemplateSpec->hasStrictPackMatch());
         });
         Instantiated = true;
       }
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 8210eb2..df834cd 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1064,6 +1064,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
   FD->setHasImplicitReturnZero(FunctionDeclBits.getNextBit());
   FD->setIsMultiVersion(FunctionDeclBits.getNextBit());
   FD->setLateTemplateParsed(FunctionDeclBits.getNextBit());
+  FD->setInstantiatedFromMemberTemplate(FunctionDeclBits.getNextBit());
   FD->setFriendConstraintRefersToEnclosingTemplate(
       FunctionDeclBits.getNextBit());
   FD->setUsesSEHTry(FunctionDeclBits.getNextBit());
@@ -2532,6 +2533,7 @@ RedeclarableResult ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
   D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
   D->PointOfInstantiation = readSourceLocation();
   D->SpecializationKind = (TemplateSpecializationKind)Record.readInt();
+  D->StrictPackMatch = Record.readBool();
 
   bool writtenAsCanonicalDecl = Record.readInt();
   if (writtenAsCanonicalDecl) {
@@ -3746,6 +3748,11 @@ void ASTDeclReader::checkMultipleDefinitionInNamedModules(ASTReader &Reader,
       Func && Func->getTemplateSpecializationInfo())
     return;
 
+  // The module ownership of in-class friend declaration is not straightforward.
+  // Avoid diagnosing such cases.
+  if (D->getFriendObjectKind() || Previous->getFriendObjectKind())
+    return;
+
   Module *M = Previous->getOwningModule();
   if (!M)
     return;
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index fa2294d..b25dada 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -679,7 +679,7 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) {
 }
 
 void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
-  static_assert(DeclContext::NumFunctionDeclBits == 44,
+  static_assert(DeclContext::NumFunctionDeclBits == 45,
                 "You need to update the serializer after you change the "
                 "FunctionDeclBits");
 
@@ -785,6 +785,7 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   FunctionDeclBits.addBit(D->hasImplicitReturnZero());
   FunctionDeclBits.addBit(D->isMultiVersion());
   FunctionDeclBits.addBit(D->isLateTemplateParsed());
+  FunctionDeclBits.addBit(D->isInstantiatedFromMemberTemplate());
   FunctionDeclBits.addBit(D->FriendConstraintRefersToEnclosingTemplate());
   FunctionDeclBits.addBit(D->usesSEHTry());
   Record.push_back(FunctionDeclBits);
@@ -1843,6 +1844,7 @@ void ASTDeclWriter::VisitClassTemplateSpecializationDecl(
   Record.AddTemplateArgumentList(&D->getTemplateArgs());
   Record.AddSourceLocation(D->getPointOfInstantiation());
   Record.push_back(D->getSpecializationKind());
+  Record.push_back(D->hasStrictPackMatch());
   Record.push_back(D->isCanonicalDecl());
 
   if (D->isCanonicalDecl()) {
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp
index a57499d..53ef423 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp
@@ -109,8 +109,10 @@ public:
       bool VisitCallExpr(CallExpr *CE) override {
         checkCalleeLambda(CE);
         if (auto *Callee = CE->getDirectCallee()) {
-          bool TreatAllArgsAsNoEscape = shouldTreatAllArgAsNoEscape(Callee);
           unsigned ArgIndex = 0;
+          if (auto *CXXCallee = dyn_cast<CXXMethodDecl>(Callee))
+            ArgIndex = CXXCallee->isInstance();
+          bool TreatAllArgsAsNoEscape = shouldTreatAllArgAsNoEscape(Callee);
           for (auto *Param : Callee->parameters()) {
             if (ArgIndex >= CE->getNumArgs())
               return true;
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index 2904eab..4100812 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -62,6 +62,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -287,6 +288,34 @@ private:
   const PathSensitiveBugReport *getBugReport() const { return R; }
 };
 
+std::string timeTraceName(const BugReportEquivClass &EQ) {
+  if (!llvm::timeTraceProfilerEnabled())
+    return "";
+  const auto &BugReports = EQ.getReports();
+  if (BugReports.empty())
+    return "Empty Equivalence Class";
+  const BugReport *R = BugReports.front().get();
+  const auto &BT = R->getBugType();
+  return ("Flushing EQC " + BT.getDescription()).str();
+}
+
+llvm::TimeTraceMetadata timeTraceMetadata(const BugReportEquivClass &EQ) {
+  // Must be called only when constructing non-bogus TimeTraceScope
+  assert(llvm::timeTraceProfilerEnabled());
+
+  const auto &BugReports = EQ.getReports();
+  if (BugReports.empty())
+    return {};
+  const BugReport *R = BugReports.front().get();
+  const auto &BT = R->getBugType();
+  auto Loc = R->getLocation().asLocation();
+  std::string File = "";
+  if (const auto *Entry = Loc.getFileEntry())
+    File = Entry->tryGetRealPathName().str();
+  return {BT.getCheckerName().str(), std::move(File),
+          static_cast<int>(Loc.getLineNumber())};
+}
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -2892,6 +2921,7 @@ std::optional<PathDiagnosticBuilder> PathDiagnosticBuilder::findValidReport(
 
     if (R->isValid()) {
       if (Reporter.getAnalyzerOptions().ShouldCrosscheckWithZ3) {
+        llvm::TimeTraceScope TCS{"Crosscheck with Z3"};
         // If crosscheck is enabled, remove all visitors, add the refutation
         // visitor and check again
         R->clearVisitors();
@@ -3119,7 +3149,9 @@ BugReport *PathSensitiveBugReporter::findReportInEquivalenceClass(
   return exampleReport;
 }
 
-void BugReporter::FlushReport(BugReportEquivClass& EQ) {
+void BugReporter::FlushReport(BugReportEquivClass &EQ) {
+  llvm::TimeTraceScope TCS{timeTraceName(EQ),
+                           [&EQ]() { return timeTraceMetadata(EQ); }};
   SmallVector<BugReport*, 10> bugReports;
   BugReport *report = findReportInEquivalenceClass(EQ, bugReports);
   if (!report)
diff --git a/clang/lib/StaticAnalyzer/Core/BugSuppression.cpp b/clang/lib/StaticAnalyzer/Core/BugSuppression.cpp
index 05c99c4..5b5f9df 100644
--- a/clang/lib/StaticAnalyzer/Core/BugSuppression.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugSuppression.cpp
@@ -9,6 +9,8 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugSuppression.h"
 #include "clang/AST/DynamicRecursiveASTVisitor.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/TimeProfiler.h"
 
 using namespace clang;
 using namespace ento;
@@ -119,6 +121,29 @@ private:
   Ranges &Result;
 };
 
+std::string timeScopeName(const Decl *DeclWithIssue) {
+  if (!llvm::timeTraceProfilerEnabled())
+    return "";
+  return llvm::formatv(
+             "BugSuppression::isSuppressed init suppressions cache for {0}",
+             DeclWithIssue->getDeclKindName())
+      .str();
+}
+
+llvm::TimeTraceMetadata getDeclTimeTraceMetadata(const Decl *DeclWithIssue) {
+  assert(DeclWithIssue);
+  assert(llvm::timeTraceProfilerEnabled());
+  std::string Name = "<noname>";
+  if (const auto *ND = dyn_cast<NamedDecl>(DeclWithIssue)) {
+    Name = ND->getNameAsString();
+  }
+  const auto &SM = DeclWithIssue->getASTContext().getSourceManager();
+  auto Line = SM.getPresumedLineNumber(DeclWithIssue->getBeginLoc());
+  auto Fname = SM.getFilename(DeclWithIssue->getBeginLoc());
+  return llvm::TimeTraceMetadata{std::move(Name), Fname.str(),
+                                 static_cast<int>(Line)};
+}
+
 } // end anonymous namespace
 
 // TODO: Introduce stable IDs for checkers and check for those here
@@ -177,6 +202,9 @@ bool BugSuppression::isSuppressed(const PathDiagnosticLocation &Location,
       std::make_pair(DeclWithIssue, CachedRanges{}));
   Ranges &SuppressionRanges = InsertionResult.first->second;
   if (InsertionResult.second) {
+    llvm::TimeTraceScope TimeScope(
+        timeScopeName(DeclWithIssue),
+        [DeclWithIssue]() { return getDeclTimeTraceMetadata(DeclWithIssue); });
     // We haven't checked this declaration for suppressions yet!
     CacheInitializer::initialize(DeclWithIssue, SuppressionRanges);
   }
diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
index 9f7a0fc..53929d3 100644
--- a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/TimeProfiler.h"
 #include <cassert>
 #include <optional>
 #include <vector>
@@ -134,6 +135,14 @@ static void expandGraphWithCheckers(CHECK_CTX checkCtx,
 
 namespace {
 
+std::string checkerScopeName(StringRef Name, const CheckerBase *Checker) {
+  if (!llvm::timeTraceProfilerEnabled())
+    return "";
+  StringRef CheckerName =
+      Checker ? Checker->getCheckerName().getName() : "<unknown>";
+  return (Name + ":" + CheckerName).str();
+}
+
   struct CheckStmtContext {
     using CheckersTy = SmallVectorImpl<CheckerManager::CheckStmtFunc>;
 
@@ -153,6 +162,7 @@ namespace {
 
     void runChecker(CheckerManager::CheckStmtFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+      llvm::TimeTraceScope TimeScope(checkerScopeName("Stmt", checkFn.Checker));
       // FIXME: Remove respondsToCallback from CheckerContext;
       ProgramPoint::Kind K =  IsPreVisit ? ProgramPoint::PreStmtKind :
                                            ProgramPoint::PostStmtKind;
@@ -174,6 +184,9 @@ void CheckerManager::runCheckersForStmt(bool isPreVisit,
                                         bool WasInlined) {
   CheckStmtContext C(isPreVisit, getCachedStmtCheckersFor(S, isPreVisit),
                      S, Eng, WasInlined);
+  llvm::TimeTraceScope TimeScope(
+      isPreVisit ? "CheckerManager::runCheckersForStmt (Pre)"
+                 : "CheckerManager::runCheckersForStmt (Post)");
   expandGraphWithCheckers(C, Dst, Src);
 }
 
@@ -200,6 +213,8 @@ namespace {
 
     void runChecker(CheckerManager::CheckObjCMessageFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+      llvm::TimeTraceScope TimeScope(
+          checkerScopeName("ObjCMsg", checkFn.Checker));
       bool IsPreVisit;
 
       switch (Kind) {
@@ -230,6 +245,7 @@ void CheckerManager::runCheckersForObjCMessage(ObjCMessageVisitKind visitKind,
                                                bool WasInlined) {
   const auto &checkers = getObjCMessageCheckers(visitKind);
   CheckObjCMessageContext C(visitKind, checkers, msg, Eng, WasInlined);
+  llvm::TimeTraceScope TimeScope("CheckerManager::runCheckersForObjCMessage");
   expandGraphWithCheckers(C, Dst, Src);
 }
 
@@ -270,6 +286,7 @@ namespace {
 
     void runChecker(CheckerManager::CheckCallFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+      llvm::TimeTraceScope TimeScope(checkerScopeName("Call", checkFn.Checker));
       const ProgramPoint &L = Call.getProgramPoint(IsPreVisit,checkFn.Checker);
       CheckerContext C(Bldr, Eng, Pred, L, WasInlined);
 
@@ -290,6 +307,9 @@ void CheckerManager::runCheckersForCallEvent(bool isPreVisit,
                      isPreVisit ? PreCallCheckers
                                 : PostCallCheckers,
                      Call, Eng, WasInlined);
+  llvm::TimeTraceScope TimeScope(
+      isPreVisit ? "CheckerManager::runCheckersForCallEvent (Pre)"
+                 : "CheckerManager::runCheckersForCallEvent (Post)");
   expandGraphWithCheckers(C, Dst, Src);
 }
 
@@ -317,6 +337,7 @@ namespace {
 
     void runChecker(CheckerManager::CheckLocationFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+      llvm::TimeTraceScope TimeScope(checkerScopeName("Loc", checkFn.Checker));
       ProgramPoint::Kind K =  IsLoad ? ProgramPoint::PreLoadKind :
                                        ProgramPoint::PreStoreKind;
       const ProgramPoint &L =
@@ -340,6 +361,9 @@ void CheckerManager::runCheckersForLocation(ExplodedNodeSet &Dst,
                                             ExprEngine &Eng) {
   CheckLocationContext C(LocationCheckers, location, isLoad, NodeEx,
                          BoundEx, Eng);
+  llvm::TimeTraceScope TimeScope(
+      isLoad ? "CheckerManager::runCheckersForLocation (Load)"
+             : "CheckerManager::runCheckersForLocation (Store)");
   expandGraphWithCheckers(C, Dst, Src);
 }
 
@@ -365,6 +389,7 @@ namespace {
 
     void runChecker(CheckerManager::CheckBindFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+      llvm::TimeTraceScope TimeScope(checkerScopeName("Bind", checkFn.Checker));
       const ProgramPoint &L = PP.withTag(checkFn.Checker);
       CheckerContext C(Bldr, Eng, Pred, L);
 
@@ -372,6 +397,14 @@ namespace {
     }
   };
 
+  llvm::TimeTraceMetadata getTimeTraceBindMetadata(SVal Val) {
+    assert(llvm::timeTraceProfilerEnabled());
+    std::string Name;
+    llvm::raw_string_ostream OS(Name);
+    Val.dumpToStream(OS);
+    return llvm::TimeTraceMetadata{OS.str(), ""};
+  }
+
 } // namespace
 
 /// Run checkers for binding of a value to a location.
@@ -381,6 +414,9 @@ void CheckerManager::runCheckersForBind(ExplodedNodeSet &Dst,
                                         const Stmt *S, ExprEngine &Eng,
                                         const ProgramPoint &PP) {
   CheckBindContext C(BindCheckers, location, val, S, Eng, PP);
+  llvm::TimeTraceScope TimeScope{
+      "CheckerManager::runCheckersForBind",
+      [&val]() { return getTimeTraceBindMetadata(val); }};
   expandGraphWithCheckers(C, Dst, Src);
 }
 
@@ -409,6 +445,7 @@ struct CheckBeginFunctionContext {
 
   void runChecker(CheckerManager::CheckBeginFunctionFunc checkFn,
                   NodeBuilder &Bldr, ExplodedNode *Pred) {
+    llvm::TimeTraceScope TimeScope(checkerScopeName("Begin", checkFn.Checker));
     const ProgramPoint &L = PP.withTag(checkFn.Checker);
     CheckerContext C(Bldr, Eng, Pred, L);
 
@@ -425,6 +462,7 @@ void CheckerManager::runCheckersForBeginFunction(ExplodedNodeSet &Dst,
   ExplodedNodeSet Src;
   Src.insert(Pred);
   CheckBeginFunctionContext C(BeginFunctionCheckers, Eng, L);
+  llvm::TimeTraceScope TimeScope("CheckerManager::runCheckersForBeginFunction");
   expandGraphWithCheckers(C, Dst, Src);
 }
 
@@ -444,6 +482,7 @@ void CheckerManager::runCheckersForEndFunction(NodeBuilderContext &BC,
     const ProgramPoint &L =
         FunctionExitPoint(RS, Pred->getLocationContext(), checkFn.Checker);
     CheckerContext C(Bldr, Eng, Pred, L);
+    llvm::TimeTraceScope TimeScope(checkerScopeName("End", checkFn.Checker));
     checkFn(RS, C);
   }
 }
@@ -466,6 +505,8 @@ namespace {
 
     void runChecker(CheckerManager::CheckBranchConditionFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+      llvm::TimeTraceScope TimeScope(
+          checkerScopeName("BranchCond", checkFn.Checker));
       ProgramPoint L = PostCondition(Condition, Pred->getLocationContext(),
                                      checkFn.Checker);
       CheckerContext C(Bldr, Eng, Pred, L);
@@ -483,6 +524,8 @@ void CheckerManager::runCheckersForBranchCondition(const Stmt *Condition,
   ExplodedNodeSet Src;
   Src.insert(Pred);
   CheckBranchConditionContext C(BranchConditionCheckers, Condition, Eng);
+  llvm::TimeTraceScope TimeScope(
+      "CheckerManager::runCheckersForBranchCondition");
   expandGraphWithCheckers(C, Dst, Src);
 }
 
@@ -506,6 +549,8 @@ namespace {
 
     void runChecker(CheckerManager::CheckNewAllocatorFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+      llvm::TimeTraceScope TimeScope(
+          checkerScopeName("Allocator", checkFn.Checker));
       ProgramPoint L =
           PostAllocatorCall(Call.getOriginExpr(), Pred->getLocationContext());
       CheckerContext C(Bldr, Eng, Pred, L, WasInlined);
@@ -524,6 +569,7 @@ void CheckerManager::runCheckersForNewAllocator(const CXXAllocatorCall &Call,
   ExplodedNodeSet Src;
   Src.insert(Pred);
   CheckNewAllocatorContext C(NewAllocatorCheckers, Call, WasInlined, Eng);
+  llvm::TimeTraceScope TimeScope("CheckerManager::runCheckersForNewAllocator");
   expandGraphWithCheckers(C, Dst, Src);
 }
 
@@ -555,6 +601,8 @@ namespace {
 
     void runChecker(CheckerManager::CheckDeadSymbolsFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+      llvm::TimeTraceScope TimeScope(
+          checkerScopeName("DeadSymbols", checkFn.Checker));
       const ProgramPoint &L = ProgramPoint::getProgramPoint(S, ProgarmPointKind,
                                 Pred->getLocationContext(), checkFn.Checker);
       CheckerContext C(Bldr, Eng, Pred, L);
@@ -576,6 +624,7 @@ void CheckerManager::runCheckersForDeadSymbols(ExplodedNodeSet &Dst,
                                                ExprEngine &Eng,
                                                ProgramPoint::Kind K) {
   CheckDeadSymbolsContext C(DeadSymbolsCheckers, SymReaper, S, Eng, K);
+  llvm::TimeTraceScope TimeScope("CheckerManager::runCheckersForDeadSymbols");
   expandGraphWithCheckers(C, Dst, Src);
 }
 
diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
index 775a22e1..bf1fd7c 100644
--- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -30,6 +30,8 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/TimeProfiler.h"
 #include <algorithm>
 #include <cassert>
 #include <memory>
@@ -179,8 +181,41 @@ bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned MaxSteps,
   return WList->hasWork();
 }
 
-void CoreEngine::dispatchWorkItem(ExplodedNode* Pred, ProgramPoint Loc,
-                                  const WorkListUnit& WU) {
+static std::string timeTraceScopeName(const ProgramPoint &Loc) {
+  if (llvm::timeTraceProfilerEnabled()) {
+    return llvm::formatv("Loc {0}",
+                         ProgramPoint::getProgramPointKindName(Loc.getKind()))
+        .str();
+  }
+  return "";
+}
+
+static llvm::TimeTraceMetadata timeTraceMetadata(const ExplodedNode *Pred,
+                                                 const ProgramPoint &Loc) {
+  // If time-trace profiler is not enabled, this function is never called.
+  assert(llvm::timeTraceProfilerEnabled());
+  std::string Detail = "";
+  if (const auto SP = Loc.getAs<StmtPoint>()) {
+    if (const Stmt *S = SP->getStmt())
+      Detail = S->getStmtClassName();
+  }
+  auto SLoc = Loc.getSourceLocation();
+  if (!SLoc)
+    return llvm::TimeTraceMetadata{Detail, ""};
+  const auto &SM = Pred->getLocationContext()
+                       ->getAnalysisDeclContext()
+                       ->getASTContext()
+                       .getSourceManager();
+  auto Line = SM.getPresumedLineNumber(*SLoc);
+  auto Fname = SM.getFilename(*SLoc);
+  return llvm::TimeTraceMetadata{Detail, Fname.str(), static_cast<int>(Line)};
+}
+
+void CoreEngine::dispatchWorkItem(ExplodedNode *Pred, ProgramPoint Loc,
+                                  const WorkListUnit &WU) {
+  llvm::TimeTraceScope tcs{timeTraceScopeName(Loc), [Loc, Pred]() {
+                             return timeTraceMetadata(Pred, Loc);
+                           }};
   // Dispatch on the location type.
   switch (Loc.getKind()) {
     case ProgramPoint::BlockEdgeKind:
diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index 91c9b08..189d7d6 100644
--- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <memory>
@@ -358,9 +359,40 @@ private:
 
   /// Print \p S to stderr if \c Opts.AnalyzerDisplayProgress is set.
   void reportAnalyzerProgress(StringRef S);
-}; // namespace
-} // end anonymous namespace
+};
 
+std::string timeTraceScopeDeclName(StringRef FunName, const Decl *D) {
+  if (llvm::timeTraceProfilerEnabled()) {
+    if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
+      return (FunName + " " + ND->getQualifiedNameAsString()).str();
+    return (FunName + " <anonymous> ").str();
+  }
+  return "";
+}
+
+llvm::TimeTraceMetadata timeTraceScopeDeclMetadata(const Decl *D) {
+  // If time-trace profiler is not enabled, this function is never called.
+  assert(llvm::timeTraceProfilerEnabled());
+  if (const auto &Loc = D->getBeginLoc(); Loc.isValid()) {
+    const auto &SM = D->getASTContext().getSourceManager();
+    std::string DeclName = AnalysisDeclContext::getFunctionName(D);
+    return llvm::TimeTraceMetadata{
+        std::move(DeclName), SM.getFilename(Loc).str(),
+        static_cast<int>(SM.getExpansionLineNumber(Loc))};
+  }
+  return llvm::TimeTraceMetadata{"", ""};
+}
+
+void flushReports(llvm::Timer *BugReporterTimer, BugReporter &BR) {
+  llvm::TimeTraceScope TCS{"Flushing reports"};
+  // Display warnings.
+  if (BugReporterTimer)
+    BugReporterTimer->startTimer();
+  BR.FlushReports();
+  if (BugReporterTimer)
+    BugReporterTimer->stopTimer();
+}
+} // namespace
 
 //===----------------------------------------------------------------------===//
 // AnalysisConsumer implementation.
@@ -658,6 +690,8 @@ AnalysisConsumer::getModeForDecl(Decl *D, AnalysisMode Mode) {
 void AnalysisConsumer::HandleCode(Decl *D, AnalysisMode Mode,
                                   ExprEngine::InliningModes IMode,
                                   SetOfConstDecls *VisitedCallees) {
+  llvm::TimeTraceScope TCS(timeTraceScopeDeclName("HandleCode", D),
+                           [D]() { return timeTraceScopeDeclMetadata(D); });
   if (!D->hasBody())
     return;
   Mode = getModeForDecl(D, Mode);
@@ -742,12 +776,7 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D,
   if (Mgr->options.visualizeExplodedGraphWithGraphViz)
     Eng.ViewGraph(Mgr->options.TrimGraph);
 
-  // Display warnings.
-  if (BugReporterTimer)
-    BugReporterTimer->startTimer();
-  Eng.getBugReporter().FlushReports();
-  if (BugReporterTimer)
-    BugReporterTimer->stopTimer();
+  flushReports(BugReporterTimer.get(), Eng.getBugReporter());
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
index 4219f67..2b4c2bb7 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
@@ -142,11 +142,13 @@ llvm::Expected<TranslationUnitDeps>
 DependencyScanningTool::getTranslationUnitDependencies(
     const std::vector<std::string> &CommandLine, StringRef CWD,
     const llvm::DenseSet<ModuleID> &AlreadySeen,
-    LookupModuleOutputCallback LookupModuleOutput) {
+    LookupModuleOutputCallback LookupModuleOutput,
+    std::optional<llvm::MemoryBufferRef> TUBuffer) {
   FullDependencyConsumer Consumer(AlreadySeen);
   CallbackActionController Controller(LookupModuleOutput);
-  llvm::Error Result =
-      Worker.computeDependencies(CWD, CommandLine, Consumer, Controller);
+  llvm::Error Result = Worker.computeDependencies(CWD, CommandLine, Consumer,
+                                                  Controller, TUBuffer);
+
   if (Result)
     return std::move(Result);
   return Consumer.takeTranslationUnitDeps();
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 5a648df..d15b74a 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -24,9 +24,11 @@
 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
 #include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/TargetParser/Host.h"
 #include <optional>
 
@@ -521,20 +523,43 @@ DependencyScanningWorker::DependencyScanningWorker(
   }
 }
 
-llvm::Error DependencyScanningWorker::computeDependencies(
-    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
-    DependencyConsumer &Consumer, DependencyActionController &Controller,
-    std::optional<StringRef> ModuleName) {
+static std::unique_ptr<DiagnosticOptions>
+createDiagOptions(const std::vector<std::string> &CommandLine) {
   std::vector<const char *> CLI;
   for (const std::string &Arg : CommandLine)
     CLI.push_back(Arg.c_str());
   auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
   sanitizeDiagOpts(*DiagOpts);
+  return DiagOpts;
+}
+
+llvm::Error DependencyScanningWorker::computeDependencies(
+    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+    DependencyConsumer &Consumer, DependencyActionController &Controller,
+    std::optional<llvm::MemoryBufferRef> TUBuffer) {
+  // Capture the emitted diagnostics and report them to the client
+  // in the case of a failure.
+  std::string DiagnosticOutput;
+  llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
+  auto DiagOpts = createDiagOptions(CommandLine);
+  TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.release());
 
+  if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
+                          DiagPrinter, TUBuffer))
+    return llvm::Error::success();
+  return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
+                                             llvm::inconvertibleErrorCode());
+}
+
+llvm::Error DependencyScanningWorker::computeDependencies(
+    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+    DependencyConsumer &Consumer, DependencyActionController &Controller,
+    StringRef ModuleName) {
   // Capture the emitted diagnostics and report them to the client
   // in the case of a failure.
   std::string DiagnosticOutput;
   llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
+  auto DiagOpts = createDiagOptions(CommandLine);
   TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.release());
 
   if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
@@ -604,54 +629,22 @@ static bool createAndRunToolInvocation(
   return true;
 }
 
-bool DependencyScanningWorker::computeDependencies(
+bool DependencyScanningWorker::scanDependencies(
     StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
     DependencyConsumer &Consumer, DependencyActionController &Controller,
-    DiagnosticConsumer &DC, std::optional<StringRef> ModuleName) {
-  // Reset what might have been modified in the previous worker invocation.
-  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
-
-  std::optional<std::vector<std::string>> ModifiedCommandLine;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
-
-  // If we're scanning based on a module name alone, we don't expect the client
-  // to provide us with an input file. However, the driver really wants to have
-  // one. Let's just make it up to make the driver happy.
-  if (ModuleName) {
-    auto OverlayFS =
-        llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
-    auto InMemoryFS =
-        llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
-    InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
-    OverlayFS->pushOverlay(InMemoryFS);
-    ModifiedFS = OverlayFS;
-
-    SmallString<128> FakeInputPath;
-    // TODO: We should retry the creation if the path already exists.
-    llvm::sys::fs::createUniquePath(*ModuleName + "-%%%%%%%%.input",
-                                    FakeInputPath,
-                                    /*MakeAbsolute=*/false);
-    InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
-
-    ModifiedCommandLine = CommandLine;
-    ModifiedCommandLine->emplace_back(FakeInputPath);
-  }
-
-  const std::vector<std::string> &FinalCommandLine =
-      ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
-  auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
-
+    DiagnosticConsumer &DC, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+    std::optional<StringRef> ModuleName) {
   auto FileMgr =
-      llvm::makeIntrusiveRefCnt<FileManager>(FileSystemOptions{}, FinalFS);
+      llvm::makeIntrusiveRefCnt<FileManager>(FileSystemOptions{}, FS);
 
-  std::vector<const char *> FinalCCommandLine(FinalCommandLine.size(), nullptr);
-  llvm::transform(FinalCommandLine, FinalCCommandLine.begin(),
+  std::vector<const char *> CCommandLine(CommandLine.size(), nullptr);
+  llvm::transform(CommandLine, CCommandLine.begin(),
                   [](const std::string &Str) { return Str.c_str(); });
-
-  auto DiagOpts = CreateAndPopulateDiagOpts(FinalCCommandLine);
+  auto DiagOpts = CreateAndPopulateDiagOpts(CCommandLine);
   sanitizeDiagOpts(*DiagOpts);
   IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
-      CompilerInstance::createDiagnostics(*FinalFS, DiagOpts.release(), &DC,
+      CompilerInstance::createDiagnostics(FileMgr->getVirtualFileSystem(),
+                                          DiagOpts.release(), &DC,
                                           /*ShouldOwnClient=*/false);
 
   // Although `Diagnostics` are used only for command-line parsing, the
@@ -667,12 +660,12 @@ bool DependencyScanningWorker::computeDependencies(
                                   DisableFree, ModuleName);
 
   bool Success = false;
-  if (FinalCommandLine[1] == "-cc1") {
-    Success = createAndRunToolInvocation(FinalCommandLine, Action, *FileMgr,
+  if (CommandLine[1] == "-cc1") {
+    Success = createAndRunToolInvocation(CommandLine, Action, *FileMgr,
                                          PCHContainerOps, *Diags, Consumer);
   } else {
     Success = forEachDriverJob(
-        FinalCommandLine, *Diags, *FileMgr, [&](const driver::Command &Cmd) {
+        CommandLine, *Diags, *FileMgr, [&](const driver::Command &Cmd) {
           if (StringRef(Cmd.getCreator().getName()) != "clang") {
             // Non-clang command. Just pass through to the dependency
             // consumer.
@@ -699,8 +692,77 @@ bool DependencyScanningWorker::computeDependencies(
 
   if (Success && !Action.hasScanned())
     Diags->Report(diag::err_fe_expected_compiler_job)
-        << llvm::join(FinalCommandLine, " ");
+        << llvm::join(CommandLine, " ");
   return Success && Action.hasScanned();
 }
 
+bool DependencyScanningWorker::computeDependencies(
+    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+    DependencyConsumer &Consumer, DependencyActionController &Controller,
+    DiagnosticConsumer &DC, std::optional<llvm::MemoryBufferRef> TUBuffer) {
+  // Reset what might have been modified in the previous worker invocation.
+  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+  std::optional<std::vector<std::string>> ModifiedCommandLine;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
+
+  // If we're scanning based on a module name alone, we don't expect the client
+  // to provide us with an input file. However, the driver really wants to have
+  // one. Let's just make it up to make the driver happy.
+  if (TUBuffer) {
+    auto OverlayFS =
+        llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+    auto InMemoryFS =
+        llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+    InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+    auto InputPath = TUBuffer->getBufferIdentifier();
+    InMemoryFS->addFile(
+        InputPath, 0,
+        llvm::MemoryBuffer::getMemBufferCopy(TUBuffer->getBuffer()));
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay =
+        InMemoryFS;
+
+    OverlayFS->pushOverlay(InMemoryOverlay);
+    ModifiedFS = OverlayFS;
+    ModifiedCommandLine = CommandLine;
+    ModifiedCommandLine->emplace_back(InputPath);
+  }
+
+  const std::vector<std::string> &FinalCommandLine =
+      ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
+  auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
+
+  return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer,
+                          Controller, DC, FinalFS, /*ModuleName=*/std::nullopt);
+}
+
+bool DependencyScanningWorker::computeDependencies(
+    StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+    DependencyConsumer &Consumer, DependencyActionController &Controller,
+    DiagnosticConsumer &DC, StringRef ModuleName) {
+  // Reset what might have been modified in the previous worker invocation.
+  BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+  // If we're scanning based on a module name alone, we don't expect the client
+  // to provide us with an input file. However, the driver really wants to have
+  // one. Let's just make it up to make the driver happy.
+  auto OverlayFS =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+  auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+  SmallString<128> FakeInputPath;
+  // TODO: We should retry the creation if the path already exists.
+  llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath,
+                                  /*MakeAbsolute=*/false);
+  InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
+
+  OverlayFS->pushOverlay(InMemoryOverlay);
+  auto ModifiedCommandLine = CommandLine;
+  ModifiedCommandLine.emplace_back(FakeInputPath);
+
+  return scanDependencies(WorkingDirectory, ModifiedCommandLine, Consumer,
+                          Controller, DC, OverlayFS, ModuleName);
+}
+
 DependencyActionController::~DependencyActionController() {}
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 732de7b..1c5f4c4 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -394,9 +394,91 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
   }
 }
 
+static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) {
+  // Check if the command line input uses relative paths.
+  // It is not safe to ignore the current working directory if any of the
+  // command line inputs use relative paths.
+#define IF_RELATIVE_RETURN_FALSE(PATH)                                         \
+  do {                                                                         \
+    if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH))                  \
+      return false;                                                            \
+  } while (0)
+
+#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS)                                    \
+  do {                                                                         \
+    if (llvm::any_of(PATHS, [](const auto &P) {                                \
+          return !P.empty() && !llvm::sys::path::is_absolute(P);               \
+        }))                                                                    \
+      return false;                                                            \
+  } while (0)
+
+  // Header search paths.
+  const auto &HeaderSearchOpts = CI.getHeaderSearchOpts();
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot);
+  for (auto &Entry : HeaderSearchOpts.UserEntries)
+    if (Entry.IgnoreSysRoot)
+      IF_RELATIVE_RETURN_FALSE(Entry.Path);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath);
+  IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath);
+  for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(),
+            E = HeaderSearchOpts.PrebuiltModuleFiles.end();
+       I != E;) {
+    auto Current = I++;
+    IF_RELATIVE_RETURN_FALSE(Current->second);
+  }
+  IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths);
+  IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles);
+
+  // Preprocessor options.
+  const auto &PPOpts = CI.getPreprocessorOpts();
+  IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes);
+  IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes);
+  IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude);
+
+  // Frontend options.
+  const auto &FrontendOpts = CI.getFrontendOpts();
+  for (const FrontendInputFile &Input : FrontendOpts.Inputs) {
+    if (Input.isBuffer())
+      continue; // FIXME: Can this happen when parsing command-line?
+
+    IF_RELATIVE_RETURN_FALSE(Input.getFile());
+  }
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles);
+  IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles);
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile);
+  IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile);
+
+  // Filesystem options.
+  const auto &FileSystemOpts = CI.getFileSystemOpts();
+  IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir);
+
+  // Codegen options.
+  const auto &CodeGenOpts = CI.getCodeGenOpts();
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir);
+
+  // Sanitizer options.
+  IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles);
+
+  // Coverage mappings.
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile);
+  IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile);
+
+  // Dependency output options.
+  for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps)
+    IF_RELATIVE_RETURN_FALSE(ExtraDep.first);
+
+  return true;
+}
+
 static std::string getModuleContextHash(const ModuleDeps &MD,
                                         const CowCompilerInvocation &CI,
-                                        bool EagerLoadModules,
+                                        bool EagerLoadModules, bool IgnoreCWD,
                                         llvm::vfs::FileSystem &VFS) {
   llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
       HashBuilder;
@@ -407,8 +489,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
   HashBuilder.add(getClangFullRepositoryVersion());
   HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
   llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
-  if (CWD)
+  auto &FSOpts = const_cast<FileSystemOptions &>(CI.getFileSystemOpts());
+  if (CWD && !IgnoreCWD)
     HashBuilder.add(*CWD);
+  else
+    FSOpts.WorkingDir.clear();
 
   // Hash the BuildInvocation without any input files.
   SmallString<0> ArgVec;
@@ -440,8 +525,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
 
 void ModuleDepCollector::associateWithContextHash(
     const CowCompilerInvocation &CI, ModuleDeps &Deps) {
-  Deps.ID.ContextHash = getModuleContextHash(
-      Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
+  bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) &&
+                   isSafeToIgnoreCWD(CI);
+  Deps.ID.ContextHash =
+      getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD,
+                           ScanInstance.getVirtualFileSystem());
   bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
   (void)Inserted;
   assert(Inserted && "duplicate module mapping");
diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp
index 268226a..6f65fa5 100644
--- a/clang/test/AST/ByteCode/cxx20.cpp
+++ b/clang/test/AST/ByteCode/cxx20.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fcxx-exceptions -fexperimental-new-constant-interpreter -std=c++20 -verify=both,expected -fcxx-exceptions %s
+// RUN: %clang_cc1 -fcxx-exceptions -fexperimental-new-constant-interpreter -std=c++20 -verify=both,expected -fcxx-exceptions %s -DNEW_INTERP
 // RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=both,ref -fcxx-exceptions %s
 
 void test_alignas_operand() {
@@ -908,3 +908,57 @@ namespace TemporaryInNTTP {
                // expected-note {{created here}}
   B<2> j2; /// Ok.
 }
+
+namespace LocalDestroy {
+  /// This is reduced from a libc++ test case.
+  /// The local f.TI.copied points to the local variable Copied, and we used to
+  /// destroy Copied before f, causing problems later on when a DeadBlock had a
+  /// pointer pointing to it that was already destroyed.
+  struct TrackInitialization {
+    bool *copied_;
+  };
+  struct TrackingPred : TrackInitialization {
+    constexpr TrackingPred(bool *copied) : TrackInitialization(copied) {}
+  };
+  struct F {
+    const TrackingPred &TI;
+  };
+  constexpr int f() {
+    bool Copied = false;
+    TrackingPred TI(&Copied);
+    F f{TI};
+    return 1;
+  }
+  static_assert(f() == 1);
+}
+
+namespace PseudoDtor {
+  constexpr int f1() {
+   using T = int;
+   int a = 0;
+   a.~T();
+   return a; // both-note {{read of object outside its lifetime}}
+  }
+  static_assert(f1() == 0); // both-error {{not an integral constant expression}} \
+                            // both-note {{in call to}}
+
+  constexpr int f2() {
+   using T = int;
+   int a = 0;
+   a.~T();
+   a = 0; // both-note {{assignment to object outside its lifetime}}
+   return a;
+  }
+  static_assert(f2() == 0); // both-error {{not an integral constant expression}} \
+                            // both-note {{in call to}}
+
+#ifdef NEW_INTERP
+  /// FIXME: Currently crashes with the current interpreter, see https://github.com/llvm/llvm-project/issues/53741
+  constexpr int f3() {
+   using T = int;
+   0 .~T();
+   return 0;
+  }
+  static_assert(f3() == 0);
+#endif
+}
diff --git a/clang/test/AST/ByteCode/unions.cpp b/clang/test/AST/ByteCode/unions.cpp
index b1fbb0c..c6b5e34 100644
--- a/clang/test/AST/ByteCode/unions.cpp
+++ b/clang/test/AST/ByteCode/unions.cpp
@@ -463,4 +463,26 @@ namespace MoveAssign {
   }
   static_assert(f()== 12);
 }
+
+namespace IFD {
+  template <class T>
+  struct Optional {
+    struct {
+      union {
+        char null_state;
+        T val;
+      };
+    };
+    constexpr Optional() : null_state(){}
+  };
+
+  constexpr bool test()
+  {
+    Optional<int> opt{};
+    Optional<int> opt2{};
+    opt = opt2;
+    return true;
+  }
+  static_assert(test());
+}
 #endif
diff --git a/clang/test/AST/ast-dump-comment.cpp b/clang/test/AST/ast-dump-comment.cpp
index 9798295..40c3edb 100644
--- a/clang/test/AST/ast-dump-comment.cpp
+++ b/clang/test/AST/ast-dump-comment.cpp
@@ -91,6 +91,19 @@ int Test_HTMLTagComment;
 // CHECK-NEXT:       TextComment{{.*}} Text=" "
 // CHECK-NEXT:       HTMLStartTagComment{{.*}} Name="br" SelfClosing
 
+/// <a
+///     href="foo"
+/// >Aaa</a>b
+int Test_HTMLTagMultilineBCPL;
+// CHECK:      VarDecl{{.*}}Test_HTMLTagMultilineBCPL
+// CHECK-NEXT:   FullComment
+// CHECK-NEXT:     ParagraphComment
+// CHECK-NEXT:       TextComment{{.*}} Text=" "
+// CHECK-NEXT:       HTMLStartTagComment{{.*}} Name="a" Attrs:  "href="foo"
+// CHECK-NEXT:       TextComment{{.*}} Text="Aaa"
+// CHECK-NEXT:       HTMLEndTagComment{{.*}} Name="a"
+// CHECK-NEXT:       TextComment{{.*}} Text="b"
+
 /// \verbatim
 /// Aaa
 /// \endverbatim
diff --git a/clang/test/AST/ast-dump-templates.cpp b/clang/test/AST/ast-dump-templates.cpp
index 9fcafbc..86af8c5 100644
--- a/clang/test/AST/ast-dump-templates.cpp
+++ b/clang/test/AST/ast-dump-templates.cpp
@@ -1,7 +1,15 @@
-// RUN: %clang_cc1 -std=c++1z -ast-print %s > %t
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=c++17 -ast-dump=json %s | FileCheck --check-prefix=JSON %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=c++17 -ast-print %s > %t
 // RUN: FileCheck < %t %s -check-prefix=CHECK1
 // RUN: FileCheck < %t %s -check-prefix=CHECK2
-// RUN: %clang_cc1 -std=c++1z -ast-dump %s | FileCheck --check-prefix=DUMP %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=c++17 -ast-dump %s | FileCheck --check-prefix=DUMP %s
+
+// Test with serialization:
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=c++17 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -x c++ -std=c++17 -include-pch %t \
+// RUN: -ast-dump-all /dev/null \
+// RUN: | sed -e "s/ <undeserialized declarations>//" -e "s/ imported//" \
+// RUN: | FileCheck --strict-whitespace --check-prefix=DUMP %s
 
 template <int X, typename Y, int Z = 5>
 struct foo {
@@ -118,3 +126,6038 @@ void func() {
 // DUMP-NEXT:     `-TemplateTypeParm {{.*}} 'Key'
 }
 }
+
+namespace test7 {
+  template <template<class> class TT> struct A {};
+  template <class...> class B {};
+  template struct A<B>;
+// DUMP-LABEL: NamespaceDecl {{.*}} test7{{$}}
+// DUMP:       ClassTemplateSpecializationDecl {{.*}} struct A definition explicit_instantiation_definition strict-pack-match{{$}}
+} // namespce test7
+
+// NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py
+
+
+// JSON-NOT: {{^}}Dumping
+// JSON:  "kind": "TranslationUnitDecl",
+// JSON-NEXT:  "loc": {},
+// JSON-NEXT:  "range": {
+// JSON-NEXT:   "begin": {},
+// JSON-NEXT:   "end": {}
+// JSON-NEXT:  },
+// JSON-NEXT:  "inner": [
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "TypedefDecl",
+// JSON-NEXT:    "loc": {},
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {},
+// JSON-NEXT:     "end": {}
+// JSON-NEXT:    },
+// JSON-NEXT:    "isImplicit": true,
+// JSON-NEXT:    "name": "__int128_t",
+// JSON-NEXT:    "type": {
+// JSON-NEXT:     "qualType": "__int128"
+// JSON-NEXT:    },
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "BuiltinType",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "__int128"
+// JSON-NEXT:      }
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "TypedefDecl",
+// JSON-NEXT:    "loc": {},
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {},
+// JSON-NEXT:     "end": {}
+// JSON-NEXT:    },
+// JSON-NEXT:    "isImplicit": true,
+// JSON-NEXT:    "name": "__uint128_t",
+// JSON-NEXT:    "type": {
+// JSON-NEXT:     "qualType": "unsigned __int128"
+// JSON-NEXT:    },
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "BuiltinType",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "unsigned __int128"
+// JSON-NEXT:      }
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "TypedefDecl",
+// JSON-NEXT:    "loc": {},
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {},
+// JSON-NEXT:     "end": {}
+// JSON-NEXT:    },
+// JSON-NEXT:    "isImplicit": true,
+// JSON-NEXT:    "name": "__NSConstantString",
+// JSON-NEXT:    "type": {
+// JSON-NEXT:     "qualType": "__NSConstantString_tag"
+// JSON-NEXT:    },
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "RecordType",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "__NSConstantString_tag"
+// JSON-NEXT:      },
+// JSON-NEXT:      "decl": {
+// JSON-NEXT:       "id": "0x{{.*}}",
+// JSON-NEXT:       "kind": "CXXRecordDecl",
+// JSON-NEXT:       "name": "__NSConstantString_tag"
+// JSON-NEXT:      }
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "TypedefDecl",
+// JSON-NEXT:    "loc": {},
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {},
+// JSON-NEXT:     "end": {}
+// JSON-NEXT:    },
+// JSON-NEXT:    "isImplicit": true,
+// JSON-NEXT:    "name": "__builtin_ms_va_list",
+// JSON-NEXT:    "type": {
+// JSON-NEXT:     "qualType": "char *"
+// JSON-NEXT:    },
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "PointerType",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "char *"
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "BuiltinType",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "char"
+// JSON-NEXT:        }
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "TypedefDecl",
+// JSON-NEXT:    "loc": {},
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {},
+// JSON-NEXT:     "end": {}
+// JSON-NEXT:    },
+// JSON-NEXT:    "isImplicit": true,
+// JSON-NEXT:    "name": "__builtin_va_list",
+// JSON-NEXT:    "type": {
+// JSON-NEXT:     "qualType": "__va_list_tag[1]"
+// JSON-NEXT:    },
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ConstantArrayType",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "__va_list_tag[1]"
+// JSON-NEXT:      },
+// JSON-NEXT:      "size": 1,
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "RecordType",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "__va_list_tag"
+// JSON-NEXT:        },
+// JSON-NEXT:        "decl": {
+// JSON-NEXT:         "id": "0x{{.*}}",
+// JSON-NEXT:         "kind": "CXXRecordDecl",
+// JSON-NEXT:         "name": "__va_list_tag"
+// JSON-NEXT:        }
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "ClassTemplateDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 812,
+// JSON-NEXT:     "file": "{{.*}}",
+// JSON-NEXT:     "line": 15,
+// JSON-NEXT:     "col": 8,
+// JSON-NEXT:     "tokLen": 3
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 765,
+// JSON-NEXT:      "line": 14,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 8
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 879,
+// JSON-NEXT:      "line": 19,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "foo",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 779,
+// JSON-NEXT:       "line": 14,
+// JSON-NEXT:       "col": 15,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 775,
+// JSON-NEXT:        "col": 11,
+// JSON-NEXT:        "tokLen": 3
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 779,
+// JSON-NEXT:        "col": 15,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isReferenced": true,
+// JSON-NEXT:      "name": "X",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "int"
+// JSON-NEXT:      },
+// JSON-NEXT:      "depth": 0,
+// JSON-NEXT:      "index": 0
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 791,
+// JSON-NEXT:       "col": 27,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 782,
+// JSON-NEXT:        "col": 18,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 791,
+// JSON-NEXT:        "col": 27,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isReferenced": true,
+// JSON-NEXT:      "name": "Y",
+// JSON-NEXT:      "tagUsed": "typename",
+// JSON-NEXT:      "depth": 0,
+// JSON-NEXT:      "index": 1
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 798,
+// JSON-NEXT:       "col": 34,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 794,
+// JSON-NEXT:        "col": 30,
+// JSON-NEXT:        "tokLen": 3
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 802,
+// JSON-NEXT:        "col": 38,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isReferenced": true,
+// JSON-NEXT:      "name": "Z",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "int"
+// JSON-NEXT:      },
+// JSON-NEXT:      "depth": 0,
+// JSON-NEXT:      "index": 2,
+// JSON-NEXT:      "defaultArg": {
+// JSON-NEXT:       "kind": "TemplateArgument",
+// JSON-NEXT:       "isExpr": true
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 802,
+// JSON-NEXT:          "col": 38,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 802,
+// JSON-NEXT:          "col": 38,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isExpr": true,
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "IntegerLiteral",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 802,
+// JSON-NEXT:            "col": 38,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 802,
+// JSON-NEXT:            "col": 38,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "int"
+// JSON-NEXT:          },
+// JSON-NEXT:          "valueCategory": "prvalue",
+// JSON-NEXT:          "value": "5"
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "CXXRecordDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 812,
+// JSON-NEXT:       "line": 15,
+// JSON-NEXT:       "col": 8,
+// JSON-NEXT:       "tokLen": 3
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 805,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 6
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 879,
+// JSON-NEXT:        "line": 19,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "foo",
+// JSON-NEXT:      "tagUsed": "struct",
+// JSON-NEXT:      "completeDefinition": true,
+// JSON-NEXT:      "definitionData": {
+// JSON-NEXT:       "canConstDefaultInit": true,
+// JSON-NEXT:       "copyAssign": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "copyCtor": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "defaultCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "nonTrivial": true,
+// JSON-NEXT:        "userProvided": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "dtor": {
+// JSON-NEXT:        "irrelevant": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "hasUserDeclaredConstructor": true,
+// JSON-NEXT:       "isStandardLayout": true,
+// JSON-NEXT:       "isTriviallyCopyable": true,
+// JSON-NEXT:       "moveAssign": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "moveCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "line": 15,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 805,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "isReferenced": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "tagUsed": "struct"
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FieldDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 824,
+// JSON-NEXT:         "line": 16,
+// JSON-NEXT:         "col": 7,
+// JSON-NEXT:         "tokLen": 8
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 820,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 824,
+// JSON-NEXT:          "col": 7,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "constant",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "int"
+// JSON-NEXT:        }
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXConstructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 836,
+// JSON-NEXT:         "line": 17,
+// JSON-NEXT:         "col": 3,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 836,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 843,
+// JSON-NEXT:          "col": 10,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "foo<X, Y, Z>",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 842,
+// JSON-NEXT:            "col": 9,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 843,
+// JSON-NEXT:            "col": 10,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXMethodDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 849,
+// JSON-NEXT:         "line": 18,
+// JSON-NEXT:         "col": 5,
+// JSON-NEXT:         "tokLen": 6
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 847,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 877,
+// JSON-NEXT:          "col": 33,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "getSum",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "Y ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 858,
+// JSON-NEXT:            "col": 14,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 877,
+// JSON-NEXT:            "col": 33,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "ReturnStmt",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 860,
+// JSON-NEXT:              "col": 16,
+// JSON-NEXT:              "tokLen": 6
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 874,
+// JSON-NEXT:              "col": 30,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "CXXUnresolvedConstructExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 867,
+// JSON-NEXT:                "col": 23,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 874,
+// JSON-NEXT:                "col": 30,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "Y"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue",
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "BinaryOperator",
+// JSON-NEXT:                "range": {
+// JSON-NEXT:                 "begin": {
+// JSON-NEXT:                  "offset": 869,
+// JSON-NEXT:                  "col": 25,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 },
+// JSON-NEXT:                 "end": {
+// JSON-NEXT:                  "offset": 873,
+// JSON-NEXT:                  "col": 29,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "int"
+// JSON-NEXT:                },
+// JSON-NEXT:                "valueCategory": "prvalue",
+// JSON-NEXT:                "opcode": "+",
+// JSON-NEXT:                "inner": [
+// JSON-NEXT:                 {
+// JSON-NEXT:                  "id": "0x{{.*}}",
+// JSON-NEXT:                  "kind": "DeclRefExpr",
+// JSON-NEXT:                  "range": {
+// JSON-NEXT:                   "begin": {
+// JSON-NEXT:                    "offset": 869,
+// JSON-NEXT:                    "col": 25,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   },
+// JSON-NEXT:                   "end": {
+// JSON-NEXT:                    "offset": 869,
+// JSON-NEXT:                    "col": 25,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   }
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "type": {
+// JSON-NEXT:                   "qualType": "int"
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "valueCategory": "prvalue",
+// JSON-NEXT:                  "referencedDecl": {
+// JSON-NEXT:                   "id": "0x{{.*}}",
+// JSON-NEXT:                   "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:                   "name": "X",
+// JSON-NEXT:                   "type": {
+// JSON-NEXT:                    "qualType": "int"
+// JSON-NEXT:                   }
+// JSON-NEXT:                  }
+// JSON-NEXT:                 },
+// JSON-NEXT:                 {
+// JSON-NEXT:                  "id": "0x{{.*}}",
+// JSON-NEXT:                  "kind": "DeclRefExpr",
+// JSON-NEXT:                  "range": {
+// JSON-NEXT:                   "begin": {
+// JSON-NEXT:                    "offset": 873,
+// JSON-NEXT:                    "col": 29,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   },
+// JSON-NEXT:                   "end": {
+// JSON-NEXT:                    "offset": 873,
+// JSON-NEXT:                    "col": 29,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   }
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "type": {
+// JSON-NEXT:                   "qualType": "int"
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "valueCategory": "prvalue",
+// JSON-NEXT:                  "referencedDecl": {
+// JSON-NEXT:                   "id": "0x{{.*}}",
+// JSON-NEXT:                   "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:                   "name": "Z",
+// JSON-NEXT:                   "type": {
+// JSON-NEXT:                    "qualType": "int"
+// JSON-NEXT:                   }
+// JSON-NEXT:                  }
+// JSON-NEXT:                 }
+// JSON-NEXT:                ]
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ClassTemplateSpecializationDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 812,
+// JSON-NEXT:       "line": 15,
+// JSON-NEXT:       "col": 8,
+// JSON-NEXT:       "tokLen": 3
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 765,
+// JSON-NEXT:        "line": 14,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 879,
+// JSON-NEXT:        "line": 19,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "foo",
+// JSON-NEXT:      "tagUsed": "struct",
+// JSON-NEXT:      "completeDefinition": true,
+// JSON-NEXT:      "definitionData": {
+// JSON-NEXT:       "canConstDefaultInit": true,
+// JSON-NEXT:       "canPassInRegisters": true,
+// JSON-NEXT:       "copyAssign": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "copyCtor": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "defaultCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "nonTrivial": true,
+// JSON-NEXT:        "userProvided": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "dtor": {
+// JSON-NEXT:        "irrelevant": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "hasUserDeclaredConstructor": true,
+// JSON-NEXT:       "isStandardLayout": true,
+// JSON-NEXT:       "isTriviallyCopyable": true,
+// JSON-NEXT:       "moveAssign": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "moveCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "value": 5
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "int"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "BuiltinType",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "int"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "value": 5
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "line": 15,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 805,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "tagUsed": "struct"
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FieldDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 824,
+// JSON-NEXT:         "line": 16,
+// JSON-NEXT:         "col": 7,
+// JSON-NEXT:         "tokLen": 8
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 820,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 824,
+// JSON-NEXT:          "col": 7,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "constant",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "int"
+// JSON-NEXT:        }
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXConstructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 836,
+// JSON-NEXT:         "line": 17,
+// JSON-NEXT:         "col": 3,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 836,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 843,
+// JSON-NEXT:          "col": 10,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isUsed": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi5EiLi5EEC1Ev",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 842,
+// JSON-NEXT:            "col": 9,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 843,
+// JSON-NEXT:            "col": 10,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXMethodDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 849,
+// JSON-NEXT:         "line": 18,
+// JSON-NEXT:         "col": 5,
+// JSON-NEXT:         "tokLen": 6
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 847,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 877,
+// JSON-NEXT:          "col": 33,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isUsed": true,
+// JSON-NEXT:        "name": "getSum",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi5EiLi5EE6getSumEv",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "int ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 858,
+// JSON-NEXT:            "col": 14,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 877,
+// JSON-NEXT:            "col": 33,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "ReturnStmt",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 860,
+// JSON-NEXT:              "col": 16,
+// JSON-NEXT:              "tokLen": 6
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 874,
+// JSON-NEXT:              "col": 30,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "CXXFunctionalCastExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 867,
+// JSON-NEXT:                "col": 23,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 874,
+// JSON-NEXT:                "col": 30,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "int"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue",
+// JSON-NEXT:              "castKind": "NoOp",
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "BinaryOperator",
+// JSON-NEXT:                "range": {
+// JSON-NEXT:                 "begin": {
+// JSON-NEXT:                  "offset": 869,
+// JSON-NEXT:                  "col": 25,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 },
+// JSON-NEXT:                 "end": {
+// JSON-NEXT:                  "offset": 873,
+// JSON-NEXT:                  "col": 29,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "int"
+// JSON-NEXT:                },
+// JSON-NEXT:                "valueCategory": "prvalue",
+// JSON-NEXT:                "opcode": "+",
+// JSON-NEXT:                "inner": [
+// JSON-NEXT:                 {
+// JSON-NEXT:                  "id": "0x{{.*}}",
+// JSON-NEXT:                  "kind": "SubstNonTypeTemplateParmExpr",
+// JSON-NEXT:                  "range": {
+// JSON-NEXT:                   "begin": {
+// JSON-NEXT:                    "offset": 869,
+// JSON-NEXT:                    "col": 25,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   },
+// JSON-NEXT:                   "end": {
+// JSON-NEXT:                    "offset": 869,
+// JSON-NEXT:                    "col": 25,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   }
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "type": {
+// JSON-NEXT:                   "qualType": "int"
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "valueCategory": "prvalue",
+// JSON-NEXT:                  "inner": [
+// JSON-NEXT:                   {
+// JSON-NEXT:                    "id": "0x{{.*}}",
+// JSON-NEXT:                    "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:                    "loc": {
+// JSON-NEXT:                     "offset": 779,
+// JSON-NEXT:                     "line": 14,
+// JSON-NEXT:                     "col": 15,
+// JSON-NEXT:                     "tokLen": 1
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "range": {
+// JSON-NEXT:                     "begin": {
+// JSON-NEXT:                      "offset": 775,
+// JSON-NEXT:                      "col": 11,
+// JSON-NEXT:                      "tokLen": 3
+// JSON-NEXT:                     },
+// JSON-NEXT:                     "end": {
+// JSON-NEXT:                      "offset": 779,
+// JSON-NEXT:                      "col": 15,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     }
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "isReferenced": true,
+// JSON-NEXT:                    "name": "X",
+// JSON-NEXT:                    "type": {
+// JSON-NEXT:                     "qualType": "int"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "depth": 0,
+// JSON-NEXT:                    "index": 0
+// JSON-NEXT:                   },
+// JSON-NEXT:                   {
+// JSON-NEXT:                    "id": "0x{{.*}}",
+// JSON-NEXT:                    "kind": "IntegerLiteral",
+// JSON-NEXT:                    "range": {
+// JSON-NEXT:                     "begin": {
+// JSON-NEXT:                      "offset": 869,
+// JSON-NEXT:                      "line": 18,
+// JSON-NEXT:                      "col": 25,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     },
+// JSON-NEXT:                     "end": {
+// JSON-NEXT:                      "offset": 869,
+// JSON-NEXT:                      "col": 25,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     }
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "type": {
+// JSON-NEXT:                     "qualType": "int"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "valueCategory": "prvalue",
+// JSON-NEXT:                    "value": "5"
+// JSON-NEXT:                   }
+// JSON-NEXT:                  ]
+// JSON-NEXT:                 },
+// JSON-NEXT:                 {
+// JSON-NEXT:                  "id": "0x{{.*}}",
+// JSON-NEXT:                  "kind": "SubstNonTypeTemplateParmExpr",
+// JSON-NEXT:                  "range": {
+// JSON-NEXT:                   "begin": {
+// JSON-NEXT:                    "offset": 873,
+// JSON-NEXT:                    "col": 29,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   },
+// JSON-NEXT:                   "end": {
+// JSON-NEXT:                    "offset": 873,
+// JSON-NEXT:                    "col": 29,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   }
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "type": {
+// JSON-NEXT:                   "qualType": "int"
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "valueCategory": "prvalue",
+// JSON-NEXT:                  "inner": [
+// JSON-NEXT:                   {
+// JSON-NEXT:                    "id": "0x{{.*}}",
+// JSON-NEXT:                    "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:                    "loc": {
+// JSON-NEXT:                     "offset": 798,
+// JSON-NEXT:                     "line": 14,
+// JSON-NEXT:                     "col": 34,
+// JSON-NEXT:                     "tokLen": 1
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "range": {
+// JSON-NEXT:                     "begin": {
+// JSON-NEXT:                      "offset": 794,
+// JSON-NEXT:                      "col": 30,
+// JSON-NEXT:                      "tokLen": 3
+// JSON-NEXT:                     },
+// JSON-NEXT:                     "end": {
+// JSON-NEXT:                      "offset": 802,
+// JSON-NEXT:                      "col": 38,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     }
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "isReferenced": true,
+// JSON-NEXT:                    "name": "Z",
+// JSON-NEXT:                    "type": {
+// JSON-NEXT:                     "qualType": "int"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "depth": 0,
+// JSON-NEXT:                    "index": 2,
+// JSON-NEXT:                    "defaultArg": {
+// JSON-NEXT:                     "kind": "TemplateArgument",
+// JSON-NEXT:                     "isExpr": true
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "inner": [
+// JSON-NEXT:                     {
+// JSON-NEXT:                      "kind": "TemplateArgument",
+// JSON-NEXT:                      "range": {
+// JSON-NEXT:                       "begin": {
+// JSON-NEXT:                        "offset": 802,
+// JSON-NEXT:                        "col": 38,
+// JSON-NEXT:                        "tokLen": 1
+// JSON-NEXT:                       },
+// JSON-NEXT:                       "end": {
+// JSON-NEXT:                        "offset": 802,
+// JSON-NEXT:                        "col": 38,
+// JSON-NEXT:                        "tokLen": 1
+// JSON-NEXT:                       }
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "isExpr": true,
+// JSON-NEXT:                      "inner": [
+// JSON-NEXT:                       {
+// JSON-NEXT:                        "id": "0x{{.*}}",
+// JSON-NEXT:                        "kind": "IntegerLiteral",
+// JSON-NEXT:                        "range": {
+// JSON-NEXT:                         "begin": {
+// JSON-NEXT:                          "offset": 802,
+// JSON-NEXT:                          "col": 38,
+// JSON-NEXT:                          "tokLen": 1
+// JSON-NEXT:                         },
+// JSON-NEXT:                         "end": {
+// JSON-NEXT:                          "offset": 802,
+// JSON-NEXT:                          "col": 38,
+// JSON-NEXT:                          "tokLen": 1
+// JSON-NEXT:                         }
+// JSON-NEXT:                        },
+// JSON-NEXT:                        "type": {
+// JSON-NEXT:                         "qualType": "int"
+// JSON-NEXT:                        },
+// JSON-NEXT:                        "valueCategory": "prvalue",
+// JSON-NEXT:                        "value": "5"
+// JSON-NEXT:                       }
+// JSON-NEXT:                      ]
+// JSON-NEXT:                     }
+// JSON-NEXT:                    ]
+// JSON-NEXT:                   },
+// JSON-NEXT:                   {
+// JSON-NEXT:                    "id": "0x{{.*}}",
+// JSON-NEXT:                    "kind": "IntegerLiteral",
+// JSON-NEXT:                    "range": {
+// JSON-NEXT:                     "begin": {
+// JSON-NEXT:                      "offset": 873,
+// JSON-NEXT:                      "line": 18,
+// JSON-NEXT:                      "col": 29,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     },
+// JSON-NEXT:                     "end": {
+// JSON-NEXT:                      "offset": 873,
+// JSON-NEXT:                      "col": 29,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     }
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "type": {
+// JSON-NEXT:                     "qualType": "int"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "valueCategory": "prvalue",
+// JSON-NEXT:                    "value": "5"
+// JSON-NEXT:                   }
+// JSON-NEXT:                  ]
+// JSON-NEXT:                 }
+// JSON-NEXT:                ]
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXConstructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "line": 15,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi5EiLi5EEC1ERKS0_",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void (const foo<5, int> &)"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inline": true,
+// JSON-NEXT:        "constexpr": true,
+// JSON-NEXT:        "explicitlyDefaulted": "default",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "ParmVarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 812,
+// JSON-NEXT:           "col": 8,
+// JSON-NEXT:           "tokLen": 3
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 812,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 812,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "const foo<5, int> &"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXConstructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi5EiLi5EEC1EOS0_",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void (foo<5, int> &&)"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inline": true,
+// JSON-NEXT:        "constexpr": true,
+// JSON-NEXT:        "explicitlyDefaulted": "default",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "ParmVarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 812,
+// JSON-NEXT:           "col": 8,
+// JSON-NEXT:           "tokLen": 3
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 812,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 812,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "foo<5, int> &&"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXDestructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "isReferenced": true,
+// JSON-NEXT:        "name": "~foo",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi5EiLi5EED1Ev",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void () noexcept"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inline": true,
+// JSON-NEXT:        "explicitlyDefaulted": "default"
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ClassTemplateSpecializationDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 812,
+// JSON-NEXT:       "col": 8,
+// JSON-NEXT:       "tokLen": 3
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 765,
+// JSON-NEXT:        "line": 14,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 879,
+// JSON-NEXT:        "line": 19,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "foo",
+// JSON-NEXT:      "tagUsed": "struct",
+// JSON-NEXT:      "completeDefinition": true,
+// JSON-NEXT:      "definitionData": {
+// JSON-NEXT:       "canConstDefaultInit": true,
+// JSON-NEXT:       "canPassInRegisters": true,
+// JSON-NEXT:       "copyAssign": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "copyCtor": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "defaultCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "nonTrivial": true,
+// JSON-NEXT:        "userProvided": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "dtor": {
+// JSON-NEXT:        "irrelevant": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "hasUserDeclaredConstructor": true,
+// JSON-NEXT:       "isStandardLayout": true,
+// JSON-NEXT:       "isTriviallyCopyable": true,
+// JSON-NEXT:       "moveAssign": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "moveCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "value": 2
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "double"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "BuiltinType",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "double"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "value": 3
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "line": 15,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 805,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "tagUsed": "struct"
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FieldDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 824,
+// JSON-NEXT:         "line": 16,
+// JSON-NEXT:         "col": 7,
+// JSON-NEXT:         "tokLen": 8
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 820,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 824,
+// JSON-NEXT:          "col": 7,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "constant",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "int"
+// JSON-NEXT:        }
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXConstructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 836,
+// JSON-NEXT:         "line": 17,
+// JSON-NEXT:         "col": 3,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 836,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 843,
+// JSON-NEXT:          "col": 10,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isUsed": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi2EdLi3EEC1Ev",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 842,
+// JSON-NEXT:            "col": 9,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 843,
+// JSON-NEXT:            "col": 10,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXMethodDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 849,
+// JSON-NEXT:         "line": 18,
+// JSON-NEXT:         "col": 5,
+// JSON-NEXT:         "tokLen": 6
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 847,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 877,
+// JSON-NEXT:          "col": 33,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isUsed": true,
+// JSON-NEXT:        "name": "getSum",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi2EdLi3EE6getSumEv",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "double ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 858,
+// JSON-NEXT:            "col": 14,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 877,
+// JSON-NEXT:            "col": 33,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "ReturnStmt",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 860,
+// JSON-NEXT:              "col": 16,
+// JSON-NEXT:              "tokLen": 6
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 874,
+// JSON-NEXT:              "col": 30,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "CXXFunctionalCastExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 867,
+// JSON-NEXT:                "col": 23,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 874,
+// JSON-NEXT:                "col": 30,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "double"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue",
+// JSON-NEXT:              "castKind": "NoOp",
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "ImplicitCastExpr",
+// JSON-NEXT:                "range": {
+// JSON-NEXT:                 "begin": {
+// JSON-NEXT:                  "offset": 869,
+// JSON-NEXT:                  "col": 25,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 },
+// JSON-NEXT:                 "end": {
+// JSON-NEXT:                  "offset": 873,
+// JSON-NEXT:                  "col": 29,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "double"
+// JSON-NEXT:                },
+// JSON-NEXT:                "valueCategory": "prvalue",
+// JSON-NEXT:                "castKind": "IntegralToFloating",
+// JSON-NEXT:                "isPartOfExplicitCast": true,
+// JSON-NEXT:                "inner": [
+// JSON-NEXT:                 {
+// JSON-NEXT:                  "id": "0x{{.*}}",
+// JSON-NEXT:                  "kind": "BinaryOperator",
+// JSON-NEXT:                  "range": {
+// JSON-NEXT:                   "begin": {
+// JSON-NEXT:                    "offset": 869,
+// JSON-NEXT:                    "col": 25,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   },
+// JSON-NEXT:                   "end": {
+// JSON-NEXT:                    "offset": 873,
+// JSON-NEXT:                    "col": 29,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   }
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "type": {
+// JSON-NEXT:                   "qualType": "int"
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "valueCategory": "prvalue",
+// JSON-NEXT:                  "opcode": "+",
+// JSON-NEXT:                  "inner": [
+// JSON-NEXT:                   {
+// JSON-NEXT:                    "id": "0x{{.*}}",
+// JSON-NEXT:                    "kind": "SubstNonTypeTemplateParmExpr",
+// JSON-NEXT:                    "range": {
+// JSON-NEXT:                     "begin": {
+// JSON-NEXT:                      "offset": 869,
+// JSON-NEXT:                      "col": 25,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     },
+// JSON-NEXT:                     "end": {
+// JSON-NEXT:                      "offset": 869,
+// JSON-NEXT:                      "col": 25,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     }
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "type": {
+// JSON-NEXT:                     "qualType": "int"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "valueCategory": "prvalue",
+// JSON-NEXT:                    "inner": [
+// JSON-NEXT:                     {
+// JSON-NEXT:                      "id": "0x{{.*}}",
+// JSON-NEXT:                      "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:                      "loc": {
+// JSON-NEXT:                       "offset": 779,
+// JSON-NEXT:                       "line": 14,
+// JSON-NEXT:                       "col": 15,
+// JSON-NEXT:                       "tokLen": 1
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "range": {
+// JSON-NEXT:                       "begin": {
+// JSON-NEXT:                        "offset": 775,
+// JSON-NEXT:                        "col": 11,
+// JSON-NEXT:                        "tokLen": 3
+// JSON-NEXT:                       },
+// JSON-NEXT:                       "end": {
+// JSON-NEXT:                        "offset": 779,
+// JSON-NEXT:                        "col": 15,
+// JSON-NEXT:                        "tokLen": 1
+// JSON-NEXT:                       }
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "isReferenced": true,
+// JSON-NEXT:                      "name": "X",
+// JSON-NEXT:                      "type": {
+// JSON-NEXT:                       "qualType": "int"
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "depth": 0,
+// JSON-NEXT:                      "index": 0
+// JSON-NEXT:                     },
+// JSON-NEXT:                     {
+// JSON-NEXT:                      "id": "0x{{.*}}",
+// JSON-NEXT:                      "kind": "IntegerLiteral",
+// JSON-NEXT:                      "range": {
+// JSON-NEXT:                       "begin": {
+// JSON-NEXT:                        "offset": 869,
+// JSON-NEXT:                        "line": 18,
+// JSON-NEXT:                        "col": 25,
+// JSON-NEXT:                        "tokLen": 1
+// JSON-NEXT:                       },
+// JSON-NEXT:                       "end": {
+// JSON-NEXT:                        "offset": 869,
+// JSON-NEXT:                        "col": 25,
+// JSON-NEXT:                        "tokLen": 1
+// JSON-NEXT:                       }
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "type": {
+// JSON-NEXT:                       "qualType": "int"
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "valueCategory": "prvalue",
+// JSON-NEXT:                      "value": "2"
+// JSON-NEXT:                     }
+// JSON-NEXT:                    ]
+// JSON-NEXT:                   },
+// JSON-NEXT:                   {
+// JSON-NEXT:                    "id": "0x{{.*}}",
+// JSON-NEXT:                    "kind": "SubstNonTypeTemplateParmExpr",
+// JSON-NEXT:                    "range": {
+// JSON-NEXT:                     "begin": {
+// JSON-NEXT:                      "offset": 873,
+// JSON-NEXT:                      "col": 29,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     },
+// JSON-NEXT:                     "end": {
+// JSON-NEXT:                      "offset": 873,
+// JSON-NEXT:                      "col": 29,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     }
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "type": {
+// JSON-NEXT:                     "qualType": "int"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "valueCategory": "prvalue",
+// JSON-NEXT:                    "inner": [
+// JSON-NEXT:                     {
+// JSON-NEXT:                      "id": "0x{{.*}}",
+// JSON-NEXT:                      "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:                      "loc": {
+// JSON-NEXT:                       "offset": 798,
+// JSON-NEXT:                       "line": 14,
+// JSON-NEXT:                       "col": 34,
+// JSON-NEXT:                       "tokLen": 1
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "range": {
+// JSON-NEXT:                       "begin": {
+// JSON-NEXT:                        "offset": 794,
+// JSON-NEXT:                        "col": 30,
+// JSON-NEXT:                        "tokLen": 3
+// JSON-NEXT:                       },
+// JSON-NEXT:                       "end": {
+// JSON-NEXT:                        "offset": 802,
+// JSON-NEXT:                        "col": 38,
+// JSON-NEXT:                        "tokLen": 1
+// JSON-NEXT:                       }
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "isReferenced": true,
+// JSON-NEXT:                      "name": "Z",
+// JSON-NEXT:                      "type": {
+// JSON-NEXT:                       "qualType": "int"
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "depth": 0,
+// JSON-NEXT:                      "index": 2,
+// JSON-NEXT:                      "defaultArg": {
+// JSON-NEXT:                       "kind": "TemplateArgument",
+// JSON-NEXT:                       "isExpr": true
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "inner": [
+// JSON-NEXT:                       {
+// JSON-NEXT:                        "kind": "TemplateArgument",
+// JSON-NEXT:                        "range": {
+// JSON-NEXT:                         "begin": {
+// JSON-NEXT:                          "offset": 802,
+// JSON-NEXT:                          "col": 38,
+// JSON-NEXT:                          "tokLen": 1
+// JSON-NEXT:                         },
+// JSON-NEXT:                         "end": {
+// JSON-NEXT:                          "offset": 802,
+// JSON-NEXT:                          "col": 38,
+// JSON-NEXT:                          "tokLen": 1
+// JSON-NEXT:                         }
+// JSON-NEXT:                        },
+// JSON-NEXT:                        "isExpr": true,
+// JSON-NEXT:                        "inner": [
+// JSON-NEXT:                         {
+// JSON-NEXT:                          "id": "0x{{.*}}",
+// JSON-NEXT:                          "kind": "IntegerLiteral",
+// JSON-NEXT:                          "range": {
+// JSON-NEXT:                           "begin": {
+// JSON-NEXT:                            "offset": 802,
+// JSON-NEXT:                            "col": 38,
+// JSON-NEXT:                            "tokLen": 1
+// JSON-NEXT:                           },
+// JSON-NEXT:                           "end": {
+// JSON-NEXT:                            "offset": 802,
+// JSON-NEXT:                            "col": 38,
+// JSON-NEXT:                            "tokLen": 1
+// JSON-NEXT:                           }
+// JSON-NEXT:                          },
+// JSON-NEXT:                          "type": {
+// JSON-NEXT:                           "qualType": "int"
+// JSON-NEXT:                          },
+// JSON-NEXT:                          "valueCategory": "prvalue",
+// JSON-NEXT:                          "value": "5"
+// JSON-NEXT:                         }
+// JSON-NEXT:                        ]
+// JSON-NEXT:                       }
+// JSON-NEXT:                      ]
+// JSON-NEXT:                     },
+// JSON-NEXT:                     {
+// JSON-NEXT:                      "id": "0x{{.*}}",
+// JSON-NEXT:                      "kind": "IntegerLiteral",
+// JSON-NEXT:                      "range": {
+// JSON-NEXT:                       "begin": {
+// JSON-NEXT:                        "offset": 873,
+// JSON-NEXT:                        "line": 18,
+// JSON-NEXT:                        "col": 29,
+// JSON-NEXT:                        "tokLen": 1
+// JSON-NEXT:                       },
+// JSON-NEXT:                       "end": {
+// JSON-NEXT:                        "offset": 873,
+// JSON-NEXT:                        "col": 29,
+// JSON-NEXT:                        "tokLen": 1
+// JSON-NEXT:                       }
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "type": {
+// JSON-NEXT:                       "qualType": "int"
+// JSON-NEXT:                      },
+// JSON-NEXT:                      "valueCategory": "prvalue",
+// JSON-NEXT:                      "value": "3"
+// JSON-NEXT:                     }
+// JSON-NEXT:                    ]
+// JSON-NEXT:                   }
+// JSON-NEXT:                  ]
+// JSON-NEXT:                 }
+// JSON-NEXT:                ]
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXConstructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "line": 15,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi2EdLi3EEC1ERKS0_",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void (const foo<2, double, 3> &)"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inline": true,
+// JSON-NEXT:        "constexpr": true,
+// JSON-NEXT:        "explicitlyDefaulted": "default",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "ParmVarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 812,
+// JSON-NEXT:           "col": 8,
+// JSON-NEXT:           "tokLen": 3
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 812,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 812,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "const foo<2, double, 3> &"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXConstructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi2EdLi3EEC1EOS0_",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void (foo<2, double, 3> &&)"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inline": true,
+// JSON-NEXT:        "constexpr": true,
+// JSON-NEXT:        "explicitlyDefaulted": "default",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "ParmVarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 812,
+// JSON-NEXT:           "col": 8,
+// JSON-NEXT:           "tokLen": 3
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 812,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 812,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "foo<2, double, 3> &&"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXDestructorDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 812,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 812,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "isReferenced": true,
+// JSON-NEXT:        "name": "~foo",
+// JSON-NEXT:        "mangledName": "_ZN3fooILi2EdLi3EED1Ev",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void () noexcept"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inline": true,
+// JSON-NEXT:        "explicitlyDefaulted": "default"
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "FunctionTemplateDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 914,
+// JSON-NEXT:     "line": 22,
+// JSON-NEXT:     "col": 3,
+// JSON-NEXT:     "tokLen": 3
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 883,
+// JSON-NEXT:      "line": 21,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 8
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 937,
+// JSON-NEXT:      "line": 24,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "bar",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 897,
+// JSON-NEXT:       "line": 21,
+// JSON-NEXT:       "col": 15,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 893,
+// JSON-NEXT:        "col": 11,
+// JSON-NEXT:        "tokLen": 3
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 897,
+// JSON-NEXT:        "col": 15,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isReferenced": true,
+// JSON-NEXT:      "name": "A",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "int"
+// JSON-NEXT:      },
+// JSON-NEXT:      "depth": 0,
+// JSON-NEXT:      "index": 0
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 909,
+// JSON-NEXT:       "col": 27,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 900,
+// JSON-NEXT:        "col": 18,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 909,
+// JSON-NEXT:        "col": 27,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isReferenced": true,
+// JSON-NEXT:      "name": "B",
+// JSON-NEXT:      "tagUsed": "typename",
+// JSON-NEXT:      "depth": 0,
+// JSON-NEXT:      "index": 1
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 914,
+// JSON-NEXT:       "line": 22,
+// JSON-NEXT:       "col": 3,
+// JSON-NEXT:       "tokLen": 3
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 912,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 937,
+// JSON-NEXT:        "line": 24,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "bar",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "B ()"
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CompoundStmt",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 920,
+// JSON-NEXT:          "line": 22,
+// JSON-NEXT:          "col": 9,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 937,
+// JSON-NEXT:          "line": 24,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "ReturnStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 924,
+// JSON-NEXT:            "line": 23,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 934,
+// JSON-NEXT:            "col": 13,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "CXXUnresolvedConstructExpr",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 931,
+// JSON-NEXT:              "col": 10,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 934,
+// JSON-NEXT:              "col": 13,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "B"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "prvalue",
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "DeclRefExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 933,
+// JSON-NEXT:                "col": 12,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 933,
+// JSON-NEXT:                "col": 12,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "int"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue",
+// JSON-NEXT:              "referencedDecl": {
+// JSON-NEXT:               "id": "0x{{.*}}",
+// JSON-NEXT:               "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:               "name": "A",
+// JSON-NEXT:               "type": {
+// JSON-NEXT:                "qualType": "int"
+// JSON-NEXT:               }
+// JSON-NEXT:              }
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 914,
+// JSON-NEXT:       "line": 22,
+// JSON-NEXT:       "col": 3,
+// JSON-NEXT:       "tokLen": 3
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 912,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 937,
+// JSON-NEXT:        "line": 24,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isUsed": true,
+// JSON-NEXT:      "name": "bar",
+// JSON-NEXT:      "mangledName": "_Z3barILi5EiET0_v",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "int ()"
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "value": 5
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "int"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "BuiltinType",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "int"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CompoundStmt",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 920,
+// JSON-NEXT:          "line": 22,
+// JSON-NEXT:          "col": 9,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 937,
+// JSON-NEXT:          "line": 24,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "ReturnStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 924,
+// JSON-NEXT:            "line": 23,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 934,
+// JSON-NEXT:            "col": 13,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "CXXFunctionalCastExpr",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 931,
+// JSON-NEXT:              "col": 10,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 934,
+// JSON-NEXT:              "col": 13,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "int"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "prvalue",
+// JSON-NEXT:            "castKind": "NoOp",
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "SubstNonTypeTemplateParmExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 933,
+// JSON-NEXT:                "col": 12,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 933,
+// JSON-NEXT:                "col": 12,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "int"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue",
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:                "loc": {
+// JSON-NEXT:                 "offset": 897,
+// JSON-NEXT:                 "line": 21,
+// JSON-NEXT:                 "col": 15,
+// JSON-NEXT:                 "tokLen": 1
+// JSON-NEXT:                },
+// JSON-NEXT:                "range": {
+// JSON-NEXT:                 "begin": {
+// JSON-NEXT:                  "offset": 893,
+// JSON-NEXT:                  "col": 11,
+// JSON-NEXT:                  "tokLen": 3
+// JSON-NEXT:                 },
+// JSON-NEXT:                 "end": {
+// JSON-NEXT:                  "offset": 897,
+// JSON-NEXT:                  "col": 15,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "isReferenced": true,
+// JSON-NEXT:                "name": "A",
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "int"
+// JSON-NEXT:                },
+// JSON-NEXT:                "depth": 0,
+// JSON-NEXT:                "index": 0
+// JSON-NEXT:               },
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "IntegerLiteral",
+// JSON-NEXT:                "range": {
+// JSON-NEXT:                 "begin": {
+// JSON-NEXT:                  "offset": 933,
+// JSON-NEXT:                  "line": 23,
+// JSON-NEXT:                  "col": 12,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 },
+// JSON-NEXT:                 "end": {
+// JSON-NEXT:                  "offset": 933,
+// JSON-NEXT:                  "col": 12,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "int"
+// JSON-NEXT:                },
+// JSON-NEXT:                "valueCategory": "prvalue",
+// JSON-NEXT:                "value": "5"
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "FunctionDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 945,
+// JSON-NEXT:     "line": 26,
+// JSON-NEXT:     "col": 6,
+// JSON-NEXT:     "tokLen": 3
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 940,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 4
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 1055,
+// JSON-NEXT:      "line": 30,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "baz",
+// JSON-NEXT:    "mangledName": "_Z3bazv",
+// JSON-NEXT:    "type": {
+// JSON-NEXT:     "qualType": "void ()"
+// JSON-NEXT:    },
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "CompoundStmt",
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 951,
+// JSON-NEXT:        "line": 26,
+// JSON-NEXT:        "col": 12,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 1055,
+// JSON-NEXT:        "line": 30,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "DeclStmt",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 955,
+// JSON-NEXT:          "line": 27,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 976,
+// JSON-NEXT:          "col": 24,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "VarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 959,
+// JSON-NEXT:           "col": 7,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 955,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 975,
+// JSON-NEXT:            "col": 23,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "name": "x",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "int"
+// JSON-NEXT:          },
+// JSON-NEXT:          "init": "c",
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "CallExpr",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 963,
+// JSON-NEXT:              "col": 11,
+// JSON-NEXT:              "tokLen": 3
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 975,
+// JSON-NEXT:              "col": 23,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "int"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "prvalue",
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "ImplicitCastExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 963,
+// JSON-NEXT:                "col": 11,
+// JSON-NEXT:                "tokLen": 3
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 973,
+// JSON-NEXT:                "col": 21,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "int (*)()"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue",
+// JSON-NEXT:              "castKind": "FunctionToPointerDecay",
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "DeclRefExpr",
+// JSON-NEXT:                "range": {
+// JSON-NEXT:                 "begin": {
+// JSON-NEXT:                  "offset": 963,
+// JSON-NEXT:                  "col": 11,
+// JSON-NEXT:                  "tokLen": 3
+// JSON-NEXT:                 },
+// JSON-NEXT:                 "end": {
+// JSON-NEXT:                  "offset": 973,
+// JSON-NEXT:                  "col": 21,
+// JSON-NEXT:                  "tokLen": 1
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "int ()"
+// JSON-NEXT:                },
+// JSON-NEXT:                "valueCategory": "lvalue",
+// JSON-NEXT:                "referencedDecl": {
+// JSON-NEXT:                 "id": "0x{{.*}}",
+// JSON-NEXT:                 "kind": "FunctionDecl",
+// JSON-NEXT:                 "name": "bar",
+// JSON-NEXT:                 "type": {
+// JSON-NEXT:                  "qualType": "int ()"
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "foundReferencedDecl": {
+// JSON-NEXT:                 "id": "0x{{.*}}",
+// JSON-NEXT:                 "kind": "FunctionTemplateDecl",
+// JSON-NEXT:                 "name": "bar"
+// JSON-NEXT:                }
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "DeclStmt",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 980,
+// JSON-NEXT:          "line": 28,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 1010,
+// JSON-NEXT:          "col": 33,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "VarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 984,
+// JSON-NEXT:           "col": 7,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 980,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 1009,
+// JSON-NEXT:            "col": 32,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "name": "y",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "int"
+// JSON-NEXT:          },
+// JSON-NEXT:          "init": "c",
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "ExprWithCleanups",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 988,
+// JSON-NEXT:              "col": 11,
+// JSON-NEXT:              "tokLen": 3
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 1009,
+// JSON-NEXT:              "col": 32,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "int"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "prvalue",
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "CXXMemberCallExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 988,
+// JSON-NEXT:                "col": 11,
+// JSON-NEXT:                "tokLen": 3
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 1009,
+// JSON-NEXT:                "col": 32,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "int"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue",
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "MemberExpr",
+// JSON-NEXT:                "range": {
+// JSON-NEXT:                 "begin": {
+// JSON-NEXT:                  "offset": 988,
+// JSON-NEXT:                  "col": 11,
+// JSON-NEXT:                  "tokLen": 3
+// JSON-NEXT:                 },
+// JSON-NEXT:                 "end": {
+// JSON-NEXT:                  "offset": 1002,
+// JSON-NEXT:                  "col": 25,
+// JSON-NEXT:                  "tokLen": 6
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "<bound member function type>"
+// JSON-NEXT:                },
+// JSON-NEXT:                "valueCategory": "prvalue",
+// JSON-NEXT:                "name": "getSum",
+// JSON-NEXT:                "isArrow": false,
+// JSON-NEXT:                "referencedMemberDecl": "0x{{.*}}",
+// JSON-NEXT:                "inner": [
+// JSON-NEXT:                 {
+// JSON-NEXT:                  "id": "0x{{.*}}",
+// JSON-NEXT:                  "kind": "MaterializeTemporaryExpr",
+// JSON-NEXT:                  "range": {
+// JSON-NEXT:                   "begin": {
+// JSON-NEXT:                    "offset": 988,
+// JSON-NEXT:                    "col": 11,
+// JSON-NEXT:                    "tokLen": 3
+// JSON-NEXT:                   },
+// JSON-NEXT:                   "end": {
+// JSON-NEXT:                    "offset": 1000,
+// JSON-NEXT:                    "col": 23,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   }
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "type": {
+// JSON-NEXT:                   "qualType": "foo<5, int>"
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "valueCategory": "xvalue",
+// JSON-NEXT:                  "storageDuration": "full expression",
+// JSON-NEXT:                  "inner": [
+// JSON-NEXT:                   {
+// JSON-NEXT:                    "id": "0x{{.*}}",
+// JSON-NEXT:                    "kind": "CXXTemporaryObjectExpr",
+// JSON-NEXT:                    "range": {
+// JSON-NEXT:                     "begin": {
+// JSON-NEXT:                      "offset": 988,
+// JSON-NEXT:                      "col": 11,
+// JSON-NEXT:                      "tokLen": 3
+// JSON-NEXT:                     },
+// JSON-NEXT:                     "end": {
+// JSON-NEXT:                      "offset": 1000,
+// JSON-NEXT:                      "col": 23,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     }
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "type": {
+// JSON-NEXT:                     "qualType": "foo<5, int>"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "valueCategory": "prvalue",
+// JSON-NEXT:                    "ctorType": {
+// JSON-NEXT:                     "qualType": "void ()"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "hadMultipleCandidates": true,
+// JSON-NEXT:                    "constructionKind": "complete"
+// JSON-NEXT:                   }
+// JSON-NEXT:                  ]
+// JSON-NEXT:                 }
+// JSON-NEXT:                ]
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "DeclStmt",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 1014,
+// JSON-NEXT:          "line": 29,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 1053,
+// JSON-NEXT:          "col": 42,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "VarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 1021,
+// JSON-NEXT:           "col": 10,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 1014,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 1052,
+// JSON-NEXT:            "col": 41,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "name": "z",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "double"
+// JSON-NEXT:          },
+// JSON-NEXT:          "init": "c",
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "ExprWithCleanups",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 1025,
+// JSON-NEXT:              "col": 14,
+// JSON-NEXT:              "tokLen": 3
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 1052,
+// JSON-NEXT:              "col": 41,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "double"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "prvalue",
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "CXXMemberCallExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 1025,
+// JSON-NEXT:                "col": 14,
+// JSON-NEXT:                "tokLen": 3
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 1052,
+// JSON-NEXT:                "col": 41,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "double"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue",
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "MemberExpr",
+// JSON-NEXT:                "range": {
+// JSON-NEXT:                 "begin": {
+// JSON-NEXT:                  "offset": 1025,
+// JSON-NEXT:                  "col": 14,
+// JSON-NEXT:                  "tokLen": 3
+// JSON-NEXT:                 },
+// JSON-NEXT:                 "end": {
+// JSON-NEXT:                  "offset": 1045,
+// JSON-NEXT:                  "col": 34,
+// JSON-NEXT:                  "tokLen": 6
+// JSON-NEXT:                 }
+// JSON-NEXT:                },
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "<bound member function type>"
+// JSON-NEXT:                },
+// JSON-NEXT:                "valueCategory": "prvalue",
+// JSON-NEXT:                "name": "getSum",
+// JSON-NEXT:                "isArrow": false,
+// JSON-NEXT:                "referencedMemberDecl": "0x{{.*}}",
+// JSON-NEXT:                "inner": [
+// JSON-NEXT:                 {
+// JSON-NEXT:                  "id": "0x{{.*}}",
+// JSON-NEXT:                  "kind": "MaterializeTemporaryExpr",
+// JSON-NEXT:                  "range": {
+// JSON-NEXT:                   "begin": {
+// JSON-NEXT:                    "offset": 1025,
+// JSON-NEXT:                    "col": 14,
+// JSON-NEXT:                    "tokLen": 3
+// JSON-NEXT:                   },
+// JSON-NEXT:                   "end": {
+// JSON-NEXT:                    "offset": 1043,
+// JSON-NEXT:                    "col": 32,
+// JSON-NEXT:                    "tokLen": 1
+// JSON-NEXT:                   }
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "type": {
+// JSON-NEXT:                   "qualType": "foo<2, double, 3>"
+// JSON-NEXT:                  },
+// JSON-NEXT:                  "valueCategory": "xvalue",
+// JSON-NEXT:                  "storageDuration": "full expression",
+// JSON-NEXT:                  "inner": [
+// JSON-NEXT:                   {
+// JSON-NEXT:                    "id": "0x{{.*}}",
+// JSON-NEXT:                    "kind": "CXXTemporaryObjectExpr",
+// JSON-NEXT:                    "range": {
+// JSON-NEXT:                     "begin": {
+// JSON-NEXT:                      "offset": 1025,
+// JSON-NEXT:                      "col": 14,
+// JSON-NEXT:                      "tokLen": 3
+// JSON-NEXT:                     },
+// JSON-NEXT:                     "end": {
+// JSON-NEXT:                      "offset": 1043,
+// JSON-NEXT:                      "col": 32,
+// JSON-NEXT:                      "tokLen": 1
+// JSON-NEXT:                     }
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "type": {
+// JSON-NEXT:                     "qualType": "foo<2, double, 3>"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "valueCategory": "prvalue",
+// JSON-NEXT:                    "ctorType": {
+// JSON-NEXT:                     "qualType": "void ()"
+// JSON-NEXT:                    },
+// JSON-NEXT:                    "hadMultipleCandidates": true,
+// JSON-NEXT:                    "constructionKind": "complete"
+// JSON-NEXT:                   }
+// JSON-NEXT:                  ]
+// JSON-NEXT:                 }
+// JSON-NEXT:                ]
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "ClassTemplateDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 1856,
+// JSON-NEXT:     "line": 52,
+// JSON-NEXT:     "col": 33,
+// JSON-NEXT:     "tokLen": 1
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 1824,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 8
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 1896,
+// JSON-NEXT:      "line": 54,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "A",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 1846,
+// JSON-NEXT:       "line": 52,
+// JSON-NEXT:       "col": 23,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 1834,
+// JSON-NEXT:        "col": 11,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 1846,
+// JSON-NEXT:        "col": 23,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isReferenced": true,
+// JSON-NEXT:      "name": "T",
+// JSON-NEXT:      "tagUsed": "typename",
+// JSON-NEXT:      "depth": 0,
+// JSON-NEXT:      "index": 0,
+// JSON-NEXT:      "isParameterPack": true
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "CXXRecordDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 1856,
+// JSON-NEXT:       "col": 33,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 1849,
+// JSON-NEXT:        "col": 26,
+// JSON-NEXT:        "tokLen": 6
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 1896,
+// JSON-NEXT:        "line": 54,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "A",
+// JSON-NEXT:      "tagUsed": "struct",
+// JSON-NEXT:      "completeDefinition": true,
+// JSON-NEXT:      "definitionData": {
+// JSON-NEXT:       "canConstDefaultInit": true,
+// JSON-NEXT:       "copyAssign": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "copyCtor": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "defaultCtor": {
+// JSON-NEXT:        "defaultedIsConstexpr": true,
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "isConstexpr": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "dtor": {
+// JSON-NEXT:        "irrelevant": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:       "isAggregate": true,
+// JSON-NEXT:       "isEmpty": true,
+// JSON-NEXT:       "isLiteral": true,
+// JSON-NEXT:       "isPOD": true,
+// JSON-NEXT:       "isStandardLayout": true,
+// JSON-NEXT:       "isTrivial": true,
+// JSON-NEXT:       "isTriviallyCopyable": true,
+// JSON-NEXT:       "moveAssign": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "moveCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 1856,
+// JSON-NEXT:         "line": 52,
+// JSON-NEXT:         "col": 33,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 1849,
+// JSON-NEXT:          "col": 26,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 1856,
+// JSON-NEXT:          "col": 33,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "A",
+// JSON-NEXT:        "tagUsed": "struct"
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ClassTemplateDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 1890,
+// JSON-NEXT:         "line": 53,
+// JSON-NEXT:         "col": 31,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 1862,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 1893,
+// JSON-NEXT:          "col": 34,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "B",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 1877,
+// JSON-NEXT:           "col": 18,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 1872,
+// JSON-NEXT:            "col": 13,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 1880,
+// JSON-NEXT:            "col": 21,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "name": "x",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "T[3]..."
+// JSON-NEXT:          },
+// JSON-NEXT:          "depth": 1,
+// JSON-NEXT:          "index": 0,
+// JSON-NEXT:          "isParameterPack": true
+// JSON-NEXT:         },
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXRecordDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 1890,
+// JSON-NEXT:           "col": 31,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 1883,
+// JSON-NEXT:            "col": 24,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 1893,
+// JSON-NEXT:            "col": 34,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "name": "B",
+// JSON-NEXT:          "tagUsed": "struct",
+// JSON-NEXT:          "completeDefinition": true,
+// JSON-NEXT:          "definitionData": {
+// JSON-NEXT:           "canConstDefaultInit": true,
+// JSON-NEXT:           "copyAssign": {
+// JSON-NEXT:            "hasConstParam": true,
+// JSON-NEXT:            "implicitHasConstParam": true,
+// JSON-NEXT:            "needsImplicit": true,
+// JSON-NEXT:            "simple": true,
+// JSON-NEXT:            "trivial": true
+// JSON-NEXT:           },
+// JSON-NEXT:           "copyCtor": {
+// JSON-NEXT:            "hasConstParam": true,
+// JSON-NEXT:            "implicitHasConstParam": true,
+// JSON-NEXT:            "needsImplicit": true,
+// JSON-NEXT:            "simple": true,
+// JSON-NEXT:            "trivial": true
+// JSON-NEXT:           },
+// JSON-NEXT:           "defaultCtor": {
+// JSON-NEXT:            "defaultedIsConstexpr": true,
+// JSON-NEXT:            "exists": true,
+// JSON-NEXT:            "isConstexpr": true,
+// JSON-NEXT:            "needsImplicit": true,
+// JSON-NEXT:            "trivial": true
+// JSON-NEXT:           },
+// JSON-NEXT:           "dtor": {
+// JSON-NEXT:            "irrelevant": true,
+// JSON-NEXT:            "needsImplicit": true,
+// JSON-NEXT:            "simple": true,
+// JSON-NEXT:            "trivial": true
+// JSON-NEXT:           },
+// JSON-NEXT:           "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:           "isAggregate": true,
+// JSON-NEXT:           "isEmpty": true,
+// JSON-NEXT:           "isLiteral": true,
+// JSON-NEXT:           "isPOD": true,
+// JSON-NEXT:           "isStandardLayout": true,
+// JSON-NEXT:           "isTrivial": true,
+// JSON-NEXT:           "isTriviallyCopyable": true,
+// JSON-NEXT:           "moveAssign": {
+// JSON-NEXT:            "exists": true,
+// JSON-NEXT:            "needsImplicit": true,
+// JSON-NEXT:            "simple": true,
+// JSON-NEXT:            "trivial": true
+// JSON-NEXT:           },
+// JSON-NEXT:           "moveCtor": {
+// JSON-NEXT:            "exists": true,
+// JSON-NEXT:            "needsImplicit": true,
+// JSON-NEXT:            "simple": true,
+// JSON-NEXT:            "trivial": true
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "CXXRecordDecl",
+// JSON-NEXT:            "loc": {
+// JSON-NEXT:             "offset": 1890,
+// JSON-NEXT:             "col": 31,
+// JSON-NEXT:             "tokLen": 1
+// JSON-NEXT:            },
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 1883,
+// JSON-NEXT:              "col": 24,
+// JSON-NEXT:              "tokLen": 6
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 1890,
+// JSON-NEXT:              "col": 31,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "isImplicit": true,
+// JSON-NEXT:            "name": "B",
+// JSON-NEXT:            "tagUsed": "struct"
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "FunctionTemplateDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 2016,
+// JSON-NEXT:     "line": 58,
+// JSON-NEXT:     "col": 31,
+// JSON-NEXT:     "tokLen": 1
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 1986,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 8
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 2038,
+// JSON-NEXT:      "line": 60,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "f",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2008,
+// JSON-NEXT:       "line": 58,
+// JSON-NEXT:       "col": 23,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 1996,
+// JSON-NEXT:        "col": 11,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2008,
+// JSON-NEXT:        "col": 23,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isReferenced": true,
+// JSON-NEXT:      "name": "T",
+// JSON-NEXT:      "tagUsed": "typename",
+// JSON-NEXT:      "depth": 0,
+// JSON-NEXT:      "index": 0,
+// JSON-NEXT:      "isParameterPack": true
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2016,
+// JSON-NEXT:       "col": 31,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2011,
+// JSON-NEXT:        "col": 26,
+// JSON-NEXT:        "tokLen": 4
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2038,
+// JSON-NEXT:        "line": 60,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "f",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "void ()"
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CompoundStmt",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2020,
+// JSON-NEXT:          "line": 58,
+// JSON-NEXT:          "col": 35,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2038,
+// JSON-NEXT:          "line": 60,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "DeclStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2024,
+// JSON-NEXT:            "line": 59,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2036,
+// JSON-NEXT:            "col": 15,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "VarDecl",
+// JSON-NEXT:            "loc": {
+// JSON-NEXT:             "offset": 2035,
+// JSON-NEXT:             "col": 14,
+// JSON-NEXT:             "tokLen": 1
+// JSON-NEXT:            },
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 2024,
+// JSON-NEXT:              "col": 3,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 2035,
+// JSON-NEXT:              "col": 14,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "name": "a",
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "A<T[3]...>"
+// JSON-NEXT:            }
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "NamespaceDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 2051,
+// JSON-NEXT:     "line": 62,
+// JSON-NEXT:     "col": 11,
+// JSON-NEXT:     "tokLen": 5
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 2041,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 9
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 2240,
+// JSON-NEXT:      "line": 71,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "test2",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2064,
+// JSON-NEXT:       "line": 63,
+// JSON-NEXT:       "col": 6,
+// JSON-NEXT:       "tokLen": 4
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2059,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 4
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2072,
+// JSON-NEXT:        "col": 14,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "func",
+// JSON-NEXT:      "mangledName": "_ZN5test24funcEi",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "void (int)"
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ParmVarDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2072,
+// JSON-NEXT:         "col": 14,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2069,
+// JSON-NEXT:          "col": 11,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2069,
+// JSON-NEXT:          "col": 11,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "int"
+// JSON-NEXT:        }
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2080,
+// JSON-NEXT:       "line": 64,
+// JSON-NEXT:       "col": 6,
+// JSON-NEXT:       "tokLen": 4
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2075,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 4
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2090,
+// JSON-NEXT:        "col": 16,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "func",
+// JSON-NEXT:      "mangledName": "_ZN5test24funcEf",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "void (float)"
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ParmVarDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2090,
+// JSON-NEXT:         "col": 16,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2085,
+// JSON-NEXT:          "col": 11,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2085,
+// JSON-NEXT:          "col": 11,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "float"
+// JSON-NEXT:        }
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2119,
+// JSON-NEXT:       "line": 66,
+// JSON-NEXT:       "col": 6,
+// JSON-NEXT:       "tokLen": 4
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2093,
+// JSON-NEXT:        "line": 65,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2141,
+// JSON-NEXT:        "line": 68,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "tmpl",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2111,
+// JSON-NEXT:         "line": 65,
+// JSON-NEXT:         "col": 19,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2102,
+// JSON-NEXT:          "col": 10,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2111,
+// JSON-NEXT:          "col": 19,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isReferenced": true,
+// JSON-NEXT:        "name": "T",
+// JSON-NEXT:        "tagUsed": "typename",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FunctionDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2119,
+// JSON-NEXT:         "line": 66,
+// JSON-NEXT:         "col": 6,
+// JSON-NEXT:         "tokLen": 4
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2114,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2141,
+// JSON-NEXT:          "line": 68,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "tmpl",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2126,
+// JSON-NEXT:            "line": 66,
+// JSON-NEXT:            "col": 13,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2141,
+// JSON-NEXT:            "line": 68,
+// JSON-NEXT:            "col": 1,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "CallExpr",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 2130,
+// JSON-NEXT:              "line": 67,
+// JSON-NEXT:              "col": 3,
+// JSON-NEXT:              "tokLen": 4
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 2138,
+// JSON-NEXT:              "col": 11,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "<dependent type>"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "prvalue",
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "UnresolvedLookupExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 2130,
+// JSON-NEXT:                "col": 3,
+// JSON-NEXT:                "tokLen": 4
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 2130,
+// JSON-NEXT:                "col": 3,
+// JSON-NEXT:                "tokLen": 4
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "<overloaded function type>"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "lvalue",
+// JSON-NEXT:              "usesADL": true,
+// JSON-NEXT:              "name": "func",
+// JSON-NEXT:              "lookups": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "FunctionDecl",
+// JSON-NEXT:                "name": "func",
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "void (float)"
+// JSON-NEXT:                }
+// JSON-NEXT:               },
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "FunctionDecl",
+// JSON-NEXT:                "name": "func",
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "void (int)"
+// JSON-NEXT:                }
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             },
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "CXXUnresolvedConstructExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 2135,
+// JSON-NEXT:                "col": 8,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 2137,
+// JSON-NEXT:                "col": 10,
+// JSON-NEXT:                "tokLen": 1
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "T"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "prvalue"
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "NamespaceDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 2253,
+// JSON-NEXT:     "line": 73,
+// JSON-NEXT:     "col": 11,
+// JSON-NEXT:     "tokLen": 5
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 2243,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 9
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 2387,
+// JSON-NEXT:      "line": 77,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "test3",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ClassTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2291,
+// JSON-NEXT:       "line": 74,
+// JSON-NEXT:       "col": 31,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2263,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2294,
+// JSON-NEXT:        "col": 34,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "A",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2281,
+// JSON-NEXT:         "col": 21,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2272,
+// JSON-NEXT:          "col": 12,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2281,
+// JSON-NEXT:          "col": 21,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "T",
+// JSON-NEXT:        "tagUsed": "typename",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2291,
+// JSON-NEXT:         "col": 31,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2284,
+// JSON-NEXT:          "col": 24,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2294,
+// JSON-NEXT:          "col": 34,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "A",
+// JSON-NEXT:        "tagUsed": "struct",
+// JSON-NEXT:        "completeDefinition": true,
+// JSON-NEXT:        "definitionData": {
+// JSON-NEXT:         "canConstDefaultInit": true,
+// JSON-NEXT:         "copyAssign": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "copyCtor": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "defaultCtor": {
+// JSON-NEXT:          "defaultedIsConstexpr": true,
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "isConstexpr": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "dtor": {
+// JSON-NEXT:          "irrelevant": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:         "isAggregate": true,
+// JSON-NEXT:         "isEmpty": true,
+// JSON-NEXT:         "isLiteral": true,
+// JSON-NEXT:         "isPOD": true,
+// JSON-NEXT:         "isStandardLayout": true,
+// JSON-NEXT:         "isTrivial": true,
+// JSON-NEXT:         "isTriviallyCopyable": true,
+// JSON-NEXT:         "moveAssign": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "moveCtor": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXRecordDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 2291,
+// JSON-NEXT:           "col": 31,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2284,
+// JSON-NEXT:            "col": 24,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2291,
+// JSON-NEXT:            "col": 31,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "isImplicit": true,
+// JSON-NEXT:          "name": "A",
+// JSON-NEXT:          "tagUsed": "struct"
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ClassTemplateSpecializationDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2291,
+// JSON-NEXT:         "col": 31,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2263,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2294,
+// JSON-NEXT:          "col": 34,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "A",
+// JSON-NEXT:        "tagUsed": "struct",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "kind": "TemplateArgument",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "int"
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "BuiltinType",
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "int"
+// JSON-NEXT:            }
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2291,
+// JSON-NEXT:       "col": 31,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2263,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2291,
+// JSON-NEXT:        "col": 31,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isImplicit": true,
+// JSON-NEXT:      "name": "<deduction guide for A>",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2281,
+// JSON-NEXT:         "col": 21,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2272,
+// JSON-NEXT:          "col": 12,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2281,
+// JSON-NEXT:          "col": 21,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "T",
+// JSON-NEXT:        "tagUsed": "typename",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXDeductionGuideDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2291,
+// JSON-NEXT:         "col": 31,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2291,
+// JSON-NEXT:          "col": 31,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2291,
+// JSON-NEXT:          "col": 31,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "<deduction guide for A>",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "auto () -> A<T>"
+// JSON-NEXT:        }
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2291,
+// JSON-NEXT:       "col": 31,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2263,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2291,
+// JSON-NEXT:        "col": 31,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "isImplicit": true,
+// JSON-NEXT:      "name": "<deduction guide for A>",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2281,
+// JSON-NEXT:         "col": 21,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2272,
+// JSON-NEXT:          "col": 12,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2281,
+// JSON-NEXT:          "col": 21,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "T",
+// JSON-NEXT:        "tagUsed": "typename",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXDeductionGuideDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2291,
+// JSON-NEXT:         "col": 31,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2291,
+// JSON-NEXT:          "col": 31,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2291,
+// JSON-NEXT:          "col": 31,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "<deduction guide for A>",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "auto (A<T>) -> A<T>"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "ParmVarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 2291,
+// JSON-NEXT:           "col": 31,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2291,
+// JSON-NEXT:            "col": 31,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2291,
+// JSON-NEXT:            "col": 31,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "A<T>"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2320,
+// JSON-NEXT:       "line": 75,
+// JSON-NEXT:       "col": 24,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2299,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2333,
+// JSON-NEXT:        "col": 37,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "<deduction guide for A>",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2317,
+// JSON-NEXT:         "col": 21,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2308,
+// JSON-NEXT:          "col": 12,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2317,
+// JSON-NEXT:          "col": 21,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isReferenced": true,
+// JSON-NEXT:        "name": "T",
+// JSON-NEXT:        "tagUsed": "typename",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXDeductionGuideDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2320,
+// JSON-NEXT:         "col": 24,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2320,
+// JSON-NEXT:          "col": 24,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2333,
+// JSON-NEXT:          "col": 37,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "<deduction guide for A>",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "auto (T) -> A<int>"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "ParmVarDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 2323,
+// JSON-NEXT:           "col": 27,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2322,
+// JSON-NEXT:            "col": 26,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2322,
+// JSON-NEXT:            "col": 26,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "T"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "NamespaceDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 2400,
+// JSON-NEXT:     "line": 79,
+// JSON-NEXT:     "col": 11,
+// JSON-NEXT:     "tokLen": 5
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 2390,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 9
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 3297,
+// JSON-NEXT:      "line": 103,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "test4",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ClassTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2445,
+// JSON-NEXT:       "line": 81,
+// JSON-NEXT:       "col": 8,
+// JSON-NEXT:       "tokLen": 3
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2408,
+// JSON-NEXT:        "line": 80,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2471,
+// JSON-NEXT:        "line": 83,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "foo",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2427,
+// JSON-NEXT:         "line": 80,
+// JSON-NEXT:         "col": 20,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2418,
+// JSON-NEXT:          "col": 11,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2427,
+// JSON-NEXT:          "col": 20,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "X",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "unsigned int"
+// JSON-NEXT:        },
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2435,
+// JSON-NEXT:         "col": 28,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2430,
+// JSON-NEXT:          "col": 23,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2435,
+// JSON-NEXT:          "col": 28,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "A",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "auto"
+// JSON-NEXT:        },
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 1
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2445,
+// JSON-NEXT:         "line": 81,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2438,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2471,
+// JSON-NEXT:          "line": 83,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "tagUsed": "struct",
+// JSON-NEXT:        "completeDefinition": true,
+// JSON-NEXT:        "definitionData": {
+// JSON-NEXT:         "canConstDefaultInit": true,
+// JSON-NEXT:         "copyAssign": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "copyCtor": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "defaultCtor": {
+// JSON-NEXT:          "defaultedIsConstexpr": true,
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "isConstexpr": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "dtor": {
+// JSON-NEXT:          "irrelevant": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:         "isAggregate": true,
+// JSON-NEXT:         "isEmpty": true,
+// JSON-NEXT:         "isLiteral": true,
+// JSON-NEXT:         "isPOD": true,
+// JSON-NEXT:         "isStandardLayout": true,
+// JSON-NEXT:         "isTrivial": true,
+// JSON-NEXT:         "isTriviallyCopyable": true,
+// JSON-NEXT:         "moveAssign": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "moveCtor": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXRecordDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 2445,
+// JSON-NEXT:           "line": 81,
+// JSON-NEXT:           "col": 8,
+// JSON-NEXT:           "tokLen": 3
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2438,
+// JSON-NEXT:            "col": 1,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2445,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "isImplicit": true,
+// JSON-NEXT:          "name": "foo",
+// JSON-NEXT:          "tagUsed": "struct"
+// JSON-NEXT:         },
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXMethodDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 2465,
+// JSON-NEXT:           "line": 82,
+// JSON-NEXT:           "col": 15,
+// JSON-NEXT:           "tokLen": 2
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2453,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2468,
+// JSON-NEXT:            "col": 18,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "name": "fn",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "void ()"
+// JSON-NEXT:          },
+// JSON-NEXT:          "storageClass": "static"
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ClassTemplateSpecializationDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2445,
+// JSON-NEXT:         "line": 81,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2408,
+// JSON-NEXT:          "line": 80,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2471,
+// JSON-NEXT:          "line": 83,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "tagUsed": "struct",
+// JSON-NEXT:        "completeDefinition": true,
+// JSON-NEXT:        "definitionData": {
+// JSON-NEXT:         "canConstDefaultInit": true,
+// JSON-NEXT:         "canPassInRegisters": true,
+// JSON-NEXT:         "copyAssign": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "copyCtor": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "defaultCtor": {
+// JSON-NEXT:          "defaultedIsConstexpr": true,
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "isConstexpr": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "dtor": {
+// JSON-NEXT:          "irrelevant": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:         "isAggregate": true,
+// JSON-NEXT:         "isEmpty": true,
+// JSON-NEXT:         "isLiteral": true,
+// JSON-NEXT:         "isPOD": true,
+// JSON-NEXT:         "isStandardLayout": true,
+// JSON-NEXT:         "isTrivial": true,
+// JSON-NEXT:         "isTriviallyCopyable": true,
+// JSON-NEXT:         "moveAssign": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "moveCtor": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "kind": "TemplateArgument",
+// JSON-NEXT:          "value": 0
+// JSON-NEXT:         },
+// JSON-NEXT:         {
+// JSON-NEXT:          "kind": "TemplateArgument",
+// JSON-NEXT:          "value": 0
+// JSON-NEXT:         },
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXRecordDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 2445,
+// JSON-NEXT:           "line": 81,
+// JSON-NEXT:           "col": 8,
+// JSON-NEXT:           "tokLen": 3
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2438,
+// JSON-NEXT:            "col": 1,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2445,
+// JSON-NEXT:            "col": 8,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "isImplicit": true,
+// JSON-NEXT:          "name": "foo",
+// JSON-NEXT:          "tagUsed": "struct"
+// JSON-NEXT:         },
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXMethodDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 2465,
+// JSON-NEXT:           "line": 82,
+// JSON-NEXT:           "col": 15,
+// JSON-NEXT:           "tokLen": 2
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2453,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2468,
+// JSON-NEXT:            "col": 18,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "isUsed": true,
+// JSON-NEXT:          "name": "fn",
+// JSON-NEXT:          "mangledName": "_ZN5test43fooILj0ELl0EE2fnEv",
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "void ()"
+// JSON-NEXT:          },
+// JSON-NEXT:          "storageClass": "static"
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ClassTemplateSpecializationDecl",
+// JSON-NEXT:        "name": "foo"
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 2846,
+// JSON-NEXT:       "line": 92,
+// JSON-NEXT:       "col": 6,
+// JSON-NEXT:       "tokLen": 4
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 2841,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 4
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 2879,
+// JSON-NEXT:        "line": 94,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "test",
+// JSON-NEXT:      "mangledName": "_ZN5test44testEv",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "void ()"
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CompoundStmt",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2853,
+// JSON-NEXT:          "line": 92,
+// JSON-NEXT:          "col": 13,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2879,
+// JSON-NEXT:          "line": 94,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CallExpr",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 2857,
+// JSON-NEXT:            "line": 93,
+// JSON-NEXT:            "col": 3,
+// JSON-NEXT:            "tokLen": 3
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 2876,
+// JSON-NEXT:            "col": 22,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "void"
+// JSON-NEXT:          },
+// JSON-NEXT:          "valueCategory": "prvalue",
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "ImplicitCastExpr",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 2857,
+// JSON-NEXT:              "col": 3,
+// JSON-NEXT:              "tokLen": 3
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 2873,
+// JSON-NEXT:              "col": 19,
+// JSON-NEXT:              "tokLen": 2
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "void (*)()"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "prvalue",
+// JSON-NEXT:            "castKind": "FunctionToPointerDecay",
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "DeclRefExpr",
+// JSON-NEXT:              "range": {
+// JSON-NEXT:               "begin": {
+// JSON-NEXT:                "offset": 2857,
+// JSON-NEXT:                "col": 3,
+// JSON-NEXT:                "tokLen": 3
+// JSON-NEXT:               },
+// JSON-NEXT:               "end": {
+// JSON-NEXT:                "offset": 2873,
+// JSON-NEXT:                "col": 19,
+// JSON-NEXT:                "tokLen": 2
+// JSON-NEXT:               }
+// JSON-NEXT:              },
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "void ()"
+// JSON-NEXT:              },
+// JSON-NEXT:              "valueCategory": "lvalue",
+// JSON-NEXT:              "referencedDecl": {
+// JSON-NEXT:               "id": "0x{{.*}}",
+// JSON-NEXT:               "kind": "CXXMethodDecl",
+// JSON-NEXT:               "name": "fn",
+// JSON-NEXT:               "type": {
+// JSON-NEXT:                "qualType": "void ()"
+// JSON-NEXT:               }
+// JSON-NEXT:              }
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ClassTemplateSpecializationDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 3281,
+// JSON-NEXT:       "line": 102,
+// JSON-NEXT:       "col": 17,
+// JSON-NEXT:       "tokLen": 3
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 3265,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 3294,
+// JSON-NEXT:        "col": 30,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "foo",
+// JSON-NEXT:      "tagUsed": "struct",
+// JSON-NEXT:      "completeDefinition": true,
+// JSON-NEXT:      "definitionData": {
+// JSON-NEXT:       "canConstDefaultInit": true,
+// JSON-NEXT:       "canPassInRegisters": true,
+// JSON-NEXT:       "copyAssign": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "copyCtor": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "defaultCtor": {
+// JSON-NEXT:        "defaultedIsConstexpr": true,
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "isConstexpr": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "dtor": {
+// JSON-NEXT:        "irrelevant": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:       "isAggregate": true,
+// JSON-NEXT:       "isEmpty": true,
+// JSON-NEXT:       "isLiteral": true,
+// JSON-NEXT:       "isPOD": true,
+// JSON-NEXT:       "isStandardLayout": true,
+// JSON-NEXT:       "isTrivial": true,
+// JSON-NEXT:       "isTriviallyCopyable": true,
+// JSON-NEXT:       "moveAssign": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "moveCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "value": 1
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument",
+// JSON-NEXT:        "value": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2445,
+// JSON-NEXT:         "line": 81,
+// JSON-NEXT:         "col": 8,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2438,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2445,
+// JSON-NEXT:          "col": 8,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "foo",
+// JSON-NEXT:        "tagUsed": "struct"
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXMethodDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 2465,
+// JSON-NEXT:         "line": 82,
+// JSON-NEXT:         "col": 15,
+// JSON-NEXT:         "tokLen": 2
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 2453,
+// JSON-NEXT:          "col": 3,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 2468,
+// JSON-NEXT:          "col": 18,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "fn",
+// JSON-NEXT:        "mangledName": "_ZN5test43fooILj1ELl0EE2fnEv",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "storageClass": "static"
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "NamespaceDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 3310,
+// JSON-NEXT:     "line": 105,
+// JSON-NEXT:     "col": 11,
+// JSON-NEXT:     "tokLen": 5
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 3300,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 9
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 3632,
+// JSON-NEXT:      "line": 114,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "test5",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 3338,
+// JSON-NEXT:       "line": 106,
+// JSON-NEXT:       "col": 21,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 3318,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 3343,
+// JSON-NEXT:        "col": 26,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "f",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3331,
+// JSON-NEXT:         "col": 14,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3327,
+// JSON-NEXT:          "col": 10,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3327,
+// JSON-NEXT:          "col": 10,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "long"
+// JSON-NEXT:        },
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FunctionDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3338,
+// JSON-NEXT:         "col": 21,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3333,
+// JSON-NEXT:          "col": 16,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3343,
+// JSON-NEXT:          "col": 26,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "f",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3342,
+// JSON-NEXT:            "col": 25,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3343,
+// JSON-NEXT:            "col": 26,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FunctionDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3338,
+// JSON-NEXT:         "col": 21,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3333,
+// JSON-NEXT:          "col": 16,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3343,
+// JSON-NEXT:          "col": 26,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isUsed": true,
+// JSON-NEXT:        "name": "f",
+// JSON-NEXT:        "mangledName": "_ZN5test51fILl0EEEvv",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "kind": "TemplateArgument",
+// JSON-NEXT:          "value": 0
+// JSON-NEXT:         },
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3342,
+// JSON-NEXT:            "col": 25,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3343,
+// JSON-NEXT:            "col": 26,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "VarDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 3352,
+// JSON-NEXT:       "line": 107,
+// JSON-NEXT:       "col": 8,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 3345,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 4
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 3362,
+// JSON-NEXT:        "col": 18,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "p",
+// JSON-NEXT:      "mangledName": "_ZN5test51pE",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "void (*)()"
+// JSON-NEXT:      },
+// JSON-NEXT:      "init": "c",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ImplicitCastExpr",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3359,
+// JSON-NEXT:          "col": 15,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3362,
+// JSON-NEXT:          "col": 18,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void (*)()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "valueCategory": "prvalue",
+// JSON-NEXT:        "castKind": "FunctionToPointerDecay",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "DeclRefExpr",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3359,
+// JSON-NEXT:            "col": 15,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3362,
+// JSON-NEXT:            "col": 18,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "void ()"
+// JSON-NEXT:          },
+// JSON-NEXT:          "valueCategory": "lvalue",
+// JSON-NEXT:          "referencedDecl": {
+// JSON-NEXT:           "id": "0x{{.*}}",
+// JSON-NEXT:           "kind": "FunctionDecl",
+// JSON-NEXT:           "name": "f",
+// JSON-NEXT:           "type": {
+// JSON-NEXT:            "qualType": "void ()"
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "foundReferencedDecl": {
+// JSON-NEXT:           "id": "0x{{.*}}",
+// JSON-NEXT:           "kind": "FunctionTemplateDecl",
+// JSON-NEXT:           "name": "f"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 3393,
+// JSON-NEXT:       "line": 108,
+// JSON-NEXT:       "col": 29,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 3365,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 3398,
+// JSON-NEXT:        "col": 34,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "f",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "NonTypeTemplateParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3383,
+// JSON-NEXT:         "col": 19,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3374,
+// JSON-NEXT:          "col": 10,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3385,
+// JSON-NEXT:          "col": 21,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "unsigned int"
+// JSON-NEXT:        },
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0,
+// JSON-NEXT:        "defaultArg": {
+// JSON-NEXT:         "kind": "TemplateArgument",
+// JSON-NEXT:         "isExpr": true
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "kind": "TemplateArgument",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3385,
+// JSON-NEXT:            "col": 21,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3385,
+// JSON-NEXT:            "col": 21,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "isExpr": true,
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "IntegerLiteral",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 3385,
+// JSON-NEXT:              "col": 21,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 3385,
+// JSON-NEXT:              "col": 21,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "int"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "prvalue",
+// JSON-NEXT:            "value": "0"
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FunctionDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3393,
+// JSON-NEXT:         "col": 29,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3388,
+// JSON-NEXT:          "col": 24,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3398,
+// JSON-NEXT:          "col": 34,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "f",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3397,
+// JSON-NEXT:            "col": 33,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3398,
+// JSON-NEXT:            "col": 34,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FunctionDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3393,
+// JSON-NEXT:         "col": 29,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3388,
+// JSON-NEXT:          "col": 24,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3398,
+// JSON-NEXT:          "col": 34,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isUsed": true,
+// JSON-NEXT:        "name": "f",
+// JSON-NEXT:        "mangledName": "_ZN5test51fILj0EEEvv",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "kind": "TemplateArgument",
+// JSON-NEXT:          "value": 0
+// JSON-NEXT:         },
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3397,
+// JSON-NEXT:            "col": 33,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3398,
+// JSON-NEXT:            "col": 34,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "VarDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 3407,
+// JSON-NEXT:       "line": 109,
+// JSON-NEXT:       "col": 8,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 3400,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 4
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 3416,
+// JSON-NEXT:        "col": 17,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "q",
+// JSON-NEXT:      "mangledName": "_ZN5test51qE",
+// JSON-NEXT:      "type": {
+// JSON-NEXT:       "qualType": "void (*)()"
+// JSON-NEXT:      },
+// JSON-NEXT:      "init": "c",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ImplicitCastExpr",
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3414,
+// JSON-NEXT:          "col": 15,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3416,
+// JSON-NEXT:          "col": 17,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void (*)()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "valueCategory": "prvalue",
+// JSON-NEXT:        "castKind": "FunctionToPointerDecay",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "DeclRefExpr",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3414,
+// JSON-NEXT:            "col": 15,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3416,
+// JSON-NEXT:            "col": 17,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "void ()"
+// JSON-NEXT:          },
+// JSON-NEXT:          "valueCategory": "lvalue",
+// JSON-NEXT:          "referencedDecl": {
+// JSON-NEXT:           "id": "0x{{.*}}",
+// JSON-NEXT:           "kind": "FunctionDecl",
+// JSON-NEXT:           "name": "f",
+// JSON-NEXT:           "type": {
+// JSON-NEXT:            "qualType": "void ()"
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "foundReferencedDecl": {
+// JSON-NEXT:           "id": "0x{{.*}}",
+// JSON-NEXT:           "kind": "FunctionTemplateDecl",
+// JSON-NEXT:           "name": "f"
+// JSON-NEXT:          }
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "NamespaceDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 3645,
+// JSON-NEXT:     "line": 116,
+// JSON-NEXT:     "col": 11,
+// JSON-NEXT:     "tokLen": 5
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 3635,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 9
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 4000,
+// JSON-NEXT:      "line": 128,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "test6",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "VarTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 3687,
+// JSON-NEXT:       "line": 118,
+// JSON-NEXT:       "col": 16,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 3653,
+// JSON-NEXT:        "line": 117,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 3691,
+// JSON-NEXT:        "line": 118,
+// JSON-NEXT:        "col": 20,
+// JSON-NEXT:        "tokLen": 4
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "C",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3669,
+// JSON-NEXT:         "line": 117,
+// JSON-NEXT:         "col": 17,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3663,
+// JSON-NEXT:          "col": 11,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3669,
+// JSON-NEXT:          "col": 17,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "D",
+// JSON-NEXT:        "tagUsed": "class",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "VarDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3687,
+// JSON-NEXT:         "line": 118,
+// JSON-NEXT:         "col": 16,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3672,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 9
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3691,
+// JSON-NEXT:          "col": 20,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "C",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "const bool"
+// JSON-NEXT:        },
+// JSON-NEXT:        "constexpr": true,
+// JSON-NEXT:        "init": "c",
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXBoolLiteralExpr",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3691,
+// JSON-NEXT:            "col": 20,
+// JSON-NEXT:            "tokLen": 4
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3691,
+// JSON-NEXT:            "col": 20,
+// JSON-NEXT:            "tokLen": 4
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "type": {
+// JSON-NEXT:           "qualType": "bool"
+// JSON-NEXT:          },
+// JSON-NEXT:          "valueCategory": "prvalue",
+// JSON-NEXT:          "value": true
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "FunctionTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 3724,
+// JSON-NEXT:       "line": 121,
+// JSON-NEXT:       "col": 6,
+// JSON-NEXT:       "tokLen": 4
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 3698,
+// JSON-NEXT:        "line": 120,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 3998,
+// JSON-NEXT:        "line": 127,
+// JSON-NEXT:        "col": 1,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "func",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3714,
+// JSON-NEXT:         "line": 120,
+// JSON-NEXT:         "col": 17,
+// JSON-NEXT:         "tokLen": 3
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3708,
+// JSON-NEXT:          "col": 11,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3714,
+// JSON-NEXT:          "col": 17,
+// JSON-NEXT:          "tokLen": 3
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isReferenced": true,
+// JSON-NEXT:        "name": "Key",
+// JSON-NEXT:        "tagUsed": "class",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "FunctionDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 3724,
+// JSON-NEXT:         "line": 121,
+// JSON-NEXT:         "col": 6,
+// JSON-NEXT:         "tokLen": 4
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 3719,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 4
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 3998,
+// JSON-NEXT:          "line": 127,
+// JSON-NEXT:          "col": 1,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "func",
+// JSON-NEXT:        "type": {
+// JSON-NEXT:         "qualType": "void ()"
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CompoundStmt",
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 3731,
+// JSON-NEXT:            "line": 121,
+// JSON-NEXT:            "col": 13,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 3998,
+// JSON-NEXT:            "line": 127,
+// JSON-NEXT:            "col": 1,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "inner": [
+// JSON-NEXT:           {
+// JSON-NEXT:            "id": "0x{{.*}}",
+// JSON-NEXT:            "kind": "UnresolvedLookupExpr",
+// JSON-NEXT:            "range": {
+// JSON-NEXT:             "begin": {
+// JSON-NEXT:              "offset": 3735,
+// JSON-NEXT:              "line": 122,
+// JSON-NEXT:              "col": 3,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             },
+// JSON-NEXT:             "end": {
+// JSON-NEXT:              "offset": 3740,
+// JSON-NEXT:              "col": 8,
+// JSON-NEXT:              "tokLen": 1
+// JSON-NEXT:             }
+// JSON-NEXT:            },
+// JSON-NEXT:            "type": {
+// JSON-NEXT:             "qualType": "<dependent type>"
+// JSON-NEXT:            },
+// JSON-NEXT:            "valueCategory": "lvalue",
+// JSON-NEXT:            "usesADL": false,
+// JSON-NEXT:            "name": "C",
+// JSON-NEXT:            "lookups": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "id": "0x{{.*}}",
+// JSON-NEXT:              "kind": "VarTemplateDecl",
+// JSON-NEXT:              "name": "C"
+// JSON-NEXT:             }
+// JSON-NEXT:            ],
+// JSON-NEXT:            "inner": [
+// JSON-NEXT:             {
+// JSON-NEXT:              "kind": "TemplateArgument",
+// JSON-NEXT:              "type": {
+// JSON-NEXT:               "qualType": "Key"
+// JSON-NEXT:              },
+// JSON-NEXT:              "inner": [
+// JSON-NEXT:               {
+// JSON-NEXT:                "id": "0x{{.*}}",
+// JSON-NEXT:                "kind": "TemplateTypeParmType",
+// JSON-NEXT:                "type": {
+// JSON-NEXT:                 "qualType": "Key"
+// JSON-NEXT:                },
+// JSON-NEXT:                "isDependent": true,
+// JSON-NEXT:                "isInstantiationDependent": true,
+// JSON-NEXT:                "depth": 0,
+// JSON-NEXT:                "index": 0,
+// JSON-NEXT:                "decl": {
+// JSON-NEXT:                 "id": "0x{{.*}}",
+// JSON-NEXT:                 "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:                 "name": "Key"
+// JSON-NEXT:                }
+// JSON-NEXT:               }
+// JSON-NEXT:              ]
+// JSON-NEXT:             }
+// JSON-NEXT:            ]
+// JSON-NEXT:           }
+// JSON-NEXT:          ]
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   },
+// JSON-NEXT:   {
+// JSON-NEXT:    "id": "0x{{.*}}",
+// JSON-NEXT:    "kind": "NamespaceDecl",
+// JSON-NEXT:    "loc": {
+// JSON-NEXT:     "offset": 4013,
+// JSON-NEXT:     "line": 130,
+// JSON-NEXT:     "col": 11,
+// JSON-NEXT:     "tokLen": 5
+// JSON-NEXT:    },
+// JSON-NEXT:    "range": {
+// JSON-NEXT:     "begin": {
+// JSON-NEXT:      "offset": 4003,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 9
+// JSON-NEXT:     },
+// JSON-NEXT:     "end": {
+// JSON-NEXT:      "offset": 4308,
+// JSON-NEXT:      "line": 136,
+// JSON-NEXT:      "col": 1,
+// JSON-NEXT:      "tokLen": 1
+// JSON-NEXT:     }
+// JSON-NEXT:    },
+// JSON-NEXT:    "name": "test7",
+// JSON-NEXT:    "inner": [
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ClassTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 4066,
+// JSON-NEXT:       "line": 131,
+// JSON-NEXT:       "col": 46,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 4023,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 4069,
+// JSON-NEXT:        "col": 49,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "A",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTemplateParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 4055,
+// JSON-NEXT:         "col": 35,
+// JSON-NEXT:         "tokLen": 2
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 4033,
+// JSON-NEXT:          "col": 13,
+// JSON-NEXT:          "tokLen": 8
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 4055,
+// JSON-NEXT:          "col": 35,
+// JSON-NEXT:          "tokLen": 2
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "TT",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0,
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 4047,
+// JSON-NEXT:           "col": 27,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 4042,
+// JSON-NEXT:            "col": 22,
+// JSON-NEXT:            "tokLen": 5
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 4042,
+// JSON-NEXT:            "col": 22,
+// JSON-NEXT:            "tokLen": 5
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "tagUsed": "class",
+// JSON-NEXT:          "depth": 1,
+// JSON-NEXT:          "index": 0
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 4066,
+// JSON-NEXT:         "col": 46,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 4059,
+// JSON-NEXT:          "col": 39,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 4069,
+// JSON-NEXT:          "col": 49,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "A",
+// JSON-NEXT:        "tagUsed": "struct",
+// JSON-NEXT:        "completeDefinition": true,
+// JSON-NEXT:        "definitionData": {
+// JSON-NEXT:         "canConstDefaultInit": true,
+// JSON-NEXT:         "copyAssign": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "copyCtor": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "defaultCtor": {
+// JSON-NEXT:          "defaultedIsConstexpr": true,
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "isConstexpr": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "dtor": {
+// JSON-NEXT:          "irrelevant": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:         "isAggregate": true,
+// JSON-NEXT:         "isEmpty": true,
+// JSON-NEXT:         "isLiteral": true,
+// JSON-NEXT:         "isPOD": true,
+// JSON-NEXT:         "isStandardLayout": true,
+// JSON-NEXT:         "isTrivial": true,
+// JSON-NEXT:         "isTriviallyCopyable": true,
+// JSON-NEXT:         "moveAssign": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "moveCtor": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXRecordDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 4066,
+// JSON-NEXT:           "col": 46,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 4059,
+// JSON-NEXT:            "col": 39,
+// JSON-NEXT:            "tokLen": 6
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 4066,
+// JSON-NEXT:            "col": 46,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "isImplicit": true,
+// JSON-NEXT:          "name": "A",
+// JSON-NEXT:          "tagUsed": "struct"
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "ClassTemplateSpecializationDecl",
+// JSON-NEXT:        "name": "A"
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ClassTemplateDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 4100,
+// JSON-NEXT:       "line": 132,
+// JSON-NEXT:       "col": 29,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 4074,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 4103,
+// JSON-NEXT:        "col": 32,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "B",
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "TemplateTypeParmDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 4092,
+// JSON-NEXT:         "col": 21,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 4084,
+// JSON-NEXT:          "col": 13,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 4084,
+// JSON-NEXT:          "col": 13,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "tagUsed": "class",
+// JSON-NEXT:        "depth": 0,
+// JSON-NEXT:        "index": 0,
+// JSON-NEXT:        "isParameterPack": true
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 4100,
+// JSON-NEXT:         "col": 29,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 4094,
+// JSON-NEXT:          "col": 23,
+// JSON-NEXT:          "tokLen": 5
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 4103,
+// JSON-NEXT:          "col": 32,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "name": "B",
+// JSON-NEXT:        "tagUsed": "class",
+// JSON-NEXT:        "completeDefinition": true,
+// JSON-NEXT:        "definitionData": {
+// JSON-NEXT:         "canConstDefaultInit": true,
+// JSON-NEXT:         "copyAssign": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "copyCtor": {
+// JSON-NEXT:          "hasConstParam": true,
+// JSON-NEXT:          "implicitHasConstParam": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "defaultCtor": {
+// JSON-NEXT:          "defaultedIsConstexpr": true,
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "isConstexpr": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "dtor": {
+// JSON-NEXT:          "irrelevant": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:         "isAggregate": true,
+// JSON-NEXT:         "isEmpty": true,
+// JSON-NEXT:         "isLiteral": true,
+// JSON-NEXT:         "isPOD": true,
+// JSON-NEXT:         "isStandardLayout": true,
+// JSON-NEXT:         "isTrivial": true,
+// JSON-NEXT:         "isTriviallyCopyable": true,
+// JSON-NEXT:         "moveAssign": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         },
+// JSON-NEXT:         "moveCtor": {
+// JSON-NEXT:          "exists": true,
+// JSON-NEXT:          "needsImplicit": true,
+// JSON-NEXT:          "simple": true,
+// JSON-NEXT:          "trivial": true
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "inner": [
+// JSON-NEXT:         {
+// JSON-NEXT:          "id": "0x{{.*}}",
+// JSON-NEXT:          "kind": "CXXRecordDecl",
+// JSON-NEXT:          "loc": {
+// JSON-NEXT:           "offset": 4100,
+// JSON-NEXT:           "col": 29,
+// JSON-NEXT:           "tokLen": 1
+// JSON-NEXT:          },
+// JSON-NEXT:          "range": {
+// JSON-NEXT:           "begin": {
+// JSON-NEXT:            "offset": 4094,
+// JSON-NEXT:            "col": 23,
+// JSON-NEXT:            "tokLen": 5
+// JSON-NEXT:           },
+// JSON-NEXT:           "end": {
+// JSON-NEXT:            "offset": 4100,
+// JSON-NEXT:            "col": 29,
+// JSON-NEXT:            "tokLen": 1
+// JSON-NEXT:           }
+// JSON-NEXT:          },
+// JSON-NEXT:          "isImplicit": true,
+// JSON-NEXT:          "name": "B",
+// JSON-NEXT:          "tagUsed": "class"
+// JSON-NEXT:         }
+// JSON-NEXT:        ]
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     },
+// JSON-NEXT:     {
+// JSON-NEXT:      "id": "0x{{.*}}",
+// JSON-NEXT:      "kind": "ClassTemplateSpecializationDecl",
+// JSON-NEXT:      "loc": {
+// JSON-NEXT:       "offset": 4124,
+// JSON-NEXT:       "line": 133,
+// JSON-NEXT:       "col": 19,
+// JSON-NEXT:       "tokLen": 1
+// JSON-NEXT:      },
+// JSON-NEXT:      "range": {
+// JSON-NEXT:       "begin": {
+// JSON-NEXT:        "offset": 4108,
+// JSON-NEXT:        "col": 3,
+// JSON-NEXT:        "tokLen": 8
+// JSON-NEXT:       },
+// JSON-NEXT:       "end": {
+// JSON-NEXT:        "offset": 4127,
+// JSON-NEXT:        "col": 22,
+// JSON-NEXT:        "tokLen": 1
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "name": "A",
+// JSON-NEXT:      "tagUsed": "struct",
+// JSON-NEXT:      "completeDefinition": true,
+// JSON-NEXT:      "strict-pack-match": true,
+// JSON-NEXT:      "definitionData": {
+// JSON-NEXT:       "canConstDefaultInit": true,
+// JSON-NEXT:       "canPassInRegisters": true,
+// JSON-NEXT:       "copyAssign": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "copyCtor": {
+// JSON-NEXT:        "hasConstParam": true,
+// JSON-NEXT:        "implicitHasConstParam": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "defaultCtor": {
+// JSON-NEXT:        "defaultedIsConstexpr": true,
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "isConstexpr": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "dtor": {
+// JSON-NEXT:        "irrelevant": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "hasConstexprNonCopyMoveConstructor": true,
+// JSON-NEXT:       "isAggregate": true,
+// JSON-NEXT:       "isEmpty": true,
+// JSON-NEXT:       "isLiteral": true,
+// JSON-NEXT:       "isPOD": true,
+// JSON-NEXT:       "isStandardLayout": true,
+// JSON-NEXT:       "isTrivial": true,
+// JSON-NEXT:       "isTriviallyCopyable": true,
+// JSON-NEXT:       "moveAssign": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       },
+// JSON-NEXT:       "moveCtor": {
+// JSON-NEXT:        "exists": true,
+// JSON-NEXT:        "needsImplicit": true,
+// JSON-NEXT:        "simple": true,
+// JSON-NEXT:        "trivial": true
+// JSON-NEXT:       }
+// JSON-NEXT:      },
+// JSON-NEXT:      "inner": [
+// JSON-NEXT:       {
+// JSON-NEXT:        "kind": "TemplateArgument"
+// JSON-NEXT:       },
+// JSON-NEXT:       {
+// JSON-NEXT:        "id": "0x{{.*}}",
+// JSON-NEXT:        "kind": "CXXRecordDecl",
+// JSON-NEXT:        "loc": {
+// JSON-NEXT:         "offset": 4066,
+// JSON-NEXT:         "line": 131,
+// JSON-NEXT:         "col": 46,
+// JSON-NEXT:         "tokLen": 1
+// JSON-NEXT:        },
+// JSON-NEXT:        "range": {
+// JSON-NEXT:         "begin": {
+// JSON-NEXT:          "offset": 4059,
+// JSON-NEXT:          "col": 39,
+// JSON-NEXT:          "tokLen": 6
+// JSON-NEXT:         },
+// JSON-NEXT:         "end": {
+// JSON-NEXT:          "offset": 4066,
+// JSON-NEXT:          "col": 46,
+// JSON-NEXT:          "tokLen": 1
+// JSON-NEXT:         }
+// JSON-NEXT:        },
+// JSON-NEXT:        "isImplicit": true,
+// JSON-NEXT:        "name": "A",
+// JSON-NEXT:        "tagUsed": "struct"
+// JSON-NEXT:       }
+// JSON-NEXT:      ]
+// JSON-NEXT:     }
+// JSON-NEXT:    ]
+// JSON-NEXT:   }
+// JSON-NEXT:  ]
+// JSON-NEXT: }
diff --git a/clang/test/AST/gen_ast_dump_json_test.py b/clang/test/AST/gen_ast_dump_json_test.py
index 301d60e..39b8eaa 100644
--- a/clang/test/AST/gen_ast_dump_json_test.py
+++ b/clang/test/AST/gen_ast_dump_json_test.py
@@ -83,6 +83,12 @@ def main():
         action="store",
         default="",
     )
+    parser.add_argument(
+        "--prefix",
+        help="The FileCheck prefix",
+        action="store",
+        default="CHECK",
+    )
     update_or_generate_group = parser.add_mutually_exclusive_group()
     update_or_generate_group.add_argument(
         "--update", help="Update the file in-place", action="store_true"
@@ -113,11 +119,18 @@ def main():
             cmdline_opts=args.opts,
             do_update=args.update,
             force_update=args.update_manual,
+            prefix=args.prefix,
         )
 
 
 def process_file(
-    source_file, clang_binary, cmdline_filters, cmdline_opts, do_update, force_update
+    source_file,
+    clang_binary,
+    cmdline_filters,
+    cmdline_opts,
+    do_update,
+    force_update,
+    prefix,
 ):
     note_firstline = (
         "// NOTE: CHECK lines have been autogenerated by " "gen_ast_dump_json_test.py"
@@ -227,14 +240,14 @@ def process_file(
         for out_ast in out_asts:
             append_str = json.dumps(out_ast, indent=1, ensure_ascii=False)
             out_str = "\n\n"
-            out_str += "// CHECK-NOT: {{^}}Dumping\n"
+            out_str += f"// {prefix}-NOT: {{{{^}}}}Dumping\n"
             index = 0
             for append_line in append_str.splitlines()[2:]:
                 if index == 0:
-                    out_str += "// CHECK: %s\n" % (append_line.rstrip())
+                    out_str += f"// {prefix}: %s\n" % (append_line.rstrip())
                     index += 1
                 else:
-                    out_str += "// CHECK-NEXT: %s\n" % (append_line.rstrip())
+                    out_str += f"// {prefix}-NEXT: %s\n" % (append_line.rstrip())
 
             f.write(out_str)
         f.flush()
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
index 2173245..2a1a164 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
@@ -252,13 +252,22 @@ struct RefCountableWithLambdaCapturingThis {
     call(lambda);
   }
 
-  void method_captures_this_with_guardian_refPtr() {
+  void method_captures_this_with_guardian_refptr() {
     auto lambda = [this, protectedThis = RefPtr { &*this }]() {
       nonTrivial();
     };
     call(lambda);
   }
 
+  void forEach(const WTF::Function<void(RefCountable&)>&);
+  void method_captures_this_with_lambda_with_no_escape() {
+    auto run = [&]([[clang::noescape]] const WTF::Function<void(RefCountable&)>& func) {
+      forEach(func);
+    };
+    run([&](RefCountable&) {
+      nonTrivial();
+    });
+  }
 };
 
 struct NonRefCountableWithLambdaCapturingThis {
diff --git a/clang/test/Analysis/ftime-trace.cpp b/clang/test/Analysis/ftime-trace.cpp
new file mode 100644
index 0000000..2c369a9
--- /dev/null
+++ b/clang/test/Analysis/ftime-trace.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core %s -ftime-trace=%t.raw.json -ftime-trace-granularity=0 -verify
+// RUN: %python -c 'import json, sys; print(json.dumps(json.load(sys.stdin), indent=4))' < %t.raw.json > %t.formatted.json
+// RUN: FileCheck --input-file=%t.formatted.json --check-prefix=CHECK %s
+
+// The trace file is rather large, but it should contain at least the duration of the analysis of 'f':
+//
+// CHECK:          "name": "HandleCode f",
+// CHECK-NEXT:     "args": {
+// CHECK-NEXT:         "detail": "f()",
+// CHECK-NEXT:         "file": "{{.+}}ftime-trace.cpp",
+// CHECK-NEXT:         "line": {{[0-9]+}}
+// CHECK-NEXT:     }
+
+// If any reports are found, "flushing" their equivalence class (EQC) is a separate action:
+//
+// CHECK:          "name": "Flushing EQC Division by zero",
+// CHECK-NEXT:     "args": {
+// CHECK-NEXT:         "detail": "core.DivideZero",
+// CHECK-NEXT:         "file": "{{.+}}ftime-trace.cpp",
+// CHECK-NEXT:         "line": {{[0-9]+}}
+// CHECK-NEXT:     }
+
+// The trace also contains durations of each step, but they are so short that they are not reliably present
+// in each run. However, they are also aggregated into Total *, for example:
+//
+// CHECK:          "name": "Total Loc PostStmt",
+// CHECK-NEXT:     "args": {
+// CHECK-NEXT:         "count": {{[0-9]+}},
+// CHECK-NEXT:         "avg ms": {{[0-9]+}}
+// CHECK-NEXT:     }
+
+// Additionally, the trace lists checker hook points (again, relying on totals here):
+//
+// CHECK:          "name": "Total CheckerManager::runCheckersForStmt (Pre)",
+// CHECK-NEXT:     "args": {
+// CHECK-NEXT:         "count": {{[0-9]+}},
+// CHECK-NEXT:         "avg ms": {{[0-9]+}}
+// CHECK-NEXT:     }
+
+// Finally, each checker call back is also present:
+//
+// CHECK:          "name": "Total Stmt:core.DivideZero",
+// CHECK-NEXT:     "args": {
+// CHECK-NEXT:         "count": {{[0-9]+}},
+// CHECK-NEXT:         "avg ms": {{[0-9]+}}
+// CHECK-NEXT:     }
+
+bool coin();
+int f() {
+    int x = 0;
+    int y = 0;
+    while (coin()) {
+        x = 1;
+    }
+    return x / y; // expected-warning{{Division by zero}}
+}
diff --git a/clang/test/ClangScanDeps/modules-context-hash-cwd.c b/clang/test/ClangScanDeps/modules-context-hash-cwd.c
new file mode 100644
index 0000000..459d2c9
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-context-hash-cwd.c
@@ -0,0 +1,188 @@
+// Test current directory pruning when computing the context hash.
+
+// REQUIRES: shell
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb3.json.in > %t/cdb3.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb4.json.in > %t/cdb4.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb5.json.in > %t/cdb5.json
+// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format experimental-full > %t/result0.json
+// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full > %t/result1.json
+// It is not a typo to use cdb1.json for result2. We intend to use the same
+// compilation database, but different clang-scan-deps optimize-args options.
+// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full -optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/result2.json
+// RUN: clang-scan-deps -compilation-database %t/cdb3.json -format experimental-full > %t/result3.json
+// RUN: clang-scan-deps -compilation-database %t/cdb4.json -format experimental-full > %t/result4.json
+// RUN: clang-scan-deps -compilation-database %t/cdb5.json -format experimental-full > %t/result5.json
+// RUN: cat %t/result0.json %t/result1.json | FileCheck %s
+// RUN: cat %t/result0.json %t/result2.json | FileCheck %s -check-prefix=SKIPOPT
+// RUN: cat %t/result3.json %t/result4.json | FileCheck %s -check-prefix=RELPATH
+// RUN: cat %t/result0.json %t/result5.json | FileCheck %s
+
+//--- cdb0.json.in
+[{
+  "directory": "DIR",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb1.json.in
+[{
+  "directory": "DIR/a",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+// cdb2 is skipped because we reuse cdb1.
+
+//--- cdb3.json.in
+[{
+  "directory": "DIR",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb4.json.in
+[{
+  "directory": "DIR/a/",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
+  "file": "DIR/tu.c"
+}]
+
+//--- cdb5.json.in
+[{
+  "directory": "DIR",
+  "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -Xclang -working-directory=DIR/a/ -o DIR/tu.o",
+  "file": "DIR/tu.c"
+}]
+
+//--- include/module.modulemap
+module mod {
+  header "mod.h"
+}
+
+//--- include/mod.h
+
+//--- tu.c
+#include "mod.h"
+
+// Check that result0 and result1/result5 compute the same hash with
+// optimization on. The only difference between result0 and result1/result5 is
+// the compiler's working directory.
+// CHECK:     {
+// CHECK-NEXT:  "modules": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:     "clang-module-deps": [],
+// CHECK:          "context-hash": "[[HASH:.*]]",
+// CHECK:        }
+// CHECK:       "translation-units": [
+// CHECK:        {
+// CHECK:          "commands": [
+// CHECK:          {
+// CHECK-NEXT:        "clang-context-hash": "{{.*}}",
+// CHECK-NEXT:        "clang-module-deps": [
+// CHECK-NEXT:          {
+// CHECK-NEXT:            "context-hash": "[[HASH]]",
+// CHECK-NEXT:            "module-name": "mod"
+// CHECK:               }
+// CHECK:             ],
+// CHECK:     {
+// CHECK-NEXT:   "modules": [
+// CHECK-NEXT:    {
+// CHECK-NEXT:      "clang-module-deps": [],
+// CHECK:           "context-hash": "[[HASH]]",
+// CHECK:         }
+// CHECK:        "translation-units": [
+// CHECK:         {
+// CHECK:           "commands": [
+// CHECK:           {
+// CHECK-NEXT:         "clang-context-hash": "{{.*}}",
+// CHECK-NEXT:         "clang-module-deps": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:             "context-hash": "[[HASH]]",
+// CHECK-NEXT:             "module-name": "mod"
+// CHECK:               }
+// CHECK:              ],
+
+// Check that result0 and result2 compute different hashes because
+// the working directory optmization is turned off for result2.
+// SKIPOPT:      {
+// SKIPOPT-NEXT:   "modules": [
+// SKIPOPT-NEXT:    {
+// SKIPOPT-NEXT:      "clang-module-deps": [],
+// SKIPOPT:           "context-hash": "[[HASH0:.*]]",
+// SKIPOPT:         }
+// SKIPOPT:        "translation-units": [
+// SKIPOPT:         {
+// SKIPOPT:            "commands": [
+// SKIPOPT:             {
+// SKIPOPT-NEXT:          "clang-context-hash": "{{.*}}",
+// SKIPOPT-NEXT:          "clang-module-deps": [
+// SKIPOPT-NEXT:            {
+// SKIPOPT-NEXT:              "context-hash": "[[HASH0]]",
+// SKIPOPT-NEXT:              "module-name": "mod"
+// SKIPOPT:            }
+// SKIPOPT:          ],
+// SKIPOPT:      {
+// SKIPOPT-NEXT:   "modules": [
+// SKIPOPT-NEXT:     {
+// SKIPOPT-NEXT:       "clang-module-deps": [],
+// SKIPOPT-NOT:        "context-hash": "[[HASH0]]",
+// SKIPOPT:            "context-hash": "[[HASH2:.*]]",
+// SKIPOPT:          }
+// SKIPOPT:       "translation-units": [
+// SKIPOPT:         {
+// SKIPOPT:           "commands": [
+// SKIPOPT:             {
+// SKIPOPT-NEXT:          "clang-context-hash": "{{.*}}",
+// SKIPOPT-NEXT:          "clang-module-deps": [
+// SKIPOPT-NEXT:            {
+// SKIPOPT-NOT:              "context-hash": "[[HASH0]]",
+// SKIPOPT-NEXT:             "context-hash": "[[HASH2]]"
+// SKIPOPT-NEXT:              "module-name": "mod"
+// SKIPOPT:            }
+// SKIPOPT:          ],
+
+// Check that result3 and result4 contain different hashes because
+// both have a same relative path as a command line input, and
+// they are produced using different compiler working directories.
+// RELPATH:      {
+// RELPATH-NEXT:   "modules": [
+// RELPATH-NEXT:    {
+// RELPATH-NEXT:      "clang-module-deps": [],
+// RELPATH:           "context-hash": "[[HASH3:.*]]",
+// RELPATH:         }
+// RELPATH:        "translation-units": [
+// RELPATH:         {
+// RELPATH:            "commands": [
+// RELPATH:             {
+// RELPATH-NEXT:          "clang-context-hash": "{{.*}}",
+// RELPATH-NEXT:          "clang-module-deps": [
+// RELPATH-NEXT:            {
+// RELPATH-NEXT:              "context-hash": "[[HASH3]]",
+// RELPATH-NEXT:              "module-name": "mod"
+// RELPATH:            }
+// RELPATH:          ],
+// RELPATH:      {
+// RELPATH-NEXT:   "modules": [
+// RELPATH-NEXT:     {
+// RELPATH-NEXT:       "clang-module-deps": [],
+// RELPATH-NOT:        "context-hash": "[[HASH3]]",
+// RELPATH:            "context-hash": "[[HASH4:.*]]",
+// RELPATH:          }
+// RELPATH:       "translation-units": [
+// RELPATH:         {
+// RELPATH:           "commands": [
+// RELPATH:             {
+// RELPATH-NEXT:          "clang-context-hash": "{{.*}}",
+// RELPATH-NEXT:          "clang-module-deps": [
+// RELPATH-NEXT:            {
+// RELPATH-NOT:              "context-hash": "[[HASH3]]",
+// RELPATH-NEXT:             "context-hash": "[[HASH4]]"
+// RELPATH-NEXT:              "module-name": "mod"
+// RELPATH:            }
+// RELPATH:          ],
+
diff --git a/clang/test/ClangScanDeps/tu-buffer.c b/clang/test/ClangScanDeps/tu-buffer.c
new file mode 100644
index 0000000..b450b13
--- /dev/null
+++ b/clang/test/ClangScanDeps/tu-buffer.c
@@ -0,0 +1,111 @@
+// UNSUPPORTED: target=powerpc64-ibm-aix{{.*}}
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- module.modulemap
+module root { header "root.h" }
+module direct { header "direct.h" }
+module transitive { header "transitive.h" }
+module addition { header "addition.h" }
+//--- root.h
+#include "direct.h"
+#include "root/textual.h"
+//--- direct.h
+#include "transitive.h"
+//--- transitive.h
+// empty
+
+//--- addition.h
+// empty
+
+//--- tu.c
+#include "root.h"
+
+//--- root/textual.h
+// This is here to verify that the "root" directory doesn't clash with name of
+// the "root" module.
+
+//--- cdb.json.template
+[{
+  "file": "",
+  "directory": "DIR",
+  "command": "clang -fmodules -fmodules-cache-path=DIR/cache -I DIR -x c -c"
+}]
+
+// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full -tu-buffer-path %t/tu.c > %t/result.json
+// RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s --check-prefix=CHECK
+
+// CHECK:      {
+// CHECK-NEXT:   "modules": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "{{.*}}",
+// CHECK-NEXT:           "module-name": "transitive"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ],
+// CHECK-NEXT:       "clang-modulemap-file": "[[PREFIX]]/module.modulemap",
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK-NEXT:         "[[PREFIX]]/module.modulemap"
+// CHECK-NEXT:         "[[PREFIX]]/direct.h"
+// CHECK-NEXT:       ],
+// CHECK:            "name": "direct"
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "context-hash": "{{.*}}",
+// CHECK-NEXT:           "module-name": "direct"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ],
+// CHECK-NEXT:       "clang-modulemap-file": "[[PREFIX]]/module.modulemap",
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK-NEXT:         "[[PREFIX]]/module.modulemap"
+// CHECK-NEXT:         "[[PREFIX]]/root.h"
+// CHECK-NEXT:         "[[PREFIX]]/root/textual.h"
+// CHECK-NEXT:       ],
+// CHECK:            "name": "root"
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "clang-module-deps": [],
+// CHECK-NEXT:       "clang-modulemap-file": "[[PREFIX]]/module.modulemap",
+// CHECK-NEXT:       "command-line": [
+// CHECK:            ],
+// CHECK-NEXT:       "context-hash": "{{.*}}",
+// CHECK-NEXT:       "file-deps": [
+// CHECK-NEXT:         "[[PREFIX]]/module.modulemap"
+// CHECK-NEXT:         "[[PREFIX]]/transitive.h"
+// CHECK-NEXT:       ],
+// CHECK:            "name": "transitive"
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ],
+// CHECK-NEXT:   "translation-units": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:       "commands": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:           "clang-context-hash": "{{.*}}",
+// CHECK-NEXT:           "clang-module-deps": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:               "context-hash": "{{.*}}",
+// CHECK-NEXT:               "module-name": "root"
+// CHECK-NEXT:             }
+// CHECK-NEXT:           ],
+// CHECK-NEXT:           "command-line": [
+// CHECK:                ],
+// CHECK:                "file-deps": [
+// CHECK-NEXT:             [[PREFIX]]/tu.c
+// CHECK-NEXT:           ],
+// CHECK-NEXT:           "input-file": "[[PREFIX]]/tu.c"
+// CHECK-NEXT:         }
+// CHECK-NEXT:       ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ]
+// CHECK-NEXT: }
diff --git a/clang/test/ClangScanDeps/working-dir.m b/clang/test/ClangScanDeps/working-dir.m
index a04f8c2..c6b7b19 100644
--- a/clang/test/ClangScanDeps/working-dir.m
+++ b/clang/test/ClangScanDeps/working-dir.m
@@ -2,7 +2,7 @@
 // RUN: split-file %s %t
 // RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
 // RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
-// RUN:   -j 1 -format experimental-full --optimize-args=all > %t/deps.db
+// RUN:   -j 1 -format experimental-full --optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/deps.db
 // RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
 
 // Check that there are two separate modules hashes. One for each working dir.
diff --git a/clang/test/CodeGen/X86/avx-cxx-record.cpp b/clang/test/CodeGen/X86/avx-cxx-record.cpp
index d8863ca..bcd9c36 100644
--- a/clang/test/CodeGen/X86/avx-cxx-record.cpp
+++ b/clang/test/CodeGen/X86/avx-cxx-record.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang %s -S --target=x86_64-unknown-linux-gnu -emit-llvm -O2 -march=x86-64-v3 -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -target-cpu x86-64-v3 -o - | FileCheck %s
 
 using UInt64x2 = unsigned long long __attribute__((__vector_size__(16), may_alias));
 
@@ -11,7 +11,7 @@ struct XMM2 : XMM1<0>, XMM1<1> {
 };
 
 // CHECK: define{{.*}} @_Z3foov({{.*}} [[ARG:%.*]]){{.*}}
-// CHECK-NEXT: entry:
+// CHECK: entry:
 // CHECK-NEXT: store {{.*}}, ptr [[ARG]]{{.*}}
 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr {{.*}}, ptr [[ARG]]{{.*}}
 // CHECK-NEXT: store {{.*}}, ptr [[TMP1]]{{.*}}
diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index feb6f15..e85f3db 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -68,7 +68,7 @@ struct anon_struct {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7:[0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8:[0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -116,7 +116,7 @@ void test1(struct annotated *p, int index, int val) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont6:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -203,7 +203,7 @@ size_t test2_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -275,7 +275,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 2
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 3) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 3) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont1:
 // SANITIZE-WITH-ATTR-NEXT:    [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl i32 [[DOTCOUNTED_BY_LOAD]], 2
@@ -283,7 +283,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT12:%.*]], label [[HANDLER_OUT_OF_BOUNDS8:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds8:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont12:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[DOTCOUNTED_BY_LOAD]], 2
@@ -295,7 +295,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT81:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 3
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT81]], label [[HANDLER_OUT_OF_BOUNDS18:%.*]], label [[CONT19:%.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds18:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 4) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 4) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont19:
 // SANITIZE-WITH-ATTR-NEXT:    [[ADD:%.*]] = add nsw i32 [[INDEX]], 1
@@ -303,7 +303,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[IDXPROM31]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP5]], label [[CONT38:%.*]], label [[HANDLER_OUT_OF_BOUNDS34:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds34:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM31]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM31]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont38:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[DOTCOUNTED_BY_LOAD]], 3
@@ -318,7 +318,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM42]], [[TMP8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS45:%.*]], label [[CONT46:%.*]], !prof [[PROF8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds45:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM42]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM42]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont46:
 // SANITIZE-WITH-ATTR-NEXT:    [[ADD59:%.*]] = add nsw i32 [[INDEX]], 2
@@ -326,7 +326,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[IDXPROM60]], [[TMP8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP9]], label [[CONT67:%.*]], label [[HANDLER_OUT_OF_BOUNDS63:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds63:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM60]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM60]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont67:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX65:%.*]] = getelementptr inbounds nuw [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM60]]
@@ -431,7 +431,7 @@ void test4(struct annotated *p, int index, int fam_idx) {
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp ugt i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], label [[CONT1:%.*]], !prof [[PROF8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont1:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i32 [[DOTCOUNTED_BY_LOAD]] to i64
@@ -478,7 +478,7 @@ size_t test4_bdos(struct annotated *p, int index) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOTCOUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
@@ -550,7 +550,7 @@ size_t test5_bdos(struct anon_struct *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont6:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
@@ -639,7 +639,7 @@ size_t test6_bdos(struct anon_struct *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
@@ -712,7 +712,7 @@ size_t test7_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT14:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont14:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
@@ -793,7 +793,7 @@ size_t test8_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
 // SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -866,7 +866,7 @@ size_t test9_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT14:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB19:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB19:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont14:
 // SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -953,7 +953,7 @@ size_t test10_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT6:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont6:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -1059,13 +1059,13 @@ int test12_a, test12_b;
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
-// SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR8:[0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR9:[0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT10:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB22:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB22:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [6 x i32], ptr [[BAZ]], i64 0, i64 [[TMP1]]
@@ -1075,17 +1075,17 @@ int test12_a, test12_b;
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds4:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 0) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.type_mismatch6:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR11:[0-9]+]]
+// NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR12:[0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i32], ptr [[BAZ]], i64 0, i64 [[IDXPROM]]
@@ -1101,13 +1101,13 @@ int test12_a, test12_b;
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
-// SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR6:[0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR7:[0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META9:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[TMP1]]) #[[ATTR7:[0-9]+]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[TMP1]]) #[[ATTR8:[0-9]+]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [6 x i32], ptr [[BAZ]], i64 0, i64 [[TMP1]]
@@ -1117,17 +1117,17 @@ int test12_a, test12_b;
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds4:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 0) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.type_mismatch6:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test12(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR9:[0-9]+]]
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR10:[0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i32], ptr [[BAZ]], i64 0, i64 [[IDXPROM]]
@@ -1168,7 +1168,7 @@ struct test13_bar {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 [[INDEX]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
@@ -1195,7 +1195,7 @@ struct test13_bar {
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[INDEX]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont5:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
@@ -1229,7 +1229,7 @@ struct test14_foo {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 undef
@@ -1254,7 +1254,7 @@ struct test14_foo {
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont3:
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 undef
@@ -1283,7 +1283,7 @@ int test14(int idx) {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB31:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB31:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont1:
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 undef
@@ -1303,7 +1303,7 @@ int test14(int idx) {
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont1:
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 undef
@@ -1333,7 +1333,7 @@ int test15(int idx) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 1
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 2) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 2) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont1:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
@@ -1471,7 +1471,7 @@ struct tests_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB33:[0-9]+]], i64 10) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB33:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont4:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
@@ -1512,7 +1512,7 @@ int test24(int c, struct tests_foo *var) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 10) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
@@ -1564,7 +1564,7 @@ struct test26_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB35:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB35:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8
@@ -1635,7 +1635,7 @@ struct test27_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24
@@ -1701,7 +1701,7 @@ struct test28_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT17:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont17:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
@@ -1763,7 +1763,7 @@ struct annotated_struct_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB41:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB41:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
@@ -1775,7 +1775,7 @@ struct annotated_struct_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM27]], [[TMP3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT32:%.*]], label [[HANDLER_OUT_OF_BOUNDS28:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds28:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB42:[0-9]+]], i64 [[IDXPROM27]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB42:[0-9]+]], i64 [[IDXPROM27]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont32:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
@@ -1808,7 +1808,7 @@ struct annotated_struct_array {
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT21:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont21:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
@@ -1850,7 +1850,7 @@ struct test30_struct {
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB44:[0-9]+]], i64 [[TMP0]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB44:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30(
@@ -1866,7 +1866,7 @@ struct test30_struct {
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[TMP0]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test30(
@@ -1927,7 +1927,7 @@ struct annotated_with_array {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX2]] to i64, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB46:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB46:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont1:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336
@@ -1937,7 +1937,7 @@ struct annotated_with_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[IDXPROM4]], [[TMP2]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP3]], label [[CONT9:%.*]], label [[HANDLER_OUT_OF_BOUNDS5:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds5:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB48:[0-9]+]], i64 [[IDXPROM4]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB48:[0-9]+]], i64 [[IDXPROM4]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont9:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344
@@ -1979,7 +1979,7 @@ struct annotated_with_array {
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX2]] to i64, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB17:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB17:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont7:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 344
@@ -2008,7 +2008,7 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB49:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB49:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont1:
 // SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 336
@@ -2045,7 +2045,7 @@ void test32(struct annotated_with_array *ptr, int idx1, int idx2) {
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont1:
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
@@ -2097,14 +2097,14 @@ struct multi_subscripts {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB51:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB51:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont1:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[IDX2]], 43
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS2:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds2:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = zext i32 [[IDX2]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB52:[0-9]+]], i64 [[TMP3]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB52:[0-9]+]], i64 [[TMP3]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
@@ -2121,14 +2121,14 @@ struct multi_subscripts {
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont1:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[IDX2]], 43
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS2:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds2:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP3:%.*]] = zext i32 [[IDX2]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 [[TMP3]]) #[[ATTR7]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 [[TMP3]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont3:
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
@@ -2151,7 +2151,7 @@ size_t test34(struct multi_subscripts *ptr, int idx1, int idx2) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB53:[0-9]+]], i64 [[INDEX]]) #[[ATTR7]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB53:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -2210,3 +2210,40 @@ void test35(struct annotated *p, size_t index) {
 size_t test35_bdos(struct annotated *p) {
   return __bdos(&p->array[-42]);
 }
+
+// See https://github.com/llvm/llvm-project/pull/122198#issuecomment-2627868702
+
+typedef struct {
+  char __padding[0];
+} spinlock_t;
+
+struct {
+  int priv_len;
+  spinlock_t addr_list_lock;
+  char *dev_addr;
+  char priv[] __attribute__((__counted_by__(priv_len)));
+} x;
+
+// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test36(
+// SANITIZE-WITH-ATTR-SAME: ) local_unnamed_addr #[[ATTR6:[0-9]+]] {
+// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
+//
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test36(
+// NO-SANITIZE-WITH-ATTR-SAME: ) local_unnamed_addr #[[ATTR10:[0-9]+]] {
+// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 -1
+//
+// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test36(
+// SANITIZE-WITHOUT-ATTR-SAME: ) local_unnamed_addr #[[ATTR6:[0-9]+]] {
+// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
+//
+// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test36(
+// NO-SANITIZE-WITHOUT-ATTR-SAME: ) local_unnamed_addr #[[ATTR9:[0-9]+]] {
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -1
+//
+size_t test36() {
+  return __builtin_dynamic_object_size(&x.dev_addr[4], 1);
+}
diff --git a/clang/test/Driver/Xarch.c b/clang/test/Driver/Xarch.c
index f7693fb..f35e292 100644
--- a/clang/test/Driver/Xarch.c
+++ b/clang/test/Driver/Xarch.c
@@ -1,8 +1,13 @@
 // RUN: %clang -target i386-apple-darwin11 -m32 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -target x86_64-unknown-windows-msvc -Xarch_x86_64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -target aarch64-unknown-linux-gnu -Xarch_aarch64 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -Xarch_powerpc64le -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
 // O3ONCE: "-O3"
 // O3ONCE-NOT: "-O3"
 
 // RUN: %clang -target i386-apple-darwin11 -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -m64 -Xarch_i386 -O3 %s -S -### 2>&1 | FileCheck -check-prefix=O3NONE %s
 // O3NONE-NOT: "-O3"
 // O3NONE: argument unused during compilation: '-Xarch_i386 -O3'
 
@@ -10,3 +15,6 @@
 // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o'
 // INVALID: error: invalid Xarch argument: '-Xarch_i386 -S'
 // INVALID: error: invalid Xarch argument: '-Xarch_i386 -o'
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -Xarch_x86_64 -Wl,foo %s -### 2>&1 | FileCheck -check-prefix=LINKER %s
+// LINKER: "foo"
diff --git a/clang/test/Driver/aarch64-ptrauth.c b/clang/test/Driver/aarch64-ptrauth.c
index d036189..1d2993f 100644
--- a/clang/test/Driver/aarch64-ptrauth.c
+++ b/clang/test/Driver/aarch64-ptrauth.c
@@ -64,22 +64,31 @@
 
 //// The only branch protection option compatible with PAuthABI is BTI.
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -mbranch-protection=pac-ret %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR4
+// RUN:   FileCheck %s --check-prefix=ERR4_1
 // RUN: not %clang -### -c --target=aarch64-linux-pauthtest       -mbranch-protection=pac-ret %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR4
-// ERR4: error: unsupported option '-mbranch-protection=pac-ret' for target 'aarch64-unknown-linux-pauthtest'
+// RUN:   FileCheck %s --check-prefix=ERR4_1
+// RUN: not %clang -### -c --target=aarch64 -fptrauth-returns     -mbranch-protection=pac-ret %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=ERR4_2
+// ERR4_1: error: unsupported option '-mbranch-protection=pac-ret' for target 'aarch64-unknown-linux-pauthtest'
+// ERR4_2: error: the combination of '-mbranch-protection=pac-ret' and '-fptrauth-returns' is incompatible
 
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -mbranch-protection=gcs %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR5
+// RUN:   FileCheck %s --check-prefix=ERR5_1
 // RUN: not %clang -### -c --target=aarch64-linux-pauthtest       -mbranch-protection=gcs %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR5
-// ERR5: error: unsupported option '-mbranch-protection=gcs' for target 'aarch64-unknown-linux-pauthtest'
+// RUN:   FileCheck %s --check-prefix=ERR5_1
+// RUN: not %clang -### -c --target=aarch64 -fptrauth-returns     -mbranch-protection=gcs %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=ERR5_2
+// ERR5_1: error: unsupported option '-mbranch-protection=gcs' for target 'aarch64-unknown-linux-pauthtest'
+// ERR5_2: error: the combination of '-mbranch-protection=gcs' and '-fptrauth-returns' is incompatible
 
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -mbranch-protection=standard %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR6
+// RUN:   FileCheck %s --check-prefix=ERR6_1
 // RUN: not %clang -### -c --target=aarch64-linux-pauthtest       -mbranch-protection=standard %s 2>&1 | \
-// RUN:   FileCheck %s --check-prefix=ERR6
-// ERR6: error: unsupported option '-mbranch-protection=standard' for target 'aarch64-unknown-linux-pauthtest'
+// RUN:   FileCheck %s --check-prefix=ERR6_1
+// RUN: not %clang -### -c --target=aarch64 -fptrauth-returns     -mbranch-protection=standard %s 2>&1 | \
+// RUN:   FileCheck %s --check-prefix=ERR6_2
+// ERR6_1: error: unsupported option '-mbranch-protection=standard' for target 'aarch64-unknown-linux-pauthtest'
+// ERR6_2: error: the combination of '-mbranch-protection=standard' and '-fptrauth-returns' is incompatible
 
 // RUN: not %clang -### -c --target=aarch64-linux -mabi=pauthtest -msign-return-address=all %s 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=ERR7
diff --git a/clang/test/Driver/amdgpu-openmp-sanitize-options.c b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
new file mode 100644
index 0000000..49aae8b
--- /dev/null
+++ b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
@@ -0,0 +1,65 @@
+// REQUIRES: x86-registered-target, amdgpu-registered-target
+
+// Fail on invalid ROCm Path.
+// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack+ -fsanitize=address -fgpu-sanitize -nogpuinc --rocm-path=%S/Inputs/rocm-invalid  %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=FAIL %s
+
+// Enable multiple sanitizer's apart from ASan with invalid rocm-path.
+// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack+ -fsanitize=address -fsanitize=leak -fgpu-sanitize --rocm-path=%S/Inputs/rocm-invalid -nogpuinc  %s 2>&1 \
+// RUN:   | FileCheck --check-prefixes=NOTSUPPORTED,FAIL %s
+
+// Memory, Leak, UndefinedBehaviour and Thread Sanitizer are not supported on AMDGPU.
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack+ -fsanitize=address -fsanitize=leak -fgpu-sanitize --rocm-path=%S/Inputs/rocm -nogpuinc  %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=NOTSUPPORTED %s
+
+// GPU ASan Enabled Test Cases
+// ASan enabled for amdgpu-arch [gfx908]
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908 -fsanitize=address -fgpu-sanitize --rocm-path=%S/Inputs/rocm %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=NOXNACK,GPUSAN %s
+
+// GPU ASan enabled for amdgpu-arch [gfx908:xnack-]
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack- -fsanitize=address -fgpu-sanitize --rocm-path=%S/Inputs/rocm %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=XNACKNEG,GPUSAN %s
+
+// GPU ASan enabled for amdgpu-arch [gfx908:xnack+]
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack+ -fsanitize=address -fgpu-sanitize --rocm-path=%S/Inputs/rocm %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=GPUSAN %s
+
+// ASan enabled for multiple amdgpu-arch [gfx908:xnack+,gfx900:xnack+]
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack+ --offload-arch=gfx900:xnack+ -fsanitize=address -fgpu-sanitize --rocm-path=%S/Inputs/rocm %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=GPUSAN %s
+
+// GPU ASan Disabled Test Cases
+// ASan disabled for amdgpu-arch [gfx908]
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908 -fsanitize=address -fno-gpu-sanitize --rocm-path=%S/Inputs/rocm %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=NOGPUSAN %s
+
+// GPU ASan disabled for amdgpu-arch [gfx908:xnack-]
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack- -fsanitize=address -fno-gpu-sanitize --rocm-path=%S/Inputs/rocm %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=NOGPUSAN %s
+
+// GPU ASan disabled for amdgpu-arch [gfx908:xnack+]
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack+ -fsanitize=address -fno-gpu-sanitize --rocm-path=%S/Inputs/rocm %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=NOGPUSAN %s
+
+// ASan disabled for amdgpu-arch [gfx908:xnack+,gfx900:xnack+]
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp --offload-arch=gfx908:xnack+ --offload-arch=gfx900:xnack+ -fsanitize=address -fno-gpu-sanitize --rocm-path=%S/Inputs/rocm %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=NOGPUSAN %s
+
+// FAIL-DAG: error: cannot find ROCm device library for ABI version 5; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
+// NOTSUPPORTED-DAG: warning: ignoring '-fsanitize=leak' option as it is not currently supported for target 'amdgcn-amd-amdhsa'
+
+// NOXNACK: warning: ignoring '-fsanitize=address' option for offload arch 'gfx908' as it is not currently supported there. Use it with an offload arch containing 'xnack+' instead
+// XNACKNEG: warning: ignoring '-fsanitize=address' option for offload arch 'gfx908:xnack-' as it is not currently supported there. Use it with an offload arch containing 'xnack+' instead
+
+// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "c".*}}
+// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-fsanitize=address".* "-x" "c".*}}
+// GPUSAN: {{"[^"]*clang-offload-packager[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx908(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
+// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "ir".*}}
+// GPUSAN: {{"[^"]*clang-linker-wrapper[^"]*" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
+
+// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "c".*}}
+// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-x" "c".*}}
+// NOGPUSAN: {{"[^"]*clang-offload-packager[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx908(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
+// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* "-fopenmp".* "-fsanitize=address".* "-fopenmp-targets=amdgcn-amd-amdhsa".* "-x" "ir".*}}
+// NOGPUSAN: {{"[^"]*clang-linker-wrapper[^"]*" "--host-triple=x86_64-unknown-linux-gnu" "--linker-path=[^"]*".* "--whole-archive" "[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* "--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
diff --git a/clang/test/Driver/offload-Xarch.c b/clang/test/Driver/offload-Xarch.c
new file mode 100644
index 0000000..a85a501
--- /dev/null
+++ b/clang/test/Driver/offload-Xarch.c
@@ -0,0 +1,34 @@
+// RUN: %clang --target=x86_64-unknown-linux-gnu -x cuda %s -Xarch_nvptx64 -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -x cuda %s -Xarch_device -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -x hip %s -Xarch_amdgcn -O3 -S -nogpulib -nogpuinc -### 2>&1 | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc \
+// RUN:   -Xarch_amdgcn -march=gfx90a -Xarch_amdgcn -O3 -S -### %s 2>&1 \
+// RUN: | FileCheck -check-prefix=O3ONCE %s
+// RUN: %clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \
+// RUN:   -Xarch_nvptx64 -march=sm_52 -Xarch_nvptx64 -O3 -S -### %s 2>&1 \
+// RUN: | FileCheck -check-prefix=O3ONCE %s
+// O3ONCE: "-O3"
+// O3ONCE-NOT: "-O3"
+
+// RUN: %clang -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -nogpulib \
+// RUN:   --target=x86_64-unknown-linux-gnu -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_60 -nogpuinc \
+// RUN:   -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx90a,gfx1030 -ccc-print-bindings -### %s 2>&1 \
+// RUN: | FileCheck -check-prefix=OPENMP %s
+//
+// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]"
+// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX1030_BC:.+]]"
+// OPENMP: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[GFX90A_BC:.+]]"
+// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM52_PTX:.+]]"
+// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM52_PTX]]"], output: "[[SM52_CUBIN:.+]]"
+// OPENMP: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[SM60_PTX:.+]]"
+// OPENMP: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[SM60_PTX]]"], output: "[[SM60_CUBIN:.+]]"
+// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[GFX1030_BC]]", "[[GFX90A_BC]]", "[[SM52_CUBIN]]", "[[SM60_CUBIN]]"], output: "[[BINARY:.+]]"
+// OPENMP: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]"
+// OPENMP: # "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -x cuda %s --offload-arch=sm_52,sm_60 -Xarch_sm_52 -O3 -Xarch_sm_60 -O0 \
+// RUN:   --target=x86_64-unknown-linux-gnu -Xarch_host -O3 -S -nogpulib -nogpuinc -### 2>&1 \
+// RUN: | FileCheck -check-prefix=CUDA %s
+// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_52" {{.*}}"-O3"
+// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}}"-target-cpu" "sm_60" {{.*}}"-O0"
+// CUDA: "-cc1" "-triple" "x86_64-unknown-linux-gnu" {{.*}}"-O3"
diff --git a/clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c b/clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c
new file mode 100644
index 0000000..32cc98d
--- /dev/null
+++ b/clang/test/Frontend/aarch64-ignore-branch-protection-attribute.c
@@ -0,0 +1,31 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang -target aarch64-linux-pauthtest   %s -S -emit-llvm -o - 2>&1 | FileCheck --implicit-check-not=warning: %s
+// RUN: %clang -target aarch64 -fptrauth-returns %s -S -emit-llvm -o - 2>&1 | FileCheck --implicit-check-not=warning: %s
+
+/// Unsupported with pauthtest, warning emitted
+__attribute__((target("branch-protection=pac-ret"))) void f1() {}
+// CHECK:      warning: unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored [-Wignored-attributes]
+// CHECK-NEXT: __attribute__((target("branch-protection=pac-ret"))) void f1() {}
+__attribute__((target("branch-protection=gcs"))) void f2() {}
+// CHECK:      warning: unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored [-Wignored-attributes]
+// CHECK-NEXT: __attribute__((target("branch-protection=gcs"))) void f2() {}
+__attribute__((target("branch-protection=standard"))) void f3() {}
+// CHECK:      warning: unsupported 'branch-protection' in the 'target' attribute string; 'target' attribute ignored [-Wignored-attributes]
+// CHECK-NEXT: __attribute__((target("branch-protection=standard"))) void f3() {}
+
+/// Supported with pauthtest, no warning emitted
+__attribute__((target("branch-protection=bti"))) void f4() {}
+
+/// Supported with pauthtest, no warning emitted
+__attribute__((target("branch-protection=none"))) void f5() {}
+
+/// Check there are no branch protection function attributes which are unsupported with pauthtest
+
+// CHECK-NOT:  attributes {{.*}} "sign-return-address"
+// CHECK-NOT:  attributes {{.*}} "sign-return-address-key"
+// CHECK-NOT:  attributes {{.*}} "branch-protection-pauth-lr"
+// CHECK-NOT:  attributes {{.*}} "guarded-control-stack"
+
+/// Check function attributes which are supported with pauthtest
+// CHECK:      attributes {{.*}} "branch-target-enforcement"
diff --git a/clang/test/Interpreter/simple-exception.cpp b/clang/test/Interpreter/simple-exception.cpp
index 6749acd..651e8d9 100644
--- a/clang/test/Interpreter/simple-exception.cpp
+++ b/clang/test/Interpreter/simple-exception.cpp
@@ -1,7 +1,7 @@
 // clang-format off
 // UNSUPPORTED: system-aix
-// XFAIL for arm and arm64, or running on Windows.
-// XFAIL: target=arm{{.*}}, system-windows
+// XFAIL for arm, or running on Windows.
+// XFAIL: target=arm-{{.*}}, target=armv{{.*}}, system-windows
 // RUN: cat %s | clang-repl | FileCheck %s
 
 // Incompatible with msan. It passes with -O3 but fail -Oz. Interpreter
diff --git a/clang/test/Modules/pr125521.cppm b/clang/test/Modules/pr125521.cppm
new file mode 100644
index 0000000..d064cdf
--- /dev/null
+++ b/clang/test/Modules/pr125521.cppm
@@ -0,0 +1,57 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/mod2.cppm -emit-module-interface -o %t/mod2.pcm
+// RUN: %clang_cc1 -std=c++20 %t/mod1.cppm -emit-module-interface -o %t/mod1.pcm \
+// RUN:     -fmodule-file=Mod2=%t/mod2.pcm
+// RUN: %clang_cc1 -std=c++20 %t/test.cc -fmodule-file=Mod2=%t/mod2.pcm -fmodule-file=Mod=%t/mod1.pcm \
+// RUN:     -fsyntax-only -verify
+
+// RUN: %clang_cc1 -std=c++20 %t/mod2.cppm -emit-module-interface -o %t/mod2.pcm
+// RUN: %clang_cc1 -std=c++20 %t/mod1.cppm -emit-module-interface -o %t/mod1.pcm \
+// RUN:     -fmodule-file=Mod2=%t/mod2.pcm
+// RUN: %clang_cc1 -std=c++20 %t/mod1.pcm  -fmodule-file=Mod2=%t/mod2.pcm -emit-llvm -o - \
+// RUN:     | FileCheck %t/mod1.cppm
+
+//--- hello.h
+template <typename V> int get() noexcept {return 0;};
+
+template <typename T>
+class List
+{
+    template <typename V> friend int get() noexcept;
+};
+
+//--- mod2.cppm
+module;
+#include "hello.h"
+export module Mod2;
+export const char *modFn2() {
+    List<int> a;
+    return "hello";
+}
+
+//--- mod1.cppm
+module;
+#include "hello.h"
+export module Mod;
+import Mod2;
+export extern "C" const char *modFn() {
+    List<int> a;
+    List<double> b;
+    return modFn2();
+}
+
+// Fine enough to check it won't crash.
+// CHECK: define {{.*}}@modFn
+
+//--- test.cc
+// expected-no-diagnostics
+import Mod;
+import Mod2;
+
+void test() {
+    modFn();
+    modFn2();
+}
diff --git a/clang/test/OpenMP/begin_declare_variant_messages.c b/clang/test/OpenMP/begin_declare_variant_messages.c
index ffa5600..f87714a 100644
--- a/clang/test/OpenMP/begin_declare_variant_messages.c
+++ b/clang/test/OpenMP/begin_declare_variant_messages.c
@@ -32,27 +32,27 @@ const int var;
 #pragma omp end declare variant
 #pragma omp begin declare variant match // expected-error {{expected '(' after 'match'}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match( // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
+#pragma omp begin declare variant match( // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match() // expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp begin declare variant match() // expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp begin declare variant match(xxx) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx=) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp begin declare variant match(xxx=) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx=yyy) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp begin declare variant match(xxx=yyy) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx=yyy}) // expected-error {{expected ')'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{extra tokens at the end of '#pragma omp begin declare variant' are ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
+#pragma omp begin declare variant match(xxx=yyy}) // expected-error {{expected ')'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{extra tokens at the end of '#pragma omp begin declare variant' are ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx={) // expected-error {{expected ')'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
+#pragma omp begin declare variant match(xxx={) // expected-error {{expected ')'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp begin declare variant match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx={vvv, vvv}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp begin declare variant match(xxx={vvv, vvv}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx={vvv} xxx) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp begin declare variant match(xxx={vvv} xxx) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp end declare variant
-#pragma omp begin declare variant match(xxx={vvv}) xxx // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{extra tokens at the end of '#pragma omp begin declare variant' are ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp begin declare variant match(xxx={vvv}) xxx // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{extra tokens at the end of '#pragma omp begin declare variant' are ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp end declare variant
 #pragma omp begin declare variant match(implementation={xxx}) // expected-warning {{'xxx' is not a valid context selector for the context set 'implementation'; selector ignored}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
 #pragma omp end declare variant
diff --git a/clang/test/OpenMP/declare_variant_ast_print.c b/clang/test/OpenMP/declare_variant_ast_print.c
index 0df1026..9bd0b6d 100644
--- a/clang/test/OpenMP/declare_variant_ast_print.c
+++ b/clang/test/OpenMP/declare_variant_ast_print.c
@@ -20,6 +20,8 @@ int foo(void);
 #pragma omp declare variant(foo) match(implementation={vendor(score(5): ibm, xxx, ibm)}, device={kind(cpu, nohost)})
 #pragma omp declare variant(foo) match(device={kind(host)})
 #pragma omp declare variant(foo) match(device={kind(nohost), xxx})
+#pragma omp declare variant(foo) match(target_device={kind(host)})
+#pragma omp declare variant(foo) match(target_device={kind(nohost), xxx})
 #pragma omp declare variant(foo) match(implementation={extension(match_all)})
 #pragma omp declare variant(foo) match(implementation={extension(match_any)})
 #pragma omp declare variant(foo) match(implementation={extension(match_none)})
@@ -29,6 +31,8 @@ int bar(void);
 // CHECK-NEXT: #pragma omp declare variant(foo) match(implementation={extension(match_none)})
 // CHECK-NEXT: #pragma omp declare variant(foo) match(implementation={extension(match_any)})
 // CHECK-NEXT: #pragma omp declare variant(foo) match(implementation={extension(match_all)})
+// CHECK-NEXT: #pragma omp declare variant(foo) match(target_device={kind(nohost)})
+// CHECK-NEXT: #pragma omp declare variant(foo) match(target_device={kind(host)})
 // CHECK-NEXT: #pragma omp declare variant(foo) match(device={kind(nohost)})
 // CHECK-NEXT: #pragma omp declare variant(foo) match(device={kind(host)})
 // CHECK-NEXT: #pragma omp declare variant(foo) match(implementation={vendor(score(5): ibm)}, device={kind(cpu, nohost)})
diff --git a/clang/test/OpenMP/declare_variant_bind_to_decl.cpp b/clang/test/OpenMP/declare_variant_bind_to_decl.cpp
index dca18ab..3419141 100644
--- a/clang/test/OpenMP/declare_variant_bind_to_decl.cpp
+++ b/clang/test/OpenMP/declare_variant_bind_to_decl.cpp
@@ -29,6 +29,6 @@ int main() {
 // CHECK-LABEL: define {{[^@]+}}@main
 // CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    call void @"_Z74foo$ompvariant$S2$s7$Pppc64le$Pppc64$S3$s9$Pmatch_any$Pbind_to_declarationv"()
+// CHECK-NEXT:    call void @{{"_Z[0-9]+foo\$ompvariant\$.*"}}()
 // CHECK-NEXT:    ret i32 0
 //
diff --git a/clang/test/OpenMP/declare_variant_messages.c b/clang/test/OpenMP/declare_variant_messages.c
index 0ae276e..14637d8 100644
--- a/clang/test/OpenMP/declare_variant_messages.c
+++ b/clang/test/OpenMP/declare_variant_messages.c
@@ -16,17 +16,17 @@ int foo(void);
 #pragma omp declare variant(foo) // omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
 #pragma omp declare variant(foo) xxx // omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
 #pragma omp declare variant(foo) match // expected-error {{expected '(' after 'match'}}
-#pragma omp declare variant(foo) match( // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
-#pragma omp declare variant(foo) match() // expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
-#pragma omp declare variant(foo) match(xxx) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
-#pragma omp declare variant(foo) match(xxx=) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
-#pragma omp declare variant(foo) match(xxx=yyy) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
-#pragma omp declare variant(foo) match(xxx=yyy}) // expected-error {{expected ')'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}} omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
-#pragma omp declare variant(foo) match(xxx={) // expected-error {{expected ')'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
-#pragma omp declare variant(foo) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
-#pragma omp declare variant(foo) match(xxx={vvv, vvv}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
-#pragma omp declare variant(foo) match(xxx={vvv} xxx) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
-#pragma omp declare variant(foo) match(xxx={vvv}) xxx // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
+#pragma omp declare variant(foo) match( // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
+#pragma omp declare variant(foo) match() // expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx=) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx=yyy) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx=yyy}) // expected-error {{expected ')'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}} omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
+#pragma omp declare variant(foo) match(xxx={) // expected-error {{expected ')'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
+#pragma omp declare variant(foo) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx={vvv, vvv}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx={vvv} xxx) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx={vvv}) xxx // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
 #pragma omp declare variant(foo) match(implementation={xxx}) // expected-warning {{'xxx' is not a valid context selector for the context set 'implementation'; selector ignored}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foo) match(implementation={vendor}) // expected-warning {{the context selector 'vendor' in context set 'implementation' requires a context property defined in parentheses; selector ignored}} expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foo) match(implementation={vendor(}) // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'amd' 'arm' 'bsc' 'cray' 'fujitsu' 'gnu' 'ibm' 'intel' 'llvm' 'nec' 'nvidia' 'pgi' 'ti' 'unknown'}} expected-note {{to match this '('}}
@@ -48,8 +48,18 @@ int foo(void);
 #pragma omp declare variant(foo) match(device={kind(score(5): host), kind(llvm)}) // expected-warning {{the context selector 'kind' in the context set 'device' cannot have a score ('5'); score ignored}} expected-warning {{the context selector 'kind' was used already in the same 'omp declare variant' directive; selector ignored}} expected-note {{the previous context selector 'kind' used here}} expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foo) match(device={kind(score(5): nohost), vendor(llvm)}) // expected-warning {{the context selector 'kind' in the context set 'device' cannot have a score ('5'); score ignored}} expected-warning {{the context selector 'vendor' is not valid for the context set 'device'; selector ignored}} expected-note {{the context selector 'vendor' can be nested in the context set 'implementation'; try 'match(implementation={vendor(property)})'}} expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foo) match(implementation={extension("aaa")}) // expected-warning {{'aaa' is not a valid context property for the context selector 'extension' and the context set 'implementation'; property ignored}} expected-note {{context property options are: 'match_all' 'match_any' 'match_none'}} expected-note {{the ignored property spans until here}}
+#pragma omp declare variant(foo) match(target_device={}) // expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-note {{context selector options are: 'kind' 'device_num' 'arch' 'isa'}} expected-note {{the ignored selector spans until here}}
+#pragma omp declare variant(foo) match(target_device={xxx}) // expected-warning {{'xxx' is not a valid context selector for the context set 'target_device'; selector ignored}} expected-note {{context selector options are: 'kind' 'device_num' 'arch' 'isa'}} expected-note {{the ignored selector spans until here}}
+#pragma omp declare variant(foo) match(target_device={kind}) // expected-warning {{the context selector 'kind' in context set 'target_device' requires a context property defined in parentheses; selector ignored}} expected-note {{the ignored selector spans until here}}
+#pragma omp declare variant(foo) match(target_device={kind(}) // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'host' 'nohost' 'cpu' 'gpu' 'fpga' 'any'}} expected-note {{to match this '('}}
+#pragma omp declare variant(foo) match(target_device={kind()}) // expected-warning {{expected identifier or string literal describing a context property; property skipped}} expected-note {{context property options are: 'host' 'nohost' 'cpu' 'gpu' 'fpga' 'any'}}
+#pragma omp declare variant(foo) match(target_device={device_num}) // expected-warning {{the context selector 'device_num' in context set 'target_device' requires a context property defined in parentheses; selector ignored}} expected-note {{the ignored selector spans until here}} 
+#pragma omp declare variant(foo) match(target_device={device_num()}) // expected-error {{expected expression}}
+#pragma omp declare variant(foo) match(target_device={device_num(-1)}) // expected-error {{argument to 'device_num' clause must be a non-negative integer value}}
+#pragma omp declare variant(foo) match(target_device={device_num(abc)}) // expected-error {{expected expression}} expected-error {{use of undeclared identifier 'abc'}}
 int bar(void);
 
+
 #pragma omp declare variant(foo) match(implementation = {vendor(score(foo) :llvm)}) // expected-warning {{score expressions in the OpenMP context selector need to be constant; foo is not and will be ignored}}
 #pragma omp declare variant(foo) match(implementation = {vendor(score(foo()) :llvm)}) // expected-warning {{score expressions in the OpenMP context selector need to be constant; foo() is not and will be ignored}}
 #pragma omp declare variant(foo) match(implementation = {vendor(score(<expr>) :llvm)}) // expected-error {{expected expression}} expected-error {{use of undeclared identifier 'expr'}} expected-error {{expected expression}}
@@ -64,10 +74,10 @@ int score_and_cond_non_const(void);
 #pragma omp declare variant(foo) match(construct={for simd}) // expected-error {{expected ')'}} expected-warning {{expected '}' after the context selectors for the context set "construct"; '}' assumed}} expected-note {{to match this '('}} omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
 int construct(void);
 
-#pragma omp declare variant(foo) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int a; // expected-error {{'#pragma omp declare variant' can only be applied to functions}}
 
-#pragma omp declare variant(foo) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp threadprivate(a) // expected-error {{'#pragma omp declare variant' can only be applied to functions}}
 int var;
 #pragma omp threadprivate(var)
@@ -91,21 +101,21 @@ int main(void);
 int main(void);
 
 
-#pragma omp declare variant(foo) match(xxx={}) // expected-error {{single declaration is expected after 'declare variant' directive}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foo) match(xxx={}) // expected-error {{single declaration is expected after 'declare variant' directive}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int b, c;
 
 int no_proto();
-#pragma omp declare variant(no_proto) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(no_proto) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int no_proto_too();
 
 int proto1(int);
 
-#pragma omp declare variant(proto1) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(proto1) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int diff_proto(); // expected-note {{previous declaration is here}}
 
 int diff_proto(double); // expected-error {{conflicting types for 'diff_proto'}}
 
-#pragma omp declare variant(no_proto) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(no_proto) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int diff_proto1(double);
 
 int after_use_variant(void);
@@ -124,24 +134,24 @@ void self_test(int n, int d_no) {
   self(n);
 }
 
-#pragma omp declare variant(after_use_variant) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{'#pragma omp declare variant' cannot be applied for function after first usage; the original function might be used}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(after_use_variant) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{'#pragma omp declare variant' cannot be applied for function after first usage; the original function might be used}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int after_use(void);
-#pragma omp declare variant(after_use_variant) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(after_use_variant) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int defined(void) { return 0; }
 int defined1(void) { return 0; }
 
-#pragma omp declare variant(after_use_variant) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{'#pragma omp declare variant' cannot be applied to the function that was defined already; the original function might be used}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(after_use_variant) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{'#pragma omp declare variant' cannot be applied to the function that was defined already; the original function might be used}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int defined1(void);
 
 
 int diff_cc_variant(void);
 
-#pragma omp declare variant(diff_cc_variant) match(xxx={}) // expected-error {{variant in '#pragma omp declare variant' with type 'int (void)' is incompatible with type 'int (void) __attribute__((vectorcall))'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(diff_cc_variant) match(xxx={}) // expected-error {{variant in '#pragma omp declare variant' with type 'int (void)' is incompatible with type 'int (void) __attribute__((vectorcall))'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 __vectorcall int diff_cc(void);
 
 int diff_ret_variant(void);
 
-#pragma omp declare variant(diff_ret_variant) match(xxx={}) // expected-error {{variant in '#pragma omp declare variant' with type 'int (void)' is incompatible with type 'void (void)'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(diff_ret_variant) match(xxx={}) // expected-error {{variant in '#pragma omp declare variant' with type 'int (void)' is incompatible with type 'void (void)'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 void diff_ret(void);
 
 void incompat_attr_variant(void);
@@ -173,7 +183,7 @@ void not_marked(void);
 #pragma omp declare variant(not_marked) match(implementation={vendor(unknown)}, device={kind(cpu)}) // expected-note {{marked as 'declare variant' here}}
 void marked_variant(void);
 
-#pragma omp declare variant(marked_variant) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{variant function in '#pragma omp declare variant' is itself marked as '#pragma omp declare variant'}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(marked_variant) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-warning {{variant function in '#pragma omp declare variant' is itself marked as '#pragma omp declare variant'}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 void marked(void);
 
 #pragma omp declare variant(foo) match(device = {isa("foo")})
@@ -214,3 +224,4 @@ int conflicting_nested_score(void);
 #pragma omp declare variant(foo) match(user = {condition(1)}) // expected-error {{nested user conditions in OpenMP context selector not supported (yet)}}
 int conflicting_nested_condition(void);
 #pragma omp end declare variant
+
diff --git a/clang/test/OpenMP/declare_variant_messages.cpp b/clang/test/OpenMP/declare_variant_messages.cpp
index b8a806e..2d6e5bf 100644
--- a/clang/test/OpenMP/declare_variant_messages.cpp
+++ b/clang/test/OpenMP/declare_variant_messages.cpp
@@ -21,8 +21,8 @@ T foofoo();
 #pragma omp declare variant(foofoo <int>) // omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
 #pragma omp declare variant(foofoo <int>) xxx // omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
 #pragma omp declare variant(foofoo <int>) match // expected-error {{expected '(' after 'match'}}
-#pragma omp declare variant(foofoo <int>) match( // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
-#pragma omp declare variant(foofoo <int>) match() // expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foofoo <int>) match( // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
+#pragma omp declare variant(foofoo <int>) match() // expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp declare variant(foofoo <int>) match(implementation) // expected-warning {{expected '=' after the context set name "implementation"; '=' assumed}} expected-warning {{expected '{' after the '=' that follows the context set name "implementation"; '{' assumed}} expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-warning {{expected '}' after the context selectors for the context set "implementation"; '}' assumed}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foofoo <int>) match(implementation =) // expected-warning {{expected '{' after the '=' that follows the context set name "implementation"; '{' assumed}} expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-warning {{expected '}' after the context selectors for the context set "implementation"; '}' assumed}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foofoo <int>) match(implementation = yyy) // expected-warning {{expected '{' after the '=' that follows the context set name "implementation"; '{' assumed}} expected-warning {{'yyy' is not a valid context selector for the context set 'implementation'; selector ignored}} expected-warning {{expected '}' after the context selectors for the context set "implementation"; '}' assumed}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
@@ -63,8 +63,8 @@ int bar();
 #pragma omp declare variant(foofoo <T>) // omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
 #pragma omp declare variant(foofoo <T>) xxx // omp50-error {{expected 'match' clause on 'omp declare variant' directive}} omp51-error {{expected 'match', 'adjust_args', or 'append_args' clause on 'omp declare variant' directive}}
 #pragma omp declare variant(foofoo <T>) match // expected-error {{expected '(' after 'match'}}
-#pragma omp declare variant(foofoo <T>) match( // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
-#pragma omp declare variant(foofoo <T>) match() // expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(foofoo <T>) match( // expected-error {{expected ')'}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} expected-note {{to match this '('}}
+#pragma omp declare variant(foofoo <T>) match() // expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 #pragma omp declare variant(foofoo <T>) match(implementation) // expected-warning {{expected '=' after the context set name "implementation"; '=' assumed}} expected-warning {{expected '{' after the '=' that follows the context set name "implementation"; '{' assumed}} expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-warning {{expected '}' after the context selectors for the context set "implementation"; '}' assumed}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foofoo <T>) match(implementation =) // expected-warning {{expected '{' after the '=' that follows the context set name "implementation"; '{' assumed}} expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-warning {{expected '}' after the context selectors for the context set "implementation"; '}' assumed}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
 #pragma omp declare variant(foofoo <T>) match(implementation = {) // expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-warning {{expected '}' after the context selectors for the context set "implementation"; '}' assumed}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
@@ -200,7 +200,7 @@ int after_use(void);
 
 int fn();
 int fn(int);
-#pragma omp declare variant(fn) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(fn) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int overload(void);
 
 int fn1();
@@ -211,7 +211,7 @@ int overload1(float);
 
 int fn_constexpr_variant();
 
-#pragma omp declare variant(fn_constexpr_variant) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(fn_constexpr_variant) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 constexpr int fn_constexpr(); // expected-error {{'#pragma omp declare variant' does not support constexpr functions}}
 
 constexpr int fn_constexpr_variant1();
@@ -221,7 +221,7 @@ int fn_constexpr1();
 
 int fn_sc_variant();
 
-#pragma omp declare variant(fn_sc_variant) match(xxx = {}) // expected-error {{function with '#pragma omp declare variant' has a different storage class}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(fn_sc_variant) match(xxx = {}) // expected-error {{function with '#pragma omp declare variant' has a different storage class}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 static int fn_sc();
 
 static int fn_sc_variant1();
@@ -231,7 +231,7 @@ int fn_sc1();
 
 int fn_inline_variant();
 
-#pragma omp declare variant(fn_inline_variant) match(xxx = {}) // expected-error {{function with '#pragma omp declare variant' has a different inline specification}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(fn_inline_variant) match(xxx = {}) // expected-error {{function with '#pragma omp declare variant' has a different inline specification}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 inline int fn_inline();
 
 inline int fn_inline_variant1();
@@ -240,7 +240,7 @@ inline int fn_inline_variant1();
 int fn_inline1();
 
 auto fn_deduced_variant() { return 0; }
-#pragma omp declare variant(fn_deduced_variant) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(fn_deduced_variant) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int fn_deduced();
 
 int fn_deduced_variant1();
@@ -254,7 +254,7 @@ auto fn_deduced3();
 
 auto fn_deduced_variant2() { return 0; }
 
-#pragma omp declare variant(fn_deduced_variant2) match(xxx = {}) // expected-error {{variant in '#pragma omp declare variant' with type 'int ()' is incompatible with type 'float ()'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(fn_deduced_variant2) match(xxx = {}) // expected-error {{variant in '#pragma omp declare variant' with type 'int ()' is incompatible with type 'float ()'}} expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 float fn_deduced2();
 
 
@@ -266,7 +266,7 @@ int fn_except() noexcept(false); // expected-note {{previous declaration is here
 
 int fn_except_variant1() noexcept(false); // expected-error {{exception specification in declaration does not match previous declaration}}
 
-#pragma omp declare variant(fn_except_variant1) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(fn_except_variant1) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 int fn_except1() noexcept(true); // expected-note {{previous declaration is here}}
 
 struct SpecialFuncs {
@@ -275,7 +275,7 @@ struct SpecialFuncs {
 #pragma omp declare variant(SpecialFuncs::vd) match(implementation = {}) // expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
   SpecialFuncs(); // expected-error {{'#pragma omp declare variant' does not support constructors}}
 
-#pragma omp declare variant(SpecialFuncs::vd) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(SpecialFuncs::vd) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
   ~SpecialFuncs(); // expected-error {{'#pragma omp declare variant' does not support destructors}}
 
   void baz();
@@ -288,14 +288,14 @@ struct SpecialFuncs {
 #pragma omp declare variant(SpecialFuncs::dar) match(construct={dispatch}) // expected-error {{'#pragma omp declare variant' does not support virtual functions}}
 
 #pragma omp declare variant(SpecialFuncs::baz) match(implementation = {}) // expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
-#pragma omp declare variant(SpecialFuncs::bar) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(SpecialFuncs::bar) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
 
 #pragma omp declare variant(fn_sc_variant1) match(implementation = {}) // expected-error {{variant in '#pragma omp declare variant' with type 'int (*)()' is incompatible with type 'void (SpecialFuncs::*)()'}} expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
   void foo1();
   SpecialFuncs& foo(const SpecialFuncs&);
   SpecialFuncs& bar(SpecialFuncs&&);
 
-#pragma omp declare variant(SpecialFuncs::foo) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp declare variant(SpecialFuncs::foo) match(xxx = {}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
   SpecialFuncs& operator=(const SpecialFuncs&) = default; // expected-error {{'#pragma omp declare variant' does not support defaulted functions}}
 
 #pragma omp declare variant(SpecialFuncs::bar) match(implementation = {}) // expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-note {{context selector options are: 'vendor' 'extension' 'unified_address' 'unified_shared_memory' 'reverse_offload' 'dynamic_allocators' 'atomic_default_mem_order'}} expected-note {{the ignored selector spans until here}}
diff --git a/clang/test/OpenMP/for_order_messages.cpp b/clang/test/OpenMP/for_order_messages.cpp
index d38f48d..530c051 100644
--- a/clang/test/OpenMP/for_order_messages.cpp
+++ b/clang/test/OpenMP/for_order_messages.cpp
@@ -1,8 +1,12 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=50 -triple x86_64-unknown-unknown -verify=expected,omp50 %s -Wuninitialized
-// RUN: %clang_cc1 -fsyntax-only -fopenmp -triple x86_64-unknown-unknown -verify=expected,omp51 %s -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=51 -triple x86_64-unknown-unknown -verify=expected,omp51 %s -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=52 -triple x86_64-unknown-unknown -verify=expected,omp51 %s -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -verify=expected,omp60 %s -Wuninitialized
 
 // RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=50 -triple x86_64-unknown-unknown -verify=expected,omp50 %s -Wuninitialized
-// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -triple x86_64-unknown-unknown -verify=expected,omp51 %s -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=51 -triple x86_64-unknown-unknown -verify=expected,omp51 %s -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=52 -triple x86_64-unknown-unknown -verify=expected,omp51 %s -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=60 -triple x86_64-unknown-unknown -verify=expected,omp60 %s -Wuninitialized
 
 extern int omp_get_num_threads  (void);
 
@@ -35,13 +39,61 @@ int main(int argc, char **argv) {
 
 #pragma omp parallel for order(reproducible: concurrent) // omp50-error {{expected 'concurrent' in OpenMP clause 'order'}}
   for (int i = 0; i < 10; ++i) {
-#pragma omp target //omp51-error {{construct 'target' not allowed in a region associated with a directive with 'order' clause}}
+#pragma omp target //omp51-error {{construct 'target' not allowed in a region associated with a directive with 'order' clause}} omp60-error {{construct 'target' not allowed in a region associated with a directive with 'order' clause}}
       A++;
   }
 
 #pragma omp parallel for order(unconstrained: concurrent) // omp50-error {{expected 'concurrent' in OpenMP clause 'order'}}
   for (int i = 0; i < 10; ++i) {
-#pragma omp target //omp51-error {{construct 'target' not allowed in a region associated with a directive with 'order' clause}}
+#pragma omp target //omp51-error {{construct 'target' not allowed in a region associated with a directive with 'order' clause}} omp60-error {{construct 'target' not allowed in a region associated with a directive with 'order' clause}}
       A++;
   }
+
+#pragma omp loop bind(parallel) order(concurrent)
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for //omp60-error {{construct 'parallel for' not allowed in a region associated with a directive with 'order' clause}}
+    for (int j = 0; j < 10; ++j) {
+      A += j;
+    }
+  }
+
+#pragma omp distribute order(concurrent)
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for simd //omp60-error {{construct 'parallel for simd' not allowed in a region associated with a directive with 'order' clause}}
+    for (int j = 0; j < 10; ++j) {
+      A += j;
+    }
+  }
+
+#pragma omp for order(concurrent)
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel master //omp60-error {{construct 'parallel master' not allowed in a region associated with a directive with 'order' clause}}
+    for (int j = 0; j < 10; ++j) {
+      A += j;
+    }
+  }
+
+#pragma omp for order(concurrent)
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel master taskloop //omp60-error {{construct 'parallel master taskloop' not allowed in a region associated with a directive with 'order' clause}}
+    for (int j = 0; j < 10; ++j) {
+      A += j;
+    }
+  }
+
+#pragma omp for order(concurrent)
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel master taskloop simd //omp60-error {{construct 'parallel master taskloop simd' not allowed in a region associated with a directive with 'order' clause}}
+    for (int j = 0; j < 10; ++j) {
+      A += j;
+    }
+  }
+
+#pragma omp for order(concurrent)
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp parallel sections //omp60-error {{construct 'parallel sections' not allowed in a region associated with a directive with 'order' clause}}
+    {
+      A++;
+    }
+  }
 }
diff --git a/clang/test/OpenMP/metadirective_messages.cpp b/clang/test/OpenMP/metadirective_messages.cpp
index b342a09..7fce9fa 100644
--- a/clang/test/OpenMP/metadirective_messages.cpp
+++ b/clang/test/OpenMP/metadirective_messages.cpp
@@ -5,7 +5,7 @@
 void foo() {
 #pragma omp metadirective // expected-error {{expected expression}}
   ;
-#pragma omp metadirective when() // expected-error {{expected valid context selector in when clause}} expected-error {{expected expression}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
+#pragma omp metadirective when() // expected-error {{expected valid context selector in when clause}} expected-error {{expected expression}} expected-warning {{expected identifier or string literal describing a context set; set skipped}} expected-note {{context set options are: 'construct' 'device' 'target_device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}}
   ;
 #pragma omp metadirective when(device{}) // expected-warning {{expected '=' after the context set name "device"; '=' assumed}} expected-warning {{expected identifier or string literal describing a context selector; selector skipped}} expected-note {{context selector options are: 'kind' 'arch' 'isa'}} expected-note {{the ignored selector spans until here}} expected-error {{expected valid context selector in when clause}} expected-error {{expected expression}}
   ;
diff --git a/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp b/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp
index 0a5690e..e128370 100644
--- a/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp
+++ b/clang/test/OpenMP/nvptx_declare_variant_name_mangling.cpp
@@ -6,10 +6,10 @@
 
 // CHECK-DAG: @_Z3barv
 // CHECK-DAG: @_Z3bazv
-// CHECK-DAG: define{{.*}} @"_Z53bar$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"
-// CHECK-DAG: define{{.*}} @"_Z53baz$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"
-// CHECK-DAG: call noundef i32 @"_Z53bar$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"()
-// CHECK-DAG: call noundef i32 @"_Z53baz$ompvariant$S2$s7$Pnvptx$Pnvptx64$S3$s9$Pmatch_anyv"()
+// CHECK-DAG: define{{.*}} @{{"_Z[0-9]+bar\$ompvariant\$.*"}}
+// CHECK-DAG: define{{.*}} @{{"_Z[0-9]+baz\$ompvariant\$.*"}}
+// CHECK-DAG: call noundef i32 @{{"_Z[0-9]+bar\$ompvariant\$.*"}}()
+// CHECK-DAG: call noundef i32 @{{"_Z[0-9]+baz\$ompvariant\$.*"}}()
 
 #ifndef HEADER
 #define HEADER
diff --git a/clang/test/SemaCXX/attr-no-sanitize.cpp b/clang/test/SemaCXX/attr-no-sanitize.cpp
index 8951f61..cd60e71 100644
--- a/clang/test/SemaCXX/attr-no-sanitize.cpp
+++ b/clang/test/SemaCXX/attr-no-sanitize.cpp
@@ -21,8 +21,8 @@ int f3() __attribute__((no_sanitize("address")));
 
 // DUMP-LABEL: FunctionDecl {{.*}} f4
 // DUMP: NoSanitizeAttr {{.*}} hwaddress
-// PRINT: {{\[\[}}clang::no_sanitize("hwaddress")]] int f4()
-[[clang::no_sanitize("hwaddress")]] int f4();
+// PRINT: {{\[\[}}gnu::no_sanitize("hwaddress")]] int f4()
+[[gnu::no_sanitize("hwaddress")]] int f4();
 
 // DUMP-LABEL: FunctionDecl {{.*}} f5
 // DUMP: NoSanitizeAttr {{.*}} address thread hwaddress
diff --git a/clang/test/SemaTemplate/GH55509.cpp b/clang/test/SemaTemplate/GH55509.cpp
new file mode 100644
index 0000000..773a843
--- /dev/null
+++ b/clang/test/SemaTemplate/GH55509.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++26 %s
+
+namespace t1 {
+  template<int N> struct A {
+    template<class C> friend auto cica(const A<N-1>&, C) {
+      return N;
+    }
+  };
+
+  template<> struct A<0> {
+    template<class C> friend auto cica(const A<0>&, C);
+    // expected-note@-1 {{declared here}}
+  };
+
+  void test() {
+    cica(A<0>{}, 0);
+    // expected-error@-1 {{function 'cica<int>' with deduced return type cannot be used before it is defined}}
+
+    (void)A<1>{};
+    cica(A<0>{}, 0);
+  }
+} // namespace t1
+namespace t2 {
+  template<int N> struct A {
+    template<class C> friend auto cica(const A<N-1>&, C) {
+      return N;
+    }
+  };
+
+  template<> struct A<0> {
+    template<class C> friend auto cica(const A<0>&, C);
+  };
+
+  template <int N, class = decltype(cica(A<N>{}, nullptr))>
+  void MakeCica();
+  // expected-note@-1 {{candidate function}}
+
+  template <int N> void MakeCica(A<N+1> = {});
+  // expected-note@-1 {{candidate function}}
+
+  void test() {
+    MakeCica<0>();
+
+    MakeCica<0>();
+    // expected-error@-1 {{call to 'MakeCica' is ambiguous}}
+  }
+} // namespace t2
+namespace t3 {
+  template<int N> struct A {
+    template<class C> friend auto cica(const A<N-1>&, C) {
+      return N-1;
+    }
+  };
+
+  template<> struct A<0> {
+    template<class C> friend auto cica(const A<0>&, C);
+  };
+
+  template <int N, class AT, class = decltype(cica(AT{}, nullptr))>
+  static constexpr bool MakeCica(int);
+
+  template <int N, class AT>
+  static constexpr bool MakeCica(short, A<N+1> = {});
+
+  template <int N, class AT = A<N>, class Val = decltype(MakeCica<N, AT>(0))>
+  static constexpr bool has_cica = Val{};
+
+  constexpr bool cica2 = has_cica<0> || has_cica<0>;
+} // namespace t3
+namespace t4 {
+  template<int N> struct A {
+    template<class C> friend auto cica(const A<N-1>&, C);
+  };
+
+  template<> struct A<0> {
+    template<class C> friend auto cica(const A<0>&, C) {
+      C a;
+    }
+  };
+
+  template struct A<1>;
+
+  void test() {
+    cica(A<0>{}, 0);
+  }
+} // namespace t4
+namespace regression1 {
+  template <class> class A;
+
+  template <class T> [[gnu::abi_tag("TAG")]] void foo(A<T>);
+
+  template <class> struct A {
+    friend void foo <>(A);
+  };
+
+  template struct A<int>;
+
+  template <class T> [[gnu::abi_tag("TAG")]] void foo(A<T>) {}
+
+  template void foo<int>(A<int>);
+} // namespace regression1
+namespace regression2 {
+  template <class> struct A {
+    template <class T> static void f() {
+      A<int>::f<T>();
+    }
+  };
+  template <> template <class T> void A<int>::f() {
+    static_assert(__is_same(T, long));
+  }
+  template void A<void>::f<long>();
+} // namespace regression2
diff --git a/clang/test/SemaTemplate/cwg2398.cpp b/clang/test/SemaTemplate/cwg2398.cpp
index 1728f90..51d98d4 100644
--- a/clang/test/SemaTemplate/cwg2398.cpp
+++ b/clang/test/SemaTemplate/cwg2398.cpp
@@ -585,6 +585,23 @@ namespace regression2 {
   template <typename, int> struct Matrix;
   template struct D<Matrix<double, 3>>;
 } // namespace regression2
+namespace regression3 {
+  struct None {};
+  template<class T> struct Node { using type = T; };
+
+  template <template<class> class TT, class T>
+  struct A {
+    static_assert(!__is_same(T, None));
+    using type2 = typename A<TT, typename T::type>::type2;
+  };
+
+  template <template<class> class TT> struct A<TT, None> {
+    using type2 = void;
+  };
+
+  template <class...> class B {};
+  template struct A<B, Node<None>>;
+} // namespace regression3
 
 namespace nttp_auto {
   namespace t1 {
diff --git a/clang/tools/clang-format/clang-format.el b/clang/tools/clang-format/clang-format.el
index fb943b7..54ab2e1 100644
--- a/clang/tools/clang-format/clang-format.el
+++ b/clang/tools/clang-format/clang-format.el
@@ -146,24 +146,133 @@ is a zero-based file offset, assuming ‘utf-8-unix’ coding."
     (lambda (byte &optional _quality _coding-system)
       (byte-to-position (1+ byte)))))
 
-;;;###autoload
-(defun clang-format-region (start end &optional style assume-file-name)
-  "Use clang-format to format the code between START and END according to STYLE.
-If called interactively uses the region or the current statement if there is no
-no active region. If no STYLE is given uses `clang-format-style'. Use
-ASSUME-FILE-NAME to locate a style config file, if no ASSUME-FILE-NAME is given
-uses the function `buffer-file-name'."
-  (interactive
-   (if (use-region-p)
-       (list (region-beginning) (region-end))
-     (list (point) (point))))
+(defmacro clang-format--with-delete-files-guard (bind-files-to-delete &rest body)
+  "Execute BODY which may add temp files to BIND-FILES-TO-DELETE."
+  (declare (indent 1))
+  `(let ((,bind-files-to-delete nil))
+     (unwind-protect
+         (progn
+           ,@body)
+       (while ,bind-files-to-delete
+         (with-demoted-errors "failed to remove file: %S"
+           (delete-file (pop ,bind-files-to-delete)))))))
+
+
+(defun clang-format--vc-diff-get-diff-lines (file-orig file-new)
+  "Return all line regions that contain diffs between FILE-ORIG and
+FILE-NEW.  If there is no diff ‘nil’ is returned. Otherwise the return
+is a ‘list’ of line ranges to format. The list of line ranges can be
+passed to ‘clang-format--region-impl’"
+  ;; Use temporary buffer for output of diff.
+  (with-temp-buffer
+    ;; We could use diff.el:diff-no-select here. The reason we don't
+    ;; is diff-no-select requires extra copies on the buffers which
+    ;; induces noticeable slowdowns, especially on larger files.
+    (let ((status (call-process
+                   diff-command
+                   nil
+                   (current-buffer)
+                   nil
+                   ;; Binary diff has different behaviors that we
+                   ;; aren't interested in.
+                   "-a"
+                   ;; Get minimal diff (copy diff config for git-clang-format).
+                   "-U0"
+                   file-orig
+                   file-new))
+          (stderr (concat (if (zerop (buffer-size)) "" ": ")
+                          (buffer-substring-no-properties
+                           (point-min) (line-end-position))))
+          (diff-lines '()))
+      (cond
+       ((stringp status)
+        (error "clang-format: (diff killed by signal %s%s)" status stderr))
+       ;; Return of 0 indicates no diff.
+       ((= status 0) nil)
+       ;; Return of 1 indicates found diffs and no error.
+       ((= status 1)
+        ;; Find and collect all diff lines.
+        ;; We are matching something like:
+        ;; "@@ -80 +80 @@" or "@@ -80,2 +80,2 @@"
+        (goto-char (point-min))
+        (while (re-search-forward
+                "^@@[[:blank:]]-[[:digit:],]+[[:blank:]]\\+\\([[:digit:]]+\\)\\(,\\([[:digit:]]+\\)\\)?[[:blank:]]@@$"
+                nil
+                t
+                1)
+          (let ((match1 (string-to-number (match-string 1)))
+                (match3 (let ((match3_or_nil (match-string 3)))
+                          (if match3_or_nil
+                              (string-to-number match3_or_nil)
+                            nil))))
+            (push (cons match1 (if match3 (+ match1 match3) match1)) diff-lines)))
+        (nreverse diff-lines))
+       ;; Any return != 0 && != 1 indicates some level of error.
+       (t
+        (error "clang-format: (diff returned unsuccessfully %s%s)" status stderr))))))
+
+(defun clang-format--vc-diff-get-vc-head-file (tmpfile-vc-head)
+  "Stores the contents of ‘buffer-file-name’ at vc revision HEAD into
+‘tmpfile-vc-head’. If the current buffer is either not a file or not
+in a vc repo, this results in an error. Currently git is the only
+supported vc."
+  ;; We need the current buffer to be a file.
+  (unless (buffer-file-name)
+    (error "clang-format: Buffer is not visiting a file"))
+
+  (let ((base-dir (vc-root-dir))
+        (backend (vc-backend (buffer-file-name))))
+    ;; We need to be able to find version control (git) root.
+    (unless base-dir
+      (error "clang-format: File not known to git"))
+    (cond
+     ((string-equal backend "Git")
+      ;; Get the filename relative to git root.
+      (let ((vc-file-name (substring
+                           (expand-file-name (buffer-file-name))
+                           (string-width (expand-file-name base-dir))
+                           nil)))
+        (let ((status (call-process
+                       vc-git-program
+                       nil
+                       `(:file ,tmpfile-vc-head)
+                       nil
+                       "show" (concat "HEAD:" vc-file-name)))
+              (stderr (with-temp-buffer
+                        (unless (zerop (cadr (insert-file-contents tmpfile-vc-head)))
+                          (insert ": "))
+                        (buffer-substring-no-properties
+                         (point-min) (line-end-position)))))
+          (when (stringp status)
+            (error "clang-format: (git show HEAD:%s killed by signal %s%s)"
+                   vc-file-name status stderr))
+          (unless (zerop status)
+            (error "clang-format: (git show HEAD:%s returned unsuccessfully %s%s)"
+                   vc-file-name status stderr)))))
+     (t
+      (error
+       "Version control %s isn't supported, currently supported backends: git"
+       backend)))))
+
 
+(defun clang-format--region-impl (start end &optional style assume-file-name lines)
+  "Common implementation for ‘clang-format-buffer’,
+‘clang-format-region’, and ‘clang-format-vc-diff’. START and END
+refer to the region to be formatter. STYLE and ASSUME-FILE-NAME are
+used for configuring the clang-format. And LINES is used to pass
+specific locations for reformatting (i.e diff locations)."
   (unless style
     (setq style clang-format-style))
 
   (unless assume-file-name
     (setq assume-file-name (buffer-file-name (buffer-base-buffer))))
 
+  ;; Convert list of line ranges to list command for ‘clang-format’ executable.
+  (when lines
+    (setq lines (mapcar (lambda (range)
+                          (format "--lines=%d:%d" (car range) (cdr range)))
+                        lines)))
+
   (let ((file-start (clang-format--bufferpos-to-filepos start 'approximate
                                                         'utf-8-unix))
         (file-end (clang-format--bufferpos-to-filepos end 'approximate
@@ -190,8 +299,12 @@ uses the function `buffer-file-name'."
                                       (list "--assume-filename" assume-file-name))
                                ,@(and style (list "--style" style))
                                "--fallback-style" ,clang-format-fallback-style
-                               "--offset" ,(number-to-string file-start)
-                               "--length" ,(number-to-string (- file-end file-start))
+                               ,@(and lines lines)
+                               ,@(and (not lines)
+                                      (list
+                                       "--offset" (number-to-string file-start)
+                                       "--length" (number-to-string
+                                                   (- file-end file-start))))
                                "--cursor" ,(number-to-string cursor))))
               (stderr (with-temp-buffer
                         (unless (zerop (cadr (insert-file-contents temp-file)))
@@ -216,9 +329,62 @@ uses the function `buffer-file-name'."
             (if incomplete-format
                 (message "(clang-format: incomplete (syntax errors)%s)" stderr)
               (message "(clang-format: success%s)" stderr))))
-      (delete-file temp-file)
+      (with-demoted-errors
+          "clang-format: Failed to delete temporary file: %S"
+        (delete-file temp-file))
       (when (buffer-name temp-buffer) (kill-buffer temp-buffer)))))
 
+
+;;;###autoload
+(defun clang-format-vc-diff (&optional style assume-file-name)
+  "The same as ‘clang-format-buffer’ but only operates on the vc
+diffs from HEAD in the buffer. If no STYLE is given uses
+‘clang-format-style’. Use ASSUME-FILE-NAME to locate a style config
+file. If no ASSUME-FILE-NAME is given uses the function
+‘buffer-file-name’."
+  (interactive)
+  (clang-format--with-delete-files-guard tmp-files
+    (let ((tmpfile-vc-head nil)
+          (tmpfile-curbuf nil))
+      (setq tmpfile-vc-head
+            (make-temp-file "clang-format-vc-tmp-head-content"))
+      (push tmpfile-vc-head tmp-files)
+      (clang-format--vc-diff-get-vc-head-file tmpfile-vc-head)
+      ;; Move the current buffer to a temporary file to take a
+      ;; diff. Even if current-buffer is backed by a file, we
+      ;; want to diff the buffer contents which might not be
+      ;; saved.
+      (setq tmpfile-curbuf (make-temp-file "clang-format-vc-tmp"))
+      (push tmpfile-curbuf tmp-files)
+      (write-region nil nil tmpfile-curbuf nil 'nomessage)
+      ;; Get a list of lines with a diff.
+      (let ((diff-lines
+             (clang-format--vc-diff-get-diff-lines
+              tmpfile-vc-head tmpfile-curbuf)))
+        ;; If we have any diffs, format them.
+        (when diff-lines
+          (clang-format--region-impl
+           (point-min)
+           (point-max)
+           style
+           assume-file-name
+           diff-lines))))))
+
+
+;;;###autoload
+(defun clang-format-region (start end &optional style assume-file-name)
+  "Use clang-format to format the code between START and END according
+to STYLE.  If called interactively uses the region or the current
+statement if there is no no active region. If no STYLE is given uses
+`clang-format-style'. Use ASSUME-FILE-NAME to locate a style config
+file, if no ASSUME-FILE-NAME is given uses the function
+`buffer-file-name'."
+  (interactive
+   (if (use-region-p)
+       (list (region-beginning) (region-end))
+     (list (point) (point))))
+  (clang-format--region-impl start end style assume-file-name))
+
 ;;;###autoload
 (defun clang-format-buffer (&optional style assume-file-name)
   "Use clang-format to format the current buffer according to STYLE.
@@ -226,7 +392,11 @@ If no STYLE is given uses `clang-format-style'. Use ASSUME-FILE-NAME
 to locate a style config file. If no ASSUME-FILE-NAME is given uses
 the function `buffer-file-name'."
   (interactive)
-  (clang-format-region (point-min) (point-max) style assume-file-name))
+  (clang-format--region-impl
+   (point-min)
+   (point-max)
+   style
+   assume-file-name))
 
 ;;;###autoload
 (defalias 'clang-format 'clang-format-region)
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 709dc51..4b39ae9 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/LLVMDriver.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetSelect.h"
@@ -31,6 +32,7 @@
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/TargetParser/Host.h"
+#include <memory>
 #include <mutex>
 #include <optional>
 #include <thread>
@@ -85,8 +87,9 @@ static std::string ModuleFilesDir;
 static bool EagerLoadModules;
 static unsigned NumThreads = 0;
 static std::string CompilationDB;
-static std::string ModuleName;
+static std::optional<std::string> ModuleName;
 static std::vector<std::string> ModuleDepTargets;
+static std::string TranslationUnitFile;
 static bool DeprecatedDriverCommand;
 static ResourceDirRecipeKind ResourceDirRecipe;
 static bool Verbose;
@@ -164,6 +167,8 @@ static void ParseArgs(int argc, char **argv) {
             .Case("system-warnings", ScanningOptimizations::SystemWarnings)
             .Case("vfs", ScanningOptimizations::VFS)
             .Case("canonicalize-macros", ScanningOptimizations::Macros)
+            .Case("ignore-current-working-dir",
+                  ScanningOptimizations::IgnoreCWD)
             .Case("all", ScanningOptimizations::All)
             .Default(std::nullopt);
     if (!Optimization) {
@@ -204,6 +209,9 @@ static void ParseArgs(int argc, char **argv) {
   for (const llvm::opt::Arg *A : Args.filtered(OPT_dependency_target_EQ))
     ModuleDepTargets.emplace_back(A->getValue());
 
+  if (const llvm::opt::Arg *A = Args.getLastArg(OPT_tu_buffer_path_EQ))
+    TranslationUnitFile = A->getValue();
+
   DeprecatedDriverCommand = Args.hasArg(OPT_deprecated_driver_command);
 
   if (const llvm::opt::Arg *A = Args.getLastArg(OPT_resource_dir_recipe_EQ)) {
@@ -940,7 +948,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
   };
 
   if (Format == ScanningOutputFormat::Full)
-    FD.emplace(ModuleName.empty() ? Inputs.size() : 0);
+    FD.emplace(!ModuleName ? Inputs.size() : 0);
 
   std::atomic<size_t> NumStatusCalls = 0;
   std::atomic<size_t> NumOpenFileForReadCalls = 0;
@@ -959,10 +967,6 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
       std::string Filename = std::move(Input->Filename);
       std::string CWD = std::move(Input->Directory);
 
-      std::optional<StringRef> MaybeModuleName;
-      if (!ModuleName.empty())
-        MaybeModuleName = ModuleName;
-
       std::string OutputDir(ModuleFilesDir);
       if (OutputDir.empty())
         OutputDir = getModuleCachePath(Input->CommandLine);
@@ -1018,16 +1022,31 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
                                              MakeformatOS, Errs))
             HadErrors = true;
         }
-      } else if (MaybeModuleName) {
+      } else if (ModuleName) {
         auto MaybeModuleDepsGraph = WorkerTool.getModuleDependencies(
-            *MaybeModuleName, Input->CommandLine, CWD, AlreadySeenModules,
+            *ModuleName, Input->CommandLine, CWD, AlreadySeenModules,
             LookupOutput);
-        if (handleModuleResult(*MaybeModuleName, MaybeModuleDepsGraph, *FD,
+        if (handleModuleResult(*ModuleName, MaybeModuleDepsGraph, *FD,
                                LocalIndex, DependencyOS, Errs))
           HadErrors = true;
       } else {
+        std::unique_ptr<llvm::MemoryBuffer> TU;
+        std::optional<llvm::MemoryBufferRef> TUBuffer;
+        if (!TranslationUnitFile.empty()) {
+          auto MaybeTU = llvm::MemoryBuffer::getFile(TranslationUnitFile);
+          if (!MaybeTU) {
+            llvm::errs() << "cannot open input translation unit: "
+                         << MaybeTU.getError().message() << "\n";
+            HadErrors = true;
+            continue;
+          }
+          TU = std::move(*MaybeTU);
+          TUBuffer = TU->getMemBufferRef();
+          Filename = TU->getBufferIdentifier();
+        }
         auto MaybeTUDeps = WorkerTool.getTranslationUnitDependencies(
-            Input->CommandLine, CWD, AlreadySeenModules, LookupOutput);
+            Input->CommandLine, CWD, AlreadySeenModules, LookupOutput,
+            TUBuffer);
         if (handleTranslationUnitResult(Filename, MaybeTUDeps, *FD, LocalIndex,
                                         DependencyOS, Errs))
           HadErrors = true;
diff --git a/clang/tools/clang-scan-deps/Opts.td b/clang/tools/clang-scan-deps/Opts.td
index 4837ce6..9cccbb3 100644
--- a/clang/tools/clang-scan-deps/Opts.td
+++ b/clang/tools/clang-scan-deps/Opts.td
@@ -29,6 +29,8 @@ defm compilation_database : Eq<"compilation-database", "Compilation database">;
 defm module_name : Eq<"module-name", "the module of which the dependencies are to be computed">;
 defm dependency_target : Eq<"dependency-target", "The names of dependency targets for the dependency file">;
 
+defm tu_buffer_path: Eq<"tu-buffer-path", "The path to the translation unit for depscan. Not compatible with -module-name">;
+
 def deprecated_driver_command : F<"deprecated-driver-command", "use a single driver command to build the tu (deprecated)">;
 
 defm resource_dir_recipe : Eq<"resource-dir-recipe", "How to produce missing '-resource-dir' argument">;
diff --git a/clang/unittests/AST/CommentLexer.cpp b/clang/unittests/AST/CommentLexer.cpp
index 1e7bad8..22866f0 100644
--- a/clang/unittests/AST/CommentLexer.cpp
+++ b/clang/unittests/AST/CommentLexer.cpp
@@ -1453,6 +1453,160 @@ TEST_F(CommentLexerTest, HTML19) {
   ASSERT_EQ(tok::newline,      Toks[2].getKind());
 }
 
+TEST_F(CommentLexerTest, HTML20) {
+  const char *Source = "// <a\n"
+                       "// \n"
+                       "// href=\"foo\"\n"
+                       "// \n"
+                       "// bar>text</a>";
+
+  std::vector<Token> Toks;
+
+  lexString(Source, Toks);
+
+  ASSERT_EQ(11U, Toks.size());
+
+  ASSERT_EQ(tok::text, Toks[0].getKind());
+  ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+  ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
+  ASSERT_EQ(StringRef("a"), Toks[1].getHTMLTagStartName());
+
+  ASSERT_EQ(tok::html_ident, Toks[2].getKind());
+  ASSERT_EQ(StringRef("href"), Toks[2].getHTMLIdent());
+
+  ASSERT_EQ(tok::html_equals, Toks[3].getKind());
+
+  ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
+  ASSERT_EQ(StringRef("foo"), Toks[4].getHTMLQuotedString());
+
+  ASSERT_EQ(tok::html_ident, Toks[5].getKind());
+  ASSERT_EQ(StringRef("bar"), Toks[5].getHTMLIdent());
+
+  ASSERT_EQ(tok::html_greater, Toks[6].getKind());
+
+  ASSERT_EQ(tok::text, Toks[7].getKind());
+  ASSERT_EQ(StringRef("text"), Toks[7].getText());
+
+  ASSERT_EQ(tok::html_end_tag, Toks[8].getKind());
+  ASSERT_EQ(StringRef("a"), Toks[8].getHTMLTagEndName());
+
+  ASSERT_EQ(tok::html_greater, Toks[9].getKind());
+
+  ASSERT_EQ(tok::newline, Toks[10].getKind());
+}
+
+TEST_F(CommentLexerTest, HTML21) {
+  const char *Source = "/**\n"
+                       " * <a\n"
+                       " * \n"
+                       " * href=\"foo\"\n"
+                       " * \n"
+                       " * bar>text</a>\n"
+                       " */";
+
+  std::vector<Token> Toks;
+
+  lexString(Source, Toks);
+
+  ASSERT_EQ(15U, Toks.size());
+
+  ASSERT_EQ(tok::newline, Toks[0].getKind());
+
+  ASSERT_EQ(tok::text, Toks[1].getKind());
+  ASSERT_EQ(StringRef(" "), Toks[1].getText());
+
+  ASSERT_EQ(tok::html_start_tag, Toks[2].getKind());
+  ASSERT_EQ(StringRef("a"), Toks[2].getHTMLTagStartName());
+
+  ASSERT_EQ(tok::html_ident, Toks[3].getKind());
+  ASSERT_EQ(StringRef("href"), Toks[3].getHTMLIdent());
+
+  ASSERT_EQ(tok::html_equals, Toks[4].getKind());
+
+  ASSERT_EQ(tok::html_quoted_string, Toks[5].getKind());
+  ASSERT_EQ(StringRef("foo"), Toks[5].getHTMLQuotedString());
+
+  ASSERT_EQ(tok::html_ident, Toks[6].getKind());
+  ASSERT_EQ(StringRef("bar"), Toks[6].getHTMLIdent());
+
+  ASSERT_EQ(tok::html_greater, Toks[7].getKind());
+
+  ASSERT_EQ(tok::text, Toks[8].getKind());
+  ASSERT_EQ(StringRef("text"), Toks[8].getText());
+
+  ASSERT_EQ(tok::html_end_tag, Toks[9].getKind());
+  ASSERT_EQ(StringRef("a"), Toks[9].getHTMLTagEndName());
+
+  ASSERT_EQ(tok::html_greater, Toks[10].getKind());
+
+  ASSERT_EQ(tok::newline, Toks[11].getKind());
+
+  ASSERT_EQ(tok::text, Toks[12].getKind());
+  ASSERT_EQ(StringRef(" "), Toks[12].getText());
+
+  ASSERT_EQ(tok::newline, Toks[13].getKind());
+
+  ASSERT_EQ(tok::newline, Toks[14].getKind());
+}
+
+TEST_F(CommentLexerTest, HTML22) {
+  const char *Source = "/**\n"
+                       " * <a\n"
+                       " */";
+
+  std::vector<Token> Toks;
+
+  lexString(Source, Toks);
+
+  ASSERT_EQ(6U, Toks.size());
+
+  ASSERT_EQ(tok::newline, Toks[0].getKind());
+
+  ASSERT_EQ(tok::text, Toks[1].getKind());
+  ASSERT_EQ(StringRef(" "), Toks[1].getText());
+
+  ASSERT_EQ(tok::html_start_tag, Toks[2].getKind());
+  ASSERT_EQ(StringRef("a"), Toks[2].getHTMLTagStartName());
+
+  ASSERT_EQ(tok::newline, Toks[3].getKind());
+
+  ASSERT_EQ(tok::newline, Toks[4].getKind());
+
+  ASSERT_EQ(tok::newline, Toks[5].getKind());
+}
+
+TEST_F(CommentLexerTest, HTML23) {
+  // NOTE: "//<" is considered a comment start
+  const char *Source = "// <\n"
+                       "// a\n"
+                       "// >";
+
+  std::vector<Token> Toks;
+
+  lexString(Source, Toks);
+
+  ASSERT_EQ(7U, Toks.size());
+
+  ASSERT_EQ(tok::text, Toks[0].getKind());
+  ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+  ASSERT_EQ(tok::text, Toks[1].getKind());
+  ASSERT_EQ(StringRef("<"), Toks[1].getText());
+
+  ASSERT_EQ(tok::newline, Toks[2].getKind());
+
+  ASSERT_EQ(tok::text, Toks[3].getKind());
+  ASSERT_EQ(StringRef(" a"), Toks[3].getText());
+
+  ASSERT_EQ(tok::newline, Toks[4].getKind());
+
+  ASSERT_EQ(tok::text, Toks[5].getKind());
+  ASSERT_EQ(StringRef(" >"), Toks[5].getText());
+
+  ASSERT_EQ(tok::newline, Toks[6].getKind());
+}
+
 TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
   const char *Source = "// <tag>";
 
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index e0df182..aa08b67 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1065,9 +1065,10 @@ TEST_F(CommentParserTest, InlineCommand5) {
 
 TEST_F(CommentParserTest, HTML1) {
   const char *Sources[] = {
-    "// <a",
-    "// <a>",
-    "// <a >"
+      "// <a",
+      "// <a>",
+      "// <a >",
+      "// <a\n// >",
   };
 
   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
@@ -1088,8 +1089,9 @@ TEST_F(CommentParserTest, HTML1) {
 
 TEST_F(CommentParserTest, HTML2) {
   const char *Sources[] = {
-    "// <br/>",
-    "// <br />"
+      "// <br/>",
+      "// <br />",
+      "// <br \n// />",
   };
 
   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
@@ -1110,10 +1112,8 @@ TEST_F(CommentParserTest, HTML2) {
 
 TEST_F(CommentParserTest, HTML3) {
   const char *Sources[] = {
-    "// <a href",
-    "// <a href ",
-    "// <a href>",
-    "// <a href >",
+      "// <a href",   "// <a href ",       "// <a href>",
+      "// <a href >", "// <a \n// href >",
   };
 
   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
@@ -1134,8 +1134,9 @@ TEST_F(CommentParserTest, HTML3) {
 
 TEST_F(CommentParserTest, HTML4) {
   const char *Sources[] = {
-    "// <a href=\"bbb\"",
-    "// <a href=\"bbb\">",
+      "// <a href=\"bbb\"",
+      "// <a href=\"bbb\">",
+      "// <a \n// href=\"bbb\">",
   };
 
   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index de12c70..af7478b 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -108,6 +108,14 @@ GetFlattenedSpellings(const Record &Attr) {
       Ret.emplace_back("CXX11", Name, "clang", false, *Spelling);
       if (Spelling->getValueAsBit("AllowInC"))
         Ret.emplace_back("C23", Name, "clang", false, *Spelling);
+    } else if (Variety == "ClangGCC") {
+      Ret.emplace_back("GNU", Name, "", false, *Spelling);
+      Ret.emplace_back("CXX11", Name, "clang", false, *Spelling);
+      Ret.emplace_back("CXX11", Name, "gnu", false, *Spelling);
+      if (Spelling->getValueAsBit("AllowInC")) {
+        Ret.emplace_back("C23", Name, "clang", false, *Spelling);
+        Ret.emplace_back("C23", Name, "gnu", false, *Spelling);
+      }
     } else {
       Ret.push_back(FlattenedSpelling(*Spelling));
     }
diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
index c3448c2..60f5c85 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -291,10 +291,9 @@ struct Allocator {
 
   atomic_uint8_t destructing;
   atomic_uint8_t constructed;
-  bool print_text;
 
   // ------------------- Initialization ------------------------
-  explicit Allocator(LinkerInitialized) : print_text(flags()->print_text) {
+  explicit Allocator(LinkerInitialized) {
     atomic_store_relaxed(&destructing, 0);
     atomic_store_relaxed(&constructed, 1);
   }
@@ -350,13 +349,13 @@ struct Allocator {
   }
 
   void FinishAndWrite() {
-    if (print_text && common_flags()->print_module_map)
+    if (flags()->print_text && common_flags()->print_module_map)
       DumpProcessMap();
 
     allocator.ForceLock();
 
     InsertLiveBlocks();
-    if (print_text) {
+    if (flags()->print_text) {
       if (!flags()->print_terse)
         Printf("Recorded MIBs (incl. live on exit):\n");
       MIBMap.ForEach(PrintCallback,
diff --git a/compiler-rt/lib/orc/macho_platform.cpp b/compiler-rt/lib/orc/macho_platform.cpp
index 8ca6858..4b603fd 100644
--- a/compiler-rt/lib/orc/macho_platform.cpp
+++ b/compiler-rt/lib/orc/macho_platform.cpp
@@ -557,6 +557,12 @@ Error MachOPlatformRuntimeState::registerObjectPlatformSections(
     return make_error<StringError>(ErrStream.str());
   }
 
+  ORC_RT_DEBUG({
+    printdbg("  UnwindInfo: %s, UseCallbackStyleUnwindInfo: %s\n",
+             UnwindInfo ? "true" : "false",
+             UseCallbackStyleUnwindInfo ? "true" : "false");
+  });
+
   if (UnwindInfo && UseCallbackStyleUnwindInfo) {
     ORC_RT_DEBUG({
       printdbg("  Registering new-style unwind info for:\n"
diff --git a/compiler-rt/test/orc/TestCases/Darwin/Generic/exceptions.cpp b/compiler-rt/test/orc/TestCases/Darwin/Generic/exceptions.cpp
new file mode 100644
index 0000000..7e9c40c
--- /dev/null
+++ b/compiler-rt/test/orc/TestCases/Darwin/Generic/exceptions.cpp
@@ -0,0 +1,13 @@
+// RUN: %clangxx -c -o %t %s
+// RUN: %llvm_jitlink -slab-allocate=20Mb %t
+//
+// REQUIRES: system-darwin && host-arch-compatible
+
+int main(int argc, char *argv[]) {
+  try {
+    throw 42;
+  } catch (int E) {
+    return 42 - E;
+  }
+  return 1;
+}
diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h
index 96c4de7..e2a420a 100644
--- a/flang/include/flang/Common/Fortran-features.h
+++ b/flang/include/flang/Common/Fortran-features.h
@@ -34,13 +34,13 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
     EquivalenceSameNonSequence, AdditionalIntrinsics, AnonymousParents,
     OldLabelDoEndStatements, LogicalIntegerAssignment, EmptySourceFile,
     ProgramReturn, ImplicitNoneTypeNever, ImplicitNoneTypeAlways,
-    ForwardRefImplicitNone, OpenAccessAppend, BOZAsDefaultInteger,
-    DistinguishableSpecifics, DefaultSave, PointerInSeqType, NonCharacterFormat,
-    SaveMainProgram, SaveBigMainProgramVariables,
-    DistinctArrayConstructorLengths, PPCVector, RelaxedIntentInChecking,
-    ForwardRefImplicitNoneData, NullActualForAllocatable,
-    ActualIntegerConvertedToSmallerKind, HollerithOrCharacterAsBOZ,
-    BindingAsProcedure, StatementFunctionExtensions,
+    ImplicitNoneExternal, ForwardRefImplicitNone, OpenAccessAppend,
+    BOZAsDefaultInteger, DistinguishableSpecifics, DefaultSave,
+    PointerInSeqType, NonCharacterFormat, SaveMainProgram,
+    SaveBigMainProgramVariables, DistinctArrayConstructorLengths, PPCVector,
+    RelaxedIntentInChecking, ForwardRefImplicitNoneData,
+    NullActualForAllocatable, ActualIntegerConvertedToSmallerKind,
+    HollerithOrCharacterAsBOZ, BindingAsProcedure, StatementFunctionExtensions,
     UseGenericIntrinsicWhenSpecificDoesntMatch, DataStmtExtensions,
     RedundantContiguous, RedundantAttribute, InitBlankCommon,
     EmptyBindCDerivedType, MiscSourceExtensions, AllocateToOtherLength,
diff --git a/flang/include/flang/Evaluate/check-expression.h b/flang/include/flang/Evaluate/check-expression.h
index 49b6446..7eee199 100644
--- a/flang/include/flang/Evaluate/check-expression.h
+++ b/flang/include/flang/Evaluate/check-expression.h
@@ -99,29 +99,44 @@ extern template void CheckSpecificationExpr(
     FoldingContext &, bool forElementalFunctionResult);
 
 // Contiguity & "simple contiguity" (9.5.4)
+// Named constant sections are expressions, and as such their evaluation is
+// considered to be contiguous. This avoids funny situations where
+// IS_CONTIGUOUS(cst(1:10:2)) would fold to true because `cst(1:10:2)` is
+// folded into an array constructor literal, but IS_CONTIGUOUS(cst(i:i+9:2))
+// folds to false because the named constant reference cannot be folded.
+// Note that these IS_CONTIGUOUS usages are not portable (can probably be
+// considered to fall into F2023 8.5.7 (4)), and existing compilers are not
+// consistent here.
+// However, the compiler may very well decide to create a descriptor over
+// `cst(i:i+9:2)` when it can to avoid copies, and as such it needs internally
+// to be able to tell the actual contiguity of that array section over the
+// read-only data.
 template <typename A>
-std::optional<bool> IsContiguous(const A &, FoldingContext &);
+std::optional<bool> IsContiguous(const A &, FoldingContext &,
+    bool namedConstantSectionsAreContiguous = true);
+extern template std::optional<bool> IsContiguous(const Expr<SomeType> &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous);
+extern template std::optional<bool> IsContiguous(const ArrayRef &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous);
+extern template std::optional<bool> IsContiguous(const Substring &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous);
+extern template std::optional<bool> IsContiguous(const Component &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous);
+extern template std::optional<bool> IsContiguous(const ComplexPart &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous);
+extern template std::optional<bool> IsContiguous(const CoarrayRef &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous);
 extern template std::optional<bool> IsContiguous(
-    const Expr<SomeType> &, FoldingContext &);
-extern template std::optional<bool> IsContiguous(
-    const ArrayRef &, FoldingContext &);
-extern template std::optional<bool> IsContiguous(
-    const Substring &, FoldingContext &);
-extern template std::optional<bool> IsContiguous(
-    const Component &, FoldingContext &);
-extern template std::optional<bool> IsContiguous(
-    const ComplexPart &, FoldingContext &);
-extern template std::optional<bool> IsContiguous(
-    const CoarrayRef &, FoldingContext &);
-extern template std::optional<bool> IsContiguous(
-    const Symbol &, FoldingContext &);
-static inline std::optional<bool> IsContiguous(
-    const SymbolRef &s, FoldingContext &c) {
-  return IsContiguous(s.get(), c);
+    const Symbol &, FoldingContext &, bool namedConstantSectionsAreContiguous);
+static inline std::optional<bool> IsContiguous(const SymbolRef &s,
+    FoldingContext &c, bool namedConstantSectionsAreContiguous = true) {
+  return IsContiguous(s.get(), c, namedConstantSectionsAreContiguous);
 }
 template <typename A>
-bool IsSimplyContiguous(const A &x, FoldingContext &context) {
-  return IsContiguous(x, context).value_or(false);
+bool IsSimplyContiguous(const A &x, FoldingContext &context,
+    bool namedConstantSectionsAreContiguous = true) {
+  return IsContiguous(x, context, namedConstantSectionsAreContiguous)
+      .value_or(false);
 }
 
 template <typename A> bool IsErrorExpr(const A &);
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 669efb4..352f6b3 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -321,28 +321,38 @@ template <typename A> const Symbol *ExtractBareLenParameter(const A &expr) {
 // of a substring or complex part.
 template <typename A>
 common::IfNoLvalue<std::optional<DataRef>, A> ExtractDataRef(
-    const A &, bool intoSubstring, bool intoComplexPart) {
-  return std::nullopt; // default base case
+    const A &x, bool intoSubstring, bool intoComplexPart) {
+  if constexpr (common::HasMember<decltype(x), decltype(DataRef::u)>) {
+    return DataRef{x};
+  } else {
+    return std::nullopt; // default base case
+  }
+}
+
+std::optional<DataRef> ExtractSubstringBase(const Substring &);
+
+inline std::optional<DataRef> ExtractDataRef(const Substring &x,
+    bool intoSubstring = false, bool intoComplexPart = false) {
+  if (intoSubstring) {
+    return ExtractSubstringBase(x);
+  } else {
+    return std::nullopt;
+  }
+}
+inline std::optional<DataRef> ExtractDataRef(const ComplexPart &x,
+    bool intoSubstring = false, bool intoComplexPart = false) {
+  if (intoComplexPart) {
+    return x.complex();
+  } else {
+    return std::nullopt;
+  }
 }
 template <typename T>
 std::optional<DataRef> ExtractDataRef(const Designator<T> &d,
     bool intoSubstring = false, bool intoComplexPart = false) {
   return common::visit(
       [=](const auto &x) -> std::optional<DataRef> {
-        if constexpr (common::HasMember<decltype(x), decltype(DataRef::u)>) {
-          return DataRef{x};
-        }
-        if constexpr (std::is_same_v<std::decay_t<decltype(x)>, Substring>) {
-          if (intoSubstring) {
-            return ExtractSubstringBase(x);
-          }
-        }
-        if constexpr (std::is_same_v<std::decay_t<decltype(x)>, ComplexPart>) {
-          if (intoComplexPart) {
-            return x.complex();
-          }
-        }
-        return std::nullopt; // w/o "else" to dodge bogus g++ 8.1 warning
+        return ExtractDataRef(x, intoSubstring, intoComplexPart);
       },
       d.u);
 }
@@ -376,8 +386,6 @@ std::optional<DataRef> ExtractDataRef(
 std::optional<DataRef> ExtractDataRef(const ActualArgument &,
     bool intoSubstring = false, bool intoComplexPart = false);
 
-std::optional<DataRef> ExtractSubstringBase(const Substring &);
-
 // Predicate: is an expression is an array element reference?
 template <typename T>
 bool IsArrayElement(const Expr<T> &expr, bool intoSubstring = true,
diff --git a/flang/lib/Common/Fortran-features.cpp b/flang/lib/Common/Fortran-features.cpp
index bbf16a4..e2601e1 100644
--- a/flang/lib/Common/Fortran-features.cpp
+++ b/flang/lib/Common/Fortran-features.cpp
@@ -22,6 +22,7 @@ LanguageFeatureControl::LanguageFeatureControl() {
   disable_.set(LanguageFeature::CudaUnified);
   disable_.set(LanguageFeature::ImplicitNoneTypeNever);
   disable_.set(LanguageFeature::ImplicitNoneTypeAlways);
+  disable_.set(LanguageFeature::ImplicitNoneExternal);
   disable_.set(LanguageFeature::DefaultSave);
   disable_.set(LanguageFeature::SaveMainProgram);
   // These features, if enabled, conflict with valid standard usage,
diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp
index 726a0ab..6ace5bb 100644
--- a/flang/lib/Evaluate/check-expression.cpp
+++ b/flang/lib/Evaluate/check-expression.cpp
@@ -833,7 +833,10 @@ class IsContiguousHelper
 public:
   using Result = std::optional<bool>; // tri-state
   using Base = AnyTraverse<IsContiguousHelper, Result>;
-  explicit IsContiguousHelper(FoldingContext &c) : Base{*this}, context_{c} {}
+  explicit IsContiguousHelper(
+      FoldingContext &c, bool namedConstantSectionsAreContiguous)
+      : Base{*this}, context_{c}, namedConstantSectionsAreContiguous_{
+                                      namedConstantSectionsAreContiguous} {}
   using Base::operator();
 
   template <typename T> Result operator()(const Constant<T> &) const {
@@ -856,6 +859,11 @@ public:
       // RANK(*) associating entity is contiguous.
       if (details->IsAssumedSize()) {
         return true;
+      } else if (!IsVariable(details->expr()) &&
+          (namedConstantSectionsAreContiguous_ ||
+              !ExtractDataRef(details->expr(), true, true))) {
+        // Selector is associated to an expression value.
+        return true;
       } else {
         return Base::operator()(ultimate); // use expr
       }
@@ -1113,22 +1121,34 @@ private:
   }
 
   FoldingContext &context_;
+  bool namedConstantSectionsAreContiguous_{false};
 };
 
 template <typename A>
-std::optional<bool> IsContiguous(const A &x, FoldingContext &context) {
-  return IsContiguousHelper{context}(x);
+std::optional<bool> IsContiguous(const A &x, FoldingContext &context,
+    bool namedConstantSectionsAreContiguous) {
+  if (!IsVariable(x) &&
+      (namedConstantSectionsAreContiguous || !ExtractDataRef(x, true, true))) {
+    return true;
+  } else {
+    return IsContiguousHelper{context, namedConstantSectionsAreContiguous}(x);
+  }
 }
 
+template std::optional<bool> IsContiguous(const Expr<SomeType> &,
+    FoldingContext &, bool namedConstantSectionsAreContiguous);
+template std::optional<bool> IsContiguous(const ArrayRef &, FoldingContext &,
+    bool namedConstantSectionsAreContiguous);
+template std::optional<bool> IsContiguous(const Substring &, FoldingContext &,
+    bool namedConstantSectionsAreContiguous);
+template std::optional<bool> IsContiguous(const Component &, FoldingContext &,
+    bool namedConstantSectionsAreContiguous);
+template std::optional<bool> IsContiguous(const ComplexPart &, FoldingContext &,
+    bool namedConstantSectionsAreContiguous);
+template std::optional<bool> IsContiguous(const CoarrayRef &, FoldingContext &,
+    bool namedConstantSectionsAreContiguous);
 template std::optional<bool> IsContiguous(
-    const Expr<SomeType> &, FoldingContext &);
-template std::optional<bool> IsContiguous(const ArrayRef &, FoldingContext &);
-template std::optional<bool> IsContiguous(const Substring &, FoldingContext &);
-template std::optional<bool> IsContiguous(const Component &, FoldingContext &);
-template std::optional<bool> IsContiguous(
-    const ComplexPart &, FoldingContext &);
-template std::optional<bool> IsContiguous(const CoarrayRef &, FoldingContext &);
-template std::optional<bool> IsContiguous(const Symbol &, FoldingContext &);
+    const Symbol &, FoldingContext &, bool namedConstantSectionsAreContiguous);
 
 // IsErrorExpr()
 struct IsErrorExprHelper : public AnyTraverse<IsErrorExprHelper, bool> {
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 68b5950..a2c1d3e 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -740,6 +740,12 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args,
       args.hasFlag(clang::driver::options::OPT_fimplicit_none,
                    clang::driver::options::OPT_fno_implicit_none, false));
 
+  // -f{no-}implicit-none-ext
+  opts.features.Enable(
+      Fortran::common::LanguageFeature::ImplicitNoneExternal,
+      args.hasFlag(clang::driver::options::OPT_fimplicit_none_ext,
+                   clang::driver::options::OPT_fno_implicit_none_ext, false));
+
   // -f{no-}backslash
   opts.features.Enable(Fortran::common::LanguageFeature::BackslashEscapes,
                        args.hasFlag(clang::driver::options::OPT_fbackslash,
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 7ca2baf..6a0f4d1 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -32,6 +32,7 @@
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "mlir/IR/IRMapping.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include <optional>
@@ -1135,6 +1136,27 @@ isSimplyContiguous(const Fortran::evaluate::ActualArgument &arg,
          Fortran::evaluate::IsSimplyContiguous(*sym, foldingContext);
 }
 
+static bool isParameterObjectOrSubObject(hlfir::Entity entity) {
+  mlir::Value base = entity;
+  bool foundParameter = false;
+  while (mlir::Operation *op = base ? base.getDefiningOp() : nullptr) {
+    base =
+        llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op)
+            .Case<hlfir::DeclareOp>([&](auto declare) -> mlir::Value {
+              foundParameter |= hlfir::Entity{declare}.isParameter();
+              return foundParameter ? mlir::Value{} : declare.getMemref();
+            })
+            .Case<hlfir::DesignateOp, hlfir::ParentComponentOp, fir::EmboxOp>(
+                [&](auto op) -> mlir::Value { return op.getMemref(); })
+            .Case<fir::ReboxOp>(
+                [&](auto rebox) -> mlir::Value { return rebox.getBox(); })
+            .Case<fir::ConvertOp>(
+                [&](auto convert) -> mlir::Value { return convert.getValue(); })
+            .Default([](mlir::Operation *) -> mlir::Value { return nullptr; });
+  }
+  return foundParameter;
+}
+
 /// When dummy is not ALLOCATABLE, POINTER and is not passed in register,
 /// prepare the actual argument according to the interface. Do as needed:
 /// - address element if this is an array argument in an elemental call.
@@ -1298,8 +1320,9 @@ static PreparedDummyArgument preparePresentUserCallActualArgument(
         // 'parameter' attribute. Even though the constant expressions
         // are not definable and explicit assignments to them are not
         // possible, we have to create a temporary copies when we pass
-        // them down the call stack.
-        entity.isParameter()) {
+        // them down the call stack because of potential compiler
+        // generated writes in copy-out.
+        isParameterObjectOrSubObject(entity)) {
       // Make a copy in a temporary.
       auto copy = builder.create<hlfir::AsExprOp>(loc, entity);
       mlir::Type storageType = entity.getType();
diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp
index 3e54cef..91daa6f 100644
--- a/flang/lib/Lower/ConvertExprToHLFIR.cpp
+++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp
@@ -21,6 +21,7 @@
 #include "flang/Lower/ConvertProcedureDesignator.h"
 #include "flang/Lower/ConvertType.h"
 #include "flang/Lower/ConvertVariable.h"
+#include "flang/Lower/DumpEvaluateExpr.h"
 #include "flang/Lower/StatementContext.h"
 #include "flang/Lower/SymbolMap.h"
 #include "flang/Optimizer/Builder/Complex.h"
@@ -220,7 +221,8 @@ private:
     // Non simply contiguous ref require a fir.box to carry the byte stride.
     if (mlir::isa<fir::SequenceType>(resultValueType) &&
         !Fortran::evaluate::IsSimplyContiguous(
-            designatorNode, getConverter().getFoldingContext()))
+            designatorNode, getConverter().getFoldingContext(),
+            /*namedConstantSectionsAreAlwaysContiguous=*/false))
       return fir::BoxType::get(resultValueType);
     // Other designators can be handled as raw addresses.
     return fir::ReferenceType::get(resultValueType);
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 299d9d4..febc6ad 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -344,6 +344,20 @@ bool ClauseProcessor::processDistSchedule(
   return false;
 }
 
+bool ClauseProcessor::processExclusive(
+    mlir::Location currentLocation,
+    mlir::omp::ExclusiveClauseOps &result) const {
+  if (auto *clause = findUniqueClause<omp::clause::Exclusive>()) {
+    for (const Object &object : clause->v) {
+      const semantics::Symbol *symbol = object.sym();
+      mlir::Value symVal = converter.getSymbolAddress(*symbol);
+      result.exclusiveVars.push_back(symVal);
+    }
+    return true;
+  }
+  return false;
+}
+
 bool ClauseProcessor::processFilter(lower::StatementContext &stmtCtx,
                                     mlir::omp::FilterClauseOps &result) const {
   if (auto *clause = findUniqueClause<omp::clause::Filter>()) {
@@ -380,6 +394,20 @@ bool ClauseProcessor::processHint(mlir::omp::HintClauseOps &result) const {
   return false;
 }
 
+bool ClauseProcessor::processInclusive(
+    mlir::Location currentLocation,
+    mlir::omp::InclusiveClauseOps &result) const {
+  if (auto *clause = findUniqueClause<omp::clause::Inclusive>()) {
+    for (const Object &object : clause->v) {
+      const semantics::Symbol *symbol = object.sym();
+      mlir::Value symVal = converter.getSymbolAddress(*symbol);
+      result.inclusiveVars.push_back(symVal);
+    }
+    return true;
+  }
+  return false;
+}
+
 bool ClauseProcessor::processMergeable(
     mlir::omp::MergeableClauseOps &result) const {
   return markClauseOccurrence<omp::clause::Mergeable>(result.mergeable);
@@ -1135,10 +1163,9 @@ bool ClauseProcessor::processReduction(
         llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
         llvm::SmallVector<const semantics::Symbol *> reductionSyms;
         ReductionProcessor rp;
-        rp.addDeclareReduction(currentLocation, converter, clause,
-                               reductionVars, reduceVarByRef,
-                               reductionDeclSymbols, reductionSyms);
-
+        rp.processReductionArguments(
+            currentLocation, converter, clause, reductionVars, reduceVarByRef,
+            reductionDeclSymbols, reductionSyms, result.reductionMod);
         // Copy local lists into the output.
         llvm::copy(reductionVars, std::back_inserter(result.reductionVars));
         llvm::copy(reduceVarByRef, std::back_inserter(result.reductionByref));
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 7b047d4a..e05f66c 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -64,6 +64,8 @@ public:
   bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
   bool processDistSchedule(lower::StatementContext &stmtCtx,
                            mlir::omp::DistScheduleClauseOps &result) const;
+  bool processExclusive(mlir::Location currentLocation,
+                        mlir::omp::ExclusiveClauseOps &result) const;
   bool processFilter(lower::StatementContext &stmtCtx,
                      mlir::omp::FilterClauseOps &result) const;
   bool processFinal(lower::StatementContext &stmtCtx,
@@ -72,6 +74,8 @@ public:
       mlir::omp::HasDeviceAddrClauseOps &result,
       llvm::SmallVectorImpl<const semantics::Symbol *> &isDeviceSyms) const;
   bool processHint(mlir::omp::HintClauseOps &result) const;
+  bool processInclusive(mlir::Location currentLocation,
+                        mlir::omp::InclusiveClauseOps &result) const;
   bool processMergeable(mlir::omp::MergeableClauseOps &result) const;
   bool processNowait(mlir::omp::NowaitClauseOps &result) const;
   bool processNumTeams(lower::StatementContext &stmtCtx,
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index db6486a..5664d8a 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -736,8 +736,8 @@ Enter make(const parser::OmpClause::Enter &inp,
 
 Exclusive make(const parser::OmpClause::Exclusive &inp,
                semantics::SemanticsContext &semaCtx) {
-  // inp -> empty
-  llvm_unreachable("Empty: exclusive");
+  // inp.v -> parser::OmpObjectList
+  return Exclusive{makeObjects(/*List=*/inp.v, semaCtx)};
 }
 
 Fail make(const parser::OmpClause::Fail &inp,
@@ -846,8 +846,8 @@ If make(const parser::OmpClause::If &inp,
 
 Inclusive make(const parser::OmpClause::Inclusive &inp,
                semantics::SemanticsContext &semaCtx) {
-  // inp -> empty
-  llvm_unreachable("Empty: inclusive");
+  // inp.v -> parser::OmpObjectList
+  return Inclusive{makeObjects(/*List=*/inp.v, semaCtx)};
 }
 
 Indirect make(const parser::OmpClause::Indirect &inp,
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 5e1f3b0..0ae04ed9 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1584,6 +1584,15 @@ static void genParallelClauses(
   cp.processReduction(loc, clauseOps, reductionSyms);
 }
 
+static void genScanClauses(lower::AbstractConverter &converter,
+                           semantics::SemanticsContext &semaCtx,
+                           const List<Clause> &clauses, mlir::Location loc,
+                           mlir::omp::ScanOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processInclusive(loc, clauseOps);
+  cp.processExclusive(loc, clauseOps);
+}
+
 static void genSectionsClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, mlir::Location loc,
@@ -1981,6 +1990,16 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   return parallelOp;
 }
 
+static mlir::omp::ScanOp
+genScanOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+          semantics::SemanticsContext &semaCtx, mlir::Location loc,
+          const ConstructQueue &queue, ConstructQueue::const_iterator item) {
+  mlir::omp::ScanOperands clauseOps;
+  genScanClauses(converter, semaCtx, item->clauses, loc, clauseOps);
+  return converter.getFirOpBuilder().create<mlir::omp::ScanOp>(
+      converter.getCurrentLocation(), clauseOps);
+}
+
 /// This breaks the normal prototype of the gen*Op functions: adding the
 /// sectionBlocks argument so that the enclosed section constructs can be
 /// lowered here with correct reduction symbol remapping.
@@ -2990,7 +3009,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
     genStandaloneParallel(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_scan:
-    TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir));
+    genScanOp(converter, symTable, semaCtx, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_section:
     llvm_unreachable("genOMPDispatch: OMPD_section");
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 4a811f1..f83079e 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -31,6 +31,9 @@ static llvm::cl::opt<bool> forceByrefReduction(
     llvm::cl::desc("Pass all reduction arguments by reference"),
     llvm::cl::Hidden);
 
+using ReductionModifier =
+    Fortran::lower::omp::clause::Reduction::ReductionModifier;
+
 namespace Fortran {
 namespace lower {
 namespace omp {
@@ -518,18 +521,36 @@ static bool doReductionByRef(mlir::Value reductionVar) {
   return false;
 }
 
-void ReductionProcessor::addDeclareReduction(
+mlir::omp::ReductionModifier translateReductionModifier(ReductionModifier mod) {
+  switch (mod) {
+  case ReductionModifier::Default:
+    return mlir::omp::ReductionModifier::defaultmod;
+  case ReductionModifier::Inscan:
+    return mlir::omp::ReductionModifier::inscan;
+  case ReductionModifier::Task:
+    return mlir::omp::ReductionModifier::task;
+  }
+  return mlir::omp::ReductionModifier::defaultmod;
+}
+
+void ReductionProcessor::processReductionArguments(
     mlir::Location currentLocation, lower::AbstractConverter &converter,
     const omp::clause::Reduction &reduction,
     llvm::SmallVectorImpl<mlir::Value> &reductionVars,
     llvm::SmallVectorImpl<bool> &reduceVarByRef,
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-    llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols) {
+    llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
+    mlir::omp::ReductionModifierAttr &reductionMod) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
-  if (std::get<std::optional<omp::clause::Reduction::ReductionModifier>>(
-          reduction.t))
-    TODO(currentLocation, "Reduction modifiers are not supported");
+  auto mod = std::get<std::optional<ReductionModifier>>(reduction.t);
+  if (mod.has_value()) {
+    if (mod.value() == ReductionModifier::Task)
+      TODO(currentLocation, "Reduction modifier `task` is not supported");
+    else
+      reductionMod = mlir::omp::ReductionModifierAttr::get(
+          firOpBuilder.getContext(), translateReductionModifier(mod.value()));
+  }
 
   mlir::omp::DeclareReductionOp decl;
   const auto &redOperatorList{
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index d7d9b067..11baa83 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -19,6 +19,7 @@
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/type.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/Types.h"
 
@@ -120,13 +121,14 @@ public:
 
   /// Creates a reduction declaration and associates it with an OpenMP block
   /// directive.
-  static void addDeclareReduction(
+  static void processReductionArguments(
       mlir::Location currentLocation, lower::AbstractConverter &converter,
       const omp::clause::Reduction &reduction,
       llvm::SmallVectorImpl<mlir::Value> &reductionVars,
       llvm::SmallVectorImpl<bool> &reduceVarByRef,
       llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-      llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols);
+      llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
+      mlir::omp::ReductionModifierAttr &reductionMod);
 };
 
 template <typename FloatOp, typename IntegerOp>
diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt
index 6fe9c70..c4dae89 100644
--- a/flang/lib/Optimizer/Analysis/CMakeLists.txt
+++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt
@@ -12,6 +12,7 @@ add_flang_library(FIRAnalysis
   LINK_LIBS
   FIRBuilder
   FIRDialect
+  FIRSupport
   HLFIRDialect
 
   MLIR_LIBS
@@ -19,5 +20,4 @@ add_flang_library(FIRAnalysis
   MLIRLLVMDialect
   MLIRMathTransforms
   MLIROpenMPDialect
-  FIRSupport
 )
diff --git a/flang/lib/Semantics/assignment.cpp b/flang/lib/Semantics/assignment.cpp
index 2b56257..627983d 100644
--- a/flang/lib/Semantics/assignment.cpp
+++ b/flang/lib/Semantics/assignment.cpp
@@ -42,6 +42,7 @@ public:
   void Analyze(const parser::AssignmentStmt &);
   void Analyze(const parser::PointerAssignmentStmt &);
   void Analyze(const parser::ConcurrentControl &);
+  int deviceConstructDepth_{0};
 
 private:
   bool CheckForPureContext(const SomeExpr &rhs, parser::CharBlock rhsSource);
@@ -94,7 +95,7 @@ void AssignmentContext::Analyze(const parser::AssignmentStmt &stmt) {
             common::LanguageFeature::CUDA)) {
       const auto &scope{context_.FindScope(lhsLoc)};
       const Scope &progUnit{GetProgramUnitContaining(scope)};
-      if (!IsCUDADeviceContext(&progUnit)) {
+      if (!IsCUDADeviceContext(&progUnit) && deviceConstructDepth_ == 0) {
         if (Fortran::evaluate::HasCUDADeviceAttrs(lhs) &&
             Fortran::evaluate::HasCUDAImplicitTransfer(rhs)) {
           context_.Say(lhsLoc, "Unsupported CUDA data transfer"_err_en_US);
@@ -228,6 +229,46 @@ void AssignmentChecker::Enter(const parser::MaskedElsewhereStmt &x) {
 void AssignmentChecker::Leave(const parser::MaskedElsewhereStmt &) {
   context_.value().PopWhereContext();
 }
+void AssignmentChecker::Enter(const parser::CUFKernelDoConstruct &x) {
+  ++context_.value().deviceConstructDepth_;
+}
+void AssignmentChecker::Leave(const parser::CUFKernelDoConstruct &) {
+  --context_.value().deviceConstructDepth_;
+}
+static bool IsOpenACCComputeConstruct(const parser::OpenACCBlockConstruct &x) {
+  const auto &beginBlockDirective =
+      std::get<Fortran::parser::AccBeginBlockDirective>(x.t);
+  const auto &blockDirective =
+      std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t);
+  if (blockDirective.v == llvm::acc::ACCD_parallel ||
+      blockDirective.v == llvm::acc::ACCD_serial ||
+      blockDirective.v == llvm::acc::ACCD_kernels) {
+    return true;
+  }
+  return false;
+}
+void AssignmentChecker::Enter(const parser::OpenACCBlockConstruct &x) {
+  if (IsOpenACCComputeConstruct(x)) {
+    ++context_.value().deviceConstructDepth_;
+  }
+}
+void AssignmentChecker::Leave(const parser::OpenACCBlockConstruct &x) {
+  if (IsOpenACCComputeConstruct(x)) {
+    --context_.value().deviceConstructDepth_;
+  }
+}
+void AssignmentChecker::Enter(const parser::OpenACCCombinedConstruct &) {
+  ++context_.value().deviceConstructDepth_;
+}
+void AssignmentChecker::Leave(const parser::OpenACCCombinedConstruct &) {
+  --context_.value().deviceConstructDepth_;
+}
+void AssignmentChecker::Enter(const parser::OpenACCLoopConstruct &) {
+  ++context_.value().deviceConstructDepth_;
+}
+void AssignmentChecker::Leave(const parser::OpenACCLoopConstruct &) {
+  --context_.value().deviceConstructDepth_;
+}
 
 } // namespace Fortran::semantics
 template class Fortran::common::Indirection<
diff --git a/flang/lib/Semantics/assignment.h b/flang/lib/Semantics/assignment.h
index 95d7b3c..a67bee4 100644
--- a/flang/lib/Semantics/assignment.h
+++ b/flang/lib/Semantics/assignment.h
@@ -45,6 +45,14 @@ public:
   void Leave(const parser::EndWhereStmt &);
   void Enter(const parser::MaskedElsewhereStmt &);
   void Leave(const parser::MaskedElsewhereStmt &);
+  void Enter(const parser::CUFKernelDoConstruct &);
+  void Leave(const parser::CUFKernelDoConstruct &);
+  void Enter(const parser::OpenACCBlockConstruct &);
+  void Leave(const parser::OpenACCBlockConstruct &);
+  void Enter(const parser::OpenACCCombinedConstruct &);
+  void Leave(const parser::OpenACCCombinedConstruct &);
+  void Enter(const parser::OpenACCLoopConstruct &);
+  void Leave(const parser::OpenACCLoopConstruct &);
 
 private:
   common::Indirection<AssignmentContext> context_;
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 8731412..fbf43d4 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -87,7 +87,8 @@ private:
   bool inheritFromParent_{false}; // look in parent if not specified here
   bool isImplicitNoneType_{
       context_.IsEnabled(common::LanguageFeature::ImplicitNoneTypeAlways)};
-  bool isImplicitNoneExternal_{false};
+  bool isImplicitNoneExternal_{
+      context_.IsEnabled(common::LanguageFeature::ImplicitNoneExternal)};
   // map_ contains the mapping between letters and types that were defined
   // by the IMPLICIT statements of the related scope. It does not contain
   // the default Fortran mappings nor the mapping defined in parents.
diff --git a/flang/test/Evaluate/folding09.f90 b/flang/test/Evaluate/folding09.f90
index 534ff1a..864f38b 100644
--- a/flang/test/Evaluate/folding09.f90
+++ b/flang/test/Evaluate/folding09.f90
@@ -9,7 +9,7 @@ module m
   logical, parameter :: test_param1 = is_contiguous(cst(:,1))
   logical, parameter :: test_param2 = is_contiguous(cst(1,:))
   logical, parameter :: test_param3 = is_contiguous(cst(:,n))
-  logical, parameter :: test_param4 = .not. is_contiguous(cst(n,:))
+  logical, parameter :: test_param4 = is_contiguous(cst(n,:))
   logical, parameter :: test_param5 = is_contiguous(empty_cst(n,-1:n:2))
  contains
   function f()
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 7ef391c..5805dd5 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -87,3 +87,17 @@ end
 ! CHECK: func.func private @llvm.nvvm.barrier0.and(i32) -> i32
 ! CHECK: func.func private @llvm.nvvm.barrier0.popc(i32) -> i32
 ! CHECK: func.func private @llvm.nvvm.barrier0.or(i32) -> i32
+
+subroutine host1()
+  integer, device :: a(32)
+  integer :: i, j
+
+block; use cudadevice
+  !$cuf kernel do(1) <<<*,32>>>
+  do i = 1, 32
+    a(i) = a(i) * 2.0
+    call syncthreads()
+    a(i) = a(i) + a(j) - 34.0
+  end do
+end block
+end 
diff --git a/flang/test/Lower/CUDA/cuda-intrinsic.cuf b/flang/test/Lower/CUDA/cuda-intrinsic.cuf
index 9723afc..77682ad 100644
--- a/flang/test/Lower/CUDA/cuda-intrinsic.cuf
+++ b/flang/test/Lower/CUDA/cuda-intrinsic.cuf
@@ -1,17 +1,194 @@
 ! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
 
 module mod1
-  type int
-    real :: inf, sup 
-  end type int
+  type retf
+    real :: inf, sup, near, zero 
+  end type retf
+  type retd
+    real(8) :: inf, sup, near, zero 
+  end type retd
 contains
   attributes(global) subroutine fadd(c, a, b)
-    type (int) :: c, a, b
-    c%inf = __fadd_rd(a%inf, b%inf)
-    c%sup = __fadd_ru(a%sup, b%sup)
+    real(4) :: a, b
+    type (retf) :: c
+    c%near = __fadd_rn(a, b)
+    c%zero = __fadd_rz(a, b)
+    c%inf =  __fadd_rd(a, b)
+    c%sup =  __fadd_ru(a, b)
   end subroutine
+  attributes(global) subroutine dadd(c, a, b)
+    real(8) :: a, b
+    type (retd) :: c
+    c%near = __dadd_rn(a, b)
+    c%zero = __dadd_rz(a, b)
+    c%inf =  __dadd_rd(a, b)
+    c%sup =  __dadd_ru(a, b)
+  end subroutine
+  attributes(global) subroutine fmul(c, a, b)
+    real(4) :: a, b
+    type (retf) :: c
+    c%near = __fmul_rn(a, b)
+    c%zero = __fmul_rz(a, b)
+    c%inf  = __fmul_rd(a, b)
+    c%sup  = __fmul_ru(a, b)
+  end subroutine
+  attributes(global) subroutine dmul(c, a, b)
+    real(8) :: a, b
+    type (retf) :: c
+    c%near = __dmul_rn(a, b)
+    c%zero = __dmul_rz(a, b)
+    c%inf  = __dmul_rd(a, b)
+    c%sup  = __dmul_ru(a, b)
+  end subroutine
+  attributes(global) subroutine fmaf(c, a, b)
+    real(4) :: a, b
+    type (retf) :: c
+    c%near = __fmaf_rn(a, b, b)
+    c%zero = __fmaf_rz(a, b, b)
+    c%inf  = __fmaf_rd(a, b, b)
+    c%sup  = __fmaf_ru(a, b, b)
+  end subroutine
+  attributes(global) subroutine fma(c, a, b)
+    real(8) :: a, b
+    type (retd) :: c
+    c%near = __fma_rn(a, b, b)
+    c%zero = __fma_rz(a, b, b)
+    c%inf  = __fma_rd(a, b, b)
+    c%sup  = __fma_ru(a, b, b)
+  end subroutine
+  attributes(global) subroutine frcp(c,a)
+    real(4) :: a
+    type (retf) :: c
+    c%near = __frcp_rn(a)
+    c%zero = __frcp_rz(a)
+    c%inf  = __frcp_rd(a)
+    c%sup  = __frcp_ru(a)
+  end subroutine
+  attributes(global) subroutine fsqrt(c,a)
+    real(4) :: a
+    type (retf) :: c
+    c%near = __fsqrt_rn(a)
+    c%zero = __fsqrt_rz(a)
+    c%inf  = __fsqrt_rd(a)
+    c%sup  = __fsqrt_ru(a)
+  end subroutine
+  attributes(global) subroutine fdiv(c, a, b)
+    real(4) :: a, b
+    type (retf) :: c
+    c%near = __fdiv_rn(a, b)
+    c%zero = __fdiv_rz(a, b)
+    c%inf  = __fdiv_rd(a, b)
+    c%sup  = __fdiv_ru(a, b)
+  end subroutine
+  attributes(global) subroutine testsincosf(c, a, b)
+    real(4) :: a, b, c
+    call sincos(a, b, c)
+  end subroutine
+  attributes(global) subroutine testsincosd(c, a, b)
+    real(8) :: a, b, c
+    call sincos(a, b, c)
+  end subroutine
+  attributes(global) subroutine testsincospif(c, a, b)
+    real(4) :: a, b, c
+    call sincospi(a, b, c)
+  end subroutine
+  attributes(global) subroutine testsincospid(c, a, b)
+    real(8) :: a, b, c
+    call sincospi(a, b, c)
+  end subroutine
+  attributes(global) subroutine testmulhi(c, a, b)
+    integer(4) :: a, b, c
+    c = __mulhi(a, b)
+  end subroutine
+  attributes(global) subroutine testumulhi(c, a, b)
+    integer(4) :: a, b, c
+    c = __umulhi(a, b)
+  end subroutine
+  attributes(global) subroutine testmul64hi(c, a, b)
+    integer(8) :: a, b, c
+    c = __mul64hi(a, b)
+  end subroutine
+  attributes(global) subroutine testumul64hi(c, a, b)
+    integer(8) :: a, b, c
+    c = __umul64hi(a, b)
+  end subroutine
+
 end
 
 ! CHECK-LABEL: func.func @_QMmod1Pfadd
+! CHECK: fir.call @__nv_fadd_rn
+! CHECK: fir.call @__nv_fadd_rz
 ! CHECK: fir.call @__nv_fadd_rd
 ! CHECK: fir.call @__nv_fadd_ru
+
+! CHECK-LABEL: func.func @_QMmod1Pdadd
+! CHECK: fir.call @__nv_dadd_rn
+! CHECK: fir.call @__nv_dadd_rz
+! CHECK: fir.call @__nv_dadd_rd
+! CHECK: fir.call @__nv_dadd_ru
+
+! CHECK-LABEL: func.func @_QMmod1Pfmul
+! CHECK: fir.call @__nv_fmul_rn
+! CHECK: fir.call @__nv_fmul_rz
+! CHECK: fir.call @__nv_fmul_rd
+! CHECK: fir.call @__nv_fmul_ru
+
+! CHECK-LABEL: func.func @_QMmod1Pdmul
+! CHECK: fir.call @__nv_dmul_rn
+! CHECK: fir.call @__nv_dmul_rz
+! CHECK: fir.call @__nv_dmul_rd
+! CHECK: fir.call @__nv_dmul_ru
+
+! CHECK-LABEL: func.func @_QMmod1Pfmaf
+! CHECK: fir.call @__nv_fmaf_rn
+! CHECK: fir.call @__nv_fmaf_rz
+! CHECK: fir.call @__nv_fmaf_rd
+! CHECK: fir.call @__nv_fmaf_ru
+
+! CHECK-LABEL: func.func @_QMmod1Pfma
+! CHECK: fir.call @__nv_fma_rn
+! CHECK: fir.call @__nv_fma_rz
+! CHECK: fir.call @__nv_fma_rd
+! CHECK: fir.call @__nv_fma_ru
+
+! CHECK-LABEL: func.func @_QMmod1Pfrcp
+! CHECK: fir.call @__nv_frcp_rn
+! CHECK: fir.call @__nv_frcp_rz
+! CHECK: fir.call @__nv_frcp_rd
+! CHECK: fir.call @__nv_frcp_ru
+
+! CHECK-LABEL: func.func @_QMmod1Pfsqrt
+! CHECK: fir.call @__nv_fsqrt_rn
+! CHECK: fir.call @__nv_fsqrt_rz
+! CHECK: fir.call @__nv_fsqrt_rd
+! CHECK: fir.call @__nv_fsqrt_ru
+
+! CHECK-LABEL: func.func @_QMmod1Pfdiv
+! CHECK: fir.call @__nv_fdiv_rn
+! CHECK: fir.call @__nv_fdiv_rz
+! CHECK: fir.call @__nv_fdiv_rd
+! CHECK: fir.call @__nv_fdiv_ru
+
+! CHECK-LABEL: func.func @_QMmod1Ptestsincosf
+! CHECK: fir.call @__nv_sincosf
+
+! CHECK-LABEL: func.func @_QMmod1Ptestsincosd
+! CHECK: fir.call @__nv_sincos
+
+! CHECK-LABEL: func.func @_QMmod1Ptestsincospif
+! CHECK: fir.call @__nv_sincospif
+
+! CHECK-LABEL: func.func @_QMmod1Ptestsincospid
+! CHECK: fir.call @__nv_sincospi
+
+! CHECK-LABEL: func.func @_QMmod1Ptestmulhi
+! CHECK: fir.call @__nv_mulhi
+
+! CHECK-LABEL: func.func @_QMmod1Ptestumulhi
+! CHECK: fir.call @__nv_umulhi
+
+! CHECK-LABEL: func.func @_QMmod1Ptestmul64hi
+! CHECK: fir.call @__nv_mul64hi
+
+! CHECK-LABEL: func.func @_QMmod1Ptestumul64hi
+! CHECK: fir.call @__nv_umul64hi
diff --git a/flang/test/Lower/HLFIR/call-issue-124043.f90 b/flang/test/Lower/HLFIR/call-issue-124043.f90
new file mode 100644
index 0000000..441063c
--- /dev/null
+++ b/flang/test/Lower/HLFIR/call-issue-124043.f90
@@ -0,0 +1,15 @@
+! Reproducer for https://github.com/llvm/llvm-project/issues/124043 lowering
+! crash.
+! RUN: bbc -emit-hlfir -o - %s | FileCheck %s
+
+subroutine repro(a)
+  integer a(10)
+  associate (b => a(::2)+1)
+    call bar(b)
+  end associate
+end
+! CHECK-LABEL:   func.func @_QPrepro(
+! CHECK:           %[[VAL_11:.*]] = hlfir.elemental
+! CHECK:           %[[VAL_16:.*]]:3 = hlfir.associate %[[VAL_11]]
+! CHECK:           %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_16]]#1
+! CHECK:           fir.call @_QPbar(%[[VAL_18]]#1)
diff --git a/flang/test/Lower/HLFIR/calls-constant-expr-arg.f90 b/flang/test/Lower/HLFIR/calls-constant-expr-arg.f90
index 61e7ef9..3e8e10e 100644
--- a/flang/test/Lower/HLFIR/calls-constant-expr-arg.f90
+++ b/flang/test/Lower/HLFIR/calls-constant-expr-arg.f90
@@ -62,3 +62,31 @@ end subroutine test
 ! CHECK:           hlfir.end_associate %[[VAL_7]]#1, %[[VAL_7]]#2 : !fir.ref<i32>, i1
 ! CHECK:           return
 ! CHECK:         }
+
+subroutine test_associate(i)
+  interface
+   subroutine foo(x)
+     real :: x(:)
+   end subroutine
+  end interface
+  real, parameter :: p(*) = [1.,2.,3.,4.]
+  integer(8) :: i
+  associate(a => p(1:i))
+    associate(b => a(1:1:2))
+      call foo(b)
+    end associate
+  end associate
+end subroutine
+! CHECK-LABEL:   func.func @_QPtest_associate(
+! CHECK:           %[[VAL_3:.*]] = fir.address_of(@_QFtest_associateECp) : !fir.ref<!fir.array<4xf32>>
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_3]](%{{.*}}) {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QFtest_associateECp"} : (!fir.ref<!fir.array<4xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<4xf32>>, !fir.ref<!fir.array<4xf32>>)
+! CHECK:           %[[VAL_18:.*]] = hlfir.designate %[[VAL_6]]#0 {{.*}}
+! CHECK:           %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {{.*}}
+! CHECK:           %[[VAL_25:.*]] = hlfir.designate %[[VAL_19]]#0 {{.*}}
+! CHECK:           %[[VAL_26:.*]]:2 = hlfir.declare %[[VAL_25]] {uniq_name = "_QFtest_associateEb"} : (!fir.box<!fir.array<1xf32>>) -> (!fir.box<!fir.array<1xf32>>, !fir.box<!fir.array<1xf32>>)
+! CHECK:           %[[VAL_27:.*]] = hlfir.as_expr %[[VAL_26]]#0 : (!fir.box<!fir.array<1xf32>>) -> !hlfir.expr<1xf32>
+! CHECK:           %[[VAL_30:.*]]:3 = hlfir.associate %[[VAL_27]]({{.*}}) {adapt.valuebyref} : (!hlfir.expr<1xf32>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xf32>>, !fir.ref<!fir.array<1xf32>>, i1)
+! CHECK:           %[[VAL_31:.*]] = fir.embox %[[VAL_30]]
+! CHECK:           %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (!fir.box<!fir.array<1xf32>>) -> !fir.box<!fir.array<?xf32>>
+! CHECK:           fir.call @_QPfoo(%[[VAL_32]]) {{.*}} : (!fir.box<!fir.array<?xf32>>) -> ()
+! CHECK:           hlfir.end_associate %[[VAL_30]]#1, %[[VAL_30]]#2 : !fir.ref<!fir.array<1xf32>>, i1
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-inscan.f90 b/flang/test/Lower/OpenMP/Todo/reduction-inscan.f90
deleted file mode 100644
index 152d91a..0000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-inscan.f90
+++ /dev/null
@@ -1,15 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction modifiers are not supported
-subroutine reduction_inscan()
-  integer :: i,j
-  i = 0
-
-  !$omp do reduction(inscan, +:i)
-  do j=1,10
-     !$omp scan inclusive(i)
-     i = i + 1
-  end do
-  !$omp end do
-end subroutine reduction_inscan
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-modifiers.f90 b/flang/test/Lower/OpenMP/Todo/reduction-modifiers.f90
deleted file mode 100644
index 82625ed..0000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-modifiers.f90
+++ /dev/null
@@ -1,14 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction modifiers are not supported
-
-subroutine foo()
-  integer :: i, j
-  j = 0
-  !$omp do reduction (inscan, *: j)
-  do i = 1, 10
-    !$omp scan inclusive(j)
-    j = j + 1
-  end do
-end subroutine
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
index 6707f65..b8bfc37 100644
--- a/flang/test/Lower/OpenMP/Todo/reduction-task.f90
+++ b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
@@ -1,7 +1,7 @@
 ! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 ! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 
-! CHECK: not yet implemented: Reduction modifiers are not supported
+! CHECK: not yet implemented: Reduction modifier `task` is not supported
 subroutine reduction_task()
   integer :: i
   i = 0
diff --git a/flang/test/Lower/OpenMP/scan.f90 b/flang/test/Lower/OpenMP/scan.f90
new file mode 100644
index 0000000..97b672e
--- /dev/null
+++ b/flang/test/Lower/OpenMP/scan.f90
@@ -0,0 +1,36 @@
+! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+! CHECK: omp.wsloop reduction(mod: inscan, @add_reduction_i32 %{{.*}} -> %[[RED_ARG_1:.*]] : {{.*}}) {
+! CHECK: %[[RED_DECL_1:.*]]:2 = hlfir.declare %[[RED_ARG_1]]
+! CHECK: omp.scan inclusive(%[[RED_DECL_1]]#1 : {{.*}})
+
+subroutine inclusive_scan(a, b, n)
+ implicit none
+ integer a(:), b(:)
+ integer x, k, n
+
+ !$omp parallel do reduction(inscan, +: x)
+ do k = 1, n
+   x = x + a(k)
+   !$omp scan inclusive(x)
+   b(k) = x
+ end do
+end subroutine inclusive_scan
+
+
+! CHECK: omp.wsloop reduction(mod: inscan, @add_reduction_i32 %{{.*}} -> %[[RED_ARG_2:.*]] : {{.*}}) {
+! CHECK: %[[RED_DECL_2:.*]]:2 = hlfir.declare %[[RED_ARG_2]]
+! CHECK: omp.scan exclusive(%[[RED_DECL_2]]#1 : {{.*}})
+subroutine exclusive_scan(a, b, n)
+ implicit none
+ integer a(:), b(:)
+ integer x, k, n
+
+ !$omp parallel do reduction(inscan, +: x)
+ do k = 1, n
+   x = x + a(k)
+   !$omp scan exclusive(x)
+   b(k) = x
+ end do
+end subroutine exclusive_scan
diff --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf
index fda8938..7d32e0d 100644
--- a/flang/test/Semantics/cuf09.cuf
+++ b/flang/test/Semantics/cuf09.cuf
@@ -192,3 +192,16 @@ program main
      enddo
   enddo
 end
+
+subroutine host1()
+  integer, device :: a(32)
+  integer :: i, j
+
+  !$cuf kernel do(1) <<<*,32>>>
+  do i = 1, 32
+    a(i) = a(i) * 2.0
+    !ERROR: 'syncthreads' may not be called in device code
+    call syncthreads() ! missing explicit use cudadevice
+    a(i) = a(i) + a(j) - 34.0
+  end do
+end 
diff --git a/flang/test/Semantics/cuf18.cuf b/flang/test/Semantics/cuf18.cuf
index ce9a2a3..e51e5c9 100644
--- a/flang/test/Semantics/cuf18.cuf
+++ b/flang/test/Semantics/cuf18.cuf
@@ -1,10 +1,67 @@
-! RUN: %python %S/test_errors.py %s %flang_fc1
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenacc
 
 subroutine sub1()
   real, allocatable, device :: a(:)
+  integer :: i
 
 !ERROR: Unsupported CUDA data transfer
   a = a + 10 ! Illegal expression according to 3.4.2
+
+  !$cuf kernel do
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in cuf kernel do
+  end do
+
+  !$acc parallel loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in openacc combined construct
+  end do
+
+  !$acc serial loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in openacc combined construct
+  end do
+
+  !$acc kernels loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in openacc combined construct
+  end do
+
+  !$acc parallel
+  !$acc loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in nested openacc construct
+  end do
+  !$acc end parallel
+
+  !$acc kernels
+  !$acc loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in nested openacc construct
+  end do
+  !$acc end kernels
+
+  !$acc serial
+  !$acc loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok in nested openacc construct
+  end do
+  !$acc end serial
+
+  !$acc loop
+  do i = 1, 10
+    a(i) = a(i) + 10 ! ok acc loop
+  end do
+
+  !$acc data
+
+  do i = 1, 10
+!ERROR: Unsupported CUDA data transfer
+    a(i) = a(i) + 10
+  end do
+
+  !$acc end data
+
 end subroutine
 
 
diff --git a/flang/test/Semantics/implicit17.f90 b/flang/test/Semantics/implicit17.f90
new file mode 100644
index 0000000..e11123d
--- /dev/null
+++ b/flang/test/Semantics/implicit17.f90
@@ -0,0 +1,14 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fimplicit-none-ext
+external x
+integer :: f, i, arr(1) = [0]
+call x
+!ERROR: 'y' is an external procedure without the EXTERNAL attribute in a scope with IMPLICIT NONE(EXTERNAL)
+call y
+!ERROR: 'f' is an external procedure without the EXTERNAL attribute in a scope with IMPLICIT NONE(EXTERNAL)
+i = f()
+block
+  !ERROR: 'z' is an external procedure without the EXTERNAL attribute in a scope with IMPLICIT NONE(EXTERNAL)
+  call z
+end block
+print *, arr(1) ! no error
+end
diff --git a/flang/tools/bbc/CMakeLists.txt b/flang/tools/bbc/CMakeLists.txt
index 85aeb85..97462be 100644
--- a/flang/tools/bbc/CMakeLists.txt
+++ b/flang/tools/bbc/CMakeLists.txt
@@ -29,6 +29,11 @@ target_link_libraries(bbc PRIVATE
   flangFrontend
   flangPasses
   FlangOpenMPTransforms
+  FortranCommon
+  FortranParser
+  FortranEvaluate
+  FortranSemantics
+  FortranLower
 )
 
 mlir_target_link_libraries(bbc PRIVATE
@@ -36,9 +41,4 @@ mlir_target_link_libraries(bbc PRIVATE
   ${extension_libs}
   MLIRAffineToStandard
   MLIRSCFToControlFlow
-  FortranCommon
-  FortranParser
-  FortranEvaluate
-  FortranSemantics
-  FortranLower
 )
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 09eb51a..49e3c04 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -341,6 +341,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.readlink
     libc.src.unistd.readlinkat
     libc.src.unistd.rmdir
+    libc.src.unistd.setsid
     libc.src.unistd.symlink
     libc.src.unistd.symlinkat
     libc.src.unistd.sysconf
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 14a05a2..4d1d871 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -337,6 +337,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.readlink
     libc.src.unistd.readlinkat
     libc.src.unistd.rmdir
+    libc.src.unistd.setsid
     libc.src.unistd.symlink
     libc.src.unistd.symlinkat
     libc.src.unistd.sysconf
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 366e4d3..c8e0ada 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -340,6 +340,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.readlink
     libc.src.unistd.readlinkat
     libc.src.unistd.rmdir
+    libc.src.unistd.setsid
     libc.src.unistd.symlink
     libc.src.unistd.symlinkat
     libc.src.unistd.sysconf
diff --git a/libc/include/llvm-libc-macros/linux/sys-wait-macros.h b/libc/include/llvm-libc-macros/linux/sys-wait-macros.h
index c101638..d01cfa7 100644
--- a/libc/include/llvm-libc-macros/linux/sys-wait-macros.h
+++ b/libc/include/llvm-libc-macros/linux/sys-wait-macros.h
@@ -9,36 +9,19 @@
 #ifndef LLVM_LIBC_MACROS_LINUX_SYS_WAIT_MACROS_H
 #define LLVM_LIBC_MACROS_LINUX_SYS_WAIT_MACROS_H
 
-// Wait flags
-#define WNOHANG 1    // Do not block
-#define WUNTRACED 2  // Report is a child has stopped even if untraced
-#define WEXITED 4    // Report dead child
-#define WCONTINUED 8 // Report if a stopped child has been resumed by SIGCONT
-#define WSTOPPED WUNTRACED
-
-// Wait status info macros
-#define __WEXITSTATUS(status) (((status)&0xff00) >> 8)
-#define __WTERMSIG(status) ((status)&0x7f)
-#define __WIFEXITED(status) (__WTERMSIG(status) == 0)
-
-// Macros for constructing status values.
-#define __W_EXITCODE(ret, sig) ((ret) << 8 | (sig))
-#define __W_STOPCODE(sig) ((sig) << 8 | 0x7f)
-#define __W_CONTINUED 0xffff
-#define __WCOREFLAG 0x80
-
-#define WEXITSTATUS(status) __WEXITSTATUS(status)
-#define WTERMSIG(status) __WTERMSIG(status)
-#define WIFEXITED(status) __WIFEXITED(status)
-
-#define WCOREFLAG __WCOREFLAG
-#define W_EXITCODE(ret, sig) __W_EXITCODE(ret, sig)
-#define W_STOPCODE(sig) __W_STOPCODE(sig)
-
-// First argument to waitid:
-#define P_ALL 0
-#define P_PID 1
-#define P_PGID 2
-#define P_PIDFD 3
+#include <linux/wait.h>
+
+#define WCOREDUMP(status) ((status) & WCOREFLAG)
+#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
+#define WIFCONTINUED(status) ((status) == 0xffff)
+#define WIFEXITED(status) (WTERMSIG(status) == 0)
+#define WIFSIGNALED(status) ((WTERMSIG(status) + 1) >= 2)
+#define WIFSTOPPED(status) (WTERMSIG(status) == 0x7f)
+#define WSTOPSIG(status) WEXITSTATUS(status)
+#define WTERMSIG(status) ((status) & 0x7f)
+
+#define WCOREFLAG 0x80
+#define W_EXITCODE(ret, sig) ((ret) << 8 | (sig))
+#define W_STOPCODE(sig) ((sig) << 8 | 0x7f)
 
 #endif // LLVM_LIBC_MACROS_LINUX_SYS_WAIT_MACROS_H
diff --git a/libc/include/unistd.yaml b/libc/include/unistd.yaml
index c1901be..d04d46b 100644
--- a/libc/include/unistd.yaml
+++ b/libc/include/unistd.yaml
@@ -275,6 +275,12 @@ functions:
       - type: const void *__restrict
       - type: void *
       - type: ssize_t
+  - name: setsid
+    standards:
+      - POSIX
+    return_type: pid_t
+    arguments:
+      - type: void
   - name: symlink
     standards:
       - POSIX
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 0e0556f..17f03a6 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -87,7 +87,7 @@ add_header_library(
     endian_internal.h
     macros/properties/architectures.h
     macros/attributes.h
-    macros/properties/cpu_features.h
+    macros/config.h
 )
 
 add_header_library(
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 522b4af..bfe0170f 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -110,6 +110,7 @@ add_header_library(
   DEPENDS
     libc.src.__support.common
     libc.src.__support.FPUtil.generic.sqrt
+    libc.src.__support.macros.properties.cpu_features
 )
 
 add_header_library(
@@ -120,6 +121,7 @@ add_header_library(
     libc.src.__support.common
     libc.src.__support.CPP.type_traits
     libc.src.__support.FPUtil.generic.fma
+    libc.src.__support.macros.properties.cpu_features
   FLAGS
     FMA_OPT
 )
@@ -130,6 +132,7 @@ add_header_library(
     multiply_add.h
   DEPENDS
     libc.src.__support.common
+    libc.src.__support.macros.properties.cpu_features
   FLAGS
     FMA_OPT
 )
@@ -150,6 +153,7 @@ add_header_library(
   DEPENDS
     libc.src.__support.common
     libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.cpu_features
   FLAGS
     ROUND_OPT
 )
@@ -161,6 +165,7 @@ add_header_library(
   DEPENDS
     libc.src.__support.common
     libc.src.__support.number_pair
+    libc.src.__support.macros.properties.cpu_features
     .multiply_add
 )
 
diff --git a/libc/src/dirent/readdir.h b/libc/src/dirent/readdir.h
index 2dcf536..bc46a1b 100644
--- a/libc/src/dirent/readdir.h
+++ b/libc/src/dirent/readdir.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_LIBC_SRC_DIRENT_READDIR_H
 #define LLVM_LIBC_SRC_DIRENT_READDIR_H
 
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
 #include <dirent.h>
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/string/allocating_string_utils.h b/libc/src/string/allocating_string_utils.h
index b3a8663..1dece51 100644
--- a/libc/src/string/allocating_string_utils.h
+++ b/libc/src/string/allocating_string_utils.h
@@ -11,7 +11,7 @@
 
 #include "src/__support/CPP/new.h"
 #include "src/__support/CPP/optional.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
 #include "src/string/memory_utils/inline_memcpy.h"
 #include "src/string/string_utils.h"
 
diff --git a/libc/src/string/memory_utils/aarch64/inline_bcmp.h b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
index 9319641..e41ac20 100644
--- a/libc/src/string/memory_utils/aarch64/inline_bcmp.h
+++ b/libc/src/string/memory_utils/aarch64/inline_bcmp.h
@@ -9,7 +9,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_BCMP_H
 
 #include "src/__support/macros/attributes.h"   // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h"       // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 #include "src/string/memory_utils/op_aarch64.h"
 #include "src/string/memory_utils/op_generic.h"
diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index a2c0553..efcbfd0 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -9,7 +9,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMSET_H
 
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/string/memory_utils/op_aarch64.h"
 #include "src/string/memory_utils/op_generic.h"
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
diff --git a/libc/src/string/memory_utils/generic/builtin.h b/libc/src/string/memory_utils/generic/builtin.h
index 5670a4e..2b9ecc0 100644
--- a/libc/src/string/memory_utils/generic/builtin.h
+++ b/libc/src/string/memory_utils/generic/builtin.h
@@ -10,7 +10,7 @@
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BUILTIN_H
 
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
 
 #include <stddef.h> // size_t
diff --git a/libc/src/string/memory_utils/inline_bcmp.h b/libc/src/string/memory_utils/inline_bcmp.h
index 14cf16e..52c738d 100644
--- a/libc/src/string/memory_utils/inline_bcmp.h
+++ b/libc/src/string/memory_utils/inline_bcmp.h
@@ -10,7 +10,8 @@
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_BCMP_H
 
 #include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
 
 #include <stddef.h> // size_t
diff --git a/libc/src/string/memory_utils/inline_bzero.h b/libc/src/string/memory_utils/inline_bzero.h
index 4a92e8b..a131b68 100644
--- a/libc/src/string/memory_utils/inline_bzero.h
+++ b/libc/src/string/memory_utils/inline_bzero.h
@@ -10,7 +10,8 @@
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_BZERO_H
 
 #include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/string/memory_utils/inline_memset.h"
 
 #include <stddef.h> // size_t
diff --git a/libc/src/string/memory_utils/inline_memmem.h b/libc/src/string/memory_utils/inline_memmem.h
index 15e3d63..1e9649c 100644
--- a/libc/src/string/memory_utils/inline_memmem.h
+++ b/libc/src/string/memory_utils/inline_memmem.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_MEMMEM_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_MEMMEM_H
 
-#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 
 #include <stddef.h>
 
diff --git a/libc/src/string/memory_utils/inline_memmove.h b/libc/src/string/memory_utils/inline_memmove.h
index 85d0159..84671c5 100644
--- a/libc/src/string/memory_utils/inline_memmove.h
+++ b/libc/src/string/memory_utils/inline_memmove.h
@@ -9,7 +9,8 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_MEMMOVE_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_MEMMOVE_H
 
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include <stddef.h> // size_t, ptrdiff_t
 
 #if defined(LIBC_TARGET_ARCH_IS_X86)
diff --git a/libc/src/string/memory_utils/inline_strcmp.h b/libc/src/string/memory_utils/inline_strcmp.h
index 281d5b1..6758e79 100644
--- a/libc/src/string/memory_utils/inline_strcmp.h
+++ b/libc/src/string/memory_utils/inline_strcmp.h
@@ -9,7 +9,8 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRCMP_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRCMP_H
 
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include <stddef.h>
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/string/memory_utils/inline_strstr.h b/libc/src/string/memory_utils/inline_strstr.h
index 9c99e92..5495cc4 100644
--- a/libc/src/string/memory_utils/inline_strstr.h
+++ b/libc/src/string/memory_utils/inline_strstr.h
@@ -9,7 +9,8 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRSTR_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRSTR_H
 
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/string/memory_utils/inline_memmem.h"
 #include "src/string/string_utils.h"
 #include <stddef.h>
diff --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h
index 1090ea2..868c644 100644
--- a/libc/src/string/memory_utils/op_aarch64.h
+++ b/libc/src/string/memory_utils/op_aarch64.h
@@ -13,7 +13,8 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_AARCH64_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_AARCH64_H
 
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h"
 
 #if defined(LIBC_TARGET_ARCH_IS_AARCH64)
diff --git a/libc/src/string/memory_utils/op_builtin.h b/libc/src/string/memory_utils/op_builtin.h
index d7c1b1f..27b621d 100644
--- a/libc/src/string/memory_utils/op_builtin.h
+++ b/libc/src/string/memory_utils/op_builtin.h
@@ -16,7 +16,8 @@
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_BUILTIN_H
 
 #include "src/__support/CPP/type_traits.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/string/memory_utils/utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h
index da20a84..9349cfdd 100644
--- a/libc/src/string/memory_utils/op_generic.h
+++ b/libc/src/string/memory_utils/op_generic.h
@@ -27,7 +27,8 @@
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/common.h"
 #include "src/__support/endian_internal.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/optimization.h"
 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT64
 #include "src/string/memory_utils/op_builtin.h"
diff --git a/libc/src/string/memory_utils/op_riscv.h b/libc/src/string/memory_utils/op_riscv.h
index 2d211de..4292d7a 100644
--- a/libc/src/string/memory_utils/op_riscv.h
+++ b/libc/src/string/memory_utils/op_riscv.h
@@ -12,7 +12,8 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_RISCV_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_RISCV_H
 
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h"
 
 #if defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
diff --git a/libc/src/string/memory_utils/op_x86.h b/libc/src/string/memory_utils/op_x86.h
index 309610e..8bd8412 100644
--- a/libc/src/string/memory_utils/op_x86.h
+++ b/libc/src/string/memory_utils/op_x86.h
@@ -12,7 +12,8 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_X86_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_X86_H
 
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h"
 
 #if defined(LIBC_TARGET_ARCH_IS_X86)
diff --git a/libc/src/string/memory_utils/riscv/inline_bcmp.h b/libc/src/string/memory_utils/riscv/inline_bcmp.h
index 4bdde27c..666ad63 100644
--- a/libc/src/string/memory_utils/riscv/inline_bcmp.h
+++ b/libc/src/string/memory_utils/riscv/inline_bcmp.h
@@ -9,7 +9,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_BCMP_H
 
 #include "src/__support/macros/attributes.h"               // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
 #include "src/string/memory_utils/generic/aligned_access.h"
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
diff --git a/libc/src/string/memory_utils/riscv/inline_memcmp.h b/libc/src/string/memory_utils/riscv/inline_memcmp.h
index ca83495..c49e8d0 100644
--- a/libc/src/string/memory_utils/riscv/inline_memcmp.h
+++ b/libc/src/string/memory_utils/riscv/inline_memcmp.h
@@ -9,7 +9,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMCMP_H
 
 #include "src/__support/macros/attributes.h"               // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
 #include "src/string/memory_utils/generic/aligned_access.h"
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
diff --git a/libc/src/string/memory_utils/riscv/inline_memcpy.h b/libc/src/string/memory_utils/riscv/inline_memcpy.h
index 8eb87e0..e907ae4 100644
--- a/libc/src/string/memory_utils/riscv/inline_memcpy.h
+++ b/libc/src/string/memory_utils/riscv/inline_memcpy.h
@@ -9,7 +9,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMCPY_H
 
 #include "src/__support/macros/attributes.h"               // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
 #include "src/string/memory_utils/generic/aligned_access.h"
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
diff --git a/libc/src/string/memory_utils/riscv/inline_memmove.h b/libc/src/string/memory_utils/riscv/inline_memmove.h
index 28de4c2..01a9fa5 100644
--- a/libc/src/string/memory_utils/riscv/inline_memmove.h
+++ b/libc/src/string/memory_utils/riscv/inline_memmove.h
@@ -9,7 +9,7 @@
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMMOVE_H
 
 #include "src/__support/macros/attributes.h"               // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
 #include "src/string/memory_utils/generic/byte_per_byte.h"
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
diff --git a/libc/src/string/memory_utils/riscv/inline_memset.h b/libc/src/string/memory_utils/riscv/inline_memset.h
index d6ab523..09a7a76 100644
--- a/libc/src/string/memory_utils/riscv/inline_memset.h
+++ b/libc/src/string/memory_utils/riscv/inline_memset.h
@@ -9,7 +9,7 @@
 #define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMSET_H
 
 #include "src/__support/macros/attributes.h"               // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
 #include "src/string/memory_utils/generic/aligned_access.h"
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h
index cae65bd..5c9bc72 100644
--- a/libc/src/string/memory_utils/utils.h
+++ b/libc/src/string/memory_utils/utils.h
@@ -14,7 +14,7 @@
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/endian_internal.h"
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/__support/macros/properties/architectures.h"
 
 #include <stddef.h> // size_t
diff --git a/libc/src/string/memory_utils/x86_64/inline_bcmp.h b/libc/src/string/memory_utils/x86_64/inline_bcmp.h
index cc54c41..8be391b 100644
--- a/libc/src/string/memory_utils/x86_64/inline_bcmp.h
+++ b/libc/src/string/memory_utils/x86_64/inline_bcmp.h
@@ -9,7 +9,7 @@
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_BCMP_H
 
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/string/memory_utils/op_generic.h"
 #include "src/string/memory_utils/op_x86.h"
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
diff --git a/libc/src/string/memory_utils/x86_64/inline_memset.h b/libc/src/string/memory_utils/x86_64/inline_memset.h
index 9f8e584..35719a8 100644
--- a/libc/src/string/memory_utils/x86_64/inline_memset.h
+++ b/libc/src/string/memory_utils/x86_64/inline_memset.h
@@ -8,8 +8,8 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
 
-#include "src/__support/macros/attributes.h" // LIBC_INLINE
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR
+#include "src/__support/macros/config.h"     // LIBC_NAMESPACE_DECL
 #include "src/string/memory_utils/op_generic.h"
 #include "src/string/memory_utils/op_x86.h"
 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt
index 6bdea0c..fb563ec 100644
--- a/libc/src/unistd/CMakeLists.txt
+++ b/libc/src/unistd/CMakeLists.txt
@@ -232,6 +232,13 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  setsid
+  ALIAS
+  DEPENDS
+    .${LIBC_TARGET_OS}.setsid
+)
+
+add_entrypoint_object(
   symlink
   ALIAS
   DEPENDS
diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt
index 2bb17f5..afdc595 100644
--- a/libc/src/unistd/linux/CMakeLists.txt
+++ b/libc/src/unistd/linux/CMakeLists.txt
@@ -460,6 +460,18 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
+  setsid
+  SRCS
+    setsid.cpp
+  HDRS
+    ../setsid.h
+  DEPENDS
+    libc.hdr.types.pid_t
+    libc.include.sys_syscall
+    libc.src.__support.OSUtil.osutil
+)
+
+add_entrypoint_object(
   symlink
   SRCS
     symlink.cpp
diff --git a/libc/src/unistd/linux/setsid.cpp b/libc/src/unistd/linux/setsid.cpp
new file mode 100644
index 0000000..df4629b
--- /dev/null
+++ b/libc/src/unistd/linux/setsid.cpp
@@ -0,0 +1,24 @@
+//===-- Linux implementation of setsid-------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/setsid.h"
+
+#include "hdr/types/pid_t.h"
+#include "src/__support/OSUtil/syscall.h" // For internal syscall function.
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+#include <sys/syscall.h> // For syscall numbers.
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(pid_t, setsid, ()) {
+  return LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_setsid);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/unistd/setsid.h b/libc/src/unistd/setsid.h
new file mode 100644
index 0000000..44dd7dd
--- /dev/null
+++ b/libc/src/unistd/setsid.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for setsid ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_UNISTD_GETPID_H
+#define LLVM_LIBC_SRC_UNISTD_GETPID_H
+
+#include "hdr/types/pid_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+pid_t setsid();
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_UNISTD_GETPID_H
diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt
index b01cce9..665cb36 100644
--- a/libc/test/src/unistd/CMakeLists.txt
+++ b/libc/test/src/unistd/CMakeLists.txt
@@ -288,6 +288,16 @@ add_libc_unittest(
 )
 
 add_libc_unittest(
+  setsid_test
+  SUITE
+    libc_unistd_unittests
+  SRCS
+    setsid_test.cpp
+  DEPENDS
+    libc.src.unistd.setsid
+)
+
+add_libc_unittest(
   symlink_test
   SUITE
     libc_unistd_unittests
diff --git a/libc/test/src/unistd/setsid_test.cpp b/libc/test/src/unistd/setsid_test.cpp
new file mode 100644
index 0000000..466faa1
--- /dev/null
+++ b/libc/test/src/unistd/setsid_test.cpp
@@ -0,0 +1,15 @@
+//===-- Unittests for setsid ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/setsid.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcGetPidTest, SmokeTest) {
+  // setsid always succeeds. So, we just call it as a smoke test.
+  LIBC_NAMESPACE::setsid();
+}
diff --git a/libclc/clc/include/clc/integer/clc_rotate.h b/libclc/clc/include/clc/integer/clc_rotate.h
new file mode 100644
index 0000000..21c945a
--- /dev/null
+++ b/libclc/clc/include/clc/integer/clc_rotate.h
@@ -0,0 +1,12 @@
+#ifndef __CLC_INTEGER_CLC_ROTATE_H__
+#define __CLC_INTEGER_CLC_ROTATE_H__
+
+#define __CLC_FUNCTION __clc_rotate
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
+#include <clc/integer/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_INTEGER_CLC_ROTATE_H__
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
index c96a6fc..2fe07f6 100644
--- a/libclc/clc/lib/clspv/SOURCES
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -7,6 +7,7 @@
 ../generic/integer/clc_mul_hi.cl
 ../generic/integer/clc_popcount.cl
 ../generic/integer/clc_rhadd.cl
+../generic/integer/clc_rotate.cl
 ../generic/integer/clc_sub_sat.cl
 ../generic/integer/clc_upsample.cl
 ../generic/math/clc_ceil.cl
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 5fd882e..2d31f9a 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -13,6 +13,7 @@ integer/clc_mul24.cl
 integer/clc_mul_hi.cl
 integer/clc_popcount.cl
 integer/clc_rhadd.cl
+integer/clc_rotate.cl
 integer/clc_sub_sat.cl
 integer/clc_upsample.cl
 math/clc_ceil.cl
diff --git a/libclc/clc/lib/generic/integer/clc_rotate.cl b/libclc/clc/lib/generic/integer/clc_rotate.cl
new file mode 100644
index 0000000..7546862
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_rotate.cl
@@ -0,0 +1,5 @@
+#include <clc/internal/clc.h>
+#include <clc/utils.h>
+
+#define __CLC_BODY <clc_rotate.inc>
+#include <clc/integer/gentype.inc>
diff --git a/libclc/clc/lib/generic/integer/clc_rotate.inc b/libclc/clc/lib/generic/integer/clc_rotate.inc
new file mode 100644
index 0000000..f144553
--- /dev/null
+++ b/libclc/clc/lib/generic/integer/clc_rotate.inc
@@ -0,0 +1,22 @@
+#define __CLC_AS_GENTYPE(x) __CLC_XCONCAT(__clc_as_, __CLC_GENTYPE)(x)
+#define __CLC_AS_U_GENTYPE(x) __CLC_XCONCAT(__clc_as_, __CLC_U_GENTYPE)(x)
+
+// The rotate(A, B) builtin left-shifts corresponding to the usual OpenCL shift
+// modulo rules. These rules state that A is left-shifted by the log2(N) least
+// significant bits in B when viewed as an unsigned integer value. Thus we don't
+// have to worry about signed shift amounts, and can perform the computation in
+// unsigned types.
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rotate(__CLC_GENTYPE x,
+                                                  __CLC_GENTYPE n) {
+  __CLC_U_GENTYPE x_as_u = __CLC_AS_U_GENTYPE(x);
+  __CLC_U_GENTYPE mask = (__CLC_U_GENTYPE)(__CLC_GENSIZE - 1);
+
+  __CLC_U_GENTYPE lshift_amt = __CLC_AS_U_GENTYPE(n) & mask;
+
+  __CLC_U_GENTYPE rshift_amt =
+      (((__CLC_U_GENTYPE)__CLC_GENSIZE - lshift_amt) & mask);
+
+  __CLC_U_GENTYPE result = (x_as_u << lshift_amt) | (x_as_u >> rshift_amt);
+
+  return __CLC_AS_GENTYPE(result);
+}
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index c3cc406..96040a3 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -11,6 +11,7 @@
 ../generic/integer/clc_mul_hi.cl
 ../generic/integer/clc_popcount.cl
 ../generic/integer/clc_rhadd.cl
+../generic/integer/clc_rotate.cl
 ../generic/integer/clc_sub_sat.cl
 ../generic/integer/clc_upsample.cl
 ../generic/math/clc_ceil.cl
diff --git a/libclc/generic/lib/integer/rotate.cl b/libclc/generic/lib/integer/rotate.cl
index 27ce515..1e72af3 100644
--- a/libclc/generic/lib/integer/rotate.cl
+++ b/libclc/generic/lib/integer/rotate.cl
@@ -1,4 +1,7 @@
 #include <clc/clc.h>
+#include <clc/integer/clc_rotate.h>
+
+#define FUNCTION rotate
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#define __CLC_BODY <rotate.inc>
 #include <clc/integer/gentype.inc>
diff --git a/libclc/generic/lib/integer/rotate.inc b/libclc/generic/lib/integer/rotate.inc
deleted file mode 100644
index 33bb0a8..0000000
--- a/libclc/generic/lib/integer/rotate.inc
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Not necessarily optimal... but it produces correct results (at least for int)
- * If we're lucky, LLVM will recognize the pattern and produce rotate
- * instructions:
- * http://llvm.1065342.n5.nabble.com/rotate-td47679.html
- * 
- * Eventually, someone should feel free to implement an llvm-specific version
- */
-
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rotate(__CLC_GENTYPE x, __CLC_GENTYPE n){
-    //Try to avoid extra work if someone's spinning the value through multiple
-    //full rotations
-    n = n % (__CLC_GENTYPE)__CLC_GENSIZE;
-
-#ifdef __CLC_SCALAR
-    if (n > 0){
-        return (x << n) | (((__CLC_U_GENTYPE)x) >> (__CLC_GENSIZE - n));
-    } else if (n == 0){
-        return x;
-    } else {
-        return ( (((__CLC_U_GENTYPE)x) >> -n) | (x << (__CLC_GENSIZE + n)) );
-    }
-#else
-    //XXX: There's a lot of __builtin_astype calls to cast everything to
-    //     unsigned ... This should be improved so that if __CLC_GENTYPE==__CLC_U_GENTYPE, no
-    //     casts are required.
-    
-    __CLC_U_GENTYPE x_1 = __builtin_astype(x, __CLC_U_GENTYPE);
-
-    //XXX: Is (__CLC_U_GENTYPE >> S__CLC_GENTYPE) | (__CLC_U_GENTYPE << S__CLC_GENTYPE) legal?
-    //     If so, then combine the amt and shifts into a single set of statements
-    
-    __CLC_U_GENTYPE amt;
-    amt = (n < (__CLC_GENTYPE)0 ? __builtin_astype((__CLC_GENTYPE)0-n, __CLC_U_GENTYPE) : (__CLC_U_GENTYPE)0);
-    x_1 = (x_1 >> amt) | (x_1 << ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt));
-
-    amt = (n < (__CLC_GENTYPE)0 ? (__CLC_U_GENTYPE)0 : __builtin_astype(n, __CLC_U_GENTYPE));
-    x_1 = (x_1 << amt) | (x_1 >> ((__CLC_U_GENTYPE)__CLC_GENSIZE - amt));
-
-    return __builtin_astype(x_1, __CLC_GENTYPE);
-#endif
-}
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 82f1de6..2439360 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -46,6 +46,7 @@ Improvements and New Features
 - The ``std::ranges::{copy, copy_n, copy_backward}`` algorithms have been optimized for ``std::vector<bool>::iterator``\s,
   resulting in a performance improvement of up to 2000x.
 
+- Updated formatting library to Unicode 16.0.0.
 
 Deprecations and Removals
 -------------------------
diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h
index a7e01c4..0da78e1 100644
--- a/libcxx/include/__algorithm/fill_n.h
+++ b/libcxx/include/__algorithm/fill_n.h
@@ -41,11 +41,7 @@ __fill_n_bool(__bit_iterator<_Cp, false> __first, typename __size_difference_typ
   if (__first.__ctz_ != 0) {
     __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
     __storage_type __dn    = std::min(__clz_f, __n);
-    __storage_type __m     = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
-    if (_FillVal)
-      *__first.__seg_ |= __m;
-    else
-      *__first.__seg_ &= ~__m;
+    std::__fill_masked_range(std::__to_address(__first.__seg_), __clz_f - __dn, __first.__ctz_, _FillVal);
     __n -= __dn;
     ++__first.__seg_;
   }
@@ -56,11 +52,7 @@ __fill_n_bool(__bit_iterator<_Cp, false> __first, typename __size_difference_typ
   // do last partial word
   if (__n > 0) {
     __first.__seg_ += __nw;
-    __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
-    if (_FillVal)
-      *__first.__seg_ |= __m;
-    else
-      *__first.__seg_ &= ~__m;
+    std::__fill_masked_range(std::__to_address(__first.__seg_), __bits_per_word - __n, 0u, _FillVal);
   }
 }
 
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index bb8d472..aad4703 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -14,6 +14,7 @@
 #include <__algorithm/copy_backward.h>
 #include <__algorithm/copy_n.h>
 #include <__algorithm/min.h>
+#include <__assert>
 #include <__bit/countr.h>
 #include <__compare/ordering.h>
 #include <__config>
@@ -24,10 +25,13 @@
 #include <__memory/construct_at.h>
 #include <__memory/pointer_traits.h>
 #include <__type_traits/conditional.h>
+#include <__type_traits/enable_if.h>
 #include <__type_traits/is_constant_evaluated.h>
+#include <__type_traits/is_unsigned.h>
 #include <__type_traits/void_t.h>
 #include <__utility/pair.h>
 #include <__utility/swap.h>
+#include <climits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -58,6 +62,26 @@ struct __size_difference_type_traits<_Cp, __void_t<typename _Cp::difference_type
   using size_type       = typename _Cp::size_type;
 };
 
+// This function is designed to operate correctly even for smaller integral types like `uint8_t`, `uint16_t`,
+// or `unsigned short`. Casting back to _StorageType is crucial to prevent undefined behavior that can arise
+// from integral promotions.
+// See https://github.com/llvm/llvm-project/pull/122410.
+template <class _StoragePointer>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
+__fill_masked_range(_StoragePointer __word, unsigned __clz, unsigned __ctz, bool __fill_val) {
+  static_assert(is_unsigned<typename pointer_traits<_StoragePointer>::element_type>::value,
+                "__fill_masked_range must be called with unsigned type");
+  using _StorageType = typename pointer_traits<_StoragePointer>::element_type;
+  _LIBCPP_ASSERT_VALID_INPUT_RANGE(
+      __ctz + __clz < sizeof(_StorageType) * CHAR_BIT, "__fill_masked_range called with invalid range");
+  _StorageType __m = static_cast<_StorageType>(static_cast<_StorageType>(~static_cast<_StorageType>(0)) >> __clz) &
+                     static_cast<_StorageType>(static_cast<_StorageType>(~static_cast<_StorageType>(0)) << __ctz);
+  if (__fill_val)
+    *__word |= __m;
+  else
+    *__word &= static_cast<_StorageType>(~__m);
+}
+
 template <class _Cp, bool = __has_storage_type<_Cp>::value>
 class __bit_reference {
   using __storage_type _LIBCPP_NODEBUG    = typename _Cp::__storage_type;
diff --git a/libcxx/include/__format/escaped_output_table.h b/libcxx/include/__format/escaped_output_table.h
index 7a0b352..1401b46 100644
--- a/libcxx/include/__format/escaped_output_table.h
+++ b/libcxx/include/__format/escaped_output_table.h
@@ -109,7 +109,7 @@ namespace __escaped_output_table {
 /// - bits [14, 31] The lower bound code point of the range. The upper bound of
 ///   the range is lower bound + size. Note the code expects code units the fit
 ///   into 18 bits, instead of the 21 bits needed for the full Unicode range.
-_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
+_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[735] = {
     0x00000020 /* 00000000 - 00000020 [   33] */,
     0x001fc021 /* 0000007f - 000000a0 [   34] */,
     0x002b4000 /* 000000ad - 000000ad [    1] */,
@@ -136,7 +136,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x02170001 /* 0000085c - 0000085d [    2] */,
     0x0217c000 /* 0000085f - 0000085f [    1] */,
     0x021ac004 /* 0000086b - 0000086f [    5] */,
-    0x0223c008 /* 0000088f - 00000897 [    9] */,
+    0x0223c007 /* 0000088f - 00000896 [    8] */,
     0x02388000 /* 000008e2 - 000008e2 [    1] */,
     0x02610000 /* 00000984 - 00000984 [    1] */,
     0x02634001 /* 0000098d - 0000098e [    2] */,
@@ -331,12 +331,11 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x06a68005 /* 00001a9a - 00001a9f [    6] */,
     0x06ab8001 /* 00001aae - 00001aaf [    2] */,
     0x06b3c030 /* 00001acf - 00001aff [   49] */,
-    0x06d34002 /* 00001b4d - 00001b4f [    3] */,
-    0x06dfc000 /* 00001b7f - 00001b7f [    1] */,
+    0x06d34000 /* 00001b4d - 00001b4d [    1] */,
     0x06fd0007 /* 00001bf4 - 00001bfb [    8] */,
     0x070e0002 /* 00001c38 - 00001c3a [    3] */,
     0x07128002 /* 00001c4a - 00001c4c [    3] */,
-    0x07224006 /* 00001c89 - 00001c8f [    7] */,
+    0x0722c004 /* 00001c8b - 00001c8f [    5] */,
     0x072ec001 /* 00001cbb - 00001cbc [    2] */,
     0x07320007 /* 00001cc8 - 00001ccf [    8] */,
     0x073ec004 /* 00001cfb - 00001cff [    5] */,
@@ -364,7 +363,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x0830400e /* 000020c1 - 000020cf [   15] */,
     0x083c400e /* 000020f1 - 000020ff [   15] */,
     0x08630003 /* 0000218c - 0000218f [    4] */,
-    0x0909c018 /* 00002427 - 0000243f [   25] */,
+    0x090a8015 /* 0000242a - 0000243f [   22] */,
     0x0912c014 /* 0000244b - 0000245f [   21] */,
     0x0add0001 /* 00002b74 - 00002b75 [    2] */,
     0x0ae58000 /* 00002b96 - 00002b96 [    1] */,
@@ -393,16 +392,16 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x0c400004 /* 00003100 - 00003104 [    5] */,
     0x0c4c0000 /* 00003130 - 00003130 [    1] */,
     0x0c63c000 /* 0000318f - 0000318f [    1] */,
-    0x0c79000a /* 000031e4 - 000031ee [   11] */,
+    0x0c798008 /* 000031e6 - 000031ee [    9] */,
     0x0c87c000 /* 0000321f - 0000321f [    1] */,
     0x29234002 /* 0000a48d - 0000a48f [    3] */,
     0x2931c008 /* 0000a4c7 - 0000a4cf [    9] */,
     0x298b0013 /* 0000a62c - 0000a63f [   20] */,
     0x29be0007 /* 0000a6f8 - 0000a6ff [    8] */,
-    0x29f2c004 /* 0000a7cb - 0000a7cf [    5] */,
+    0x29f38001 /* 0000a7ce - 0000a7cf [    2] */,
     0x29f48000 /* 0000a7d2 - 0000a7d2 [    1] */,
     0x29f50000 /* 0000a7d4 - 0000a7d4 [    1] */,
-    0x29f68017 /* 0000a7da - 0000a7f1 [   24] */,
+    0x29f74014 /* 0000a7dd - 0000a7f1 [   21] */,
     0x2a0b4002 /* 0000a82d - 0000a82f [    3] */,
     0x2a0e8005 /* 0000a83a - 0000a83f [    6] */,
     0x2a1e0007 /* 0000a878 - 0000a87f [    8] */,
@@ -491,7 +490,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x41688000 /* 000105a2 - 000105a2 [    1] */,
     0x416c8000 /* 000105b2 - 000105b2 [    1] */,
     0x416e8000 /* 000105ba - 000105ba [    1] */,
-    0x416f4042 /* 000105bd - 000105ff [   67] */,
+    0x416f4002 /* 000105bd - 000105bf [    3] */,
+    0x417d000b /* 000105f4 - 000105ff [   12] */,
     0x41cdc008 /* 00010737 - 0001073f [    9] */,
     0x41d58009 /* 00010756 - 0001075f [   10] */,
     0x41da0017 /* 00010768 - 0001077f [   24] */,
@@ -534,11 +534,15 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x432cc00c /* 00010cb3 - 00010cbf [   13] */,
     0x433cc006 /* 00010cf3 - 00010cf9 [    7] */,
     0x434a0007 /* 00010d28 - 00010d2f [    8] */,
-    0x434e8125 /* 00010d3a - 00010e5f [  294] */,
+    0x434e8005 /* 00010d3a - 00010d3f [    6] */,
+    0x43598002 /* 00010d66 - 00010d68 [    3] */,
+    0x43618007 /* 00010d86 - 00010d8d [    8] */,
+    0x436400cf /* 00010d90 - 00010e5f [  208] */,
     0x439fc000 /* 00010e7f - 00010e7f [    1] */,
     0x43aa8000 /* 00010eaa - 00010eaa [    1] */,
     0x43ab8001 /* 00010eae - 00010eaf [    2] */,
-    0x43ac804a /* 00010eb2 - 00010efc [   75] */,
+    0x43ac800f /* 00010eb2 - 00010ec1 [   16] */,
+    0x43b14036 /* 00010ec5 - 00010efb [   55] */,
     0x43ca0007 /* 00010f28 - 00010f2f [    8] */,
     0x43d68015 /* 00010f5a - 00010f6f [   22] */,
     0x43e28025 /* 00010f8a - 00010faf [   38] */,
@@ -578,7 +582,18 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x44d60004 /* 00011358 - 0001135c [    5] */,
     0x44d90001 /* 00011364 - 00011365 [    2] */,
     0x44db4002 /* 0001136d - 0001136f [    3] */,
-    0x44dd408a /* 00011375 - 000113ff [  139] */,
+    0x44dd400a /* 00011375 - 0001137f [   11] */,
+    0x44e28000 /* 0001138a - 0001138a [    1] */,
+    0x44e30001 /* 0001138c - 0001138d [    2] */,
+    0x44e3c000 /* 0001138f - 0001138f [    1] */,
+    0x44ed8000 /* 000113b6 - 000113b6 [    1] */,
+    0x44f04000 /* 000113c1 - 000113c1 [    1] */,
+    0x44f0c001 /* 000113c3 - 000113c4 [    2] */,
+    0x44f18000 /* 000113c6 - 000113c6 [    1] */,
+    0x44f2c000 /* 000113cb - 000113cb [    1] */,
+    0x44f58000 /* 000113d6 - 000113d6 [    1] */,
+    0x44f64007 /* 000113d9 - 000113e0 [    8] */,
+    0x44f8c01c /* 000113e3 - 000113ff [   29] */,
     0x45170000 /* 0001145c - 0001145c [    1] */,
     0x4518801d /* 00011462 - 0001147f [   30] */,
     0x45320007 /* 000114c8 - 000114cf [    8] */,
@@ -589,7 +604,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x45968005 /* 0001165a - 0001165f [    6] */,
     0x459b4012 /* 0001166d - 0001167f [   19] */,
     0x45ae8005 /* 000116ba - 000116bf [    6] */,
-    0x45b28035 /* 000116ca - 000116ff [   54] */,
+    0x45b28005 /* 000116ca - 000116cf [    6] */,
+    0x45b9001b /* 000116e4 - 000116ff [   28] */,
     0x45c6c001 /* 0001171b - 0001171c [    2] */,
     0x45cb0003 /* 0001172c - 0001172f [    4] */,
     0x45d1c0b8 /* 00011747 - 000117ff [  185] */,
@@ -609,7 +625,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x46920007 /* 00011a48 - 00011a4f [    8] */,
     0x46a8c00c /* 00011aa3 - 00011aaf [   13] */,
     0x46be4006 /* 00011af9 - 00011aff [    7] */,
-    0x46c280f5 /* 00011b0a - 00011bff [  246] */,
+    0x46c280b5 /* 00011b0a - 00011bbf [  182] */,
+    0x46f8800d /* 00011be2 - 00011bef [   14] */,
+    0x46fe8005 /* 00011bfa - 00011bff [    6] */,
     0x47024000 /* 00011c09 - 00011c09 [    1] */,
     0x470dc000 /* 00011c37 - 00011c37 [    1] */,
     0x47118009 /* 00011c46 - 00011c4f [   10] */,
@@ -633,7 +651,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x47be4006 /* 00011ef9 - 00011eff [    7] */,
     0x47c44000 /* 00011f11 - 00011f11 [    1] */,
     0x47cec002 /* 00011f3b - 00011f3d [    3] */,
-    0x47d68055 /* 00011f5a - 00011faf [   86] */,
+    0x47d6c054 /* 00011f5b - 00011faf [   85] */,
     0x47ec400e /* 00011fb1 - 00011fbf [   15] */,
     0x47fc800c /* 00011ff2 - 00011ffe [   13] */,
     0x48e68065 /* 0001239a - 000123ff [  102] */,
@@ -642,8 +660,10 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x49510a4b /* 00012544 - 00012f8f [ 2636] */,
     0x4bfcc00c /* 00012ff3 - 00012fff [   13] */,
     0x4d0c000f /* 00013430 - 0001343f [   16] */,
-    0x4d158fa9 /* 00013456 - 000143ff [ 4010] */,
-    0x5191e1b8 /* 00014647 - 000167ff [ 8633] */,
+    0x4d158009 /* 00013456 - 0001345f [   10] */,
+    0x50fec004 /* 000143fb - 000143ff [    5] */,
+    0x5191dab8 /* 00014647 - 000160ff [ 6841] */,
+    0x584e86c5 /* 0001613a - 000167ff [ 1734] */,
     0x5a8e4006 /* 00016a39 - 00016a3f [    7] */,
     0x5a97c000 /* 00016a5f - 00016a5f [    1] */,
     0x5a9a8003 /* 00016a6a - 00016a6d [    4] */,
@@ -655,7 +675,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x5ad68000 /* 00016b5a - 00016b5a [    1] */,
     0x5ad88000 /* 00016b62 - 00016b62 [    1] */,
     0x5ade0004 /* 00016b78 - 00016b7c [    5] */,
-    0x5ae402af /* 00016b90 - 00016e3f [  688] */,
+    0x5ae401af /* 00016b90 - 00016d3f [  432] */,
+    0x5b5e80c5 /* 00016d7a - 00016e3f [  198] */,
     0x5ba6c064 /* 00016e9b - 00016eff [  101] */,
     0x5bd2c003 /* 00016f4b - 00016f4e [    4] */,
     0x5be20006 /* 00016f88 - 00016f8e [    7] */,
@@ -663,7 +684,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x5bf9400a /* 00016fe5 - 00016fef [   11] */,
     0x5bfc800d /* 00016ff2 - 00016fff [   14] */,
     0x61fe0007 /* 000187f8 - 000187ff [    8] */,
-    0x63358029 /* 00018cd6 - 00018cff [   42] */,
+    0x63358028 /* 00018cd6 - 00018cfe [   41] */,
     0x634262e6 /* 00018d09 - 0001afef [ 8935] */,
     0x6bfd0000 /* 0001aff4 - 0001aff4 [    1] */,
     0x6bff0000 /* 0001affc - 0001affc [    1] */,
@@ -678,7 +699,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x6f1f4002 /* 0001bc7d - 0001bc7f [    3] */,
     0x6f224006 /* 0001bc89 - 0001bc8f [    7] */,
     0x6f268001 /* 0001bc9a - 0001bc9b [    2] */,
-    0x6f28125f /* 0001bca0 - 0001ceff [ 4704] */,
+    0x6f280f5f /* 0001bca0 - 0001cbff [ 3936] */,
+    0x733e8005 /* 0001ccfa - 0001ccff [    6] */,
+    0x73ad004b /* 0001ceb4 - 0001ceff [   76] */,
     0x73cb8001 /* 0001cf2e - 0001cf2f [    2] */,
     0x73d1c008 /* 0001cf47 - 0001cf4f [    9] */,
     0x73f1003b /* 0001cfc4 - 0001cfff [   60] */,
@@ -730,7 +753,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x78abc010 /* 0001e2af - 0001e2bf [   17] */,
     0x78be8004 /* 0001e2fa - 0001e2fe [    5] */,
     0x78c001cf /* 0001e300 - 0001e4cf [  464] */,
-    0x793e82e5 /* 0001e4fa - 0001e7df [  742] */,
+    0x793e80d5 /* 0001e4fa - 0001e5cf [  214] */,
+    0x797ec003 /* 0001e5fb - 0001e5fe [    4] */,
+    0x798001df /* 0001e600 - 0001e7df [  480] */,
     0x79f9c000 /* 0001e7e7 - 0001e7e7 [    1] */,
     0x79fb0000 /* 0001e7ec - 0001e7ec [    1] */,
     0x79fbc000 /* 0001e7ef - 0001e7ef [    1] */,
@@ -800,18 +825,17 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = {
     0x7e168005 /* 0001f85a - 0001f85f [    6] */,
     0x7e220007 /* 0001f888 - 0001f88f [    8] */,
     0x7e2b8001 /* 0001f8ae - 0001f8af [    2] */,
-    0x7e2c804d /* 0001f8b2 - 0001f8ff [   78] */,
+    0x7e2f0003 /* 0001f8bc - 0001f8bf [    4] */,
+    0x7e30803d /* 0001f8c2 - 0001f8ff [   62] */,
     0x7e95000b /* 0001fa54 - 0001fa5f [   12] */,
     0x7e9b8001 /* 0001fa6e - 0001fa6f [    2] */,
     0x7e9f4002 /* 0001fa7d - 0001fa7f [    3] */,
-    0x7ea24006 /* 0001fa89 - 0001fa8f [    7] */,
-    0x7eaf8000 /* 0001fabe - 0001fabe [    1] */,
-    0x7eb18007 /* 0001fac6 - 0001facd [    8] */,
-    0x7eb70003 /* 0001fadc - 0001fadf [    4] */,
-    0x7eba4006 /* 0001fae9 - 0001faef [    7] */,
+    0x7ea28004 /* 0001fa8a - 0001fa8e [    5] */,
+    0x7eb1c006 /* 0001fac7 - 0001facd [    7] */,
+    0x7eb74001 /* 0001fadd - 0001fade [    2] */,
+    0x7eba8005 /* 0001faea - 0001faef [    6] */,
     0x7ebe4006 /* 0001faf9 - 0001faff [    7] */,
     0x7ee4c000 /* 0001fb93 - 0001fb93 [    1] */,
-    0x7ef2c024 /* 0001fbcb - 0001fbef [   37] */,
     0x7efe8405 /* 0001fbfa - 0001ffff [ 1030] */,
     0xa9b8001f /* 0002a6e0 - 0002a6ff [   32] */,
     0xadce8005 /* 0002b73a - 0002b73f [    6] */,
diff --git a/libcxx/include/__format/extended_grapheme_cluster_table.h b/libcxx/include/__format/extended_grapheme_cluster_table.h
index 7653a9e..f76e018 100644
--- a/libcxx/include/__format/extended_grapheme_cluster_table.h
+++ b/libcxx/include/__format/extended_grapheme_cluster_table.h
@@ -125,7 +125,7 @@ enum class __property : uint8_t {
 /// following benchmark.
 /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp
 // clang-format off
-_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
+_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1501] = {
     0x00000091,
     0x00005005,
     0x00005811,
@@ -164,7 +164,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x00414842,
     0x0042c822,
     0x00448018,
-    0x0044c072,
+    0x0044b882,
     0x00465172,
     0x00471008,
     0x004719f2,
@@ -246,14 +246,12 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x0064101a,
     0x0065e002,
     0x0065f00a,
-    0x0065f802,
-    0x0066001a,
+    0x0065f812,
+    0x0066080a,
     0x00661002,
     0x0066181a,
-    0x00663002,
-    0x0066381a,
-    0x0066501a,
-    0x00666012,
+    0x00663022,
+    0x00665032,
     0x0066a812,
     0x00671012,
     0x0067980a,
@@ -318,10 +316,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x008b047c,
     0x008d457b,
     0x009ae822,
-    0x00b89022,
-    0x00b8a80a,
-    0x00b99012,
-    0x00b9a00a,
+    0x00b89032,
+    0x00b99022,
     0x00ba9012,
     0x00bb9012,
     0x00bda012,
@@ -361,29 +357,23 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x00d581e2,
     0x00d80032,
     0x00d8200a,
-    0x00d9a062,
-    0x00d9d80a,
-    0x00d9e002,
-    0x00d9e84a,
-    0x00da1002,
-    0x00da181a,
+    0x00d9a092,
+    0x00d9f03a,
+    0x00da1022,
     0x00db5882,
     0x00dc0012,
     0x00dc100a,
     0x00dd080a,
     0x00dd1032,
     0x00dd301a,
-    0x00dd4012,
-    0x00dd500a,
-    0x00dd5822,
+    0x00dd4052,
     0x00df3002,
     0x00df380a,
     0x00df4012,
     0x00df502a,
     0x00df6802,
     0x00df700a,
-    0x00df7822,
-    0x00df901a,
+    0x00df7842,
     0x00e1207a,
     0x00e16072,
     0x00e1a01a,
@@ -475,7 +465,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x0547f802,
     0x05493072,
     0x054a38a2,
-    0x054a901a,
+    0x054a900a,
+    0x054a9802,
     0x054b01c4,
     0x054c0022,
     0x054c180a,
@@ -484,7 +475,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x054db032,
     0x054dd01a,
     0x054de012,
-    0x054df02a,
+    0x054df01a,
+    0x054e0002,
     0x054f2802,
     0x05514852,
     0x0551781a,
@@ -1328,8 +1320,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x0851f802,
     0x08572812,
     0x08692032,
+    0x086b4842,
     0x08755812,
-    0x0877e822,
+    0x0877e032,
     0x087a30a2,
     0x087c1032,
     0x0880000a,
@@ -1357,7 +1350,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x088c100a,
     0x088d982a,
     0x088db082,
-    0x088df81a,
+    0x088df80a,
+    0x088e0002,
     0x088e1018,
     0x088e4832,
     0x088e700a,
@@ -1365,9 +1359,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x0891602a,
     0x08917822,
     0x0891901a,
-    0x0891a002,
-    0x0891a80a,
-    0x0891b012,
+    0x0891a032,
     0x0891f002,
     0x08920802,
     0x0896f802,
@@ -1381,11 +1373,24 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x089a0002,
     0x089a083a,
     0x089a381a,
-    0x089a582a,
+    0x089a581a,
+    0x089a6802,
     0x089ab802,
     0x089b101a,
     0x089b3062,
     0x089b8042,
+    0x089dc002,
+    0x089dc81a,
+    0x089dd852,
+    0x089e1002,
+    0x089e2802,
+    0x089e3822,
+    0x089e500a,
+    0x089e601a,
+    0x089e7022,
+    0x089e8808,
+    0x089e9002,
+    0x089f0812,
     0x08a1a82a,
     0x08a1c072,
     0x08a2001a,
@@ -1422,10 +1427,10 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x08b5600a,
     0x08b56802,
     0x08b5701a,
-    0x08b58052,
-    0x08b5b00a,
-    0x08b5b802,
-    0x08b8e822,
+    0x08b58072,
+    0x08b8e802,
+    0x08b8f00a,
+    0x08b8f802,
     0x08b91032,
     0x08b9300a,
     0x08b93842,
@@ -1436,9 +1441,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x08c98002,
     0x08c9884a,
     0x08c9b81a,
-    0x08c9d812,
-    0x08c9e80a,
-    0x08c9f002,
+    0x08c9d832,
     0x08c9f808,
     0x08ca000a,
     0x08ca0808,
@@ -1495,28 +1498,29 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x08f9a01a,
     0x08f9b042,
     0x08f9f01a,
-    0x08fa0002,
-    0x08fa080a,
-    0x08fa1002,
+    0x08fa0022,
+    0x08fad002,
     0x09a180f1,
     0x09a20002,
     0x09a238e2,
+    0x0b08f0b2,
+    0x0b09502a,
+    0x0b096822,
     0x0b578042,
     0x0b598062,
+    0x0b6b180c,
+    0x0b6b383c,
     0x0b7a7802,
     0x0b7a8b6a,
     0x0b7c7832,
     0x0b7f2002,
-    0x0b7f801a,
+    0x0b7f8012,
     0x0de4e812,
     0x0de50031,
     0x0e7802d2,
     0x0e798162,
-    0x0e8b2802,
-    0x0e8b300a,
-    0x0e8b3822,
-    0x0e8b680a,
-    0x0e8b7042,
+    0x0e8b2842,
+    0x0e8b6852,
     0x0e8b9871,
     0x0e8bd872,
     0x0e8c2862,
@@ -1538,6 +1542,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = {
     0x0f157002,
     0x0f176032,
     0x0f276032,
+    0x0f2f7012,
     0x0f468062,
     0x0f4a2062,
     0x0f8007f3,
diff --git a/libcxx/include/__format/indic_conjunct_break_table.h b/libcxx/include/__format/indic_conjunct_break_table.h
index df6cfe6..f48ea62 100644
--- a/libcxx/include/__format/indic_conjunct_break_table.h
+++ b/libcxx/include/__format/indic_conjunct_break_table.h
@@ -107,10 +107,9 @@ enum class __property : uint8_t {
 /// following benchmark.
 /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp
 // clang-format off
-_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = {
-    0x00180139,
-    0x001a807d,
-    0x00241811,
+_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = {
+    0x001801bd,
+    0x00241819,
     0x002c88b1,
     0x002df801,
     0x002e0805,
@@ -125,6 +124,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = {
     0x0037500d,
     0x00388801,
     0x00398069,
+    0x003d3029,
     0x003f5821,
     0x003fe801,
     0x0040b00d,
@@ -132,87 +132,174 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = {
     0x00412809,
     0x00414811,
     0x0042c809,
-    0x0044c01d,
+    0x0044b821,
     0x0046505d,
-    0x00471871,
+    0x0047187d,
     0x0048a890,
+    0x0049d001,
     0x0049e001,
+    0x004a081d,
     0x004a6802,
-    0x004a880d,
+    0x004a8819,
     0x004ac01c,
+    0x004b1005,
     0x004bc01c,
+    0x004c0801,
     0x004ca84c,
     0x004d5018,
     0x004d9000,
     0x004db00c,
     0x004de001,
+    0x004df001,
+    0x004e080d,
     0x004e6802,
+    0x004eb801,
     0x004ee004,
     0x004ef800,
+    0x004f1005,
     0x004f8004,
     0x004ff001,
+    0x00500805,
     0x0051e001,
+    0x00520805,
+    0x00523805,
+    0x00525809,
+    0x00528801,
+    0x00538005,
+    0x0053a801,
+    0x00540805,
     0x0054a84c,
     0x00555018,
     0x00559004,
     0x0055a810,
     0x0055e001,
+    0x00560811,
+    0x00563805,
     0x00566802,
+    0x00571005,
     0x0057c800,
+    0x0057d015,
+    0x00580801,
     0x0058a84c,
     0x00595018,
     0x00599004,
     0x0059a810,
     0x0059e001,
+    0x0059f005,
+    0x005a080d,
     0x005a6802,
+    0x005aa809,
     0x005ae004,
     0x005af800,
+    0x005b1005,
     0x005b8800,
+    0x005c1001,
+    0x005df001,
+    0x005e0001,
+    0x005e6801,
+    0x005eb801,
+    0x00600001,
+    0x00602001,
     0x0060a84c,
     0x0061503c,
     0x0061e001,
+    0x0061f009,
+    0x00623009,
+    0x00625009,
     0x00626802,
     0x0062a805,
     0x0062c008,
+    0x00631005,
+    0x00640801,
     0x0065e001,
+    0x0065f805,
+    0x00661001,
+    0x00663009,
+    0x0066500d,
+    0x0066a805,
+    0x00671005,
+    0x00680005,
     0x0068a894,
     0x0069d805,
+    0x0069f001,
+    0x006a080d,
     0x006a6802,
-    0x0071c009,
-    0x0072400d,
-    0x0075c009,
-    0x0076400d,
+    0x006ab801,
+    0x006b1005,
+    0x006c0801,
+    0x006e5001,
+    0x006e7801,
+    0x006e9009,
+    0x006eb001,
+    0x006ef801,
+    0x00718801,
+    0x0071a019,
+    0x0072381d,
+    0x00758801,
+    0x0075a021,
+    0x00764019,
     0x0078c005,
     0x0079a801,
     0x0079b801,
     0x0079c801,
-    0x007b8805,
-    0x007ba001,
-    0x007bd00d,
-    0x007c0001,
-    0x007c1009,
+    0x007b8835,
+    0x007c0011,
     0x007c3005,
+    0x007c6829,
+    0x007cc88d,
     0x007e3001,
-    0x0081b801,
+    0x0081680d,
+    0x00819015,
     0x0081c805,
+    0x0081e805,
+    0x0082c005,
+    0x0082f009,
+    0x0083880d,
+    0x00841001,
+    0x00842805,
     0x00846801,
+    0x0084e801,
     0x009ae809,
-    0x00b8a001,
-    0x00be9001,
+    0x00b8900d,
+    0x00b99009,
+    0x00ba9005,
+    0x00bb9005,
+    0x00bda005,
+    0x00bdb819,
+    0x00be3001,
+    0x00be4829,
     0x00bee801,
+    0x00c05809,
+    0x00c07801,
+    0x00c42805,
     0x00c54801,
+    0x00c90009,
+    0x00c93805,
+    0x00c99001,
     0x00c9c809,
     0x00d0b805,
+    0x00d0d801,
+    0x00d2b001,
+    0x00d2c019,
     0x00d30001,
-    0x00d3a81d,
+    0x00d31001,
+    0x00d3281d,
+    0x00d39825,
     0x00d3f801,
-    0x00d58035,
-    0x00d5f83d,
-    0x00d9a001,
+    0x00d58079,
+    0x00d8000d,
+    0x00d9a025,
+    0x00da1009,
     0x00db5821,
-    0x00dd5801,
+    0x00dc0005,
+    0x00dd100d,
+    0x00dd4015,
     0x00df3001,
-    0x00e1b801,
+    0x00df4005,
+    0x00df6801,
+    0x00df7811,
+    0x00e1601d,
+    0x00e1b005,
     0x00e68009,
     0x00e6a031,
     0x00e71019,
@@ -221,82 +308,193 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = {
     0x00e7c005,
     0x00ee00fd,
     0x01006801,
-    0x01068031,
-    0x01070801,
-    0x0107282d,
+    0x01068081,
     0x01677809,
     0x016bf801,
     0x016f007d,
     0x01815015,
     0x0184c805,
-    0x05337801,
+    0x0533780d,
     0x0533a025,
     0x0534f005,
     0x05378005,
+    0x05401001,
+    0x05403001,
+    0x05405801,
+    0x05412805,
     0x05416001,
+    0x05462005,
     0x05470045,
-    0x05495809,
+    0x0547f801,
+    0x0549301d,
+    0x054a3829,
+    0x054a9801,
+    0x054c0009,
     0x054d9801,
+    0x054db00d,
+    0x054de005,
+    0x054e0001,
+    0x054f2801,
+    0x05514815,
+    0x05518805,
+    0x0551a805,
+    0x05521801,
+    0x05526001,
+    0x0553e001,
     0x05558001,
     0x05559009,
     0x0555b805,
     0x0555f005,
     0x05560801,
+    0x05576005,
     0x0557b001,
+    0x055f2801,
+    0x055f4001,
     0x055f6801,
     0x07d8f001,
+    0x07f0003d,
     0x07f1003d,
+    0x07fcf005,
     0x080fe801,
     0x08170001,
     0x081bb011,
-    0x08506801,
-    0x08507801,
+    0x08500809,
+    0x08502805,
+    0x0850600d,
     0x0851c009,
     0x0851f801,
     0x08572805,
     0x0869200d,
+    0x086b4811,
     0x08755805,
-    0x0877e809,
+    0x0877e00d,
     0x087a3029,
     0x087c100d,
+    0x08800801,
+    0x0881c039,
     0x08838001,
-    0x0883f801,
-    0x0885d001,
+    0x08839805,
+    0x0883f809,
+    0x0885980d,
+    0x0885c805,
+    0x08861001,
     0x08880009,
-    0x08899805,
+    0x08893811,
+    0x0889681d,
     0x088b9801,
-    0x088e5001,
-    0x0891b001,
-    0x08974805,
+    0x088c0005,
+    0x088db021,
+    0x088e0001,
+    0x088e480d,
+    0x088e7801,
+    0x08917809,
+    0x0891a00d,
+    0x0891f001,
+    0x08920801,
+    0x0896f801,
+    0x0897181d,
+    0x08980005,
     0x0899d805,
+    0x0899f001,
+    0x089a0001,
+    0x089a6801,
+    0x089ab801,
     0x089b3019,
     0x089b8011,
+    0x089dc001,
+    0x089dd815,
+    0x089e1001,
+    0x089e2801,
+    0x089e3809,
+    0x089e7009,
+    0x089e9001,
+    0x089f0805,
+    0x08a1c01d,
+    0x08a21009,
     0x08a23001,
     0x08a2f001,
-    0x08a61801,
-    0x08ae0001,
-    0x08b5b801,
-    0x08b95801,
-    0x08c1d001,
-    0x08c9f001,
+    0x08a58001,
+    0x08a59815,
+    0x08a5d001,
+    0x08a5e801,
+    0x08a5f805,
+    0x08a61005,
+    0x08ad7801,
+    0x08ad900d,
+    0x08ade005,
+    0x08adf805,
+    0x08aee005,
+    0x08b1981d,
+    0x08b1e801,
+    0x08b1f805,
+    0x08b55801,
+    0x08b56801,
+    0x08b5801d,
+    0x08b8e801,
+    0x08b8f801,
+    0x08b9100d,
+    0x08b93811,
+    0x08c17821,
+    0x08c1c805,
+    0x08c98001,
+    0x08c9d80d,
     0x08ca1801,
-    0x08d1a001,
+    0x08cea00d,
+    0x08ced005,
+    0x08cf0001,
+    0x08d00825,
+    0x08d19815,
+    0x08d1d80d,
     0x08d23801,
-    0x08d4c801,
-    0x08ea1001,
-    0x08ea2005,
+    0x08d28815,
+    0x08d2c809,
+    0x08d45031,
+    0x08d4c005,
+    0x08e18019,
+    0x08e1c015,
+    0x08e1f801,
+    0x08e49055,
+    0x08e55019,
+    0x08e59005,
+    0x08e5a805,
+    0x08e98815,
+    0x08e9d001,
+    0x08e9e005,
+    0x08e9f819,
+    0x08ea3801,
+    0x08ec8005,
+    0x08eca801,
     0x08ecb801,
-    0x08fa1001,
+    0x08f79805,
+    0x08f80005,
+    0x08f9b011,
+    0x08fa0009,
+    0x08fad001,
+    0x09a20001,
+    0x09a23839,
+    0x0b08f02d,
+    0x0b096809,
     0x0b578011,
     0x0b598019,
-    0x0de4f001,
-    0x0e8b2801,
-    0x0e8b3809,
-    0x0e8b7011,
+    0x0b7a7801,
+    0x0b7c780d,
+    0x0b7f2001,
+    0x0b7f8005,
+    0x0de4e805,
+    0x0e7800b5,
+    0x0e798059,
+    0x0e8b2811,
+    0x0e8b6815,
     0x0e8bd81d,
     0x0e8c2819,
     0x0e8d500d,
     0x0e921009,
+    0x0ed000d9,
+    0x0ed1d8c5,
+    0x0ed3a801,
+    0x0ed42001,
+    0x0ed4d811,
+    0x0ed50839,
     0x0f000019,
     0x0f004041,
     0x0f00d819,
@@ -307,8 +505,12 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = {
     0x0f157001,
     0x0f17600d,
     0x0f27600d,
+    0x0f2f7005,
     0x0f468019,
-    0x0f4a2019};
+    0x0f4a2019,
+    0x0f9fd811,
+    0x7001017d,
+    0x700803bd};
 // clang-format on
 
 /// Returns the indic conjuct break property of a code point.
diff --git a/libcxx/include/__format/width_estimation_table.h b/libcxx/include/__format/width_estimation_table.h
index 5b4b395..0ea0b4f 100644
--- a/libcxx/include/__format/width_estimation_table.h
+++ b/libcxx/include/__format/width_estimation_table.h
@@ -119,7 +119,7 @@ namespace __width_estimation_table {
 /// - bits [0, 13] The size of the range, allowing 16384 elements.
 /// - bits [14, 31] The lower bound code point of the range. The upper bound of
 ///   the range is lower bound + size.
-_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = {
+_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[110] = {
     0x0440005f /* 00001100 - 0000115f [   96] */, //
     0x08c68001 /* 0000231a - 0000231b [    2] */, //
     0x08ca4001 /* 00002329 - 0000232a [    2] */, //
@@ -128,8 +128,10 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = {
     0x08fcc000 /* 000023f3 - 000023f3 [    1] */, //
     0x097f4001 /* 000025fd - 000025fe [    2] */, //
     0x09850001 /* 00002614 - 00002615 [    2] */, //
+    0x098c0007 /* 00002630 - 00002637 [    8] */, //
     0x0992000b /* 00002648 - 00002653 [   12] */, //
     0x099fc000 /* 0000267f - 0000267f [    1] */, //
+    0x09a28005 /* 0000268a - 0000268f [    6] */, //
     0x09a4c000 /* 00002693 - 00002693 [    1] */, //
     0x09a84000 /* 000026a1 - 000026a1 [    1] */, //
     0x09aa8001 /* 000026aa - 000026ab [    2] */, //
@@ -163,7 +165,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = {
     0x0c264066 /* 00003099 - 000030ff [  103] */, //
     0x0c41402a /* 00003105 - 0000312f [   43] */, //
     0x0c4c405d /* 00003131 - 0000318e [   94] */, //
-    0x0c640053 /* 00003190 - 000031e3 [   84] */, //
+    0x0c640055 /* 00003190 - 000031e5 [   86] */, //
     0x0c7bc02f /* 000031ef - 0000321e [   48] */, //
     0x0c880027 /* 00003220 - 00003247 [   40] */, //
     0x0c943fff /* 00003250 - 0000724f [16384] */, //
@@ -182,7 +184,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = {
     0x5bfc0001 /* 00016ff0 - 00016ff1 [    2] */, //
     0x5c0017f7 /* 00017000 - 000187f7 [ 6136] */, //
     0x620004d5 /* 00018800 - 00018cd5 [ 1238] */, //
-    0x63400008 /* 00018d00 - 00018d08 [    9] */, //
+    0x633fc009 /* 00018cff - 00018d08 [   10] */, //
     0x6bfc0003 /* 0001aff0 - 0001aff3 [    4] */, //
     0x6bfd4006 /* 0001aff5 - 0001affb [    7] */, //
     0x6bff4001 /* 0001affd - 0001affe [    2] */, //
@@ -192,6 +194,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = {
     0x6c554000 /* 0001b155 - 0001b155 [    1] */, //
     0x6c590003 /* 0001b164 - 0001b167 [    4] */, //
     0x6c5c018b /* 0001b170 - 0001b2fb [  396] */, //
+    0x74c00056 /* 0001d300 - 0001d356 [   87] */, //
+    0x74d80016 /* 0001d360 - 0001d376 [   23] */, //
     0x7c010000 /* 0001f004 - 0001f004 [    1] */, //
     0x7c33c000 /* 0001f0cf - 0001f0cf [    1] */, //
     0x7c638000 /* 0001f18e - 0001f18e [    1] */, //
@@ -213,11 +217,10 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = {
     0x7dfc0000 /* 0001f7f0 - 0001f7f0 [    1] */, //
     0x7e4000ff /* 0001f900 - 0001f9ff [  256] */, //
     0x7e9c000c /* 0001fa70 - 0001fa7c [   13] */, //
-    0x7ea00008 /* 0001fa80 - 0001fa88 [    9] */, //
-    0x7ea4002d /* 0001fa90 - 0001fabd [   46] */, //
-    0x7eafc006 /* 0001fabf - 0001fac5 [    7] */, //
-    0x7eb3800d /* 0001face - 0001fadb [   14] */, //
-    0x7eb80008 /* 0001fae0 - 0001fae8 [    9] */, //
+    0x7ea00009 /* 0001fa80 - 0001fa89 [   10] */, //
+    0x7ea3c037 /* 0001fa8f - 0001fac6 [   56] */, //
+    0x7eb3800e /* 0001face - 0001fadc [   15] */, //
+    0x7eb7c00a /* 0001fadf - 0001fae9 [   11] */, //
     0x7ebc0008 /* 0001faf0 - 0001faf8 [    9] */, //
     0x80003fff /* 00020000 - 00023fff [16384] */, //
     0x90003fff /* 00024000 - 00027fff [16384] */, //
diff --git a/libcxx/include/__fwd/bit_reference.h b/libcxx/include/__fwd/bit_reference.h
index 30462b6..d65f043 100644
--- a/libcxx/include/__fwd/bit_reference.h
+++ b/libcxx/include/__fwd/bit_reference.h
@@ -10,6 +10,9 @@
 #define _LIBCPP___FWD_BIT_REFERENCE_H
 
 #include <__config>
+#include <__memory/pointer_traits.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_unsigned.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -23,6 +26,10 @@ class __bit_iterator;
 template <class, class = void>
 struct __size_difference_type_traits;
 
+template <class _StoragePointer>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
+__fill_masked_range(_StoragePointer __word, unsigned __ctz, unsigned __clz, bool __fill_val);
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___FWD_BIT_REFERENCE_H
diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h
index ff89d3e..f0f76c5 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -215,7 +215,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, s
   return ::_strxfrm_l(__dest, __src, __n, __loc);
 }
 
-#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+#if _LIBCPP_HAS_WIDE_CHARACTERS
 inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t __loc) {
   return ::_iswctype_l(__c, __type, __loc);
 }
@@ -240,7 +240,7 @@ inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t*
 inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) {
   return ::_wcsxfrm_l(__dest, __src, __n, __loc);
 }
-#endif // !_LIBCPP_HAS_NO_WIDE_CHARACTERS
+#endif // _LIBCPP_HAS_WIDE_CHARACTERS
 
 #if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800
 _LIBCPP_EXPORTED_FROM_ABI size_t __strftime(char*, size_t, const char*, const struct tm*, __locale_t);
diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index 66cb622..bad676a 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -783,14 +783,18 @@ private:
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(vector&, false_type) _NOEXCEPT {}
 
-  static _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI pointer __add_alignment_assumption(pointer __p) _NOEXCEPT {
-#ifndef _LIBCPP_CXX03_LANG
-    if constexpr (is_pointer<pointer>::value) {
-      if (!__libcpp_is_constant_evaluated()) {
-        return static_cast<pointer>(__builtin_assume_aligned(__p, alignof(decltype(*__p))));
-      }
+  template <class _Ptr = pointer, __enable_if_t<is_pointer<_Ptr>::value, int> = 0>
+  static _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI pointer
+  __add_alignment_assumption(_Ptr __p) _NOEXCEPT {
+    if (!__libcpp_is_constant_evaluated()) {
+      return static_cast<pointer>(__builtin_assume_aligned(__p, _LIBCPP_ALIGNOF(decltype(*__p))));
     }
-#endif
+    return __p;
+  }
+
+  template <class _Ptr = pointer, __enable_if_t<!is_pointer<_Ptr>::value, int> = 0>
+  static _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI pointer
+  __add_alignment_assumption(_Ptr __p) _NOEXCEPT {
     return __p;
   }
 };
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/escaped_output.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/escaped_output.pass.cpp
index 430495e..d24db36 100644
--- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/escaped_output.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/escaped_output.pass.cpp
@@ -48,7 +48,7 @@ inline constexpr int Cc = 65;
 inline constexpr int Cf = 170;
 inline constexpr int Cs = 2'048;
 inline constexpr int Co = 137'468;
-inline constexpr int Cn = 824'718;
+inline constexpr int Cn = 819'533;
 inline constexpr int C  = Cc + Cf + Cs + Co + Cn;
 
 // This is the final part of the Unicode properties table:
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h
index eb7500a..9664622 100644
--- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h
@@ -82,7 +82,7 @@ struct data {
 };
 
 /// The data for UTF-8.
-std::array<data<char>, 1187> data_utf8 = {{
+std::array<data<char>, 1093> data_utf8 = {{
      {"\U00000020\U00000020", {32, 32}, {1, 2}},
      {"\U00000020\U00000308\U00000020", {32, 32}, {3, 4}},
      {"\U00000020\U0000000d", {32, 13}, {1, 2}},
@@ -91,8 +91,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000020\U00000308\U0000000a", {32, 10}, {3, 4}},
      {"\U00000020\U00000001", {32, 1}, {1, 2}},
      {"\U00000020\U00000308\U00000001", {32, 1}, {3, 4}},
-     {"\U00000020\U0000034f", {32}, {3}},
-     {"\U00000020\U00000308\U0000034f", {32}, {5}},
+     {"\U00000020\U0000200c", {32}, {4}},
+     {"\U00000020\U00000308\U0000200c", {32}, {6}},
      {"\U00000020\U0001f1e6", {32, 127462}, {1, 5}},
      {"\U00000020\U00000308\U0001f1e6", {32, 127462}, {3, 7}},
      {"\U00000020\U00000600", {32, 1536}, {1, 3}},
@@ -109,8 +109,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000020\U00000308\U0000ac00", {32, 44032}, {3, 6}},
      {"\U00000020\U0000ac01", {32, 44033}, {1, 4}},
      {"\U00000020\U00000308\U0000ac01", {32, 44033}, {3, 6}},
-     {"\U00000020\U00000900", {32}, {4}},
-     {"\U00000020\U00000308\U00000900", {32}, {6}},
      {"\U00000020\U00000903", {32}, {4}},
      {"\U00000020\U00000308\U00000903", {32}, {6}},
      {"\U00000020\U00000904", {32, 2308}, {1, 4}},
@@ -123,8 +121,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000020\U00000308\U0000231a", {32, 8986}, {3, 6}},
      {"\U00000020\U00000300", {32}, {3}},
      {"\U00000020\U00000308\U00000300", {32}, {5}},
-     {"\U00000020\U0000093c", {32}, {4}},
-     {"\U00000020\U00000308\U0000093c", {32}, {6}},
+     {"\U00000020\U00000900", {32}, {4}},
+     {"\U00000020\U00000308\U00000900", {32}, {6}},
      {"\U00000020\U0000094d", {32}, {4}},
      {"\U00000020\U00000308\U0000094d", {32}, {6}},
      {"\U00000020\U0000200d", {32}, {4}},
@@ -139,8 +137,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000000d\U00000308\U0000000a", {13, 776, 10}, {1, 3, 4}},
      {"\U0000000d\U00000001", {13, 1}, {1, 2}},
      {"\U0000000d\U00000308\U00000001", {13, 776, 1}, {1, 3, 4}},
-     {"\U0000000d\U0000034f", {13, 847}, {1, 3}},
-     {"\U0000000d\U00000308\U0000034f", {13, 776}, {1, 5}},
+     {"\U0000000d\U0000200c", {13, 8204}, {1, 4}},
+     {"\U0000000d\U00000308\U0000200c", {13, 776}, {1, 6}},
      {"\U0000000d\U0001f1e6", {13, 127462}, {1, 5}},
      {"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 3, 7}},
      {"\U0000000d\U00000600", {13, 1536}, {1, 3}},
@@ -157,8 +155,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 3, 6}},
      {"\U0000000d\U0000ac01", {13, 44033}, {1, 4}},
      {"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 3, 6}},
-     {"\U0000000d\U00000900", {13, 2304}, {1, 4}},
-     {"\U0000000d\U00000308\U00000900", {13, 776}, {1, 6}},
      {"\U0000000d\U00000903", {13, 2307}, {1, 4}},
      {"\U0000000d\U00000308\U00000903", {13, 776}, {1, 6}},
      {"\U0000000d\U00000904", {13, 2308}, {1, 4}},
@@ -171,8 +167,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 3, 6}},
      {"\U0000000d\U00000300", {13, 768}, {1, 3}},
      {"\U0000000d\U00000308\U00000300", {13, 776}, {1, 5}},
-     {"\U0000000d\U0000093c", {13, 2364}, {1, 4}},
-     {"\U0000000d\U00000308\U0000093c", {13, 776}, {1, 6}},
+     {"\U0000000d\U00000900", {13, 2304}, {1, 4}},
+     {"\U0000000d\U00000308\U00000900", {13, 776}, {1, 6}},
      {"\U0000000d\U0000094d", {13, 2381}, {1, 4}},
      {"\U0000000d\U00000308\U0000094d", {13, 776}, {1, 6}},
      {"\U0000000d\U0000200d", {13, 8205}, {1, 4}},
@@ -187,8 +183,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000000a\U00000308\U0000000a", {10, 776, 10}, {1, 3, 4}},
      {"\U0000000a\U00000001", {10, 1}, {1, 2}},
      {"\U0000000a\U00000308\U00000001", {10, 776, 1}, {1, 3, 4}},
-     {"\U0000000a\U0000034f", {10, 847}, {1, 3}},
-     {"\U0000000a\U00000308\U0000034f", {10, 776}, {1, 5}},
+     {"\U0000000a\U0000200c", {10, 8204}, {1, 4}},
+     {"\U0000000a\U00000308\U0000200c", {10, 776}, {1, 6}},
      {"\U0000000a\U0001f1e6", {10, 127462}, {1, 5}},
      {"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 3, 7}},
      {"\U0000000a\U00000600", {10, 1536}, {1, 3}},
@@ -205,8 +201,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 3, 6}},
      {"\U0000000a\U0000ac01", {10, 44033}, {1, 4}},
      {"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 3, 6}},
-     {"\U0000000a\U00000900", {10, 2304}, {1, 4}},
-     {"\U0000000a\U00000308\U00000900", {10, 776}, {1, 6}},
      {"\U0000000a\U00000903", {10, 2307}, {1, 4}},
      {"\U0000000a\U00000308\U00000903", {10, 776}, {1, 6}},
      {"\U0000000a\U00000904", {10, 2308}, {1, 4}},
@@ -219,8 +213,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 3, 6}},
      {"\U0000000a\U00000300", {10, 768}, {1, 3}},
      {"\U0000000a\U00000308\U00000300", {10, 776}, {1, 5}},
-     {"\U0000000a\U0000093c", {10, 2364}, {1, 4}},
-     {"\U0000000a\U00000308\U0000093c", {10, 776}, {1, 6}},
+     {"\U0000000a\U00000900", {10, 2304}, {1, 4}},
+     {"\U0000000a\U00000308\U00000900", {10, 776}, {1, 6}},
      {"\U0000000a\U0000094d", {10, 2381}, {1, 4}},
      {"\U0000000a\U00000308\U0000094d", {10, 776}, {1, 6}},
      {"\U0000000a\U0000200d", {10, 8205}, {1, 4}},
@@ -235,8 +229,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000001\U00000308\U0000000a", {1, 776, 10}, {1, 3, 4}},
      {"\U00000001\U00000001", {1, 1}, {1, 2}},
      {"\U00000001\U00000308\U00000001", {1, 776, 1}, {1, 3, 4}},
-     {"\U00000001\U0000034f", {1, 847}, {1, 3}},
-     {"\U00000001\U00000308\U0000034f", {1, 776}, {1, 5}},
+     {"\U00000001\U0000200c", {1, 8204}, {1, 4}},
+     {"\U00000001\U00000308\U0000200c", {1, 776}, {1, 6}},
      {"\U00000001\U0001f1e6", {1, 127462}, {1, 5}},
      {"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 3, 7}},
      {"\U00000001\U00000600", {1, 1536}, {1, 3}},
@@ -253,8 +247,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 3, 6}},
      {"\U00000001\U0000ac01", {1, 44033}, {1, 4}},
      {"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 3, 6}},
-     {"\U00000001\U00000900", {1, 2304}, {1, 4}},
-     {"\U00000001\U00000308\U00000900", {1, 776}, {1, 6}},
      {"\U00000001\U00000903", {1, 2307}, {1, 4}},
      {"\U00000001\U00000308\U00000903", {1, 776}, {1, 6}},
      {"\U00000001\U00000904", {1, 2308}, {1, 4}},
@@ -267,62 +259,60 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 3, 6}},
      {"\U00000001\U00000300", {1, 768}, {1, 3}},
      {"\U00000001\U00000308\U00000300", {1, 776}, {1, 5}},
-     {"\U00000001\U0000093c", {1, 2364}, {1, 4}},
-     {"\U00000001\U00000308\U0000093c", {1, 776}, {1, 6}},
+     {"\U00000001\U00000900", {1, 2304}, {1, 4}},
+     {"\U00000001\U00000308\U00000900", {1, 776}, {1, 6}},
      {"\U00000001\U0000094d", {1, 2381}, {1, 4}},
      {"\U00000001\U00000308\U0000094d", {1, 776}, {1, 6}},
      {"\U00000001\U0000200d", {1, 8205}, {1, 4}},
      {"\U00000001\U00000308\U0000200d", {1, 776}, {1, 6}},
      {"\U00000001\U00000378", {1, 888}, {1, 3}},
      {"\U00000001\U00000308\U00000378", {1, 776, 888}, {1, 3, 5}},
-     {"\U0000034f\U00000020", {847, 32}, {2, 3}},
-     {"\U0000034f\U00000308\U00000020", {847, 32}, {4, 5}},
-     {"\U0000034f\U0000000d", {847, 13}, {2, 3}},
-     {"\U0000034f\U00000308\U0000000d", {847, 13}, {4, 5}},
-     {"\U0000034f\U0000000a", {847, 10}, {2, 3}},
-     {"\U0000034f\U00000308\U0000000a", {847, 10}, {4, 5}},
-     {"\U0000034f\U00000001", {847, 1}, {2, 3}},
-     {"\U0000034f\U00000308\U00000001", {847, 1}, {4, 5}},
-     {"\U0000034f\U0000034f", {847}, {4}},
-     {"\U0000034f\U00000308\U0000034f", {847}, {6}},
-     {"\U0000034f\U0001f1e6", {847, 127462}, {2, 6}},
-     {"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {4, 8}},
-     {"\U0000034f\U00000600", {847, 1536}, {2, 4}},
-     {"\U0000034f\U00000308\U00000600", {847, 1536}, {4, 6}},
-     {"\U0000034f\U00000a03", {847}, {5}},
-     {"\U0000034f\U00000308\U00000a03", {847}, {7}},
-     {"\U0000034f\U00001100", {847, 4352}, {2, 5}},
-     {"\U0000034f\U00000308\U00001100", {847, 4352}, {4, 7}},
-     {"\U0000034f\U00001160", {847, 4448}, {2, 5}},
-     {"\U0000034f\U00000308\U00001160", {847, 4448}, {4, 7}},
-     {"\U0000034f\U000011a8", {847, 4520}, {2, 5}},
-     {"\U0000034f\U00000308\U000011a8", {847, 4520}, {4, 7}},
-     {"\U0000034f\U0000ac00", {847, 44032}, {2, 5}},
-     {"\U0000034f\U00000308\U0000ac00", {847, 44032}, {4, 7}},
-     {"\U0000034f\U0000ac01", {847, 44033}, {2, 5}},
-     {"\U0000034f\U00000308\U0000ac01", {847, 44033}, {4, 7}},
-     {"\U0000034f\U00000900", {847}, {5}},
-     {"\U0000034f\U00000308\U00000900", {847}, {7}},
-     {"\U0000034f\U00000903", {847}, {5}},
-     {"\U0000034f\U00000308\U00000903", {847}, {7}},
-     {"\U0000034f\U00000904", {847, 2308}, {2, 5}},
-     {"\U0000034f\U00000308\U00000904", {847, 2308}, {4, 7}},
-     {"\U0000034f\U00000d4e", {847, 3406}, {2, 5}},
-     {"\U0000034f\U00000308\U00000d4e", {847, 3406}, {4, 7}},
-     {"\U0000034f\U00000915", {847, 2325}, {2, 5}},
-     {"\U0000034f\U00000308\U00000915", {847, 2325}, {4, 7}},
-     {"\U0000034f\U0000231a", {847, 8986}, {2, 5}},
-     {"\U0000034f\U00000308\U0000231a", {847, 8986}, {4, 7}},
-     {"\U0000034f\U00000300", {847}, {4}},
-     {"\U0000034f\U00000308\U00000300", {847}, {6}},
-     {"\U0000034f\U0000093c", {847}, {5}},
-     {"\U0000034f\U00000308\U0000093c", {847}, {7}},
-     {"\U0000034f\U0000094d", {847}, {5}},
-     {"\U0000034f\U00000308\U0000094d", {847}, {7}},
-     {"\U0000034f\U0000200d", {847}, {5}},
-     {"\U0000034f\U00000308\U0000200d", {847}, {7}},
-     {"\U0000034f\U00000378", {847, 888}, {2, 4}},
-     {"\U0000034f\U00000308\U00000378", {847, 888}, {4, 6}},
+     {"\U0000200c\U00000020", {8204, 32}, {3, 4}},
+     {"\U0000200c\U00000308\U00000020", {8204, 32}, {5, 6}},
+     {"\U0000200c\U0000000d", {8204, 13}, {3, 4}},
+     {"\U0000200c\U00000308\U0000000d", {8204, 13}, {5, 6}},
+     {"\U0000200c\U0000000a", {8204, 10}, {3, 4}},
+     {"\U0000200c\U00000308\U0000000a", {8204, 10}, {5, 6}},
+     {"\U0000200c\U00000001", {8204, 1}, {3, 4}},
+     {"\U0000200c\U00000308\U00000001", {8204, 1}, {5, 6}},
+     {"\U0000200c\U0000200c", {8204}, {6}},
+     {"\U0000200c\U00000308\U0000200c", {8204}, {8}},
+     {"\U0000200c\U0001f1e6", {8204, 127462}, {3, 7}},
+     {"\U0000200c\U00000308\U0001f1e6", {8204, 127462}, {5, 9}},
+     {"\U0000200c\U00000600", {8204, 1536}, {3, 5}},
+     {"\U0000200c\U00000308\U00000600", {8204, 1536}, {5, 7}},
+     {"\U0000200c\U00000a03", {8204}, {6}},
+     {"\U0000200c\U00000308\U00000a03", {8204}, {8}},
+     {"\U0000200c\U00001100", {8204, 4352}, {3, 6}},
+     {"\U0000200c\U00000308\U00001100", {8204, 4352}, {5, 8}},
+     {"\U0000200c\U00001160", {8204, 4448}, {3, 6}},
+     {"\U0000200c\U00000308\U00001160", {8204, 4448}, {5, 8}},
+     {"\U0000200c\U000011a8", {8204, 4520}, {3, 6}},
+     {"\U0000200c\U00000308\U000011a8", {8204, 4520}, {5, 8}},
+     {"\U0000200c\U0000ac00", {8204, 44032}, {3, 6}},
+     {"\U0000200c\U00000308\U0000ac00", {8204, 44032}, {5, 8}},
+     {"\U0000200c\U0000ac01", {8204, 44033}, {3, 6}},
+     {"\U0000200c\U00000308\U0000ac01", {8204, 44033}, {5, 8}},
+     {"\U0000200c\U00000903", {8204}, {6}},
+     {"\U0000200c\U00000308\U00000903", {8204}, {8}},
+     {"\U0000200c\U00000904", {8204, 2308}, {3, 6}},
+     {"\U0000200c\U00000308\U00000904", {8204, 2308}, {5, 8}},
+     {"\U0000200c\U00000d4e", {8204, 3406}, {3, 6}},
+     {"\U0000200c\U00000308\U00000d4e", {8204, 3406}, {5, 8}},
+     {"\U0000200c\U00000915", {8204, 2325}, {3, 6}},
+     {"\U0000200c\U00000308\U00000915", {8204, 2325}, {5, 8}},
+     {"\U0000200c\U0000231a", {8204, 8986}, {3, 6}},
+     {"\U0000200c\U00000308\U0000231a", {8204, 8986}, {5, 8}},
+     {"\U0000200c\U00000300", {8204}, {5}},
+     {"\U0000200c\U00000308\U00000300", {8204}, {7}},
+     {"\U0000200c\U00000900", {8204}, {6}},
+     {"\U0000200c\U00000308\U00000900", {8204}, {8}},
+     {"\U0000200c\U0000094d", {8204}, {6}},
+     {"\U0000200c\U00000308\U0000094d", {8204}, {8}},
+     {"\U0000200c\U0000200d", {8204}, {6}},
+     {"\U0000200c\U00000308\U0000200d", {8204}, {8}},
+     {"\U0000200c\U00000378", {8204, 888}, {3, 5}},
+     {"\U0000200c\U00000308\U00000378", {8204, 888}, {5, 7}},
      {"\U0001f1e6\U00000020", {127462, 32}, {4, 5}},
      {"\U0001f1e6\U00000308\U00000020", {127462, 32}, {6, 7}},
      {"\U0001f1e6\U0000000d", {127462, 13}, {4, 5}},
@@ -331,8 +321,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0001f1e6\U00000308\U0000000a", {127462, 10}, {6, 7}},
      {"\U0001f1e6\U00000001", {127462, 1}, {4, 5}},
      {"\U0001f1e6\U00000308\U00000001", {127462, 1}, {6, 7}},
-     {"\U0001f1e6\U0000034f", {127462}, {6}},
-     {"\U0001f1e6\U00000308\U0000034f", {127462}, {8}},
+     {"\U0001f1e6\U0000200c", {127462}, {7}},
+     {"\U0001f1e6\U00000308\U0000200c", {127462}, {9}},
      {"\U0001f1e6\U0001f1e6", {127462}, {8}},
      {"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {6, 10}},
      {"\U0001f1e6\U00000600", {127462, 1536}, {4, 6}},
@@ -349,8 +339,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {6, 9}},
      {"\U0001f1e6\U0000ac01", {127462, 44033}, {4, 7}},
      {"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {6, 9}},
-     {"\U0001f1e6\U00000900", {127462}, {7}},
-     {"\U0001f1e6\U00000308\U00000900", {127462}, {9}},
      {"\U0001f1e6\U00000903", {127462}, {7}},
      {"\U0001f1e6\U00000308\U00000903", {127462}, {9}},
      {"\U0001f1e6\U00000904", {127462, 2308}, {4, 7}},
@@ -363,8 +351,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {6, 9}},
      {"\U0001f1e6\U00000300", {127462}, {6}},
      {"\U0001f1e6\U00000308\U00000300", {127462}, {8}},
-     {"\U0001f1e6\U0000093c", {127462}, {7}},
-     {"\U0001f1e6\U00000308\U0000093c", {127462}, {9}},
+     {"\U0001f1e6\U00000900", {127462}, {7}},
+     {"\U0001f1e6\U00000308\U00000900", {127462}, {9}},
      {"\U0001f1e6\U0000094d", {127462}, {7}},
      {"\U0001f1e6\U00000308\U0000094d", {127462}, {9}},
      {"\U0001f1e6\U0000200d", {127462}, {7}},
@@ -379,8 +367,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000600\U00000308\U0000000a", {1536, 10}, {4, 5}},
      {"\U00000600\U00000001", {1536, 1}, {2, 3}},
      {"\U00000600\U00000308\U00000001", {1536, 1}, {4, 5}},
-     {"\U00000600\U0000034f", {1536}, {4}},
-     {"\U00000600\U00000308\U0000034f", {1536}, {6}},
+     {"\U00000600\U0000200c", {1536}, {5}},
+     {"\U00000600\U00000308\U0000200c", {1536}, {7}},
      {"\U00000600\U0001f1e6", {1536}, {6}},
      {"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {4, 8}},
      {"\U00000600\U00000600", {1536}, {4}},
@@ -397,8 +385,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000600\U00000308\U0000ac00", {1536, 44032}, {4, 7}},
      {"\U00000600\U0000ac01", {1536}, {5}},
      {"\U00000600\U00000308\U0000ac01", {1536, 44033}, {4, 7}},
-     {"\U00000600\U00000900", {1536}, {5}},
-     {"\U00000600\U00000308\U00000900", {1536}, {7}},
      {"\U00000600\U00000903", {1536}, {5}},
      {"\U00000600\U00000308\U00000903", {1536}, {7}},
      {"\U00000600\U00000904", {1536}, {5}},
@@ -411,8 +397,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000600\U00000308\U0000231a", {1536, 8986}, {4, 7}},
      {"\U00000600\U00000300", {1536}, {4}},
      {"\U00000600\U00000308\U00000300", {1536}, {6}},
-     {"\U00000600\U0000093c", {1536}, {5}},
-     {"\U00000600\U00000308\U0000093c", {1536}, {7}},
+     {"\U00000600\U00000900", {1536}, {5}},
+     {"\U00000600\U00000308\U00000900", {1536}, {7}},
      {"\U00000600\U0000094d", {1536}, {5}},
      {"\U00000600\U00000308\U0000094d", {1536}, {7}},
      {"\U00000600\U0000200d", {1536}, {5}},
@@ -427,8 +413,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000a03\U00000308\U0000000a", {2563, 10}, {5, 6}},
      {"\U00000a03\U00000001", {2563, 1}, {3, 4}},
      {"\U00000a03\U00000308\U00000001", {2563, 1}, {5, 6}},
-     {"\U00000a03\U0000034f", {2563}, {5}},
-     {"\U00000a03\U00000308\U0000034f", {2563}, {7}},
+     {"\U00000a03\U0000200c", {2563}, {6}},
+     {"\U00000a03\U00000308\U0000200c", {2563}, {8}},
      {"\U00000a03\U0001f1e6", {2563, 127462}, {3, 7}},
      {"\U00000a03\U00000308\U0001f1e6", {2563, 127462}, {5, 9}},
      {"\U00000a03\U00000600", {2563, 1536}, {3, 5}},
@@ -445,8 +431,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000a03\U00000308\U0000ac00", {2563, 44032}, {5, 8}},
      {"\U00000a03\U0000ac01", {2563, 44033}, {3, 6}},
      {"\U00000a03\U00000308\U0000ac01", {2563, 44033}, {5, 8}},
-     {"\U00000a03\U00000900", {2563}, {6}},
-     {"\U00000a03\U00000308\U00000900", {2563}, {8}},
      {"\U00000a03\U00000903", {2563}, {6}},
      {"\U00000a03\U00000308\U00000903", {2563}, {8}},
      {"\U00000a03\U00000904", {2563, 2308}, {3, 6}},
@@ -459,8 +443,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000a03\U00000308\U0000231a", {2563, 8986}, {5, 8}},
      {"\U00000a03\U00000300", {2563}, {5}},
      {"\U00000a03\U00000308\U00000300", {2563}, {7}},
-     {"\U00000a03\U0000093c", {2563}, {6}},
-     {"\U00000a03\U00000308\U0000093c", {2563}, {8}},
+     {"\U00000a03\U00000900", {2563}, {6}},
+     {"\U00000a03\U00000308\U00000900", {2563}, {8}},
      {"\U00000a03\U0000094d", {2563}, {6}},
      {"\U00000a03\U00000308\U0000094d", {2563}, {8}},
      {"\U00000a03\U0000200d", {2563}, {6}},
@@ -475,8 +459,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00001100\U00000308\U0000000a", {4352, 10}, {5, 6}},
      {"\U00001100\U00000001", {4352, 1}, {3, 4}},
      {"\U00001100\U00000308\U00000001", {4352, 1}, {5, 6}},
-     {"\U00001100\U0000034f", {4352}, {5}},
-     {"\U00001100\U00000308\U0000034f", {4352}, {7}},
+     {"\U00001100\U0000200c", {4352}, {6}},
+     {"\U00001100\U00000308\U0000200c", {4352}, {8}},
      {"\U00001100\U0001f1e6", {4352, 127462}, {3, 7}},
      {"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {5, 9}},
      {"\U00001100\U00000600", {4352, 1536}, {3, 5}},
@@ -493,8 +477,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00001100\U00000308\U0000ac00", {4352, 44032}, {5, 8}},
      {"\U00001100\U0000ac01", {4352}, {6}},
      {"\U00001100\U00000308\U0000ac01", {4352, 44033}, {5, 8}},
-     {"\U00001100\U00000900", {4352}, {6}},
-     {"\U00001100\U00000308\U00000900", {4352}, {8}},
      {"\U00001100\U00000903", {4352}, {6}},
      {"\U00001100\U00000308\U00000903", {4352}, {8}},
      {"\U00001100\U00000904", {4352, 2308}, {3, 6}},
@@ -507,8 +489,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00001100\U00000308\U0000231a", {4352, 8986}, {5, 8}},
      {"\U00001100\U00000300", {4352}, {5}},
      {"\U00001100\U00000308\U00000300", {4352}, {7}},
-     {"\U00001100\U0000093c", {4352}, {6}},
-     {"\U00001100\U00000308\U0000093c", {4352}, {8}},
+     {"\U00001100\U00000900", {4352}, {6}},
+     {"\U00001100\U00000308\U00000900", {4352}, {8}},
      {"\U00001100\U0000094d", {4352}, {6}},
      {"\U00001100\U00000308\U0000094d", {4352}, {8}},
      {"\U00001100\U0000200d", {4352}, {6}},
@@ -523,8 +505,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00001160\U00000308\U0000000a", {4448, 10}, {5, 6}},
      {"\U00001160\U00000001", {4448, 1}, {3, 4}},
      {"\U00001160\U00000308\U00000001", {4448, 1}, {5, 6}},
-     {"\U00001160\U0000034f", {4448}, {5}},
-     {"\U00001160\U00000308\U0000034f", {4448}, {7}},
+     {"\U00001160\U0000200c", {4448}, {6}},
+     {"\U00001160\U00000308\U0000200c", {4448}, {8}},
      {"\U00001160\U0001f1e6", {4448, 127462}, {3, 7}},
      {"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {5, 9}},
      {"\U00001160\U00000600", {4448, 1536}, {3, 5}},
@@ -541,8 +523,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00001160\U00000308\U0000ac00", {4448, 44032}, {5, 8}},
      {"\U00001160\U0000ac01", {4448, 44033}, {3, 6}},
      {"\U00001160\U00000308\U0000ac01", {4448, 44033}, {5, 8}},
-     {"\U00001160\U00000900", {4448}, {6}},
-     {"\U00001160\U00000308\U00000900", {4448}, {8}},
      {"\U00001160\U00000903", {4448}, {6}},
      {"\U00001160\U00000308\U00000903", {4448}, {8}},
      {"\U00001160\U00000904", {4448, 2308}, {3, 6}},
@@ -555,8 +535,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00001160\U00000308\U0000231a", {4448, 8986}, {5, 8}},
      {"\U00001160\U00000300", {4448}, {5}},
      {"\U00001160\U00000308\U00000300", {4448}, {7}},
-     {"\U00001160\U0000093c", {4448}, {6}},
-     {"\U00001160\U00000308\U0000093c", {4448}, {8}},
+     {"\U00001160\U00000900", {4448}, {6}},
+     {"\U00001160\U00000308\U00000900", {4448}, {8}},
      {"\U00001160\U0000094d", {4448}, {6}},
      {"\U00001160\U00000308\U0000094d", {4448}, {8}},
      {"\U00001160\U0000200d", {4448}, {6}},
@@ -571,8 +551,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U000011a8\U00000308\U0000000a", {4520, 10}, {5, 6}},
      {"\U000011a8\U00000001", {4520, 1}, {3, 4}},
      {"\U000011a8\U00000308\U00000001", {4520, 1}, {5, 6}},
-     {"\U000011a8\U0000034f", {4520}, {5}},
-     {"\U000011a8\U00000308\U0000034f", {4520}, {7}},
+     {"\U000011a8\U0000200c", {4520}, {6}},
+     {"\U000011a8\U00000308\U0000200c", {4520}, {8}},
      {"\U000011a8\U0001f1e6", {4520, 127462}, {3, 7}},
      {"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {5, 9}},
      {"\U000011a8\U00000600", {4520, 1536}, {3, 5}},
@@ -589,8 +569,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {5, 8}},
      {"\U000011a8\U0000ac01", {4520, 44033}, {3, 6}},
      {"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {5, 8}},
-     {"\U000011a8\U00000900", {4520}, {6}},
-     {"\U000011a8\U00000308\U00000900", {4520}, {8}},
      {"\U000011a8\U00000903", {4520}, {6}},
      {"\U000011a8\U00000308\U00000903", {4520}, {8}},
      {"\U000011a8\U00000904", {4520, 2308}, {3, 6}},
@@ -603,8 +581,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U000011a8\U00000308\U0000231a", {4520, 8986}, {5, 8}},
      {"\U000011a8\U00000300", {4520}, {5}},
      {"\U000011a8\U00000308\U00000300", {4520}, {7}},
-     {"\U000011a8\U0000093c", {4520}, {6}},
-     {"\U000011a8\U00000308\U0000093c", {4520}, {8}},
+     {"\U000011a8\U00000900", {4520}, {6}},
+     {"\U000011a8\U00000308\U00000900", {4520}, {8}},
      {"\U000011a8\U0000094d", {4520}, {6}},
      {"\U000011a8\U00000308\U0000094d", {4520}, {8}},
      {"\U000011a8\U0000200d", {4520}, {6}},
@@ -619,8 +597,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000ac00\U00000308\U0000000a", {44032, 10}, {5, 6}},
      {"\U0000ac00\U00000001", {44032, 1}, {3, 4}},
      {"\U0000ac00\U00000308\U00000001", {44032, 1}, {5, 6}},
-     {"\U0000ac00\U0000034f", {44032}, {5}},
-     {"\U0000ac00\U00000308\U0000034f", {44032}, {7}},
+     {"\U0000ac00\U0000200c", {44032}, {6}},
+     {"\U0000ac00\U00000308\U0000200c", {44032}, {8}},
      {"\U0000ac00\U0001f1e6", {44032, 127462}, {3, 7}},
      {"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {5, 9}},
      {"\U0000ac00\U00000600", {44032, 1536}, {3, 5}},
@@ -637,8 +615,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {5, 8}},
      {"\U0000ac00\U0000ac01", {44032, 44033}, {3, 6}},
      {"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {5, 8}},
-     {"\U0000ac00\U00000900", {44032}, {6}},
-     {"\U0000ac00\U00000308\U00000900", {44032}, {8}},
      {"\U0000ac00\U00000903", {44032}, {6}},
      {"\U0000ac00\U00000308\U00000903", {44032}, {8}},
      {"\U0000ac00\U00000904", {44032, 2308}, {3, 6}},
@@ -651,8 +627,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {5, 8}},
      {"\U0000ac00\U00000300", {44032}, {5}},
      {"\U0000ac00\U00000308\U00000300", {44032}, {7}},
-     {"\U0000ac00\U0000093c", {44032}, {6}},
-     {"\U0000ac00\U00000308\U0000093c", {44032}, {8}},
+     {"\U0000ac00\U00000900", {44032}, {6}},
+     {"\U0000ac00\U00000308\U00000900", {44032}, {8}},
      {"\U0000ac00\U0000094d", {44032}, {6}},
      {"\U0000ac00\U00000308\U0000094d", {44032}, {8}},
      {"\U0000ac00\U0000200d", {44032}, {6}},
@@ -667,8 +643,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000ac01\U00000308\U0000000a", {44033, 10}, {5, 6}},
      {"\U0000ac01\U00000001", {44033, 1}, {3, 4}},
      {"\U0000ac01\U00000308\U00000001", {44033, 1}, {5, 6}},
-     {"\U0000ac01\U0000034f", {44033}, {5}},
-     {"\U0000ac01\U00000308\U0000034f", {44033}, {7}},
+     {"\U0000ac01\U0000200c", {44033}, {6}},
+     {"\U0000ac01\U00000308\U0000200c", {44033}, {8}},
      {"\U0000ac01\U0001f1e6", {44033, 127462}, {3, 7}},
      {"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {5, 9}},
      {"\U0000ac01\U00000600", {44033, 1536}, {3, 5}},
@@ -685,8 +661,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {5, 8}},
      {"\U0000ac01\U0000ac01", {44033, 44033}, {3, 6}},
      {"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {5, 8}},
-     {"\U0000ac01\U00000900", {44033}, {6}},
-     {"\U0000ac01\U00000308\U00000900", {44033}, {8}},
      {"\U0000ac01\U00000903", {44033}, {6}},
      {"\U0000ac01\U00000308\U00000903", {44033}, {8}},
      {"\U0000ac01\U00000904", {44033, 2308}, {3, 6}},
@@ -699,62 +673,14 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {5, 8}},
      {"\U0000ac01\U00000300", {44033}, {5}},
      {"\U0000ac01\U00000308\U00000300", {44033}, {7}},
-     {"\U0000ac01\U0000093c", {44033}, {6}},
-     {"\U0000ac01\U00000308\U0000093c", {44033}, {8}},
+     {"\U0000ac01\U00000900", {44033}, {6}},
+     {"\U0000ac01\U00000308\U00000900", {44033}, {8}},
      {"\U0000ac01\U0000094d", {44033}, {6}},
      {"\U0000ac01\U00000308\U0000094d", {44033}, {8}},
      {"\U0000ac01\U0000200d", {44033}, {6}},
      {"\U0000ac01\U00000308\U0000200d", {44033}, {8}},
      {"\U0000ac01\U00000378", {44033, 888}, {3, 5}},
      {"\U0000ac01\U00000308\U00000378", {44033, 888}, {5, 7}},
-     {"\U00000900\U00000020", {2304, 32}, {3, 4}},
-     {"\U00000900\U00000308\U00000020", {2304, 32}, {5, 6}},
-     {"\U00000900\U0000000d", {2304, 13}, {3, 4}},
-     {"\U00000900\U00000308\U0000000d", {2304, 13}, {5, 6}},
-     {"\U00000900\U0000000a", {2304, 10}, {3, 4}},
-     {"\U00000900\U00000308\U0000000a", {2304, 10}, {5, 6}},
-     {"\U00000900\U00000001", {2304, 1}, {3, 4}},
-     {"\U00000900\U00000308\U00000001", {2304, 1}, {5, 6}},
-     {"\U00000900\U0000034f", {2304}, {5}},
-     {"\U00000900\U00000308\U0000034f", {2304}, {7}},
-     {"\U00000900\U0001f1e6", {2304, 127462}, {3, 7}},
-     {"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {5, 9}},
-     {"\U00000900\U00000600", {2304, 1536}, {3, 5}},
-     {"\U00000900\U00000308\U00000600", {2304, 1536}, {5, 7}},
-     {"\U00000900\U00000a03", {2304}, {6}},
-     {"\U00000900\U00000308\U00000a03", {2304}, {8}},
-     {"\U00000900\U00001100", {2304, 4352}, {3, 6}},
-     {"\U00000900\U00000308\U00001100", {2304, 4352}, {5, 8}},
-     {"\U00000900\U00001160", {2304, 4448}, {3, 6}},
-     {"\U00000900\U00000308\U00001160", {2304, 4448}, {5, 8}},
-     {"\U00000900\U000011a8", {2304, 4520}, {3, 6}},
-     {"\U00000900\U00000308\U000011a8", {2304, 4520}, {5, 8}},
-     {"\U00000900\U0000ac00", {2304, 44032}, {3, 6}},
-     {"\U00000900\U00000308\U0000ac00", {2304, 44032}, {5, 8}},
-     {"\U00000900\U0000ac01", {2304, 44033}, {3, 6}},
-     {"\U00000900\U00000308\U0000ac01", {2304, 44033}, {5, 8}},
-     {"\U00000900\U00000900", {2304}, {6}},
-     {"\U00000900\U00000308\U00000900", {2304}, {8}},
-     {"\U00000900\U00000903", {2304}, {6}},
-     {"\U00000900\U00000308\U00000903", {2304}, {8}},
-     {"\U00000900\U00000904", {2304, 2308}, {3, 6}},
-     {"\U00000900\U00000308\U00000904", {2304, 2308}, {5, 8}},
-     {"\U00000900\U00000d4e", {2304, 3406}, {3, 6}},
-     {"\U00000900\U00000308\U00000d4e", {2304, 3406}, {5, 8}},
-     {"\U00000900\U00000915", {2304, 2325}, {3, 6}},
-     {"\U00000900\U00000308\U00000915", {2304, 2325}, {5, 8}},
-     {"\U00000900\U0000231a", {2304, 8986}, {3, 6}},
-     {"\U00000900\U00000308\U0000231a", {2304, 8986}, {5, 8}},
-     {"\U00000900\U00000300", {2304}, {5}},
-     {"\U00000900\U00000308\U00000300", {2304}, {7}},
-     {"\U00000900\U0000093c", {2304}, {6}},
-     {"\U00000900\U00000308\U0000093c", {2304}, {8}},
-     {"\U00000900\U0000094d", {2304}, {6}},
-     {"\U00000900\U00000308\U0000094d", {2304}, {8}},
-     {"\U00000900\U0000200d", {2304}, {6}},
-     {"\U00000900\U00000308\U0000200d", {2304}, {8}},
-     {"\U00000900\U00000378", {2304, 888}, {3, 5}},
-     {"\U00000900\U00000308\U00000378", {2304, 888}, {5, 7}},
      {"\U00000903\U00000020", {2307, 32}, {3, 4}},
      {"\U00000903\U00000308\U00000020", {2307, 32}, {5, 6}},
      {"\U00000903\U0000000d", {2307, 13}, {3, 4}},
@@ -763,8 +689,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000903\U00000308\U0000000a", {2307, 10}, {5, 6}},
      {"\U00000903\U00000001", {2307, 1}, {3, 4}},
      {"\U00000903\U00000308\U00000001", {2307, 1}, {5, 6}},
-     {"\U00000903\U0000034f", {2307}, {5}},
-     {"\U00000903\U00000308\U0000034f", {2307}, {7}},
+     {"\U00000903\U0000200c", {2307}, {6}},
+     {"\U00000903\U00000308\U0000200c", {2307}, {8}},
      {"\U00000903\U0001f1e6", {2307, 127462}, {3, 7}},
      {"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {5, 9}},
      {"\U00000903\U00000600", {2307, 1536}, {3, 5}},
@@ -781,8 +707,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000903\U00000308\U0000ac00", {2307, 44032}, {5, 8}},
      {"\U00000903\U0000ac01", {2307, 44033}, {3, 6}},
      {"\U00000903\U00000308\U0000ac01", {2307, 44033}, {5, 8}},
-     {"\U00000903\U00000900", {2307}, {6}},
-     {"\U00000903\U00000308\U00000900", {2307}, {8}},
      {"\U00000903\U00000903", {2307}, {6}},
      {"\U00000903\U00000308\U00000903", {2307}, {8}},
      {"\U00000903\U00000904", {2307, 2308}, {3, 6}},
@@ -795,8 +719,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000903\U00000308\U0000231a", {2307, 8986}, {5, 8}},
      {"\U00000903\U00000300", {2307}, {5}},
      {"\U00000903\U00000308\U00000300", {2307}, {7}},
-     {"\U00000903\U0000093c", {2307}, {6}},
-     {"\U00000903\U00000308\U0000093c", {2307}, {8}},
+     {"\U00000903\U00000900", {2307}, {6}},
+     {"\U00000903\U00000308\U00000900", {2307}, {8}},
      {"\U00000903\U0000094d", {2307}, {6}},
      {"\U00000903\U00000308\U0000094d", {2307}, {8}},
      {"\U00000903\U0000200d", {2307}, {6}},
@@ -811,8 +735,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000904\U00000308\U0000000a", {2308, 10}, {5, 6}},
      {"\U00000904\U00000001", {2308, 1}, {3, 4}},
      {"\U00000904\U00000308\U00000001", {2308, 1}, {5, 6}},
-     {"\U00000904\U0000034f", {2308}, {5}},
-     {"\U00000904\U00000308\U0000034f", {2308}, {7}},
+     {"\U00000904\U0000200c", {2308}, {6}},
+     {"\U00000904\U00000308\U0000200c", {2308}, {8}},
      {"\U00000904\U0001f1e6", {2308, 127462}, {3, 7}},
      {"\U00000904\U00000308\U0001f1e6", {2308, 127462}, {5, 9}},
      {"\U00000904\U00000600", {2308, 1536}, {3, 5}},
@@ -829,8 +753,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000904\U00000308\U0000ac00", {2308, 44032}, {5, 8}},
      {"\U00000904\U0000ac01", {2308, 44033}, {3, 6}},
      {"\U00000904\U00000308\U0000ac01", {2308, 44033}, {5, 8}},
-     {"\U00000904\U00000900", {2308}, {6}},
-     {"\U00000904\U00000308\U00000900", {2308}, {8}},
      {"\U00000904\U00000903", {2308}, {6}},
      {"\U00000904\U00000308\U00000903", {2308}, {8}},
      {"\U00000904\U00000904", {2308, 2308}, {3, 6}},
@@ -843,8 +765,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000904\U00000308\U0000231a", {2308, 8986}, {5, 8}},
      {"\U00000904\U00000300", {2308}, {5}},
      {"\U00000904\U00000308\U00000300", {2308}, {7}},
-     {"\U00000904\U0000093c", {2308}, {6}},
-     {"\U00000904\U00000308\U0000093c", {2308}, {8}},
+     {"\U00000904\U00000900", {2308}, {6}},
+     {"\U00000904\U00000308\U00000900", {2308}, {8}},
      {"\U00000904\U0000094d", {2308}, {6}},
      {"\U00000904\U00000308\U0000094d", {2308}, {8}},
      {"\U00000904\U0000200d", {2308}, {6}},
@@ -859,8 +781,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000d4e\U00000308\U0000000a", {3406, 10}, {5, 6}},
      {"\U00000d4e\U00000001", {3406, 1}, {3, 4}},
      {"\U00000d4e\U00000308\U00000001", {3406, 1}, {5, 6}},
-     {"\U00000d4e\U0000034f", {3406}, {5}},
-     {"\U00000d4e\U00000308\U0000034f", {3406}, {7}},
+     {"\U00000d4e\U0000200c", {3406}, {6}},
+     {"\U00000d4e\U00000308\U0000200c", {3406}, {8}},
      {"\U00000d4e\U0001f1e6", {3406}, {7}},
      {"\U00000d4e\U00000308\U0001f1e6", {3406, 127462}, {5, 9}},
      {"\U00000d4e\U00000600", {3406}, {5}},
@@ -877,8 +799,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000d4e\U00000308\U0000ac00", {3406, 44032}, {5, 8}},
      {"\U00000d4e\U0000ac01", {3406}, {6}},
      {"\U00000d4e\U00000308\U0000ac01", {3406, 44033}, {5, 8}},
-     {"\U00000d4e\U00000900", {3406}, {6}},
-     {"\U00000d4e\U00000308\U00000900", {3406}, {8}},
      {"\U00000d4e\U00000903", {3406}, {6}},
      {"\U00000d4e\U00000308\U00000903", {3406}, {8}},
      {"\U00000d4e\U00000904", {3406}, {6}},
@@ -891,8 +811,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000d4e\U00000308\U0000231a", {3406, 8986}, {5, 8}},
      {"\U00000d4e\U00000300", {3406}, {5}},
      {"\U00000d4e\U00000308\U00000300", {3406}, {7}},
-     {"\U00000d4e\U0000093c", {3406}, {6}},
-     {"\U00000d4e\U00000308\U0000093c", {3406}, {8}},
+     {"\U00000d4e\U00000900", {3406}, {6}},
+     {"\U00000d4e\U00000308\U00000900", {3406}, {8}},
      {"\U00000d4e\U0000094d", {3406}, {6}},
      {"\U00000d4e\U00000308\U0000094d", {3406}, {8}},
      {"\U00000d4e\U0000200d", {3406}, {6}},
@@ -907,8 +827,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000915\U00000308\U0000000a", {2325, 10}, {5, 6}},
      {"\U00000915\U00000001", {2325, 1}, {3, 4}},
      {"\U00000915\U00000308\U00000001", {2325, 1}, {5, 6}},
-     {"\U00000915\U0000034f", {2325}, {5}},
-     {"\U00000915\U00000308\U0000034f", {2325}, {7}},
+     {"\U00000915\U0000200c", {2325}, {6}},
+     {"\U00000915\U00000308\U0000200c", {2325}, {8}},
      {"\U00000915\U0001f1e6", {2325, 127462}, {3, 7}},
      {"\U00000915\U00000308\U0001f1e6", {2325, 127462}, {5, 9}},
      {"\U00000915\U00000600", {2325, 1536}, {3, 5}},
@@ -925,8 +845,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000915\U00000308\U0000ac00", {2325, 44032}, {5, 8}},
      {"\U00000915\U0000ac01", {2325, 44033}, {3, 6}},
      {"\U00000915\U00000308\U0000ac01", {2325, 44033}, {5, 8}},
-     {"\U00000915\U00000900", {2325}, {6}},
-     {"\U00000915\U00000308\U00000900", {2325}, {8}},
      {"\U00000915\U00000903", {2325}, {6}},
      {"\U00000915\U00000308\U00000903", {2325}, {8}},
      {"\U00000915\U00000904", {2325, 2308}, {3, 6}},
@@ -939,8 +857,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000915\U00000308\U0000231a", {2325, 8986}, {5, 8}},
      {"\U00000915\U00000300", {2325}, {5}},
      {"\U00000915\U00000308\U00000300", {2325}, {7}},
-     {"\U00000915\U0000093c", {2325}, {6}},
-     {"\U00000915\U00000308\U0000093c", {2325}, {8}},
+     {"\U00000915\U00000900", {2325}, {6}},
+     {"\U00000915\U00000308\U00000900", {2325}, {8}},
      {"\U00000915\U0000094d", {2325}, {6}},
      {"\U00000915\U00000308\U0000094d", {2325}, {8}},
      {"\U00000915\U0000200d", {2325}, {6}},
@@ -955,8 +873,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000231a\U00000308\U0000000a", {8986, 10}, {5, 6}},
      {"\U0000231a\U00000001", {8986, 1}, {3, 4}},
      {"\U0000231a\U00000308\U00000001", {8986, 1}, {5, 6}},
-     {"\U0000231a\U0000034f", {8986}, {5}},
-     {"\U0000231a\U00000308\U0000034f", {8986}, {7}},
+     {"\U0000231a\U0000200c", {8986}, {6}},
+     {"\U0000231a\U00000308\U0000200c", {8986}, {8}},
      {"\U0000231a\U0001f1e6", {8986, 127462}, {3, 7}},
      {"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {5, 9}},
      {"\U0000231a\U00000600", {8986, 1536}, {3, 5}},
@@ -973,8 +891,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {5, 8}},
      {"\U0000231a\U0000ac01", {8986, 44033}, {3, 6}},
      {"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {5, 8}},
-     {"\U0000231a\U00000900", {8986}, {6}},
-     {"\U0000231a\U00000308\U00000900", {8986}, {8}},
      {"\U0000231a\U00000903", {8986}, {6}},
      {"\U0000231a\U00000308\U00000903", {8986}, {8}},
      {"\U0000231a\U00000904", {8986, 2308}, {3, 6}},
@@ -987,8 +903,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000231a\U00000308\U0000231a", {8986, 8986}, {5, 8}},
      {"\U0000231a\U00000300", {8986}, {5}},
      {"\U0000231a\U00000308\U00000300", {8986}, {7}},
-     {"\U0000231a\U0000093c", {8986}, {6}},
-     {"\U0000231a\U00000308\U0000093c", {8986}, {8}},
+     {"\U0000231a\U00000900", {8986}, {6}},
+     {"\U0000231a\U00000308\U00000900", {8986}, {8}},
      {"\U0000231a\U0000094d", {8986}, {6}},
      {"\U0000231a\U00000308\U0000094d", {8986}, {8}},
      {"\U0000231a\U0000200d", {8986}, {6}},
@@ -1003,8 +919,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000300\U00000308\U0000000a", {768, 10}, {4, 5}},
      {"\U00000300\U00000001", {768, 1}, {2, 3}},
      {"\U00000300\U00000308\U00000001", {768, 1}, {4, 5}},
-     {"\U00000300\U0000034f", {768}, {4}},
-     {"\U00000300\U00000308\U0000034f", {768}, {6}},
+     {"\U00000300\U0000200c", {768}, {5}},
+     {"\U00000300\U00000308\U0000200c", {768}, {7}},
      {"\U00000300\U0001f1e6", {768, 127462}, {2, 6}},
      {"\U00000300\U00000308\U0001f1e6", {768, 127462}, {4, 8}},
      {"\U00000300\U00000600", {768, 1536}, {2, 4}},
@@ -1021,8 +937,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000300\U00000308\U0000ac00", {768, 44032}, {4, 7}},
      {"\U00000300\U0000ac01", {768, 44033}, {2, 5}},
      {"\U00000300\U00000308\U0000ac01", {768, 44033}, {4, 7}},
-     {"\U00000300\U00000900", {768}, {5}},
-     {"\U00000300\U00000308\U00000900", {768}, {7}},
      {"\U00000300\U00000903", {768}, {5}},
      {"\U00000300\U00000308\U00000903", {768}, {7}},
      {"\U00000300\U00000904", {768, 2308}, {2, 5}},
@@ -1035,62 +949,60 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000300\U00000308\U0000231a", {768, 8986}, {4, 7}},
      {"\U00000300\U00000300", {768}, {4}},
      {"\U00000300\U00000308\U00000300", {768}, {6}},
-     {"\U00000300\U0000093c", {768}, {5}},
-     {"\U00000300\U00000308\U0000093c", {768}, {7}},
+     {"\U00000300\U00000900", {768}, {5}},
+     {"\U00000300\U00000308\U00000900", {768}, {7}},
      {"\U00000300\U0000094d", {768}, {5}},
      {"\U00000300\U00000308\U0000094d", {768}, {7}},
      {"\U00000300\U0000200d", {768}, {5}},
      {"\U00000300\U00000308\U0000200d", {768}, {7}},
      {"\U00000300\U00000378", {768, 888}, {2, 4}},
      {"\U00000300\U00000308\U00000378", {768, 888}, {4, 6}},
-     {"\U0000093c\U00000020", {2364, 32}, {3, 4}},
-     {"\U0000093c\U00000308\U00000020", {2364, 32}, {5, 6}},
-     {"\U0000093c\U0000000d", {2364, 13}, {3, 4}},
-     {"\U0000093c\U00000308\U0000000d", {2364, 13}, {5, 6}},
-     {"\U0000093c\U0000000a", {2364, 10}, {3, 4}},
-     {"\U0000093c\U00000308\U0000000a", {2364, 10}, {5, 6}},
-     {"\U0000093c\U00000001", {2364, 1}, {3, 4}},
-     {"\U0000093c\U00000308\U00000001", {2364, 1}, {5, 6}},
-     {"\U0000093c\U0000034f", {2364}, {5}},
-     {"\U0000093c\U00000308\U0000034f", {2364}, {7}},
-     {"\U0000093c\U0001f1e6", {2364, 127462}, {3, 7}},
-     {"\U0000093c\U00000308\U0001f1e6", {2364, 127462}, {5, 9}},
-     {"\U0000093c\U00000600", {2364, 1536}, {3, 5}},
-     {"\U0000093c\U00000308\U00000600", {2364, 1536}, {5, 7}},
-     {"\U0000093c\U00000a03", {2364}, {6}},
-     {"\U0000093c\U00000308\U00000a03", {2364}, {8}},
-     {"\U0000093c\U00001100", {2364, 4352}, {3, 6}},
-     {"\U0000093c\U00000308\U00001100", {2364, 4352}, {5, 8}},
-     {"\U0000093c\U00001160", {2364, 4448}, {3, 6}},
-     {"\U0000093c\U00000308\U00001160", {2364, 4448}, {5, 8}},
-     {"\U0000093c\U000011a8", {2364, 4520}, {3, 6}},
-     {"\U0000093c\U00000308\U000011a8", {2364, 4520}, {5, 8}},
-     {"\U0000093c\U0000ac00", {2364, 44032}, {3, 6}},
-     {"\U0000093c\U00000308\U0000ac00", {2364, 44032}, {5, 8}},
-     {"\U0000093c\U0000ac01", {2364, 44033}, {3, 6}},
-     {"\U0000093c\U00000308\U0000ac01", {2364, 44033}, {5, 8}},
-     {"\U0000093c\U00000900", {2364}, {6}},
-     {"\U0000093c\U00000308\U00000900", {2364}, {8}},
-     {"\U0000093c\U00000903", {2364}, {6}},
-     {"\U0000093c\U00000308\U00000903", {2364}, {8}},
-     {"\U0000093c\U00000904", {2364, 2308}, {3, 6}},
-     {"\U0000093c\U00000308\U00000904", {2364, 2308}, {5, 8}},
-     {"\U0000093c\U00000d4e", {2364, 3406}, {3, 6}},
-     {"\U0000093c\U00000308\U00000d4e", {2364, 3406}, {5, 8}},
-     {"\U0000093c\U00000915", {2364, 2325}, {3, 6}},
-     {"\U0000093c\U00000308\U00000915", {2364, 2325}, {5, 8}},
-     {"\U0000093c\U0000231a", {2364, 8986}, {3, 6}},
-     {"\U0000093c\U00000308\U0000231a", {2364, 8986}, {5, 8}},
-     {"\U0000093c\U00000300", {2364}, {5}},
-     {"\U0000093c\U00000308\U00000300", {2364}, {7}},
-     {"\U0000093c\U0000093c", {2364}, {6}},
-     {"\U0000093c\U00000308\U0000093c", {2364}, {8}},
-     {"\U0000093c\U0000094d", {2364}, {6}},
-     {"\U0000093c\U00000308\U0000094d", {2364}, {8}},
-     {"\U0000093c\U0000200d", {2364}, {6}},
-     {"\U0000093c\U00000308\U0000200d", {2364}, {8}},
-     {"\U0000093c\U00000378", {2364, 888}, {3, 5}},
-     {"\U0000093c\U00000308\U00000378", {2364, 888}, {5, 7}},
+     {"\U00000900\U00000020", {2304, 32}, {3, 4}},
+     {"\U00000900\U00000308\U00000020", {2304, 32}, {5, 6}},
+     {"\U00000900\U0000000d", {2304, 13}, {3, 4}},
+     {"\U00000900\U00000308\U0000000d", {2304, 13}, {5, 6}},
+     {"\U00000900\U0000000a", {2304, 10}, {3, 4}},
+     {"\U00000900\U00000308\U0000000a", {2304, 10}, {5, 6}},
+     {"\U00000900\U00000001", {2304, 1}, {3, 4}},
+     {"\U00000900\U00000308\U00000001", {2304, 1}, {5, 6}},
+     {"\U00000900\U0000200c", {2304}, {6}},
+     {"\U00000900\U00000308\U0000200c", {2304}, {8}},
+     {"\U00000900\U0001f1e6", {2304, 127462}, {3, 7}},
+     {"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {5, 9}},
+     {"\U00000900\U00000600", {2304, 1536}, {3, 5}},
+     {"\U00000900\U00000308\U00000600", {2304, 1536}, {5, 7}},
+     {"\U00000900\U00000a03", {2304}, {6}},
+     {"\U00000900\U00000308\U00000a03", {2304}, {8}},
+     {"\U00000900\U00001100", {2304, 4352}, {3, 6}},
+     {"\U00000900\U00000308\U00001100", {2304, 4352}, {5, 8}},
+     {"\U00000900\U00001160", {2304, 4448}, {3, 6}},
+     {"\U00000900\U00000308\U00001160", {2304, 4448}, {5, 8}},
+     {"\U00000900\U000011a8", {2304, 4520}, {3, 6}},
+     {"\U00000900\U00000308\U000011a8", {2304, 4520}, {5, 8}},
+     {"\U00000900\U0000ac00", {2304, 44032}, {3, 6}},
+     {"\U00000900\U00000308\U0000ac00", {2304, 44032}, {5, 8}},
+     {"\U00000900\U0000ac01", {2304, 44033}, {3, 6}},
+     {"\U00000900\U00000308\U0000ac01", {2304, 44033}, {5, 8}},
+     {"\U00000900\U00000903", {2304}, {6}},
+     {"\U00000900\U00000308\U00000903", {2304}, {8}},
+     {"\U00000900\U00000904", {2304, 2308}, {3, 6}},
+     {"\U00000900\U00000308\U00000904", {2304, 2308}, {5, 8}},
+     {"\U00000900\U00000d4e", {2304, 3406}, {3, 6}},
+     {"\U00000900\U00000308\U00000d4e", {2304, 3406}, {5, 8}},
+     {"\U00000900\U00000915", {2304, 2325}, {3, 6}},
+     {"\U00000900\U00000308\U00000915", {2304, 2325}, {5, 8}},
+     {"\U00000900\U0000231a", {2304, 8986}, {3, 6}},
+     {"\U00000900\U00000308\U0000231a", {2304, 8986}, {5, 8}},
+     {"\U00000900\U00000300", {2304}, {5}},
+     {"\U00000900\U00000308\U00000300", {2304}, {7}},
+     {"\U00000900\U00000900", {2304}, {6}},
+     {"\U00000900\U00000308\U00000900", {2304}, {8}},
+     {"\U00000900\U0000094d", {2304}, {6}},
+     {"\U00000900\U00000308\U0000094d", {2304}, {8}},
+     {"\U00000900\U0000200d", {2304}, {6}},
+     {"\U00000900\U00000308\U0000200d", {2304}, {8}},
+     {"\U00000900\U00000378", {2304, 888}, {3, 5}},
+     {"\U00000900\U00000308\U00000378", {2304, 888}, {5, 7}},
      {"\U0000094d\U00000020", {2381, 32}, {3, 4}},
      {"\U0000094d\U00000308\U00000020", {2381, 32}, {5, 6}},
      {"\U0000094d\U0000000d", {2381, 13}, {3, 4}},
@@ -1099,8 +1011,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000094d\U00000308\U0000000a", {2381, 10}, {5, 6}},
      {"\U0000094d\U00000001", {2381, 1}, {3, 4}},
      {"\U0000094d\U00000308\U00000001", {2381, 1}, {5, 6}},
-     {"\U0000094d\U0000034f", {2381}, {5}},
-     {"\U0000094d\U00000308\U0000034f", {2381}, {7}},
+     {"\U0000094d\U0000200c", {2381}, {6}},
+     {"\U0000094d\U00000308\U0000200c", {2381}, {8}},
      {"\U0000094d\U0001f1e6", {2381, 127462}, {3, 7}},
      {"\U0000094d\U00000308\U0001f1e6", {2381, 127462}, {5, 9}},
      {"\U0000094d\U00000600", {2381, 1536}, {3, 5}},
@@ -1117,8 +1029,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000094d\U00000308\U0000ac00", {2381, 44032}, {5, 8}},
      {"\U0000094d\U0000ac01", {2381, 44033}, {3, 6}},
      {"\U0000094d\U00000308\U0000ac01", {2381, 44033}, {5, 8}},
-     {"\U0000094d\U00000900", {2381}, {6}},
-     {"\U0000094d\U00000308\U00000900", {2381}, {8}},
      {"\U0000094d\U00000903", {2381}, {6}},
      {"\U0000094d\U00000308\U00000903", {2381}, {8}},
      {"\U0000094d\U00000904", {2381, 2308}, {3, 6}},
@@ -1131,8 +1041,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000094d\U00000308\U0000231a", {2381, 8986}, {5, 8}},
      {"\U0000094d\U00000300", {2381}, {5}},
      {"\U0000094d\U00000308\U00000300", {2381}, {7}},
-     {"\U0000094d\U0000093c", {2381}, {6}},
-     {"\U0000094d\U00000308\U0000093c", {2381}, {8}},
+     {"\U0000094d\U00000900", {2381}, {6}},
+     {"\U0000094d\U00000308\U00000900", {2381}, {8}},
      {"\U0000094d\U0000094d", {2381}, {6}},
      {"\U0000094d\U00000308\U0000094d", {2381}, {8}},
      {"\U0000094d\U0000200d", {2381}, {6}},
@@ -1147,8 +1057,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000200d\U00000308\U0000000a", {8205, 10}, {5, 6}},
      {"\U0000200d\U00000001", {8205, 1}, {3, 4}},
      {"\U0000200d\U00000308\U00000001", {8205, 1}, {5, 6}},
-     {"\U0000200d\U0000034f", {8205}, {5}},
-     {"\U0000200d\U00000308\U0000034f", {8205}, {7}},
+     {"\U0000200d\U0000200c", {8205}, {6}},
+     {"\U0000200d\U00000308\U0000200c", {8205}, {8}},
      {"\U0000200d\U0001f1e6", {8205, 127462}, {3, 7}},
      {"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {5, 9}},
      {"\U0000200d\U00000600", {8205, 1536}, {3, 5}},
@@ -1165,8 +1075,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {5, 8}},
      {"\U0000200d\U0000ac01", {8205, 44033}, {3, 6}},
      {"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {5, 8}},
-     {"\U0000200d\U00000900", {8205}, {6}},
-     {"\U0000200d\U00000308\U00000900", {8205}, {8}},
      {"\U0000200d\U00000903", {8205}, {6}},
      {"\U0000200d\U00000308\U00000903", {8205}, {8}},
      {"\U0000200d\U00000904", {8205, 2308}, {3, 6}},
@@ -1179,8 +1087,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U0000200d\U00000308\U0000231a", {8205, 8986}, {5, 8}},
      {"\U0000200d\U00000300", {8205}, {5}},
      {"\U0000200d\U00000308\U00000300", {8205}, {7}},
-     {"\U0000200d\U0000093c", {8205}, {6}},
-     {"\U0000200d\U00000308\U0000093c", {8205}, {8}},
+     {"\U0000200d\U00000900", {8205}, {6}},
+     {"\U0000200d\U00000308\U00000900", {8205}, {8}},
      {"\U0000200d\U0000094d", {8205}, {6}},
      {"\U0000200d\U00000308\U0000094d", {8205}, {8}},
      {"\U0000200d\U0000200d", {8205}, {6}},
@@ -1195,8 +1103,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000378\U00000308\U0000000a", {888, 10}, {4, 5}},
      {"\U00000378\U00000001", {888, 1}, {2, 3}},
      {"\U00000378\U00000308\U00000001", {888, 1}, {4, 5}},
-     {"\U00000378\U0000034f", {888}, {4}},
-     {"\U00000378\U00000308\U0000034f", {888}, {6}},
+     {"\U00000378\U0000200c", {888}, {5}},
+     {"\U00000378\U00000308\U0000200c", {888}, {7}},
      {"\U00000378\U0001f1e6", {888, 127462}, {2, 6}},
      {"\U00000378\U00000308\U0001f1e6", {888, 127462}, {4, 8}},
      {"\U00000378\U00000600", {888, 1536}, {2, 4}},
@@ -1213,8 +1121,6 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000378\U00000308\U0000ac00", {888, 44032}, {4, 7}},
      {"\U00000378\U0000ac01", {888, 44033}, {2, 5}},
      {"\U00000378\U00000308\U0000ac01", {888, 44033}, {4, 7}},
-     {"\U00000378\U00000900", {888}, {5}},
-     {"\U00000378\U00000308\U00000900", {888}, {7}},
      {"\U00000378\U00000903", {888}, {5}},
      {"\U00000378\U00000308\U00000903", {888}, {7}},
      {"\U00000378\U00000904", {888, 2308}, {2, 5}},
@@ -1227,8 +1133,8 @@ std::array<data<char>, 1187> data_utf8 = {{
      {"\U00000378\U00000308\U0000231a", {888, 8986}, {4, 7}},
      {"\U00000378\U00000300", {888}, {4}},
      {"\U00000378\U00000308\U00000300", {888}, {6}},
-     {"\U00000378\U0000093c", {888}, {5}},
-     {"\U00000378\U00000308\U0000093c", {888}, {7}},
+     {"\U00000378\U00000900", {888}, {5}},
+     {"\U00000378\U00000308\U00000900", {888}, {7}},
      {"\U00000378\U0000094d", {888}, {5}},
      {"\U00000378\U00000308\U0000094d", {888}, {7}},
      {"\U00000378\U0000200d", {888}, {5}},
@@ -1277,7 +1183,7 @@ std::array<data<char>, 1187> data_utf8 = {{
 /// since the size of the code units differ the breaks can contain different
 /// values.
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
-std::array<data<wchar_t>, 1187> data_utf16 = {{
+std::array<data<wchar_t>, 1093> data_utf16 = {{
      {L"\U00000020\U00000020", {32, 32}, {1, 2}},
      {L"\U00000020\U00000308\U00000020", {32, 32}, {2, 3}},
      {L"\U00000020\U0000000d", {32, 13}, {1, 2}},
@@ -1286,8 +1192,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000020\U00000308\U0000000a", {32, 10}, {2, 3}},
      {L"\U00000020\U00000001", {32, 1}, {1, 2}},
      {L"\U00000020\U00000308\U00000001", {32, 1}, {2, 3}},
-     {L"\U00000020\U0000034f", {32}, {2}},
-     {L"\U00000020\U00000308\U0000034f", {32}, {3}},
+     {L"\U00000020\U0000200c", {32}, {2}},
+     {L"\U00000020\U00000308\U0000200c", {32}, {3}},
      {L"\U00000020\U0001f1e6", {32, 127462}, {1, 3}},
      {L"\U00000020\U00000308\U0001f1e6", {32, 127462}, {2, 4}},
      {L"\U00000020\U00000600", {32, 1536}, {1, 2}},
@@ -1304,8 +1210,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000020\U00000308\U0000ac00", {32, 44032}, {2, 3}},
      {L"\U00000020\U0000ac01", {32, 44033}, {1, 2}},
      {L"\U00000020\U00000308\U0000ac01", {32, 44033}, {2, 3}},
-     {L"\U00000020\U00000900", {32}, {2}},
-     {L"\U00000020\U00000308\U00000900", {32}, {3}},
      {L"\U00000020\U00000903", {32}, {2}},
      {L"\U00000020\U00000308\U00000903", {32}, {3}},
      {L"\U00000020\U00000904", {32, 2308}, {1, 2}},
@@ -1318,8 +1222,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000020\U00000308\U0000231a", {32, 8986}, {2, 3}},
      {L"\U00000020\U00000300", {32}, {2}},
      {L"\U00000020\U00000308\U00000300", {32}, {3}},
-     {L"\U00000020\U0000093c", {32}, {2}},
-     {L"\U00000020\U00000308\U0000093c", {32}, {3}},
+     {L"\U00000020\U00000900", {32}, {2}},
+     {L"\U00000020\U00000308\U00000900", {32}, {3}},
      {L"\U00000020\U0000094d", {32}, {2}},
      {L"\U00000020\U00000308\U0000094d", {32}, {3}},
      {L"\U00000020\U0000200d", {32}, {2}},
@@ -1334,8 +1238,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000000d\U00000308\U0000000a", {13, 776, 10}, {1, 2, 3}},
      {L"\U0000000d\U00000001", {13, 1}, {1, 2}},
      {L"\U0000000d\U00000308\U00000001", {13, 776, 1}, {1, 2, 3}},
-     {L"\U0000000d\U0000034f", {13, 847}, {1, 2}},
-     {L"\U0000000d\U00000308\U0000034f", {13, 776}, {1, 3}},
+     {L"\U0000000d\U0000200c", {13, 8204}, {1, 2}},
+     {L"\U0000000d\U00000308\U0000200c", {13, 776}, {1, 3}},
      {L"\U0000000d\U0001f1e6", {13, 127462}, {1, 3}},
      {L"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 2, 4}},
      {L"\U0000000d\U00000600", {13, 1536}, {1, 2}},
@@ -1352,8 +1256,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 2, 3}},
      {L"\U0000000d\U0000ac01", {13, 44033}, {1, 2}},
      {L"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 2, 3}},
-     {L"\U0000000d\U00000900", {13, 2304}, {1, 2}},
-     {L"\U0000000d\U00000308\U00000900", {13, 776}, {1, 3}},
      {L"\U0000000d\U00000903", {13, 2307}, {1, 2}},
      {L"\U0000000d\U00000308\U00000903", {13, 776}, {1, 3}},
      {L"\U0000000d\U00000904", {13, 2308}, {1, 2}},
@@ -1366,8 +1268,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 2, 3}},
      {L"\U0000000d\U00000300", {13, 768}, {1, 2}},
      {L"\U0000000d\U00000308\U00000300", {13, 776}, {1, 3}},
-     {L"\U0000000d\U0000093c", {13, 2364}, {1, 2}},
-     {L"\U0000000d\U00000308\U0000093c", {13, 776}, {1, 3}},
+     {L"\U0000000d\U00000900", {13, 2304}, {1, 2}},
+     {L"\U0000000d\U00000308\U00000900", {13, 776}, {1, 3}},
      {L"\U0000000d\U0000094d", {13, 2381}, {1, 2}},
      {L"\U0000000d\U00000308\U0000094d", {13, 776}, {1, 3}},
      {L"\U0000000d\U0000200d", {13, 8205}, {1, 2}},
@@ -1382,8 +1284,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000000a\U00000308\U0000000a", {10, 776, 10}, {1, 2, 3}},
      {L"\U0000000a\U00000001", {10, 1}, {1, 2}},
      {L"\U0000000a\U00000308\U00000001", {10, 776, 1}, {1, 2, 3}},
-     {L"\U0000000a\U0000034f", {10, 847}, {1, 2}},
-     {L"\U0000000a\U00000308\U0000034f", {10, 776}, {1, 3}},
+     {L"\U0000000a\U0000200c", {10, 8204}, {1, 2}},
+     {L"\U0000000a\U00000308\U0000200c", {10, 776}, {1, 3}},
      {L"\U0000000a\U0001f1e6", {10, 127462}, {1, 3}},
      {L"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 2, 4}},
      {L"\U0000000a\U00000600", {10, 1536}, {1, 2}},
@@ -1400,8 +1302,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 2, 3}},
      {L"\U0000000a\U0000ac01", {10, 44033}, {1, 2}},
      {L"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 2, 3}},
-     {L"\U0000000a\U00000900", {10, 2304}, {1, 2}},
-     {L"\U0000000a\U00000308\U00000900", {10, 776}, {1, 3}},
      {L"\U0000000a\U00000903", {10, 2307}, {1, 2}},
      {L"\U0000000a\U00000308\U00000903", {10, 776}, {1, 3}},
      {L"\U0000000a\U00000904", {10, 2308}, {1, 2}},
@@ -1414,8 +1314,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 2, 3}},
      {L"\U0000000a\U00000300", {10, 768}, {1, 2}},
      {L"\U0000000a\U00000308\U00000300", {10, 776}, {1, 3}},
-     {L"\U0000000a\U0000093c", {10, 2364}, {1, 2}},
-     {L"\U0000000a\U00000308\U0000093c", {10, 776}, {1, 3}},
+     {L"\U0000000a\U00000900", {10, 2304}, {1, 2}},
+     {L"\U0000000a\U00000308\U00000900", {10, 776}, {1, 3}},
      {L"\U0000000a\U0000094d", {10, 2381}, {1, 2}},
      {L"\U0000000a\U00000308\U0000094d", {10, 776}, {1, 3}},
      {L"\U0000000a\U0000200d", {10, 8205}, {1, 2}},
@@ -1430,8 +1330,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000001\U00000308\U0000000a", {1, 776, 10}, {1, 2, 3}},
      {L"\U00000001\U00000001", {1, 1}, {1, 2}},
      {L"\U00000001\U00000308\U00000001", {1, 776, 1}, {1, 2, 3}},
-     {L"\U00000001\U0000034f", {1, 847}, {1, 2}},
-     {L"\U00000001\U00000308\U0000034f", {1, 776}, {1, 3}},
+     {L"\U00000001\U0000200c", {1, 8204}, {1, 2}},
+     {L"\U00000001\U00000308\U0000200c", {1, 776}, {1, 3}},
      {L"\U00000001\U0001f1e6", {1, 127462}, {1, 3}},
      {L"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 2, 4}},
      {L"\U00000001\U00000600", {1, 1536}, {1, 2}},
@@ -1448,8 +1348,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 2, 3}},
      {L"\U00000001\U0000ac01", {1, 44033}, {1, 2}},
      {L"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 2, 3}},
-     {L"\U00000001\U00000900", {1, 2304}, {1, 2}},
-     {L"\U00000001\U00000308\U00000900", {1, 776}, {1, 3}},
      {L"\U00000001\U00000903", {1, 2307}, {1, 2}},
      {L"\U00000001\U00000308\U00000903", {1, 776}, {1, 3}},
      {L"\U00000001\U00000904", {1, 2308}, {1, 2}},
@@ -1462,62 +1360,60 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 2, 3}},
      {L"\U00000001\U00000300", {1, 768}, {1, 2}},
      {L"\U00000001\U00000308\U00000300", {1, 776}, {1, 3}},
-     {L"\U00000001\U0000093c", {1, 2364}, {1, 2}},
-     {L"\U00000001\U00000308\U0000093c", {1, 776}, {1, 3}},
+     {L"\U00000001\U00000900", {1, 2304}, {1, 2}},
+     {L"\U00000001\U00000308\U00000900", {1, 776}, {1, 3}},
      {L"\U00000001\U0000094d", {1, 2381}, {1, 2}},
      {L"\U00000001\U00000308\U0000094d", {1, 776}, {1, 3}},
      {L"\U00000001\U0000200d", {1, 8205}, {1, 2}},
      {L"\U00000001\U00000308\U0000200d", {1, 776}, {1, 3}},
      {L"\U00000001\U00000378", {1, 888}, {1, 2}},
      {L"\U00000001\U00000308\U00000378", {1, 776, 888}, {1, 2, 3}},
-     {L"\U0000034f\U00000020", {847, 32}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000020", {847, 32}, {2, 3}},
-     {L"\U0000034f\U0000000d", {847, 13}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000000d", {847, 13}, {2, 3}},
-     {L"\U0000034f\U0000000a", {847, 10}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000000a", {847, 10}, {2, 3}},
-     {L"\U0000034f\U00000001", {847, 1}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000001", {847, 1}, {2, 3}},
-     {L"\U0000034f\U0000034f", {847}, {2}},
-     {L"\U0000034f\U00000308\U0000034f", {847}, {3}},
-     {L"\U0000034f\U0001f1e6", {847, 127462}, {1, 3}},
-     {L"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {2, 4}},
-     {L"\U0000034f\U00000600", {847, 1536}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000600", {847, 1536}, {2, 3}},
-     {L"\U0000034f\U00000a03", {847}, {2}},
-     {L"\U0000034f\U00000308\U00000a03", {847}, {3}},
-     {L"\U0000034f\U00001100", {847, 4352}, {1, 2}},
-     {L"\U0000034f\U00000308\U00001100", {847, 4352}, {2, 3}},
-     {L"\U0000034f\U00001160", {847, 4448}, {1, 2}},
-     {L"\U0000034f\U00000308\U00001160", {847, 4448}, {2, 3}},
-     {L"\U0000034f\U000011a8", {847, 4520}, {1, 2}},
-     {L"\U0000034f\U00000308\U000011a8", {847, 4520}, {2, 3}},
-     {L"\U0000034f\U0000ac00", {847, 44032}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000ac00", {847, 44032}, {2, 3}},
-     {L"\U0000034f\U0000ac01", {847, 44033}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000ac01", {847, 44033}, {2, 3}},
-     {L"\U0000034f\U00000900", {847}, {2}},
-     {L"\U0000034f\U00000308\U00000900", {847}, {3}},
-     {L"\U0000034f\U00000903", {847}, {2}},
-     {L"\U0000034f\U00000308\U00000903", {847}, {3}},
-     {L"\U0000034f\U00000904", {847, 2308}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000904", {847, 2308}, {2, 3}},
-     {L"\U0000034f\U00000d4e", {847, 3406}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000d4e", {847, 3406}, {2, 3}},
-     {L"\U0000034f\U00000915", {847, 2325}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000915", {847, 2325}, {2, 3}},
-     {L"\U0000034f\U0000231a", {847, 8986}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000231a", {847, 8986}, {2, 3}},
-     {L"\U0000034f\U00000300", {847}, {2}},
-     {L"\U0000034f\U00000308\U00000300", {847}, {3}},
-     {L"\U0000034f\U0000093c", {847}, {2}},
-     {L"\U0000034f\U00000308\U0000093c", {847}, {3}},
-     {L"\U0000034f\U0000094d", {847}, {2}},
-     {L"\U0000034f\U00000308\U0000094d", {847}, {3}},
-     {L"\U0000034f\U0000200d", {847}, {2}},
-     {L"\U0000034f\U00000308\U0000200d", {847}, {3}},
-     {L"\U0000034f\U00000378", {847, 888}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000378", {847, 888}, {2, 3}},
+     {L"\U0000200c\U00000020", {8204, 32}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000020", {8204, 32}, {2, 3}},
+     {L"\U0000200c\U0000000d", {8204, 13}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000000d", {8204, 13}, {2, 3}},
+     {L"\U0000200c\U0000000a", {8204, 10}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000000a", {8204, 10}, {2, 3}},
+     {L"\U0000200c\U00000001", {8204, 1}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000001", {8204, 1}, {2, 3}},
+     {L"\U0000200c\U0000200c", {8204}, {2}},
+     {L"\U0000200c\U00000308\U0000200c", {8204}, {3}},
+     {L"\U0000200c\U0001f1e6", {8204, 127462}, {1, 3}},
+     {L"\U0000200c\U00000308\U0001f1e6", {8204, 127462}, {2, 4}},
+     {L"\U0000200c\U00000600", {8204, 1536}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000600", {8204, 1536}, {2, 3}},
+     {L"\U0000200c\U00000a03", {8204}, {2}},
+     {L"\U0000200c\U00000308\U00000a03", {8204}, {3}},
+     {L"\U0000200c\U00001100", {8204, 4352}, {1, 2}},
+     {L"\U0000200c\U00000308\U00001100", {8204, 4352}, {2, 3}},
+     {L"\U0000200c\U00001160", {8204, 4448}, {1, 2}},
+     {L"\U0000200c\U00000308\U00001160", {8204, 4448}, {2, 3}},
+     {L"\U0000200c\U000011a8", {8204, 4520}, {1, 2}},
+     {L"\U0000200c\U00000308\U000011a8", {8204, 4520}, {2, 3}},
+     {L"\U0000200c\U0000ac00", {8204, 44032}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000ac00", {8204, 44032}, {2, 3}},
+     {L"\U0000200c\U0000ac01", {8204, 44033}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000ac01", {8204, 44033}, {2, 3}},
+     {L"\U0000200c\U00000903", {8204}, {2}},
+     {L"\U0000200c\U00000308\U00000903", {8204}, {3}},
+     {L"\U0000200c\U00000904", {8204, 2308}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000904", {8204, 2308}, {2, 3}},
+     {L"\U0000200c\U00000d4e", {8204, 3406}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000d4e", {8204, 3406}, {2, 3}},
+     {L"\U0000200c\U00000915", {8204, 2325}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000915", {8204, 2325}, {2, 3}},
+     {L"\U0000200c\U0000231a", {8204, 8986}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000231a", {8204, 8986}, {2, 3}},
+     {L"\U0000200c\U00000300", {8204}, {2}},
+     {L"\U0000200c\U00000308\U00000300", {8204}, {3}},
+     {L"\U0000200c\U00000900", {8204}, {2}},
+     {L"\U0000200c\U00000308\U00000900", {8204}, {3}},
+     {L"\U0000200c\U0000094d", {8204}, {2}},
+     {L"\U0000200c\U00000308\U0000094d", {8204}, {3}},
+     {L"\U0000200c\U0000200d", {8204}, {2}},
+     {L"\U0000200c\U00000308\U0000200d", {8204}, {3}},
+     {L"\U0000200c\U00000378", {8204, 888}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000378", {8204, 888}, {2, 3}},
      {L"\U0001f1e6\U00000020", {127462, 32}, {2, 3}},
      {L"\U0001f1e6\U00000308\U00000020", {127462, 32}, {3, 4}},
      {L"\U0001f1e6\U0000000d", {127462, 13}, {2, 3}},
@@ -1526,8 +1422,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0001f1e6\U00000308\U0000000a", {127462, 10}, {3, 4}},
      {L"\U0001f1e6\U00000001", {127462, 1}, {2, 3}},
      {L"\U0001f1e6\U00000308\U00000001", {127462, 1}, {3, 4}},
-     {L"\U0001f1e6\U0000034f", {127462}, {3}},
-     {L"\U0001f1e6\U00000308\U0000034f", {127462}, {4}},
+     {L"\U0001f1e6\U0000200c", {127462}, {3}},
+     {L"\U0001f1e6\U00000308\U0000200c", {127462}, {4}},
      {L"\U0001f1e6\U0001f1e6", {127462}, {4}},
      {L"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {3, 5}},
      {L"\U0001f1e6\U00000600", {127462, 1536}, {2, 3}},
@@ -1544,8 +1440,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {3, 4}},
      {L"\U0001f1e6\U0000ac01", {127462, 44033}, {2, 3}},
      {L"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {3, 4}},
-     {L"\U0001f1e6\U00000900", {127462}, {3}},
-     {L"\U0001f1e6\U00000308\U00000900", {127462}, {4}},
      {L"\U0001f1e6\U00000903", {127462}, {3}},
      {L"\U0001f1e6\U00000308\U00000903", {127462}, {4}},
      {L"\U0001f1e6\U00000904", {127462, 2308}, {2, 3}},
@@ -1558,8 +1452,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {3, 4}},
      {L"\U0001f1e6\U00000300", {127462}, {3}},
      {L"\U0001f1e6\U00000308\U00000300", {127462}, {4}},
-     {L"\U0001f1e6\U0000093c", {127462}, {3}},
-     {L"\U0001f1e6\U00000308\U0000093c", {127462}, {4}},
+     {L"\U0001f1e6\U00000900", {127462}, {3}},
+     {L"\U0001f1e6\U00000308\U00000900", {127462}, {4}},
      {L"\U0001f1e6\U0000094d", {127462}, {3}},
      {L"\U0001f1e6\U00000308\U0000094d", {127462}, {4}},
      {L"\U0001f1e6\U0000200d", {127462}, {3}},
@@ -1574,8 +1468,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000600\U00000308\U0000000a", {1536, 10}, {2, 3}},
      {L"\U00000600\U00000001", {1536, 1}, {1, 2}},
      {L"\U00000600\U00000308\U00000001", {1536, 1}, {2, 3}},
-     {L"\U00000600\U0000034f", {1536}, {2}},
-     {L"\U00000600\U00000308\U0000034f", {1536}, {3}},
+     {L"\U00000600\U0000200c", {1536}, {2}},
+     {L"\U00000600\U00000308\U0000200c", {1536}, {3}},
      {L"\U00000600\U0001f1e6", {1536}, {3}},
      {L"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {2, 4}},
      {L"\U00000600\U00000600", {1536}, {2}},
@@ -1592,8 +1486,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000600\U00000308\U0000ac00", {1536, 44032}, {2, 3}},
      {L"\U00000600\U0000ac01", {1536}, {2}},
      {L"\U00000600\U00000308\U0000ac01", {1536, 44033}, {2, 3}},
-     {L"\U00000600\U00000900", {1536}, {2}},
-     {L"\U00000600\U00000308\U00000900", {1536}, {3}},
      {L"\U00000600\U00000903", {1536}, {2}},
      {L"\U00000600\U00000308\U00000903", {1536}, {3}},
      {L"\U00000600\U00000904", {1536}, {2}},
@@ -1606,8 +1498,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000600\U00000308\U0000231a", {1536, 8986}, {2, 3}},
      {L"\U00000600\U00000300", {1536}, {2}},
      {L"\U00000600\U00000308\U00000300", {1536}, {3}},
-     {L"\U00000600\U0000093c", {1536}, {2}},
-     {L"\U00000600\U00000308\U0000093c", {1536}, {3}},
+     {L"\U00000600\U00000900", {1536}, {2}},
+     {L"\U00000600\U00000308\U00000900", {1536}, {3}},
      {L"\U00000600\U0000094d", {1536}, {2}},
      {L"\U00000600\U00000308\U0000094d", {1536}, {3}},
      {L"\U00000600\U0000200d", {1536}, {2}},
@@ -1622,8 +1514,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000a03\U00000308\U0000000a", {2563, 10}, {2, 3}},
      {L"\U00000a03\U00000001", {2563, 1}, {1, 2}},
      {L"\U00000a03\U00000308\U00000001", {2563, 1}, {2, 3}},
-     {L"\U00000a03\U0000034f", {2563}, {2}},
-     {L"\U00000a03\U00000308\U0000034f", {2563}, {3}},
+     {L"\U00000a03\U0000200c", {2563}, {2}},
+     {L"\U00000a03\U00000308\U0000200c", {2563}, {3}},
      {L"\U00000a03\U0001f1e6", {2563, 127462}, {1, 3}},
      {L"\U00000a03\U00000308\U0001f1e6", {2563, 127462}, {2, 4}},
      {L"\U00000a03\U00000600", {2563, 1536}, {1, 2}},
@@ -1640,8 +1532,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000a03\U00000308\U0000ac00", {2563, 44032}, {2, 3}},
      {L"\U00000a03\U0000ac01", {2563, 44033}, {1, 2}},
      {L"\U00000a03\U00000308\U0000ac01", {2563, 44033}, {2, 3}},
-     {L"\U00000a03\U00000900", {2563}, {2}},
-     {L"\U00000a03\U00000308\U00000900", {2563}, {3}},
      {L"\U00000a03\U00000903", {2563}, {2}},
      {L"\U00000a03\U00000308\U00000903", {2563}, {3}},
      {L"\U00000a03\U00000904", {2563, 2308}, {1, 2}},
@@ -1654,8 +1544,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000a03\U00000308\U0000231a", {2563, 8986}, {2, 3}},
      {L"\U00000a03\U00000300", {2563}, {2}},
      {L"\U00000a03\U00000308\U00000300", {2563}, {3}},
-     {L"\U00000a03\U0000093c", {2563}, {2}},
-     {L"\U00000a03\U00000308\U0000093c", {2563}, {3}},
+     {L"\U00000a03\U00000900", {2563}, {2}},
+     {L"\U00000a03\U00000308\U00000900", {2563}, {3}},
      {L"\U00000a03\U0000094d", {2563}, {2}},
      {L"\U00000a03\U00000308\U0000094d", {2563}, {3}},
      {L"\U00000a03\U0000200d", {2563}, {2}},
@@ -1670,8 +1560,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00001100\U00000308\U0000000a", {4352, 10}, {2, 3}},
      {L"\U00001100\U00000001", {4352, 1}, {1, 2}},
      {L"\U00001100\U00000308\U00000001", {4352, 1}, {2, 3}},
-     {L"\U00001100\U0000034f", {4352}, {2}},
-     {L"\U00001100\U00000308\U0000034f", {4352}, {3}},
+     {L"\U00001100\U0000200c", {4352}, {2}},
+     {L"\U00001100\U00000308\U0000200c", {4352}, {3}},
      {L"\U00001100\U0001f1e6", {4352, 127462}, {1, 3}},
      {L"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {2, 4}},
      {L"\U00001100\U00000600", {4352, 1536}, {1, 2}},
@@ -1688,8 +1578,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00001100\U00000308\U0000ac00", {4352, 44032}, {2, 3}},
      {L"\U00001100\U0000ac01", {4352}, {2}},
      {L"\U00001100\U00000308\U0000ac01", {4352, 44033}, {2, 3}},
-     {L"\U00001100\U00000900", {4352}, {2}},
-     {L"\U00001100\U00000308\U00000900", {4352}, {3}},
      {L"\U00001100\U00000903", {4352}, {2}},
      {L"\U00001100\U00000308\U00000903", {4352}, {3}},
      {L"\U00001100\U00000904", {4352, 2308}, {1, 2}},
@@ -1702,8 +1590,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00001100\U00000308\U0000231a", {4352, 8986}, {2, 3}},
      {L"\U00001100\U00000300", {4352}, {2}},
      {L"\U00001100\U00000308\U00000300", {4352}, {3}},
-     {L"\U00001100\U0000093c", {4352}, {2}},
-     {L"\U00001100\U00000308\U0000093c", {4352}, {3}},
+     {L"\U00001100\U00000900", {4352}, {2}},
+     {L"\U00001100\U00000308\U00000900", {4352}, {3}},
      {L"\U00001100\U0000094d", {4352}, {2}},
      {L"\U00001100\U00000308\U0000094d", {4352}, {3}},
      {L"\U00001100\U0000200d", {4352}, {2}},
@@ -1718,8 +1606,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00001160\U00000308\U0000000a", {4448, 10}, {2, 3}},
      {L"\U00001160\U00000001", {4448, 1}, {1, 2}},
      {L"\U00001160\U00000308\U00000001", {4448, 1}, {2, 3}},
-     {L"\U00001160\U0000034f", {4448}, {2}},
-     {L"\U00001160\U00000308\U0000034f", {4448}, {3}},
+     {L"\U00001160\U0000200c", {4448}, {2}},
+     {L"\U00001160\U00000308\U0000200c", {4448}, {3}},
      {L"\U00001160\U0001f1e6", {4448, 127462}, {1, 3}},
      {L"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {2, 4}},
      {L"\U00001160\U00000600", {4448, 1536}, {1, 2}},
@@ -1736,8 +1624,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00001160\U00000308\U0000ac00", {4448, 44032}, {2, 3}},
      {L"\U00001160\U0000ac01", {4448, 44033}, {1, 2}},
      {L"\U00001160\U00000308\U0000ac01", {4448, 44033}, {2, 3}},
-     {L"\U00001160\U00000900", {4448}, {2}},
-     {L"\U00001160\U00000308\U00000900", {4448}, {3}},
      {L"\U00001160\U00000903", {4448}, {2}},
      {L"\U00001160\U00000308\U00000903", {4448}, {3}},
      {L"\U00001160\U00000904", {4448, 2308}, {1, 2}},
@@ -1750,8 +1636,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00001160\U00000308\U0000231a", {4448, 8986}, {2, 3}},
      {L"\U00001160\U00000300", {4448}, {2}},
      {L"\U00001160\U00000308\U00000300", {4448}, {3}},
-     {L"\U00001160\U0000093c", {4448}, {2}},
-     {L"\U00001160\U00000308\U0000093c", {4448}, {3}},
+     {L"\U00001160\U00000900", {4448}, {2}},
+     {L"\U00001160\U00000308\U00000900", {4448}, {3}},
      {L"\U00001160\U0000094d", {4448}, {2}},
      {L"\U00001160\U00000308\U0000094d", {4448}, {3}},
      {L"\U00001160\U0000200d", {4448}, {2}},
@@ -1766,8 +1652,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U000011a8\U00000308\U0000000a", {4520, 10}, {2, 3}},
      {L"\U000011a8\U00000001", {4520, 1}, {1, 2}},
      {L"\U000011a8\U00000308\U00000001", {4520, 1}, {2, 3}},
-     {L"\U000011a8\U0000034f", {4520}, {2}},
-     {L"\U000011a8\U00000308\U0000034f", {4520}, {3}},
+     {L"\U000011a8\U0000200c", {4520}, {2}},
+     {L"\U000011a8\U00000308\U0000200c", {4520}, {3}},
      {L"\U000011a8\U0001f1e6", {4520, 127462}, {1, 3}},
      {L"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {2, 4}},
      {L"\U000011a8\U00000600", {4520, 1536}, {1, 2}},
@@ -1784,8 +1670,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {2, 3}},
      {L"\U000011a8\U0000ac01", {4520, 44033}, {1, 2}},
      {L"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {2, 3}},
-     {L"\U000011a8\U00000900", {4520}, {2}},
-     {L"\U000011a8\U00000308\U00000900", {4520}, {3}},
      {L"\U000011a8\U00000903", {4520}, {2}},
      {L"\U000011a8\U00000308\U00000903", {4520}, {3}},
      {L"\U000011a8\U00000904", {4520, 2308}, {1, 2}},
@@ -1798,8 +1682,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U000011a8\U00000308\U0000231a", {4520, 8986}, {2, 3}},
      {L"\U000011a8\U00000300", {4520}, {2}},
      {L"\U000011a8\U00000308\U00000300", {4520}, {3}},
-     {L"\U000011a8\U0000093c", {4520}, {2}},
-     {L"\U000011a8\U00000308\U0000093c", {4520}, {3}},
+     {L"\U000011a8\U00000900", {4520}, {2}},
+     {L"\U000011a8\U00000308\U00000900", {4520}, {3}},
      {L"\U000011a8\U0000094d", {4520}, {2}},
      {L"\U000011a8\U00000308\U0000094d", {4520}, {3}},
      {L"\U000011a8\U0000200d", {4520}, {2}},
@@ -1814,8 +1698,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000ac00\U00000308\U0000000a", {44032, 10}, {2, 3}},
      {L"\U0000ac00\U00000001", {44032, 1}, {1, 2}},
      {L"\U0000ac00\U00000308\U00000001", {44032, 1}, {2, 3}},
-     {L"\U0000ac00\U0000034f", {44032}, {2}},
-     {L"\U0000ac00\U00000308\U0000034f", {44032}, {3}},
+     {L"\U0000ac00\U0000200c", {44032}, {2}},
+     {L"\U0000ac00\U00000308\U0000200c", {44032}, {3}},
      {L"\U0000ac00\U0001f1e6", {44032, 127462}, {1, 3}},
      {L"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {2, 4}},
      {L"\U0000ac00\U00000600", {44032, 1536}, {1, 2}},
@@ -1832,8 +1716,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {2, 3}},
      {L"\U0000ac00\U0000ac01", {44032, 44033}, {1, 2}},
      {L"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {2, 3}},
-     {L"\U0000ac00\U00000900", {44032}, {2}},
-     {L"\U0000ac00\U00000308\U00000900", {44032}, {3}},
      {L"\U0000ac00\U00000903", {44032}, {2}},
      {L"\U0000ac00\U00000308\U00000903", {44032}, {3}},
      {L"\U0000ac00\U00000904", {44032, 2308}, {1, 2}},
@@ -1846,8 +1728,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {2, 3}},
      {L"\U0000ac00\U00000300", {44032}, {2}},
      {L"\U0000ac00\U00000308\U00000300", {44032}, {3}},
-     {L"\U0000ac00\U0000093c", {44032}, {2}},
-     {L"\U0000ac00\U00000308\U0000093c", {44032}, {3}},
+     {L"\U0000ac00\U00000900", {44032}, {2}},
+     {L"\U0000ac00\U00000308\U00000900", {44032}, {3}},
      {L"\U0000ac00\U0000094d", {44032}, {2}},
      {L"\U0000ac00\U00000308\U0000094d", {44032}, {3}},
      {L"\U0000ac00\U0000200d", {44032}, {2}},
@@ -1862,8 +1744,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000ac01\U00000308\U0000000a", {44033, 10}, {2, 3}},
      {L"\U0000ac01\U00000001", {44033, 1}, {1, 2}},
      {L"\U0000ac01\U00000308\U00000001", {44033, 1}, {2, 3}},
-     {L"\U0000ac01\U0000034f", {44033}, {2}},
-     {L"\U0000ac01\U00000308\U0000034f", {44033}, {3}},
+     {L"\U0000ac01\U0000200c", {44033}, {2}},
+     {L"\U0000ac01\U00000308\U0000200c", {44033}, {3}},
      {L"\U0000ac01\U0001f1e6", {44033, 127462}, {1, 3}},
      {L"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {2, 4}},
      {L"\U0000ac01\U00000600", {44033, 1536}, {1, 2}},
@@ -1880,8 +1762,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {2, 3}},
      {L"\U0000ac01\U0000ac01", {44033, 44033}, {1, 2}},
      {L"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {2, 3}},
-     {L"\U0000ac01\U00000900", {44033}, {2}},
-     {L"\U0000ac01\U00000308\U00000900", {44033}, {3}},
      {L"\U0000ac01\U00000903", {44033}, {2}},
      {L"\U0000ac01\U00000308\U00000903", {44033}, {3}},
      {L"\U0000ac01\U00000904", {44033, 2308}, {1, 2}},
@@ -1894,62 +1774,14 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {2, 3}},
      {L"\U0000ac01\U00000300", {44033}, {2}},
      {L"\U0000ac01\U00000308\U00000300", {44033}, {3}},
-     {L"\U0000ac01\U0000093c", {44033}, {2}},
-     {L"\U0000ac01\U00000308\U0000093c", {44033}, {3}},
+     {L"\U0000ac01\U00000900", {44033}, {2}},
+     {L"\U0000ac01\U00000308\U00000900", {44033}, {3}},
      {L"\U0000ac01\U0000094d", {44033}, {2}},
      {L"\U0000ac01\U00000308\U0000094d", {44033}, {3}},
      {L"\U0000ac01\U0000200d", {44033}, {2}},
      {L"\U0000ac01\U00000308\U0000200d", {44033}, {3}},
      {L"\U0000ac01\U00000378", {44033, 888}, {1, 2}},
      {L"\U0000ac01\U00000308\U00000378", {44033, 888}, {2, 3}},
-     {L"\U00000900\U00000020", {2304, 32}, {1, 2}},
-     {L"\U00000900\U00000308\U00000020", {2304, 32}, {2, 3}},
-     {L"\U00000900\U0000000d", {2304, 13}, {1, 2}},
-     {L"\U00000900\U00000308\U0000000d", {2304, 13}, {2, 3}},
-     {L"\U00000900\U0000000a", {2304, 10}, {1, 2}},
-     {L"\U00000900\U00000308\U0000000a", {2304, 10}, {2, 3}},
-     {L"\U00000900\U00000001", {2304, 1}, {1, 2}},
-     {L"\U00000900\U00000308\U00000001", {2304, 1}, {2, 3}},
-     {L"\U00000900\U0000034f", {2304}, {2}},
-     {L"\U00000900\U00000308\U0000034f", {2304}, {3}},
-     {L"\U00000900\U0001f1e6", {2304, 127462}, {1, 3}},
-     {L"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {2, 4}},
-     {L"\U00000900\U00000600", {2304, 1536}, {1, 2}},
-     {L"\U00000900\U00000308\U00000600", {2304, 1536}, {2, 3}},
-     {L"\U00000900\U00000a03", {2304}, {2}},
-     {L"\U00000900\U00000308\U00000a03", {2304}, {3}},
-     {L"\U00000900\U00001100", {2304, 4352}, {1, 2}},
-     {L"\U00000900\U00000308\U00001100", {2304, 4352}, {2, 3}},
-     {L"\U00000900\U00001160", {2304, 4448}, {1, 2}},
-     {L"\U00000900\U00000308\U00001160", {2304, 4448}, {2, 3}},
-     {L"\U00000900\U000011a8", {2304, 4520}, {1, 2}},
-     {L"\U00000900\U00000308\U000011a8", {2304, 4520}, {2, 3}},
-     {L"\U00000900\U0000ac00", {2304, 44032}, {1, 2}},
-     {L"\U00000900\U00000308\U0000ac00", {2304, 44032}, {2, 3}},
-     {L"\U00000900\U0000ac01", {2304, 44033}, {1, 2}},
-     {L"\U00000900\U00000308\U0000ac01", {2304, 44033}, {2, 3}},
-     {L"\U00000900\U00000900", {2304}, {2}},
-     {L"\U00000900\U00000308\U00000900", {2304}, {3}},
-     {L"\U00000900\U00000903", {2304}, {2}},
-     {L"\U00000900\U00000308\U00000903", {2304}, {3}},
-     {L"\U00000900\U00000904", {2304, 2308}, {1, 2}},
-     {L"\U00000900\U00000308\U00000904", {2304, 2308}, {2, 3}},
-     {L"\U00000900\U00000d4e", {2304, 3406}, {1, 2}},
-     {L"\U00000900\U00000308\U00000d4e", {2304, 3406}, {2, 3}},
-     {L"\U00000900\U00000915", {2304, 2325}, {1, 2}},
-     {L"\U00000900\U00000308\U00000915", {2304, 2325}, {2, 3}},
-     {L"\U00000900\U0000231a", {2304, 8986}, {1, 2}},
-     {L"\U00000900\U00000308\U0000231a", {2304, 8986}, {2, 3}},
-     {L"\U00000900\U00000300", {2304}, {2}},
-     {L"\U00000900\U00000308\U00000300", {2304}, {3}},
-     {L"\U00000900\U0000093c", {2304}, {2}},
-     {L"\U00000900\U00000308\U0000093c", {2304}, {3}},
-     {L"\U00000900\U0000094d", {2304}, {2}},
-     {L"\U00000900\U00000308\U0000094d", {2304}, {3}},
-     {L"\U00000900\U0000200d", {2304}, {2}},
-     {L"\U00000900\U00000308\U0000200d", {2304}, {3}},
-     {L"\U00000900\U00000378", {2304, 888}, {1, 2}},
-     {L"\U00000900\U00000308\U00000378", {2304, 888}, {2, 3}},
      {L"\U00000903\U00000020", {2307, 32}, {1, 2}},
      {L"\U00000903\U00000308\U00000020", {2307, 32}, {2, 3}},
      {L"\U00000903\U0000000d", {2307, 13}, {1, 2}},
@@ -1958,8 +1790,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000903\U00000308\U0000000a", {2307, 10}, {2, 3}},
      {L"\U00000903\U00000001", {2307, 1}, {1, 2}},
      {L"\U00000903\U00000308\U00000001", {2307, 1}, {2, 3}},
-     {L"\U00000903\U0000034f", {2307}, {2}},
-     {L"\U00000903\U00000308\U0000034f", {2307}, {3}},
+     {L"\U00000903\U0000200c", {2307}, {2}},
+     {L"\U00000903\U00000308\U0000200c", {2307}, {3}},
      {L"\U00000903\U0001f1e6", {2307, 127462}, {1, 3}},
      {L"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {2, 4}},
      {L"\U00000903\U00000600", {2307, 1536}, {1, 2}},
@@ -1976,8 +1808,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000903\U00000308\U0000ac00", {2307, 44032}, {2, 3}},
      {L"\U00000903\U0000ac01", {2307, 44033}, {1, 2}},
      {L"\U00000903\U00000308\U0000ac01", {2307, 44033}, {2, 3}},
-     {L"\U00000903\U00000900", {2307}, {2}},
-     {L"\U00000903\U00000308\U00000900", {2307}, {3}},
      {L"\U00000903\U00000903", {2307}, {2}},
      {L"\U00000903\U00000308\U00000903", {2307}, {3}},
      {L"\U00000903\U00000904", {2307, 2308}, {1, 2}},
@@ -1990,8 +1820,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000903\U00000308\U0000231a", {2307, 8986}, {2, 3}},
      {L"\U00000903\U00000300", {2307}, {2}},
      {L"\U00000903\U00000308\U00000300", {2307}, {3}},
-     {L"\U00000903\U0000093c", {2307}, {2}},
-     {L"\U00000903\U00000308\U0000093c", {2307}, {3}},
+     {L"\U00000903\U00000900", {2307}, {2}},
+     {L"\U00000903\U00000308\U00000900", {2307}, {3}},
      {L"\U00000903\U0000094d", {2307}, {2}},
      {L"\U00000903\U00000308\U0000094d", {2307}, {3}},
      {L"\U00000903\U0000200d", {2307}, {2}},
@@ -2006,8 +1836,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000904\U00000308\U0000000a", {2308, 10}, {2, 3}},
      {L"\U00000904\U00000001", {2308, 1}, {1, 2}},
      {L"\U00000904\U00000308\U00000001", {2308, 1}, {2, 3}},
-     {L"\U00000904\U0000034f", {2308}, {2}},
-     {L"\U00000904\U00000308\U0000034f", {2308}, {3}},
+     {L"\U00000904\U0000200c", {2308}, {2}},
+     {L"\U00000904\U00000308\U0000200c", {2308}, {3}},
      {L"\U00000904\U0001f1e6", {2308, 127462}, {1, 3}},
      {L"\U00000904\U00000308\U0001f1e6", {2308, 127462}, {2, 4}},
      {L"\U00000904\U00000600", {2308, 1536}, {1, 2}},
@@ -2024,8 +1854,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000904\U00000308\U0000ac00", {2308, 44032}, {2, 3}},
      {L"\U00000904\U0000ac01", {2308, 44033}, {1, 2}},
      {L"\U00000904\U00000308\U0000ac01", {2308, 44033}, {2, 3}},
-     {L"\U00000904\U00000900", {2308}, {2}},
-     {L"\U00000904\U00000308\U00000900", {2308}, {3}},
      {L"\U00000904\U00000903", {2308}, {2}},
      {L"\U00000904\U00000308\U00000903", {2308}, {3}},
      {L"\U00000904\U00000904", {2308, 2308}, {1, 2}},
@@ -2038,8 +1866,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000904\U00000308\U0000231a", {2308, 8986}, {2, 3}},
      {L"\U00000904\U00000300", {2308}, {2}},
      {L"\U00000904\U00000308\U00000300", {2308}, {3}},
-     {L"\U00000904\U0000093c", {2308}, {2}},
-     {L"\U00000904\U00000308\U0000093c", {2308}, {3}},
+     {L"\U00000904\U00000900", {2308}, {2}},
+     {L"\U00000904\U00000308\U00000900", {2308}, {3}},
      {L"\U00000904\U0000094d", {2308}, {2}},
      {L"\U00000904\U00000308\U0000094d", {2308}, {3}},
      {L"\U00000904\U0000200d", {2308}, {2}},
@@ -2054,8 +1882,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000d4e\U00000308\U0000000a", {3406, 10}, {2, 3}},
      {L"\U00000d4e\U00000001", {3406, 1}, {1, 2}},
      {L"\U00000d4e\U00000308\U00000001", {3406, 1}, {2, 3}},
-     {L"\U00000d4e\U0000034f", {3406}, {2}},
-     {L"\U00000d4e\U00000308\U0000034f", {3406}, {3}},
+     {L"\U00000d4e\U0000200c", {3406}, {2}},
+     {L"\U00000d4e\U00000308\U0000200c", {3406}, {3}},
      {L"\U00000d4e\U0001f1e6", {3406}, {3}},
      {L"\U00000d4e\U00000308\U0001f1e6", {3406, 127462}, {2, 4}},
      {L"\U00000d4e\U00000600", {3406}, {2}},
@@ -2072,8 +1900,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000d4e\U00000308\U0000ac00", {3406, 44032}, {2, 3}},
      {L"\U00000d4e\U0000ac01", {3406}, {2}},
      {L"\U00000d4e\U00000308\U0000ac01", {3406, 44033}, {2, 3}},
-     {L"\U00000d4e\U00000900", {3406}, {2}},
-     {L"\U00000d4e\U00000308\U00000900", {3406}, {3}},
      {L"\U00000d4e\U00000903", {3406}, {2}},
      {L"\U00000d4e\U00000308\U00000903", {3406}, {3}},
      {L"\U00000d4e\U00000904", {3406}, {2}},
@@ -2086,8 +1912,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000d4e\U00000308\U0000231a", {3406, 8986}, {2, 3}},
      {L"\U00000d4e\U00000300", {3406}, {2}},
      {L"\U00000d4e\U00000308\U00000300", {3406}, {3}},
-     {L"\U00000d4e\U0000093c", {3406}, {2}},
-     {L"\U00000d4e\U00000308\U0000093c", {3406}, {3}},
+     {L"\U00000d4e\U00000900", {3406}, {2}},
+     {L"\U00000d4e\U00000308\U00000900", {3406}, {3}},
      {L"\U00000d4e\U0000094d", {3406}, {2}},
      {L"\U00000d4e\U00000308\U0000094d", {3406}, {3}},
      {L"\U00000d4e\U0000200d", {3406}, {2}},
@@ -2102,8 +1928,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000915\U00000308\U0000000a", {2325, 10}, {2, 3}},
      {L"\U00000915\U00000001", {2325, 1}, {1, 2}},
      {L"\U00000915\U00000308\U00000001", {2325, 1}, {2, 3}},
-     {L"\U00000915\U0000034f", {2325}, {2}},
-     {L"\U00000915\U00000308\U0000034f", {2325}, {3}},
+     {L"\U00000915\U0000200c", {2325}, {2}},
+     {L"\U00000915\U00000308\U0000200c", {2325}, {3}},
      {L"\U00000915\U0001f1e6", {2325, 127462}, {1, 3}},
      {L"\U00000915\U00000308\U0001f1e6", {2325, 127462}, {2, 4}},
      {L"\U00000915\U00000600", {2325, 1536}, {1, 2}},
@@ -2120,8 +1946,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000915\U00000308\U0000ac00", {2325, 44032}, {2, 3}},
      {L"\U00000915\U0000ac01", {2325, 44033}, {1, 2}},
      {L"\U00000915\U00000308\U0000ac01", {2325, 44033}, {2, 3}},
-     {L"\U00000915\U00000900", {2325}, {2}},
-     {L"\U00000915\U00000308\U00000900", {2325}, {3}},
      {L"\U00000915\U00000903", {2325}, {2}},
      {L"\U00000915\U00000308\U00000903", {2325}, {3}},
      {L"\U00000915\U00000904", {2325, 2308}, {1, 2}},
@@ -2134,8 +1958,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000915\U00000308\U0000231a", {2325, 8986}, {2, 3}},
      {L"\U00000915\U00000300", {2325}, {2}},
      {L"\U00000915\U00000308\U00000300", {2325}, {3}},
-     {L"\U00000915\U0000093c", {2325}, {2}},
-     {L"\U00000915\U00000308\U0000093c", {2325}, {3}},
+     {L"\U00000915\U00000900", {2325}, {2}},
+     {L"\U00000915\U00000308\U00000900", {2325}, {3}},
      {L"\U00000915\U0000094d", {2325}, {2}},
      {L"\U00000915\U00000308\U0000094d", {2325}, {3}},
      {L"\U00000915\U0000200d", {2325}, {2}},
@@ -2150,8 +1974,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000231a\U00000308\U0000000a", {8986, 10}, {2, 3}},
      {L"\U0000231a\U00000001", {8986, 1}, {1, 2}},
      {L"\U0000231a\U00000308\U00000001", {8986, 1}, {2, 3}},
-     {L"\U0000231a\U0000034f", {8986}, {2}},
-     {L"\U0000231a\U00000308\U0000034f", {8986}, {3}},
+     {L"\U0000231a\U0000200c", {8986}, {2}},
+     {L"\U0000231a\U00000308\U0000200c", {8986}, {3}},
      {L"\U0000231a\U0001f1e6", {8986, 127462}, {1, 3}},
      {L"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {2, 4}},
      {L"\U0000231a\U00000600", {8986, 1536}, {1, 2}},
@@ -2168,8 +1992,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {2, 3}},
      {L"\U0000231a\U0000ac01", {8986, 44033}, {1, 2}},
      {L"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {2, 3}},
-     {L"\U0000231a\U00000900", {8986}, {2}},
-     {L"\U0000231a\U00000308\U00000900", {8986}, {3}},
      {L"\U0000231a\U00000903", {8986}, {2}},
      {L"\U0000231a\U00000308\U00000903", {8986}, {3}},
      {L"\U0000231a\U00000904", {8986, 2308}, {1, 2}},
@@ -2182,8 +2004,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000231a\U00000308\U0000231a", {8986, 8986}, {2, 3}},
      {L"\U0000231a\U00000300", {8986}, {2}},
      {L"\U0000231a\U00000308\U00000300", {8986}, {3}},
-     {L"\U0000231a\U0000093c", {8986}, {2}},
-     {L"\U0000231a\U00000308\U0000093c", {8986}, {3}},
+     {L"\U0000231a\U00000900", {8986}, {2}},
+     {L"\U0000231a\U00000308\U00000900", {8986}, {3}},
      {L"\U0000231a\U0000094d", {8986}, {2}},
      {L"\U0000231a\U00000308\U0000094d", {8986}, {3}},
      {L"\U0000231a\U0000200d", {8986}, {2}},
@@ -2198,8 +2020,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000300\U00000308\U0000000a", {768, 10}, {2, 3}},
      {L"\U00000300\U00000001", {768, 1}, {1, 2}},
      {L"\U00000300\U00000308\U00000001", {768, 1}, {2, 3}},
-     {L"\U00000300\U0000034f", {768}, {2}},
-     {L"\U00000300\U00000308\U0000034f", {768}, {3}},
+     {L"\U00000300\U0000200c", {768}, {2}},
+     {L"\U00000300\U00000308\U0000200c", {768}, {3}},
      {L"\U00000300\U0001f1e6", {768, 127462}, {1, 3}},
      {L"\U00000300\U00000308\U0001f1e6", {768, 127462}, {2, 4}},
      {L"\U00000300\U00000600", {768, 1536}, {1, 2}},
@@ -2216,8 +2038,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000300\U00000308\U0000ac00", {768, 44032}, {2, 3}},
      {L"\U00000300\U0000ac01", {768, 44033}, {1, 2}},
      {L"\U00000300\U00000308\U0000ac01", {768, 44033}, {2, 3}},
-     {L"\U00000300\U00000900", {768}, {2}},
-     {L"\U00000300\U00000308\U00000900", {768}, {3}},
      {L"\U00000300\U00000903", {768}, {2}},
      {L"\U00000300\U00000308\U00000903", {768}, {3}},
      {L"\U00000300\U00000904", {768, 2308}, {1, 2}},
@@ -2230,62 +2050,60 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000300\U00000308\U0000231a", {768, 8986}, {2, 3}},
      {L"\U00000300\U00000300", {768}, {2}},
      {L"\U00000300\U00000308\U00000300", {768}, {3}},
-     {L"\U00000300\U0000093c", {768}, {2}},
-     {L"\U00000300\U00000308\U0000093c", {768}, {3}},
+     {L"\U00000300\U00000900", {768}, {2}},
+     {L"\U00000300\U00000308\U00000900", {768}, {3}},
      {L"\U00000300\U0000094d", {768}, {2}},
      {L"\U00000300\U00000308\U0000094d", {768}, {3}},
      {L"\U00000300\U0000200d", {768}, {2}},
      {L"\U00000300\U00000308\U0000200d", {768}, {3}},
      {L"\U00000300\U00000378", {768, 888}, {1, 2}},
      {L"\U00000300\U00000308\U00000378", {768, 888}, {2, 3}},
-     {L"\U0000093c\U00000020", {2364, 32}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000020", {2364, 32}, {2, 3}},
-     {L"\U0000093c\U0000000d", {2364, 13}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000000d", {2364, 13}, {2, 3}},
-     {L"\U0000093c\U0000000a", {2364, 10}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000000a", {2364, 10}, {2, 3}},
-     {L"\U0000093c\U00000001", {2364, 1}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000001", {2364, 1}, {2, 3}},
-     {L"\U0000093c\U0000034f", {2364}, {2}},
-     {L"\U0000093c\U00000308\U0000034f", {2364}, {3}},
-     {L"\U0000093c\U0001f1e6", {2364, 127462}, {1, 3}},
-     {L"\U0000093c\U00000308\U0001f1e6", {2364, 127462}, {2, 4}},
-     {L"\U0000093c\U00000600", {2364, 1536}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000600", {2364, 1536}, {2, 3}},
-     {L"\U0000093c\U00000a03", {2364}, {2}},
-     {L"\U0000093c\U00000308\U00000a03", {2364}, {3}},
-     {L"\U0000093c\U00001100", {2364, 4352}, {1, 2}},
-     {L"\U0000093c\U00000308\U00001100", {2364, 4352}, {2, 3}},
-     {L"\U0000093c\U00001160", {2364, 4448}, {1, 2}},
-     {L"\U0000093c\U00000308\U00001160", {2364, 4448}, {2, 3}},
-     {L"\U0000093c\U000011a8", {2364, 4520}, {1, 2}},
-     {L"\U0000093c\U00000308\U000011a8", {2364, 4520}, {2, 3}},
-     {L"\U0000093c\U0000ac00", {2364, 44032}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000ac00", {2364, 44032}, {2, 3}},
-     {L"\U0000093c\U0000ac01", {2364, 44033}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000ac01", {2364, 44033}, {2, 3}},
-     {L"\U0000093c\U00000900", {2364}, {2}},
-     {L"\U0000093c\U00000308\U00000900", {2364}, {3}},
-     {L"\U0000093c\U00000903", {2364}, {2}},
-     {L"\U0000093c\U00000308\U00000903", {2364}, {3}},
-     {L"\U0000093c\U00000904", {2364, 2308}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000904", {2364, 2308}, {2, 3}},
-     {L"\U0000093c\U00000d4e", {2364, 3406}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000d4e", {2364, 3406}, {2, 3}},
-     {L"\U0000093c\U00000915", {2364, 2325}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000915", {2364, 2325}, {2, 3}},
-     {L"\U0000093c\U0000231a", {2364, 8986}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000231a", {2364, 8986}, {2, 3}},
-     {L"\U0000093c\U00000300", {2364}, {2}},
-     {L"\U0000093c\U00000308\U00000300", {2364}, {3}},
-     {L"\U0000093c\U0000093c", {2364}, {2}},
-     {L"\U0000093c\U00000308\U0000093c", {2364}, {3}},
-     {L"\U0000093c\U0000094d", {2364}, {2}},
-     {L"\U0000093c\U00000308\U0000094d", {2364}, {3}},
-     {L"\U0000093c\U0000200d", {2364}, {2}},
-     {L"\U0000093c\U00000308\U0000200d", {2364}, {3}},
-     {L"\U0000093c\U00000378", {2364, 888}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000378", {2364, 888}, {2, 3}},
+     {L"\U00000900\U00000020", {2304, 32}, {1, 2}},
+     {L"\U00000900\U00000308\U00000020", {2304, 32}, {2, 3}},
+     {L"\U00000900\U0000000d", {2304, 13}, {1, 2}},
+     {L"\U00000900\U00000308\U0000000d", {2304, 13}, {2, 3}},
+     {L"\U00000900\U0000000a", {2304, 10}, {1, 2}},
+     {L"\U00000900\U00000308\U0000000a", {2304, 10}, {2, 3}},
+     {L"\U00000900\U00000001", {2304, 1}, {1, 2}},
+     {L"\U00000900\U00000308\U00000001", {2304, 1}, {2, 3}},
+     {L"\U00000900\U0000200c", {2304}, {2}},
+     {L"\U00000900\U00000308\U0000200c", {2304}, {3}},
+     {L"\U00000900\U0001f1e6", {2304, 127462}, {1, 3}},
+     {L"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {2, 4}},
+     {L"\U00000900\U00000600", {2304, 1536}, {1, 2}},
+     {L"\U00000900\U00000308\U00000600", {2304, 1536}, {2, 3}},
+     {L"\U00000900\U00000a03", {2304}, {2}},
+     {L"\U00000900\U00000308\U00000a03", {2304}, {3}},
+     {L"\U00000900\U00001100", {2304, 4352}, {1, 2}},
+     {L"\U00000900\U00000308\U00001100", {2304, 4352}, {2, 3}},
+     {L"\U00000900\U00001160", {2304, 4448}, {1, 2}},
+     {L"\U00000900\U00000308\U00001160", {2304, 4448}, {2, 3}},
+     {L"\U00000900\U000011a8", {2304, 4520}, {1, 2}},
+     {L"\U00000900\U00000308\U000011a8", {2304, 4520}, {2, 3}},
+     {L"\U00000900\U0000ac00", {2304, 44032}, {1, 2}},
+     {L"\U00000900\U00000308\U0000ac00", {2304, 44032}, {2, 3}},
+     {L"\U00000900\U0000ac01", {2304, 44033}, {1, 2}},
+     {L"\U00000900\U00000308\U0000ac01", {2304, 44033}, {2, 3}},
+     {L"\U00000900\U00000903", {2304}, {2}},
+     {L"\U00000900\U00000308\U00000903", {2304}, {3}},
+     {L"\U00000900\U00000904", {2304, 2308}, {1, 2}},
+     {L"\U00000900\U00000308\U00000904", {2304, 2308}, {2, 3}},
+     {L"\U00000900\U00000d4e", {2304, 3406}, {1, 2}},
+     {L"\U00000900\U00000308\U00000d4e", {2304, 3406}, {2, 3}},
+     {L"\U00000900\U00000915", {2304, 2325}, {1, 2}},
+     {L"\U00000900\U00000308\U00000915", {2304, 2325}, {2, 3}},
+     {L"\U00000900\U0000231a", {2304, 8986}, {1, 2}},
+     {L"\U00000900\U00000308\U0000231a", {2304, 8986}, {2, 3}},
+     {L"\U00000900\U00000300", {2304}, {2}},
+     {L"\U00000900\U00000308\U00000300", {2304}, {3}},
+     {L"\U00000900\U00000900", {2304}, {2}},
+     {L"\U00000900\U00000308\U00000900", {2304}, {3}},
+     {L"\U00000900\U0000094d", {2304}, {2}},
+     {L"\U00000900\U00000308\U0000094d", {2304}, {3}},
+     {L"\U00000900\U0000200d", {2304}, {2}},
+     {L"\U00000900\U00000308\U0000200d", {2304}, {3}},
+     {L"\U00000900\U00000378", {2304, 888}, {1, 2}},
+     {L"\U00000900\U00000308\U00000378", {2304, 888}, {2, 3}},
      {L"\U0000094d\U00000020", {2381, 32}, {1, 2}},
      {L"\U0000094d\U00000308\U00000020", {2381, 32}, {2, 3}},
      {L"\U0000094d\U0000000d", {2381, 13}, {1, 2}},
@@ -2294,8 +2112,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000094d\U00000308\U0000000a", {2381, 10}, {2, 3}},
      {L"\U0000094d\U00000001", {2381, 1}, {1, 2}},
      {L"\U0000094d\U00000308\U00000001", {2381, 1}, {2, 3}},
-     {L"\U0000094d\U0000034f", {2381}, {2}},
-     {L"\U0000094d\U00000308\U0000034f", {2381}, {3}},
+     {L"\U0000094d\U0000200c", {2381}, {2}},
+     {L"\U0000094d\U00000308\U0000200c", {2381}, {3}},
      {L"\U0000094d\U0001f1e6", {2381, 127462}, {1, 3}},
      {L"\U0000094d\U00000308\U0001f1e6", {2381, 127462}, {2, 4}},
      {L"\U0000094d\U00000600", {2381, 1536}, {1, 2}},
@@ -2312,8 +2130,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000094d\U00000308\U0000ac00", {2381, 44032}, {2, 3}},
      {L"\U0000094d\U0000ac01", {2381, 44033}, {1, 2}},
      {L"\U0000094d\U00000308\U0000ac01", {2381, 44033}, {2, 3}},
-     {L"\U0000094d\U00000900", {2381}, {2}},
-     {L"\U0000094d\U00000308\U00000900", {2381}, {3}},
      {L"\U0000094d\U00000903", {2381}, {2}},
      {L"\U0000094d\U00000308\U00000903", {2381}, {3}},
      {L"\U0000094d\U00000904", {2381, 2308}, {1, 2}},
@@ -2326,8 +2142,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000094d\U00000308\U0000231a", {2381, 8986}, {2, 3}},
      {L"\U0000094d\U00000300", {2381}, {2}},
      {L"\U0000094d\U00000308\U00000300", {2381}, {3}},
-     {L"\U0000094d\U0000093c", {2381}, {2}},
-     {L"\U0000094d\U00000308\U0000093c", {2381}, {3}},
+     {L"\U0000094d\U00000900", {2381}, {2}},
+     {L"\U0000094d\U00000308\U00000900", {2381}, {3}},
      {L"\U0000094d\U0000094d", {2381}, {2}},
      {L"\U0000094d\U00000308\U0000094d", {2381}, {3}},
      {L"\U0000094d\U0000200d", {2381}, {2}},
@@ -2342,8 +2158,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000200d\U00000308\U0000000a", {8205, 10}, {2, 3}},
      {L"\U0000200d\U00000001", {8205, 1}, {1, 2}},
      {L"\U0000200d\U00000308\U00000001", {8205, 1}, {2, 3}},
-     {L"\U0000200d\U0000034f", {8205}, {2}},
-     {L"\U0000200d\U00000308\U0000034f", {8205}, {3}},
+     {L"\U0000200d\U0000200c", {8205}, {2}},
+     {L"\U0000200d\U00000308\U0000200c", {8205}, {3}},
      {L"\U0000200d\U0001f1e6", {8205, 127462}, {1, 3}},
      {L"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {2, 4}},
      {L"\U0000200d\U00000600", {8205, 1536}, {1, 2}},
@@ -2360,8 +2176,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {2, 3}},
      {L"\U0000200d\U0000ac01", {8205, 44033}, {1, 2}},
      {L"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {2, 3}},
-     {L"\U0000200d\U00000900", {8205}, {2}},
-     {L"\U0000200d\U00000308\U00000900", {8205}, {3}},
      {L"\U0000200d\U00000903", {8205}, {2}},
      {L"\U0000200d\U00000308\U00000903", {8205}, {3}},
      {L"\U0000200d\U00000904", {8205, 2308}, {1, 2}},
@@ -2374,8 +2188,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U0000200d\U00000308\U0000231a", {8205, 8986}, {2, 3}},
      {L"\U0000200d\U00000300", {8205}, {2}},
      {L"\U0000200d\U00000308\U00000300", {8205}, {3}},
-     {L"\U0000200d\U0000093c", {8205}, {2}},
-     {L"\U0000200d\U00000308\U0000093c", {8205}, {3}},
+     {L"\U0000200d\U00000900", {8205}, {2}},
+     {L"\U0000200d\U00000308\U00000900", {8205}, {3}},
      {L"\U0000200d\U0000094d", {8205}, {2}},
      {L"\U0000200d\U00000308\U0000094d", {8205}, {3}},
      {L"\U0000200d\U0000200d", {8205}, {2}},
@@ -2390,8 +2204,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000378\U00000308\U0000000a", {888, 10}, {2, 3}},
      {L"\U00000378\U00000001", {888, 1}, {1, 2}},
      {L"\U00000378\U00000308\U00000001", {888, 1}, {2, 3}},
-     {L"\U00000378\U0000034f", {888}, {2}},
-     {L"\U00000378\U00000308\U0000034f", {888}, {3}},
+     {L"\U00000378\U0000200c", {888}, {2}},
+     {L"\U00000378\U00000308\U0000200c", {888}, {3}},
      {L"\U00000378\U0001f1e6", {888, 127462}, {1, 3}},
      {L"\U00000378\U00000308\U0001f1e6", {888, 127462}, {2, 4}},
      {L"\U00000378\U00000600", {888, 1536}, {1, 2}},
@@ -2408,8 +2222,6 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000378\U00000308\U0000ac00", {888, 44032}, {2, 3}},
      {L"\U00000378\U0000ac01", {888, 44033}, {1, 2}},
      {L"\U00000378\U00000308\U0000ac01", {888, 44033}, {2, 3}},
-     {L"\U00000378\U00000900", {888}, {2}},
-     {L"\U00000378\U00000308\U00000900", {888}, {3}},
      {L"\U00000378\U00000903", {888}, {2}},
      {L"\U00000378\U00000308\U00000903", {888}, {3}},
      {L"\U00000378\U00000904", {888, 2308}, {1, 2}},
@@ -2422,8 +2234,8 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
      {L"\U00000378\U00000308\U0000231a", {888, 8986}, {2, 3}},
      {L"\U00000378\U00000300", {888}, {2}},
      {L"\U00000378\U00000308\U00000300", {888}, {3}},
-     {L"\U00000378\U0000093c", {888}, {2}},
-     {L"\U00000378\U00000308\U0000093c", {888}, {3}},
+     {L"\U00000378\U00000900", {888}, {2}},
+     {L"\U00000378\U00000308\U00000900", {888}, {3}},
      {L"\U00000378\U0000094d", {888}, {2}},
      {L"\U00000378\U00000308\U0000094d", {888}, {3}},
      {L"\U00000378\U0000200d", {888}, {2}},
@@ -2471,7 +2283,7 @@ std::array<data<wchar_t>, 1187> data_utf16 = {{
 /// Note that most of the data for the UTF-16 and UTF-32 are identical. However
 /// since the size of the code units differ the breaks can contain different
 /// values.
-std::array<data<wchar_t>, 1187> data_utf32 = {{
+std::array<data<wchar_t>, 1093> data_utf32 = {{
      {L"\U00000020\U00000020", {32, 32}, {1, 2}},
      {L"\U00000020\U00000308\U00000020", {32, 32}, {2, 3}},
      {L"\U00000020\U0000000d", {32, 13}, {1, 2}},
@@ -2480,8 +2292,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000020\U00000308\U0000000a", {32, 10}, {2, 3}},
      {L"\U00000020\U00000001", {32, 1}, {1, 2}},
      {L"\U00000020\U00000308\U00000001", {32, 1}, {2, 3}},
-     {L"\U00000020\U0000034f", {32}, {2}},
-     {L"\U00000020\U00000308\U0000034f", {32}, {3}},
+     {L"\U00000020\U0000200c", {32}, {2}},
+     {L"\U00000020\U00000308\U0000200c", {32}, {3}},
      {L"\U00000020\U0001f1e6", {32, 127462}, {1, 2}},
      {L"\U00000020\U00000308\U0001f1e6", {32, 127462}, {2, 3}},
      {L"\U00000020\U00000600", {32, 1536}, {1, 2}},
@@ -2498,8 +2310,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000020\U00000308\U0000ac00", {32, 44032}, {2, 3}},
      {L"\U00000020\U0000ac01", {32, 44033}, {1, 2}},
      {L"\U00000020\U00000308\U0000ac01", {32, 44033}, {2, 3}},
-     {L"\U00000020\U00000900", {32}, {2}},
-     {L"\U00000020\U00000308\U00000900", {32}, {3}},
      {L"\U00000020\U00000903", {32}, {2}},
      {L"\U00000020\U00000308\U00000903", {32}, {3}},
      {L"\U00000020\U00000904", {32, 2308}, {1, 2}},
@@ -2512,8 +2322,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000020\U00000308\U0000231a", {32, 8986}, {2, 3}},
      {L"\U00000020\U00000300", {32}, {2}},
      {L"\U00000020\U00000308\U00000300", {32}, {3}},
-     {L"\U00000020\U0000093c", {32}, {2}},
-     {L"\U00000020\U00000308\U0000093c", {32}, {3}},
+     {L"\U00000020\U00000900", {32}, {2}},
+     {L"\U00000020\U00000308\U00000900", {32}, {3}},
      {L"\U00000020\U0000094d", {32}, {2}},
      {L"\U00000020\U00000308\U0000094d", {32}, {3}},
      {L"\U00000020\U0000200d", {32}, {2}},
@@ -2528,8 +2338,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000000d\U00000308\U0000000a", {13, 776, 10}, {1, 2, 3}},
      {L"\U0000000d\U00000001", {13, 1}, {1, 2}},
      {L"\U0000000d\U00000308\U00000001", {13, 776, 1}, {1, 2, 3}},
-     {L"\U0000000d\U0000034f", {13, 847}, {1, 2}},
-     {L"\U0000000d\U00000308\U0000034f", {13, 776}, {1, 3}},
+     {L"\U0000000d\U0000200c", {13, 8204}, {1, 2}},
+     {L"\U0000000d\U00000308\U0000200c", {13, 776}, {1, 3}},
      {L"\U0000000d\U0001f1e6", {13, 127462}, {1, 2}},
      {L"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 2, 3}},
      {L"\U0000000d\U00000600", {13, 1536}, {1, 2}},
@@ -2546,8 +2356,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 2, 3}},
      {L"\U0000000d\U0000ac01", {13, 44033}, {1, 2}},
      {L"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 2, 3}},
-     {L"\U0000000d\U00000900", {13, 2304}, {1, 2}},
-     {L"\U0000000d\U00000308\U00000900", {13, 776}, {1, 3}},
      {L"\U0000000d\U00000903", {13, 2307}, {1, 2}},
      {L"\U0000000d\U00000308\U00000903", {13, 776}, {1, 3}},
      {L"\U0000000d\U00000904", {13, 2308}, {1, 2}},
@@ -2560,8 +2368,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 2, 3}},
      {L"\U0000000d\U00000300", {13, 768}, {1, 2}},
      {L"\U0000000d\U00000308\U00000300", {13, 776}, {1, 3}},
-     {L"\U0000000d\U0000093c", {13, 2364}, {1, 2}},
-     {L"\U0000000d\U00000308\U0000093c", {13, 776}, {1, 3}},
+     {L"\U0000000d\U00000900", {13, 2304}, {1, 2}},
+     {L"\U0000000d\U00000308\U00000900", {13, 776}, {1, 3}},
      {L"\U0000000d\U0000094d", {13, 2381}, {1, 2}},
      {L"\U0000000d\U00000308\U0000094d", {13, 776}, {1, 3}},
      {L"\U0000000d\U0000200d", {13, 8205}, {1, 2}},
@@ -2576,8 +2384,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000000a\U00000308\U0000000a", {10, 776, 10}, {1, 2, 3}},
      {L"\U0000000a\U00000001", {10, 1}, {1, 2}},
      {L"\U0000000a\U00000308\U00000001", {10, 776, 1}, {1, 2, 3}},
-     {L"\U0000000a\U0000034f", {10, 847}, {1, 2}},
-     {L"\U0000000a\U00000308\U0000034f", {10, 776}, {1, 3}},
+     {L"\U0000000a\U0000200c", {10, 8204}, {1, 2}},
+     {L"\U0000000a\U00000308\U0000200c", {10, 776}, {1, 3}},
      {L"\U0000000a\U0001f1e6", {10, 127462}, {1, 2}},
      {L"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 2, 3}},
      {L"\U0000000a\U00000600", {10, 1536}, {1, 2}},
@@ -2594,8 +2402,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 2, 3}},
      {L"\U0000000a\U0000ac01", {10, 44033}, {1, 2}},
      {L"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 2, 3}},
-     {L"\U0000000a\U00000900", {10, 2304}, {1, 2}},
-     {L"\U0000000a\U00000308\U00000900", {10, 776}, {1, 3}},
      {L"\U0000000a\U00000903", {10, 2307}, {1, 2}},
      {L"\U0000000a\U00000308\U00000903", {10, 776}, {1, 3}},
      {L"\U0000000a\U00000904", {10, 2308}, {1, 2}},
@@ -2608,8 +2414,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 2, 3}},
      {L"\U0000000a\U00000300", {10, 768}, {1, 2}},
      {L"\U0000000a\U00000308\U00000300", {10, 776}, {1, 3}},
-     {L"\U0000000a\U0000093c", {10, 2364}, {1, 2}},
-     {L"\U0000000a\U00000308\U0000093c", {10, 776}, {1, 3}},
+     {L"\U0000000a\U00000900", {10, 2304}, {1, 2}},
+     {L"\U0000000a\U00000308\U00000900", {10, 776}, {1, 3}},
      {L"\U0000000a\U0000094d", {10, 2381}, {1, 2}},
      {L"\U0000000a\U00000308\U0000094d", {10, 776}, {1, 3}},
      {L"\U0000000a\U0000200d", {10, 8205}, {1, 2}},
@@ -2624,8 +2430,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000001\U00000308\U0000000a", {1, 776, 10}, {1, 2, 3}},
      {L"\U00000001\U00000001", {1, 1}, {1, 2}},
      {L"\U00000001\U00000308\U00000001", {1, 776, 1}, {1, 2, 3}},
-     {L"\U00000001\U0000034f", {1, 847}, {1, 2}},
-     {L"\U00000001\U00000308\U0000034f", {1, 776}, {1, 3}},
+     {L"\U00000001\U0000200c", {1, 8204}, {1, 2}},
+     {L"\U00000001\U00000308\U0000200c", {1, 776}, {1, 3}},
      {L"\U00000001\U0001f1e6", {1, 127462}, {1, 2}},
      {L"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 2, 3}},
      {L"\U00000001\U00000600", {1, 1536}, {1, 2}},
@@ -2642,8 +2448,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 2, 3}},
      {L"\U00000001\U0000ac01", {1, 44033}, {1, 2}},
      {L"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 2, 3}},
-     {L"\U00000001\U00000900", {1, 2304}, {1, 2}},
-     {L"\U00000001\U00000308\U00000900", {1, 776}, {1, 3}},
      {L"\U00000001\U00000903", {1, 2307}, {1, 2}},
      {L"\U00000001\U00000308\U00000903", {1, 776}, {1, 3}},
      {L"\U00000001\U00000904", {1, 2308}, {1, 2}},
@@ -2656,62 +2460,60 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 2, 3}},
      {L"\U00000001\U00000300", {1, 768}, {1, 2}},
      {L"\U00000001\U00000308\U00000300", {1, 776}, {1, 3}},
-     {L"\U00000001\U0000093c", {1, 2364}, {1, 2}},
-     {L"\U00000001\U00000308\U0000093c", {1, 776}, {1, 3}},
+     {L"\U00000001\U00000900", {1, 2304}, {1, 2}},
+     {L"\U00000001\U00000308\U00000900", {1, 776}, {1, 3}},
      {L"\U00000001\U0000094d", {1, 2381}, {1, 2}},
      {L"\U00000001\U00000308\U0000094d", {1, 776}, {1, 3}},
      {L"\U00000001\U0000200d", {1, 8205}, {1, 2}},
      {L"\U00000001\U00000308\U0000200d", {1, 776}, {1, 3}},
      {L"\U00000001\U00000378", {1, 888}, {1, 2}},
      {L"\U00000001\U00000308\U00000378", {1, 776, 888}, {1, 2, 3}},
-     {L"\U0000034f\U00000020", {847, 32}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000020", {847, 32}, {2, 3}},
-     {L"\U0000034f\U0000000d", {847, 13}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000000d", {847, 13}, {2, 3}},
-     {L"\U0000034f\U0000000a", {847, 10}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000000a", {847, 10}, {2, 3}},
-     {L"\U0000034f\U00000001", {847, 1}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000001", {847, 1}, {2, 3}},
-     {L"\U0000034f\U0000034f", {847}, {2}},
-     {L"\U0000034f\U00000308\U0000034f", {847}, {3}},
-     {L"\U0000034f\U0001f1e6", {847, 127462}, {1, 2}},
-     {L"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {2, 3}},
-     {L"\U0000034f\U00000600", {847, 1536}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000600", {847, 1536}, {2, 3}},
-     {L"\U0000034f\U00000a03", {847}, {2}},
-     {L"\U0000034f\U00000308\U00000a03", {847}, {3}},
-     {L"\U0000034f\U00001100", {847, 4352}, {1, 2}},
-     {L"\U0000034f\U00000308\U00001100", {847, 4352}, {2, 3}},
-     {L"\U0000034f\U00001160", {847, 4448}, {1, 2}},
-     {L"\U0000034f\U00000308\U00001160", {847, 4448}, {2, 3}},
-     {L"\U0000034f\U000011a8", {847, 4520}, {1, 2}},
-     {L"\U0000034f\U00000308\U000011a8", {847, 4520}, {2, 3}},
-     {L"\U0000034f\U0000ac00", {847, 44032}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000ac00", {847, 44032}, {2, 3}},
-     {L"\U0000034f\U0000ac01", {847, 44033}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000ac01", {847, 44033}, {2, 3}},
-     {L"\U0000034f\U00000900", {847}, {2}},
-     {L"\U0000034f\U00000308\U00000900", {847}, {3}},
-     {L"\U0000034f\U00000903", {847}, {2}},
-     {L"\U0000034f\U00000308\U00000903", {847}, {3}},
-     {L"\U0000034f\U00000904", {847, 2308}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000904", {847, 2308}, {2, 3}},
-     {L"\U0000034f\U00000d4e", {847, 3406}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000d4e", {847, 3406}, {2, 3}},
-     {L"\U0000034f\U00000915", {847, 2325}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000915", {847, 2325}, {2, 3}},
-     {L"\U0000034f\U0000231a", {847, 8986}, {1, 2}},
-     {L"\U0000034f\U00000308\U0000231a", {847, 8986}, {2, 3}},
-     {L"\U0000034f\U00000300", {847}, {2}},
-     {L"\U0000034f\U00000308\U00000300", {847}, {3}},
-     {L"\U0000034f\U0000093c", {847}, {2}},
-     {L"\U0000034f\U00000308\U0000093c", {847}, {3}},
-     {L"\U0000034f\U0000094d", {847}, {2}},
-     {L"\U0000034f\U00000308\U0000094d", {847}, {3}},
-     {L"\U0000034f\U0000200d", {847}, {2}},
-     {L"\U0000034f\U00000308\U0000200d", {847}, {3}},
-     {L"\U0000034f\U00000378", {847, 888}, {1, 2}},
-     {L"\U0000034f\U00000308\U00000378", {847, 888}, {2, 3}},
+     {L"\U0000200c\U00000020", {8204, 32}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000020", {8204, 32}, {2, 3}},
+     {L"\U0000200c\U0000000d", {8204, 13}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000000d", {8204, 13}, {2, 3}},
+     {L"\U0000200c\U0000000a", {8204, 10}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000000a", {8204, 10}, {2, 3}},
+     {L"\U0000200c\U00000001", {8204, 1}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000001", {8204, 1}, {2, 3}},
+     {L"\U0000200c\U0000200c", {8204}, {2}},
+     {L"\U0000200c\U00000308\U0000200c", {8204}, {3}},
+     {L"\U0000200c\U0001f1e6", {8204, 127462}, {1, 2}},
+     {L"\U0000200c\U00000308\U0001f1e6", {8204, 127462}, {2, 3}},
+     {L"\U0000200c\U00000600", {8204, 1536}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000600", {8204, 1536}, {2, 3}},
+     {L"\U0000200c\U00000a03", {8204}, {2}},
+     {L"\U0000200c\U00000308\U00000a03", {8204}, {3}},
+     {L"\U0000200c\U00001100", {8204, 4352}, {1, 2}},
+     {L"\U0000200c\U00000308\U00001100", {8204, 4352}, {2, 3}},
+     {L"\U0000200c\U00001160", {8204, 4448}, {1, 2}},
+     {L"\U0000200c\U00000308\U00001160", {8204, 4448}, {2, 3}},
+     {L"\U0000200c\U000011a8", {8204, 4520}, {1, 2}},
+     {L"\U0000200c\U00000308\U000011a8", {8204, 4520}, {2, 3}},
+     {L"\U0000200c\U0000ac00", {8204, 44032}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000ac00", {8204, 44032}, {2, 3}},
+     {L"\U0000200c\U0000ac01", {8204, 44033}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000ac01", {8204, 44033}, {2, 3}},
+     {L"\U0000200c\U00000903", {8204}, {2}},
+     {L"\U0000200c\U00000308\U00000903", {8204}, {3}},
+     {L"\U0000200c\U00000904", {8204, 2308}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000904", {8204, 2308}, {2, 3}},
+     {L"\U0000200c\U00000d4e", {8204, 3406}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000d4e", {8204, 3406}, {2, 3}},
+     {L"\U0000200c\U00000915", {8204, 2325}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000915", {8204, 2325}, {2, 3}},
+     {L"\U0000200c\U0000231a", {8204, 8986}, {1, 2}},
+     {L"\U0000200c\U00000308\U0000231a", {8204, 8986}, {2, 3}},
+     {L"\U0000200c\U00000300", {8204}, {2}},
+     {L"\U0000200c\U00000308\U00000300", {8204}, {3}},
+     {L"\U0000200c\U00000900", {8204}, {2}},
+     {L"\U0000200c\U00000308\U00000900", {8204}, {3}},
+     {L"\U0000200c\U0000094d", {8204}, {2}},
+     {L"\U0000200c\U00000308\U0000094d", {8204}, {3}},
+     {L"\U0000200c\U0000200d", {8204}, {2}},
+     {L"\U0000200c\U00000308\U0000200d", {8204}, {3}},
+     {L"\U0000200c\U00000378", {8204, 888}, {1, 2}},
+     {L"\U0000200c\U00000308\U00000378", {8204, 888}, {2, 3}},
      {L"\U0001f1e6\U00000020", {127462, 32}, {1, 2}},
      {L"\U0001f1e6\U00000308\U00000020", {127462, 32}, {2, 3}},
      {L"\U0001f1e6\U0000000d", {127462, 13}, {1, 2}},
@@ -2720,8 +2522,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0001f1e6\U00000308\U0000000a", {127462, 10}, {2, 3}},
      {L"\U0001f1e6\U00000001", {127462, 1}, {1, 2}},
      {L"\U0001f1e6\U00000308\U00000001", {127462, 1}, {2, 3}},
-     {L"\U0001f1e6\U0000034f", {127462}, {2}},
-     {L"\U0001f1e6\U00000308\U0000034f", {127462}, {3}},
+     {L"\U0001f1e6\U0000200c", {127462}, {2}},
+     {L"\U0001f1e6\U00000308\U0000200c", {127462}, {3}},
      {L"\U0001f1e6\U0001f1e6", {127462}, {2}},
      {L"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {2, 3}},
      {L"\U0001f1e6\U00000600", {127462, 1536}, {1, 2}},
@@ -2738,8 +2540,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {2, 3}},
      {L"\U0001f1e6\U0000ac01", {127462, 44033}, {1, 2}},
      {L"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {2, 3}},
-     {L"\U0001f1e6\U00000900", {127462}, {2}},
-     {L"\U0001f1e6\U00000308\U00000900", {127462}, {3}},
      {L"\U0001f1e6\U00000903", {127462}, {2}},
      {L"\U0001f1e6\U00000308\U00000903", {127462}, {3}},
      {L"\U0001f1e6\U00000904", {127462, 2308}, {1, 2}},
@@ -2752,8 +2552,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {2, 3}},
      {L"\U0001f1e6\U00000300", {127462}, {2}},
      {L"\U0001f1e6\U00000308\U00000300", {127462}, {3}},
-     {L"\U0001f1e6\U0000093c", {127462}, {2}},
-     {L"\U0001f1e6\U00000308\U0000093c", {127462}, {3}},
+     {L"\U0001f1e6\U00000900", {127462}, {2}},
+     {L"\U0001f1e6\U00000308\U00000900", {127462}, {3}},
      {L"\U0001f1e6\U0000094d", {127462}, {2}},
      {L"\U0001f1e6\U00000308\U0000094d", {127462}, {3}},
      {L"\U0001f1e6\U0000200d", {127462}, {2}},
@@ -2768,8 +2568,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000600\U00000308\U0000000a", {1536, 10}, {2, 3}},
      {L"\U00000600\U00000001", {1536, 1}, {1, 2}},
      {L"\U00000600\U00000308\U00000001", {1536, 1}, {2, 3}},
-     {L"\U00000600\U0000034f", {1536}, {2}},
-     {L"\U00000600\U00000308\U0000034f", {1536}, {3}},
+     {L"\U00000600\U0000200c", {1536}, {2}},
+     {L"\U00000600\U00000308\U0000200c", {1536}, {3}},
      {L"\U00000600\U0001f1e6", {1536}, {2}},
      {L"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {2, 3}},
      {L"\U00000600\U00000600", {1536}, {2}},
@@ -2786,8 +2586,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000600\U00000308\U0000ac00", {1536, 44032}, {2, 3}},
      {L"\U00000600\U0000ac01", {1536}, {2}},
      {L"\U00000600\U00000308\U0000ac01", {1536, 44033}, {2, 3}},
-     {L"\U00000600\U00000900", {1536}, {2}},
-     {L"\U00000600\U00000308\U00000900", {1536}, {3}},
      {L"\U00000600\U00000903", {1536}, {2}},
      {L"\U00000600\U00000308\U00000903", {1536}, {3}},
      {L"\U00000600\U00000904", {1536}, {2}},
@@ -2800,8 +2598,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000600\U00000308\U0000231a", {1536, 8986}, {2, 3}},
      {L"\U00000600\U00000300", {1536}, {2}},
      {L"\U00000600\U00000308\U00000300", {1536}, {3}},
-     {L"\U00000600\U0000093c", {1536}, {2}},
-     {L"\U00000600\U00000308\U0000093c", {1536}, {3}},
+     {L"\U00000600\U00000900", {1536}, {2}},
+     {L"\U00000600\U00000308\U00000900", {1536}, {3}},
      {L"\U00000600\U0000094d", {1536}, {2}},
      {L"\U00000600\U00000308\U0000094d", {1536}, {3}},
      {L"\U00000600\U0000200d", {1536}, {2}},
@@ -2816,8 +2614,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000a03\U00000308\U0000000a", {2563, 10}, {2, 3}},
      {L"\U00000a03\U00000001", {2563, 1}, {1, 2}},
      {L"\U00000a03\U00000308\U00000001", {2563, 1}, {2, 3}},
-     {L"\U00000a03\U0000034f", {2563}, {2}},
-     {L"\U00000a03\U00000308\U0000034f", {2563}, {3}},
+     {L"\U00000a03\U0000200c", {2563}, {2}},
+     {L"\U00000a03\U00000308\U0000200c", {2563}, {3}},
      {L"\U00000a03\U0001f1e6", {2563, 127462}, {1, 2}},
      {L"\U00000a03\U00000308\U0001f1e6", {2563, 127462}, {2, 3}},
      {L"\U00000a03\U00000600", {2563, 1536}, {1, 2}},
@@ -2834,8 +2632,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000a03\U00000308\U0000ac00", {2563, 44032}, {2, 3}},
      {L"\U00000a03\U0000ac01", {2563, 44033}, {1, 2}},
      {L"\U00000a03\U00000308\U0000ac01", {2563, 44033}, {2, 3}},
-     {L"\U00000a03\U00000900", {2563}, {2}},
-     {L"\U00000a03\U00000308\U00000900", {2563}, {3}},
      {L"\U00000a03\U00000903", {2563}, {2}},
      {L"\U00000a03\U00000308\U00000903", {2563}, {3}},
      {L"\U00000a03\U00000904", {2563, 2308}, {1, 2}},
@@ -2848,8 +2644,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000a03\U00000308\U0000231a", {2563, 8986}, {2, 3}},
      {L"\U00000a03\U00000300", {2563}, {2}},
      {L"\U00000a03\U00000308\U00000300", {2563}, {3}},
-     {L"\U00000a03\U0000093c", {2563}, {2}},
-     {L"\U00000a03\U00000308\U0000093c", {2563}, {3}},
+     {L"\U00000a03\U00000900", {2563}, {2}},
+     {L"\U00000a03\U00000308\U00000900", {2563}, {3}},
      {L"\U00000a03\U0000094d", {2563}, {2}},
      {L"\U00000a03\U00000308\U0000094d", {2563}, {3}},
      {L"\U00000a03\U0000200d", {2563}, {2}},
@@ -2864,8 +2660,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00001100\U00000308\U0000000a", {4352, 10}, {2, 3}},
      {L"\U00001100\U00000001", {4352, 1}, {1, 2}},
      {L"\U00001100\U00000308\U00000001", {4352, 1}, {2, 3}},
-     {L"\U00001100\U0000034f", {4352}, {2}},
-     {L"\U00001100\U00000308\U0000034f", {4352}, {3}},
+     {L"\U00001100\U0000200c", {4352}, {2}},
+     {L"\U00001100\U00000308\U0000200c", {4352}, {3}},
      {L"\U00001100\U0001f1e6", {4352, 127462}, {1, 2}},
      {L"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {2, 3}},
      {L"\U00001100\U00000600", {4352, 1536}, {1, 2}},
@@ -2882,8 +2678,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00001100\U00000308\U0000ac00", {4352, 44032}, {2, 3}},
      {L"\U00001100\U0000ac01", {4352}, {2}},
      {L"\U00001100\U00000308\U0000ac01", {4352, 44033}, {2, 3}},
-     {L"\U00001100\U00000900", {4352}, {2}},
-     {L"\U00001100\U00000308\U00000900", {4352}, {3}},
      {L"\U00001100\U00000903", {4352}, {2}},
      {L"\U00001100\U00000308\U00000903", {4352}, {3}},
      {L"\U00001100\U00000904", {4352, 2308}, {1, 2}},
@@ -2896,8 +2690,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00001100\U00000308\U0000231a", {4352, 8986}, {2, 3}},
      {L"\U00001100\U00000300", {4352}, {2}},
      {L"\U00001100\U00000308\U00000300", {4352}, {3}},
-     {L"\U00001100\U0000093c", {4352}, {2}},
-     {L"\U00001100\U00000308\U0000093c", {4352}, {3}},
+     {L"\U00001100\U00000900", {4352}, {2}},
+     {L"\U00001100\U00000308\U00000900", {4352}, {3}},
      {L"\U00001100\U0000094d", {4352}, {2}},
      {L"\U00001100\U00000308\U0000094d", {4352}, {3}},
      {L"\U00001100\U0000200d", {4352}, {2}},
@@ -2912,8 +2706,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00001160\U00000308\U0000000a", {4448, 10}, {2, 3}},
      {L"\U00001160\U00000001", {4448, 1}, {1, 2}},
      {L"\U00001160\U00000308\U00000001", {4448, 1}, {2, 3}},
-     {L"\U00001160\U0000034f", {4448}, {2}},
-     {L"\U00001160\U00000308\U0000034f", {4448}, {3}},
+     {L"\U00001160\U0000200c", {4448}, {2}},
+     {L"\U00001160\U00000308\U0000200c", {4448}, {3}},
      {L"\U00001160\U0001f1e6", {4448, 127462}, {1, 2}},
      {L"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {2, 3}},
      {L"\U00001160\U00000600", {4448, 1536}, {1, 2}},
@@ -2930,8 +2724,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00001160\U00000308\U0000ac00", {4448, 44032}, {2, 3}},
      {L"\U00001160\U0000ac01", {4448, 44033}, {1, 2}},
      {L"\U00001160\U00000308\U0000ac01", {4448, 44033}, {2, 3}},
-     {L"\U00001160\U00000900", {4448}, {2}},
-     {L"\U00001160\U00000308\U00000900", {4448}, {3}},
      {L"\U00001160\U00000903", {4448}, {2}},
      {L"\U00001160\U00000308\U00000903", {4448}, {3}},
      {L"\U00001160\U00000904", {4448, 2308}, {1, 2}},
@@ -2944,8 +2736,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00001160\U00000308\U0000231a", {4448, 8986}, {2, 3}},
      {L"\U00001160\U00000300", {4448}, {2}},
      {L"\U00001160\U00000308\U00000300", {4448}, {3}},
-     {L"\U00001160\U0000093c", {4448}, {2}},
-     {L"\U00001160\U00000308\U0000093c", {4448}, {3}},
+     {L"\U00001160\U00000900", {4448}, {2}},
+     {L"\U00001160\U00000308\U00000900", {4448}, {3}},
      {L"\U00001160\U0000094d", {4448}, {2}},
      {L"\U00001160\U00000308\U0000094d", {4448}, {3}},
      {L"\U00001160\U0000200d", {4448}, {2}},
@@ -2960,8 +2752,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U000011a8\U00000308\U0000000a", {4520, 10}, {2, 3}},
      {L"\U000011a8\U00000001", {4520, 1}, {1, 2}},
      {L"\U000011a8\U00000308\U00000001", {4520, 1}, {2, 3}},
-     {L"\U000011a8\U0000034f", {4520}, {2}},
-     {L"\U000011a8\U00000308\U0000034f", {4520}, {3}},
+     {L"\U000011a8\U0000200c", {4520}, {2}},
+     {L"\U000011a8\U00000308\U0000200c", {4520}, {3}},
      {L"\U000011a8\U0001f1e6", {4520, 127462}, {1, 2}},
      {L"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {2, 3}},
      {L"\U000011a8\U00000600", {4520, 1536}, {1, 2}},
@@ -2978,8 +2770,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {2, 3}},
      {L"\U000011a8\U0000ac01", {4520, 44033}, {1, 2}},
      {L"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {2, 3}},
-     {L"\U000011a8\U00000900", {4520}, {2}},
-     {L"\U000011a8\U00000308\U00000900", {4520}, {3}},
      {L"\U000011a8\U00000903", {4520}, {2}},
      {L"\U000011a8\U00000308\U00000903", {4520}, {3}},
      {L"\U000011a8\U00000904", {4520, 2308}, {1, 2}},
@@ -2992,8 +2782,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U000011a8\U00000308\U0000231a", {4520, 8986}, {2, 3}},
      {L"\U000011a8\U00000300", {4520}, {2}},
      {L"\U000011a8\U00000308\U00000300", {4520}, {3}},
-     {L"\U000011a8\U0000093c", {4520}, {2}},
-     {L"\U000011a8\U00000308\U0000093c", {4520}, {3}},
+     {L"\U000011a8\U00000900", {4520}, {2}},
+     {L"\U000011a8\U00000308\U00000900", {4520}, {3}},
      {L"\U000011a8\U0000094d", {4520}, {2}},
      {L"\U000011a8\U00000308\U0000094d", {4520}, {3}},
      {L"\U000011a8\U0000200d", {4520}, {2}},
@@ -3008,8 +2798,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000ac00\U00000308\U0000000a", {44032, 10}, {2, 3}},
      {L"\U0000ac00\U00000001", {44032, 1}, {1, 2}},
      {L"\U0000ac00\U00000308\U00000001", {44032, 1}, {2, 3}},
-     {L"\U0000ac00\U0000034f", {44032}, {2}},
-     {L"\U0000ac00\U00000308\U0000034f", {44032}, {3}},
+     {L"\U0000ac00\U0000200c", {44032}, {2}},
+     {L"\U0000ac00\U00000308\U0000200c", {44032}, {3}},
      {L"\U0000ac00\U0001f1e6", {44032, 127462}, {1, 2}},
      {L"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {2, 3}},
      {L"\U0000ac00\U00000600", {44032, 1536}, {1, 2}},
@@ -3026,8 +2816,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {2, 3}},
      {L"\U0000ac00\U0000ac01", {44032, 44033}, {1, 2}},
      {L"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {2, 3}},
-     {L"\U0000ac00\U00000900", {44032}, {2}},
-     {L"\U0000ac00\U00000308\U00000900", {44032}, {3}},
      {L"\U0000ac00\U00000903", {44032}, {2}},
      {L"\U0000ac00\U00000308\U00000903", {44032}, {3}},
      {L"\U0000ac00\U00000904", {44032, 2308}, {1, 2}},
@@ -3040,8 +2828,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {2, 3}},
      {L"\U0000ac00\U00000300", {44032}, {2}},
      {L"\U0000ac00\U00000308\U00000300", {44032}, {3}},
-     {L"\U0000ac00\U0000093c", {44032}, {2}},
-     {L"\U0000ac00\U00000308\U0000093c", {44032}, {3}},
+     {L"\U0000ac00\U00000900", {44032}, {2}},
+     {L"\U0000ac00\U00000308\U00000900", {44032}, {3}},
      {L"\U0000ac00\U0000094d", {44032}, {2}},
      {L"\U0000ac00\U00000308\U0000094d", {44032}, {3}},
      {L"\U0000ac00\U0000200d", {44032}, {2}},
@@ -3056,8 +2844,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000ac01\U00000308\U0000000a", {44033, 10}, {2, 3}},
      {L"\U0000ac01\U00000001", {44033, 1}, {1, 2}},
      {L"\U0000ac01\U00000308\U00000001", {44033, 1}, {2, 3}},
-     {L"\U0000ac01\U0000034f", {44033}, {2}},
-     {L"\U0000ac01\U00000308\U0000034f", {44033}, {3}},
+     {L"\U0000ac01\U0000200c", {44033}, {2}},
+     {L"\U0000ac01\U00000308\U0000200c", {44033}, {3}},
      {L"\U0000ac01\U0001f1e6", {44033, 127462}, {1, 2}},
      {L"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {2, 3}},
      {L"\U0000ac01\U00000600", {44033, 1536}, {1, 2}},
@@ -3074,8 +2862,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {2, 3}},
      {L"\U0000ac01\U0000ac01", {44033, 44033}, {1, 2}},
      {L"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {2, 3}},
-     {L"\U0000ac01\U00000900", {44033}, {2}},
-     {L"\U0000ac01\U00000308\U00000900", {44033}, {3}},
      {L"\U0000ac01\U00000903", {44033}, {2}},
      {L"\U0000ac01\U00000308\U00000903", {44033}, {3}},
      {L"\U0000ac01\U00000904", {44033, 2308}, {1, 2}},
@@ -3088,62 +2874,14 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {2, 3}},
      {L"\U0000ac01\U00000300", {44033}, {2}},
      {L"\U0000ac01\U00000308\U00000300", {44033}, {3}},
-     {L"\U0000ac01\U0000093c", {44033}, {2}},
-     {L"\U0000ac01\U00000308\U0000093c", {44033}, {3}},
+     {L"\U0000ac01\U00000900", {44033}, {2}},
+     {L"\U0000ac01\U00000308\U00000900", {44033}, {3}},
      {L"\U0000ac01\U0000094d", {44033}, {2}},
      {L"\U0000ac01\U00000308\U0000094d", {44033}, {3}},
      {L"\U0000ac01\U0000200d", {44033}, {2}},
      {L"\U0000ac01\U00000308\U0000200d", {44033}, {3}},
      {L"\U0000ac01\U00000378", {44033, 888}, {1, 2}},
      {L"\U0000ac01\U00000308\U00000378", {44033, 888}, {2, 3}},
-     {L"\U00000900\U00000020", {2304, 32}, {1, 2}},
-     {L"\U00000900\U00000308\U00000020", {2304, 32}, {2, 3}},
-     {L"\U00000900\U0000000d", {2304, 13}, {1, 2}},
-     {L"\U00000900\U00000308\U0000000d", {2304, 13}, {2, 3}},
-     {L"\U00000900\U0000000a", {2304, 10}, {1, 2}},
-     {L"\U00000900\U00000308\U0000000a", {2304, 10}, {2, 3}},
-     {L"\U00000900\U00000001", {2304, 1}, {1, 2}},
-     {L"\U00000900\U00000308\U00000001", {2304, 1}, {2, 3}},
-     {L"\U00000900\U0000034f", {2304}, {2}},
-     {L"\U00000900\U00000308\U0000034f", {2304}, {3}},
-     {L"\U00000900\U0001f1e6", {2304, 127462}, {1, 2}},
-     {L"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {2, 3}},
-     {L"\U00000900\U00000600", {2304, 1536}, {1, 2}},
-     {L"\U00000900\U00000308\U00000600", {2304, 1536}, {2, 3}},
-     {L"\U00000900\U00000a03", {2304}, {2}},
-     {L"\U00000900\U00000308\U00000a03", {2304}, {3}},
-     {L"\U00000900\U00001100", {2304, 4352}, {1, 2}},
-     {L"\U00000900\U00000308\U00001100", {2304, 4352}, {2, 3}},
-     {L"\U00000900\U00001160", {2304, 4448}, {1, 2}},
-     {L"\U00000900\U00000308\U00001160", {2304, 4448}, {2, 3}},
-     {L"\U00000900\U000011a8", {2304, 4520}, {1, 2}},
-     {L"\U00000900\U00000308\U000011a8", {2304, 4520}, {2, 3}},
-     {L"\U00000900\U0000ac00", {2304, 44032}, {1, 2}},
-     {L"\U00000900\U00000308\U0000ac00", {2304, 44032}, {2, 3}},
-     {L"\U00000900\U0000ac01", {2304, 44033}, {1, 2}},
-     {L"\U00000900\U00000308\U0000ac01", {2304, 44033}, {2, 3}},
-     {L"\U00000900\U00000900", {2304}, {2}},
-     {L"\U00000900\U00000308\U00000900", {2304}, {3}},
-     {L"\U00000900\U00000903", {2304}, {2}},
-     {L"\U00000900\U00000308\U00000903", {2304}, {3}},
-     {L"\U00000900\U00000904", {2304, 2308}, {1, 2}},
-     {L"\U00000900\U00000308\U00000904", {2304, 2308}, {2, 3}},
-     {L"\U00000900\U00000d4e", {2304, 3406}, {1, 2}},
-     {L"\U00000900\U00000308\U00000d4e", {2304, 3406}, {2, 3}},
-     {L"\U00000900\U00000915", {2304, 2325}, {1, 2}},
-     {L"\U00000900\U00000308\U00000915", {2304, 2325}, {2, 3}},
-     {L"\U00000900\U0000231a", {2304, 8986}, {1, 2}},
-     {L"\U00000900\U00000308\U0000231a", {2304, 8986}, {2, 3}},
-     {L"\U00000900\U00000300", {2304}, {2}},
-     {L"\U00000900\U00000308\U00000300", {2304}, {3}},
-     {L"\U00000900\U0000093c", {2304}, {2}},
-     {L"\U00000900\U00000308\U0000093c", {2304}, {3}},
-     {L"\U00000900\U0000094d", {2304}, {2}},
-     {L"\U00000900\U00000308\U0000094d", {2304}, {3}},
-     {L"\U00000900\U0000200d", {2304}, {2}},
-     {L"\U00000900\U00000308\U0000200d", {2304}, {3}},
-     {L"\U00000900\U00000378", {2304, 888}, {1, 2}},
-     {L"\U00000900\U00000308\U00000378", {2304, 888}, {2, 3}},
      {L"\U00000903\U00000020", {2307, 32}, {1, 2}},
      {L"\U00000903\U00000308\U00000020", {2307, 32}, {2, 3}},
      {L"\U00000903\U0000000d", {2307, 13}, {1, 2}},
@@ -3152,8 +2890,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000903\U00000308\U0000000a", {2307, 10}, {2, 3}},
      {L"\U00000903\U00000001", {2307, 1}, {1, 2}},
      {L"\U00000903\U00000308\U00000001", {2307, 1}, {2, 3}},
-     {L"\U00000903\U0000034f", {2307}, {2}},
-     {L"\U00000903\U00000308\U0000034f", {2307}, {3}},
+     {L"\U00000903\U0000200c", {2307}, {2}},
+     {L"\U00000903\U00000308\U0000200c", {2307}, {3}},
      {L"\U00000903\U0001f1e6", {2307, 127462}, {1, 2}},
      {L"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {2, 3}},
      {L"\U00000903\U00000600", {2307, 1536}, {1, 2}},
@@ -3170,8 +2908,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000903\U00000308\U0000ac00", {2307, 44032}, {2, 3}},
      {L"\U00000903\U0000ac01", {2307, 44033}, {1, 2}},
      {L"\U00000903\U00000308\U0000ac01", {2307, 44033}, {2, 3}},
-     {L"\U00000903\U00000900", {2307}, {2}},
-     {L"\U00000903\U00000308\U00000900", {2307}, {3}},
      {L"\U00000903\U00000903", {2307}, {2}},
      {L"\U00000903\U00000308\U00000903", {2307}, {3}},
      {L"\U00000903\U00000904", {2307, 2308}, {1, 2}},
@@ -3184,8 +2920,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000903\U00000308\U0000231a", {2307, 8986}, {2, 3}},
      {L"\U00000903\U00000300", {2307}, {2}},
      {L"\U00000903\U00000308\U00000300", {2307}, {3}},
-     {L"\U00000903\U0000093c", {2307}, {2}},
-     {L"\U00000903\U00000308\U0000093c", {2307}, {3}},
+     {L"\U00000903\U00000900", {2307}, {2}},
+     {L"\U00000903\U00000308\U00000900", {2307}, {3}},
      {L"\U00000903\U0000094d", {2307}, {2}},
      {L"\U00000903\U00000308\U0000094d", {2307}, {3}},
      {L"\U00000903\U0000200d", {2307}, {2}},
@@ -3200,8 +2936,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000904\U00000308\U0000000a", {2308, 10}, {2, 3}},
      {L"\U00000904\U00000001", {2308, 1}, {1, 2}},
      {L"\U00000904\U00000308\U00000001", {2308, 1}, {2, 3}},
-     {L"\U00000904\U0000034f", {2308}, {2}},
-     {L"\U00000904\U00000308\U0000034f", {2308}, {3}},
+     {L"\U00000904\U0000200c", {2308}, {2}},
+     {L"\U00000904\U00000308\U0000200c", {2308}, {3}},
      {L"\U00000904\U0001f1e6", {2308, 127462}, {1, 2}},
      {L"\U00000904\U00000308\U0001f1e6", {2308, 127462}, {2, 3}},
      {L"\U00000904\U00000600", {2308, 1536}, {1, 2}},
@@ -3218,8 +2954,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000904\U00000308\U0000ac00", {2308, 44032}, {2, 3}},
      {L"\U00000904\U0000ac01", {2308, 44033}, {1, 2}},
      {L"\U00000904\U00000308\U0000ac01", {2308, 44033}, {2, 3}},
-     {L"\U00000904\U00000900", {2308}, {2}},
-     {L"\U00000904\U00000308\U00000900", {2308}, {3}},
      {L"\U00000904\U00000903", {2308}, {2}},
      {L"\U00000904\U00000308\U00000903", {2308}, {3}},
      {L"\U00000904\U00000904", {2308, 2308}, {1, 2}},
@@ -3232,8 +2966,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000904\U00000308\U0000231a", {2308, 8986}, {2, 3}},
      {L"\U00000904\U00000300", {2308}, {2}},
      {L"\U00000904\U00000308\U00000300", {2308}, {3}},
-     {L"\U00000904\U0000093c", {2308}, {2}},
-     {L"\U00000904\U00000308\U0000093c", {2308}, {3}},
+     {L"\U00000904\U00000900", {2308}, {2}},
+     {L"\U00000904\U00000308\U00000900", {2308}, {3}},
      {L"\U00000904\U0000094d", {2308}, {2}},
      {L"\U00000904\U00000308\U0000094d", {2308}, {3}},
      {L"\U00000904\U0000200d", {2308}, {2}},
@@ -3248,8 +2982,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000d4e\U00000308\U0000000a", {3406, 10}, {2, 3}},
      {L"\U00000d4e\U00000001", {3406, 1}, {1, 2}},
      {L"\U00000d4e\U00000308\U00000001", {3406, 1}, {2, 3}},
-     {L"\U00000d4e\U0000034f", {3406}, {2}},
-     {L"\U00000d4e\U00000308\U0000034f", {3406}, {3}},
+     {L"\U00000d4e\U0000200c", {3406}, {2}},
+     {L"\U00000d4e\U00000308\U0000200c", {3406}, {3}},
      {L"\U00000d4e\U0001f1e6", {3406}, {2}},
      {L"\U00000d4e\U00000308\U0001f1e6", {3406, 127462}, {2, 3}},
      {L"\U00000d4e\U00000600", {3406}, {2}},
@@ -3266,8 +3000,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000d4e\U00000308\U0000ac00", {3406, 44032}, {2, 3}},
      {L"\U00000d4e\U0000ac01", {3406}, {2}},
      {L"\U00000d4e\U00000308\U0000ac01", {3406, 44033}, {2, 3}},
-     {L"\U00000d4e\U00000900", {3406}, {2}},
-     {L"\U00000d4e\U00000308\U00000900", {3406}, {3}},
      {L"\U00000d4e\U00000903", {3406}, {2}},
      {L"\U00000d4e\U00000308\U00000903", {3406}, {3}},
      {L"\U00000d4e\U00000904", {3406}, {2}},
@@ -3280,8 +3012,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000d4e\U00000308\U0000231a", {3406, 8986}, {2, 3}},
      {L"\U00000d4e\U00000300", {3406}, {2}},
      {L"\U00000d4e\U00000308\U00000300", {3406}, {3}},
-     {L"\U00000d4e\U0000093c", {3406}, {2}},
-     {L"\U00000d4e\U00000308\U0000093c", {3406}, {3}},
+     {L"\U00000d4e\U00000900", {3406}, {2}},
+     {L"\U00000d4e\U00000308\U00000900", {3406}, {3}},
      {L"\U00000d4e\U0000094d", {3406}, {2}},
      {L"\U00000d4e\U00000308\U0000094d", {3406}, {3}},
      {L"\U00000d4e\U0000200d", {3406}, {2}},
@@ -3296,8 +3028,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000915\U00000308\U0000000a", {2325, 10}, {2, 3}},
      {L"\U00000915\U00000001", {2325, 1}, {1, 2}},
      {L"\U00000915\U00000308\U00000001", {2325, 1}, {2, 3}},
-     {L"\U00000915\U0000034f", {2325}, {2}},
-     {L"\U00000915\U00000308\U0000034f", {2325}, {3}},
+     {L"\U00000915\U0000200c", {2325}, {2}},
+     {L"\U00000915\U00000308\U0000200c", {2325}, {3}},
      {L"\U00000915\U0001f1e6", {2325, 127462}, {1, 2}},
      {L"\U00000915\U00000308\U0001f1e6", {2325, 127462}, {2, 3}},
      {L"\U00000915\U00000600", {2325, 1536}, {1, 2}},
@@ -3314,8 +3046,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000915\U00000308\U0000ac00", {2325, 44032}, {2, 3}},
      {L"\U00000915\U0000ac01", {2325, 44033}, {1, 2}},
      {L"\U00000915\U00000308\U0000ac01", {2325, 44033}, {2, 3}},
-     {L"\U00000915\U00000900", {2325}, {2}},
-     {L"\U00000915\U00000308\U00000900", {2325}, {3}},
      {L"\U00000915\U00000903", {2325}, {2}},
      {L"\U00000915\U00000308\U00000903", {2325}, {3}},
      {L"\U00000915\U00000904", {2325, 2308}, {1, 2}},
@@ -3328,8 +3058,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000915\U00000308\U0000231a", {2325, 8986}, {2, 3}},
      {L"\U00000915\U00000300", {2325}, {2}},
      {L"\U00000915\U00000308\U00000300", {2325}, {3}},
-     {L"\U00000915\U0000093c", {2325}, {2}},
-     {L"\U00000915\U00000308\U0000093c", {2325}, {3}},
+     {L"\U00000915\U00000900", {2325}, {2}},
+     {L"\U00000915\U00000308\U00000900", {2325}, {3}},
      {L"\U00000915\U0000094d", {2325}, {2}},
      {L"\U00000915\U00000308\U0000094d", {2325}, {3}},
      {L"\U00000915\U0000200d", {2325}, {2}},
@@ -3344,8 +3074,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000231a\U00000308\U0000000a", {8986, 10}, {2, 3}},
      {L"\U0000231a\U00000001", {8986, 1}, {1, 2}},
      {L"\U0000231a\U00000308\U00000001", {8986, 1}, {2, 3}},
-     {L"\U0000231a\U0000034f", {8986}, {2}},
-     {L"\U0000231a\U00000308\U0000034f", {8986}, {3}},
+     {L"\U0000231a\U0000200c", {8986}, {2}},
+     {L"\U0000231a\U00000308\U0000200c", {8986}, {3}},
      {L"\U0000231a\U0001f1e6", {8986, 127462}, {1, 2}},
      {L"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {2, 3}},
      {L"\U0000231a\U00000600", {8986, 1536}, {1, 2}},
@@ -3362,8 +3092,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {2, 3}},
      {L"\U0000231a\U0000ac01", {8986, 44033}, {1, 2}},
      {L"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {2, 3}},
-     {L"\U0000231a\U00000900", {8986}, {2}},
-     {L"\U0000231a\U00000308\U00000900", {8986}, {3}},
      {L"\U0000231a\U00000903", {8986}, {2}},
      {L"\U0000231a\U00000308\U00000903", {8986}, {3}},
      {L"\U0000231a\U00000904", {8986, 2308}, {1, 2}},
@@ -3376,8 +3104,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000231a\U00000308\U0000231a", {8986, 8986}, {2, 3}},
      {L"\U0000231a\U00000300", {8986}, {2}},
      {L"\U0000231a\U00000308\U00000300", {8986}, {3}},
-     {L"\U0000231a\U0000093c", {8986}, {2}},
-     {L"\U0000231a\U00000308\U0000093c", {8986}, {3}},
+     {L"\U0000231a\U00000900", {8986}, {2}},
+     {L"\U0000231a\U00000308\U00000900", {8986}, {3}},
      {L"\U0000231a\U0000094d", {8986}, {2}},
      {L"\U0000231a\U00000308\U0000094d", {8986}, {3}},
      {L"\U0000231a\U0000200d", {8986}, {2}},
@@ -3392,8 +3120,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000300\U00000308\U0000000a", {768, 10}, {2, 3}},
      {L"\U00000300\U00000001", {768, 1}, {1, 2}},
      {L"\U00000300\U00000308\U00000001", {768, 1}, {2, 3}},
-     {L"\U00000300\U0000034f", {768}, {2}},
-     {L"\U00000300\U00000308\U0000034f", {768}, {3}},
+     {L"\U00000300\U0000200c", {768}, {2}},
+     {L"\U00000300\U00000308\U0000200c", {768}, {3}},
      {L"\U00000300\U0001f1e6", {768, 127462}, {1, 2}},
      {L"\U00000300\U00000308\U0001f1e6", {768, 127462}, {2, 3}},
      {L"\U00000300\U00000600", {768, 1536}, {1, 2}},
@@ -3410,8 +3138,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000300\U00000308\U0000ac00", {768, 44032}, {2, 3}},
      {L"\U00000300\U0000ac01", {768, 44033}, {1, 2}},
      {L"\U00000300\U00000308\U0000ac01", {768, 44033}, {2, 3}},
-     {L"\U00000300\U00000900", {768}, {2}},
-     {L"\U00000300\U00000308\U00000900", {768}, {3}},
      {L"\U00000300\U00000903", {768}, {2}},
      {L"\U00000300\U00000308\U00000903", {768}, {3}},
      {L"\U00000300\U00000904", {768, 2308}, {1, 2}},
@@ -3424,62 +3150,60 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000300\U00000308\U0000231a", {768, 8986}, {2, 3}},
      {L"\U00000300\U00000300", {768}, {2}},
      {L"\U00000300\U00000308\U00000300", {768}, {3}},
-     {L"\U00000300\U0000093c", {768}, {2}},
-     {L"\U00000300\U00000308\U0000093c", {768}, {3}},
+     {L"\U00000300\U00000900", {768}, {2}},
+     {L"\U00000300\U00000308\U00000900", {768}, {3}},
      {L"\U00000300\U0000094d", {768}, {2}},
      {L"\U00000300\U00000308\U0000094d", {768}, {3}},
      {L"\U00000300\U0000200d", {768}, {2}},
      {L"\U00000300\U00000308\U0000200d", {768}, {3}},
      {L"\U00000300\U00000378", {768, 888}, {1, 2}},
      {L"\U00000300\U00000308\U00000378", {768, 888}, {2, 3}},
-     {L"\U0000093c\U00000020", {2364, 32}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000020", {2364, 32}, {2, 3}},
-     {L"\U0000093c\U0000000d", {2364, 13}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000000d", {2364, 13}, {2, 3}},
-     {L"\U0000093c\U0000000a", {2364, 10}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000000a", {2364, 10}, {2, 3}},
-     {L"\U0000093c\U00000001", {2364, 1}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000001", {2364, 1}, {2, 3}},
-     {L"\U0000093c\U0000034f", {2364}, {2}},
-     {L"\U0000093c\U00000308\U0000034f", {2364}, {3}},
-     {L"\U0000093c\U0001f1e6", {2364, 127462}, {1, 2}},
-     {L"\U0000093c\U00000308\U0001f1e6", {2364, 127462}, {2, 3}},
-     {L"\U0000093c\U00000600", {2364, 1536}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000600", {2364, 1536}, {2, 3}},
-     {L"\U0000093c\U00000a03", {2364}, {2}},
-     {L"\U0000093c\U00000308\U00000a03", {2364}, {3}},
-     {L"\U0000093c\U00001100", {2364, 4352}, {1, 2}},
-     {L"\U0000093c\U00000308\U00001100", {2364, 4352}, {2, 3}},
-     {L"\U0000093c\U00001160", {2364, 4448}, {1, 2}},
-     {L"\U0000093c\U00000308\U00001160", {2364, 4448}, {2, 3}},
-     {L"\U0000093c\U000011a8", {2364, 4520}, {1, 2}},
-     {L"\U0000093c\U00000308\U000011a8", {2364, 4520}, {2, 3}},
-     {L"\U0000093c\U0000ac00", {2364, 44032}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000ac00", {2364, 44032}, {2, 3}},
-     {L"\U0000093c\U0000ac01", {2364, 44033}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000ac01", {2364, 44033}, {2, 3}},
-     {L"\U0000093c\U00000900", {2364}, {2}},
-     {L"\U0000093c\U00000308\U00000900", {2364}, {3}},
-     {L"\U0000093c\U00000903", {2364}, {2}},
-     {L"\U0000093c\U00000308\U00000903", {2364}, {3}},
-     {L"\U0000093c\U00000904", {2364, 2308}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000904", {2364, 2308}, {2, 3}},
-     {L"\U0000093c\U00000d4e", {2364, 3406}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000d4e", {2364, 3406}, {2, 3}},
-     {L"\U0000093c\U00000915", {2364, 2325}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000915", {2364, 2325}, {2, 3}},
-     {L"\U0000093c\U0000231a", {2364, 8986}, {1, 2}},
-     {L"\U0000093c\U00000308\U0000231a", {2364, 8986}, {2, 3}},
-     {L"\U0000093c\U00000300", {2364}, {2}},
-     {L"\U0000093c\U00000308\U00000300", {2364}, {3}},
-     {L"\U0000093c\U0000093c", {2364}, {2}},
-     {L"\U0000093c\U00000308\U0000093c", {2364}, {3}},
-     {L"\U0000093c\U0000094d", {2364}, {2}},
-     {L"\U0000093c\U00000308\U0000094d", {2364}, {3}},
-     {L"\U0000093c\U0000200d", {2364}, {2}},
-     {L"\U0000093c\U00000308\U0000200d", {2364}, {3}},
-     {L"\U0000093c\U00000378", {2364, 888}, {1, 2}},
-     {L"\U0000093c\U00000308\U00000378", {2364, 888}, {2, 3}},
+     {L"\U00000900\U00000020", {2304, 32}, {1, 2}},
+     {L"\U00000900\U00000308\U00000020", {2304, 32}, {2, 3}},
+     {L"\U00000900\U0000000d", {2304, 13}, {1, 2}},
+     {L"\U00000900\U00000308\U0000000d", {2304, 13}, {2, 3}},
+     {L"\U00000900\U0000000a", {2304, 10}, {1, 2}},
+     {L"\U00000900\U00000308\U0000000a", {2304, 10}, {2, 3}},
+     {L"\U00000900\U00000001", {2304, 1}, {1, 2}},
+     {L"\U00000900\U00000308\U00000001", {2304, 1}, {2, 3}},
+     {L"\U00000900\U0000200c", {2304}, {2}},
+     {L"\U00000900\U00000308\U0000200c", {2304}, {3}},
+     {L"\U00000900\U0001f1e6", {2304, 127462}, {1, 2}},
+     {L"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {2, 3}},
+     {L"\U00000900\U00000600", {2304, 1536}, {1, 2}},
+     {L"\U00000900\U00000308\U00000600", {2304, 1536}, {2, 3}},
+     {L"\U00000900\U00000a03", {2304}, {2}},
+     {L"\U00000900\U00000308\U00000a03", {2304}, {3}},
+     {L"\U00000900\U00001100", {2304, 4352}, {1, 2}},
+     {L"\U00000900\U00000308\U00001100", {2304, 4352}, {2, 3}},
+     {L"\U00000900\U00001160", {2304, 4448}, {1, 2}},
+     {L"\U00000900\U00000308\U00001160", {2304, 4448}, {2, 3}},
+     {L"\U00000900\U000011a8", {2304, 4520}, {1, 2}},
+     {L"\U00000900\U00000308\U000011a8", {2304, 4520}, {2, 3}},
+     {L"\U00000900\U0000ac00", {2304, 44032}, {1, 2}},
+     {L"\U00000900\U00000308\U0000ac00", {2304, 44032}, {2, 3}},
+     {L"\U00000900\U0000ac01", {2304, 44033}, {1, 2}},
+     {L"\U00000900\U00000308\U0000ac01", {2304, 44033}, {2, 3}},
+     {L"\U00000900\U00000903", {2304}, {2}},
+     {L"\U00000900\U00000308\U00000903", {2304}, {3}},
+     {L"\U00000900\U00000904", {2304, 2308}, {1, 2}},
+     {L"\U00000900\U00000308\U00000904", {2304, 2308}, {2, 3}},
+     {L"\U00000900\U00000d4e", {2304, 3406}, {1, 2}},
+     {L"\U00000900\U00000308\U00000d4e", {2304, 3406}, {2, 3}},
+     {L"\U00000900\U00000915", {2304, 2325}, {1, 2}},
+     {L"\U00000900\U00000308\U00000915", {2304, 2325}, {2, 3}},
+     {L"\U00000900\U0000231a", {2304, 8986}, {1, 2}},
+     {L"\U00000900\U00000308\U0000231a", {2304, 8986}, {2, 3}},
+     {L"\U00000900\U00000300", {2304}, {2}},
+     {L"\U00000900\U00000308\U00000300", {2304}, {3}},
+     {L"\U00000900\U00000900", {2304}, {2}},
+     {L"\U00000900\U00000308\U00000900", {2304}, {3}},
+     {L"\U00000900\U0000094d", {2304}, {2}},
+     {L"\U00000900\U00000308\U0000094d", {2304}, {3}},
+     {L"\U00000900\U0000200d", {2304}, {2}},
+     {L"\U00000900\U00000308\U0000200d", {2304}, {3}},
+     {L"\U00000900\U00000378", {2304, 888}, {1, 2}},
+     {L"\U00000900\U00000308\U00000378", {2304, 888}, {2, 3}},
      {L"\U0000094d\U00000020", {2381, 32}, {1, 2}},
      {L"\U0000094d\U00000308\U00000020", {2381, 32}, {2, 3}},
      {L"\U0000094d\U0000000d", {2381, 13}, {1, 2}},
@@ -3488,8 +3212,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000094d\U00000308\U0000000a", {2381, 10}, {2, 3}},
      {L"\U0000094d\U00000001", {2381, 1}, {1, 2}},
      {L"\U0000094d\U00000308\U00000001", {2381, 1}, {2, 3}},
-     {L"\U0000094d\U0000034f", {2381}, {2}},
-     {L"\U0000094d\U00000308\U0000034f", {2381}, {3}},
+     {L"\U0000094d\U0000200c", {2381}, {2}},
+     {L"\U0000094d\U00000308\U0000200c", {2381}, {3}},
      {L"\U0000094d\U0001f1e6", {2381, 127462}, {1, 2}},
      {L"\U0000094d\U00000308\U0001f1e6", {2381, 127462}, {2, 3}},
      {L"\U0000094d\U00000600", {2381, 1536}, {1, 2}},
@@ -3506,8 +3230,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000094d\U00000308\U0000ac00", {2381, 44032}, {2, 3}},
      {L"\U0000094d\U0000ac01", {2381, 44033}, {1, 2}},
      {L"\U0000094d\U00000308\U0000ac01", {2381, 44033}, {2, 3}},
-     {L"\U0000094d\U00000900", {2381}, {2}},
-     {L"\U0000094d\U00000308\U00000900", {2381}, {3}},
      {L"\U0000094d\U00000903", {2381}, {2}},
      {L"\U0000094d\U00000308\U00000903", {2381}, {3}},
      {L"\U0000094d\U00000904", {2381, 2308}, {1, 2}},
@@ -3520,8 +3242,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000094d\U00000308\U0000231a", {2381, 8986}, {2, 3}},
      {L"\U0000094d\U00000300", {2381}, {2}},
      {L"\U0000094d\U00000308\U00000300", {2381}, {3}},
-     {L"\U0000094d\U0000093c", {2381}, {2}},
-     {L"\U0000094d\U00000308\U0000093c", {2381}, {3}},
+     {L"\U0000094d\U00000900", {2381}, {2}},
+     {L"\U0000094d\U00000308\U00000900", {2381}, {3}},
      {L"\U0000094d\U0000094d", {2381}, {2}},
      {L"\U0000094d\U00000308\U0000094d", {2381}, {3}},
      {L"\U0000094d\U0000200d", {2381}, {2}},
@@ -3536,8 +3258,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000200d\U00000308\U0000000a", {8205, 10}, {2, 3}},
      {L"\U0000200d\U00000001", {8205, 1}, {1, 2}},
      {L"\U0000200d\U00000308\U00000001", {8205, 1}, {2, 3}},
-     {L"\U0000200d\U0000034f", {8205}, {2}},
-     {L"\U0000200d\U00000308\U0000034f", {8205}, {3}},
+     {L"\U0000200d\U0000200c", {8205}, {2}},
+     {L"\U0000200d\U00000308\U0000200c", {8205}, {3}},
      {L"\U0000200d\U0001f1e6", {8205, 127462}, {1, 2}},
      {L"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {2, 3}},
      {L"\U0000200d\U00000600", {8205, 1536}, {1, 2}},
@@ -3554,8 +3276,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {2, 3}},
      {L"\U0000200d\U0000ac01", {8205, 44033}, {1, 2}},
      {L"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {2, 3}},
-     {L"\U0000200d\U00000900", {8205}, {2}},
-     {L"\U0000200d\U00000308\U00000900", {8205}, {3}},
      {L"\U0000200d\U00000903", {8205}, {2}},
      {L"\U0000200d\U00000308\U00000903", {8205}, {3}},
      {L"\U0000200d\U00000904", {8205, 2308}, {1, 2}},
@@ -3568,8 +3288,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U0000200d\U00000308\U0000231a", {8205, 8986}, {2, 3}},
      {L"\U0000200d\U00000300", {8205}, {2}},
      {L"\U0000200d\U00000308\U00000300", {8205}, {3}},
-     {L"\U0000200d\U0000093c", {8205}, {2}},
-     {L"\U0000200d\U00000308\U0000093c", {8205}, {3}},
+     {L"\U0000200d\U00000900", {8205}, {2}},
+     {L"\U0000200d\U00000308\U00000900", {8205}, {3}},
      {L"\U0000200d\U0000094d", {8205}, {2}},
      {L"\U0000200d\U00000308\U0000094d", {8205}, {3}},
      {L"\U0000200d\U0000200d", {8205}, {2}},
@@ -3584,8 +3304,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000378\U00000308\U0000000a", {888, 10}, {2, 3}},
      {L"\U00000378\U00000001", {888, 1}, {1, 2}},
      {L"\U00000378\U00000308\U00000001", {888, 1}, {2, 3}},
-     {L"\U00000378\U0000034f", {888}, {2}},
-     {L"\U00000378\U00000308\U0000034f", {888}, {3}},
+     {L"\U00000378\U0000200c", {888}, {2}},
+     {L"\U00000378\U00000308\U0000200c", {888}, {3}},
      {L"\U00000378\U0001f1e6", {888, 127462}, {1, 2}},
      {L"\U00000378\U00000308\U0001f1e6", {888, 127462}, {2, 3}},
      {L"\U00000378\U00000600", {888, 1536}, {1, 2}},
@@ -3602,8 +3322,6 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000378\U00000308\U0000ac00", {888, 44032}, {2, 3}},
      {L"\U00000378\U0000ac01", {888, 44033}, {1, 2}},
      {L"\U00000378\U00000308\U0000ac01", {888, 44033}, {2, 3}},
-     {L"\U00000378\U00000900", {888}, {2}},
-     {L"\U00000378\U00000308\U00000900", {888}, {3}},
      {L"\U00000378\U00000903", {888}, {2}},
      {L"\U00000378\U00000308\U00000903", {888}, {3}},
      {L"\U00000378\U00000904", {888, 2308}, {1, 2}},
@@ -3616,8 +3334,8 @@ std::array<data<wchar_t>, 1187> data_utf32 = {{
      {L"\U00000378\U00000308\U0000231a", {888, 8986}, {2, 3}},
      {L"\U00000378\U00000300", {888}, {2}},
      {L"\U00000378\U00000308\U00000300", {888}, {3}},
-     {L"\U00000378\U0000093c", {888}, {2}},
-     {L"\U00000378\U00000308\U0000093c", {888}, {3}},
+     {L"\U00000378\U00000900", {888}, {2}},
+     {L"\U00000378\U00000308\U00000900", {888}, {3}},
      {L"\U00000378\U0000094d", {888}, {2}},
      {L"\U00000378\U00000308\U0000094d", {888}, {3}},
      {L"\U00000378\U0000200d", {888}, {2}},
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp
index dd1f4b6..90f7cb2 100644
--- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp
@@ -40,15 +40,15 @@ constexpr int count_entries(cluster::__property property) {
       });
 }
 
-static_assert(count_entries(cluster::__property::__Prepend) == 27);
+static_assert(count_entries(cluster::__property::__Prepend) == 28);
 static_assert(count_entries(cluster::__property::__CR) == 1);
 static_assert(count_entries(cluster::__property::__LF) == 1);
 static_assert(count_entries(cluster::__property::__Control) == 3893);
-static_assert(count_entries(cluster::__property::__Extend) == 2130);
+static_assert(count_entries(cluster::__property::__Extend) == 2198);
 static_assert(count_entries(cluster::__property::__Regional_Indicator) == 26);
-static_assert(count_entries(cluster::__property::__SpacingMark) == 395);
+static_assert(count_entries(cluster::__property::__SpacingMark) == 378);
 static_assert(count_entries(cluster::__property::__L) == 125);
-static_assert(count_entries(cluster::__property::__V) == 95);
+static_assert(count_entries(cluster::__property::__V) == 100);
 static_assert(count_entries(cluster::__property::__T) == 137);
 static_assert(count_entries(cluster::__property::__LV) == 399);
 static_assert(count_entries(cluster::__property::__LVT) == 10773);
@@ -68,7 +68,7 @@ constexpr int count_entries(inCB::__property property) {
 
 static_assert(count_entries(inCB::__property::__Linker) == 6);
 static_assert(count_entries(inCB::__property::__Consonant) == 240);
-static_assert(count_entries(inCB::__property::__Extend) == 884);
+static_assert(count_entries(inCB::__property::__Extend) == 2192);
 
 } // namespace
 
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
index 619dc72..7656be7 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
@@ -19,6 +19,7 @@
 #include <cstddef>
 #include <vector>
 
+#include "sized_allocator.h"
 #include "test_macros.h"
 #include "test_iterators.h"
 
@@ -46,6 +47,39 @@ struct Test {
   }
 };
 
+// Make sure std::fill behaves properly with std::vector<bool> iterators with custom size types.
+// See https://github.com/llvm/llvm-project/pull/122410.
+TEST_CONSTEXPR_CXX20 void test_bititer_with_custom_sized_types() {
+  {
+    using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
+    std::vector<bool, Alloc> in(100, false, Alloc(1));
+    std::vector<bool, Alloc> expected(100, true, Alloc(1));
+    std::fill(in.begin(), in.end(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::fill(in.begin(), in.end(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::fill(in.begin(), in.end(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint64_t, std::int64_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::fill(in.begin(), in.end(), true);
+    assert(in == expected);
+  }
+}
+
 TEST_CONSTEXPR_CXX20 bool test() {
   types::for_each(types::forward_iterator_list<char*>(), Test<char>());
   types::for_each(types::forward_iterator_list<int*>(), Test<int>());
@@ -93,6 +127,9 @@ TEST_CONSTEXPR_CXX20 bool test() {
       assert(in == expected);
     }
   }
+
+  test_bititer_with_custom_sized_types();
+
   return true;
 }
 
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
index 7d6770d..3b67101 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
@@ -15,159 +15,177 @@
 
 #include <algorithm>
 #include <cassert>
+#include <vector>
 
+#include "sized_allocator.h"
 #include "test_macros.h"
 #include "test_iterators.h"
 #include "user_defined_integral.h"
 
 #if TEST_STD_VER > 17
 TEST_CONSTEXPR bool test_constexpr() {
-    const std::size_t N = 5;
-    int ib[] = {0, 0, 0, 0, 0, 0}; // one bigger than N
-
-    auto it = std::fill_n(std::begin(ib), N, 5);
-    return it == (std::begin(ib) + N)
-        && std::all_of(std::begin(ib), it, [](int a) {return a == 5; })
-        && *it == 0 // don't overwrite the last value in the output array
-        ;
-    }
+  const std::size_t N = 5;
+  int ib[]            = {0, 0, 0, 0, 0, 0}; // one bigger than N
+
+  auto it = std::fill_n(std::begin(ib), N, 5);
+  return it == (std::begin(ib) + N) && std::all_of(std::begin(ib), it, [](int a) { return a == 5; }) &&
+       *it == 0 // don't overwrite the last value in the output array
+      ;
+}
 #endif
 
 typedef UserDefinedIntegral<unsigned> UDI;
 
 template <class Iter>
-void
-test_char()
-{
-    char a[4] = {};
-    Iter it = std::fill_n(Iter(a), UDI(4), char(1));
-    assert(base(it) == a + 4);
-    assert(a[0] == 1);
-    assert(a[1] == 1);
-    assert(a[2] == 1);
-    assert(a[3] == 1);
+void test_char() {
+  char a[4] = {};
+  Iter it   = std::fill_n(Iter(a), UDI(4), char(1));
+  assert(base(it) == a + 4);
+  assert(a[0] == 1);
+  assert(a[1] == 1);
+  assert(a[2] == 1);
+  assert(a[3] == 1);
 }
 
 template <class Iter>
-void
-test_int()
-{
-    int a[4] = {};
-    Iter it = std::fill_n(Iter(a), UDI(4), 1);
-    assert(base(it) == a + 4);
-    assert(a[0] == 1);
-    assert(a[1] == 1);
-    assert(a[2] == 1);
-    assert(a[3] == 1);
+void test_int() {
+  int a[4] = {};
+  Iter it  = std::fill_n(Iter(a), UDI(4), 1);
+  assert(base(it) == a + 4);
+  assert(a[0] == 1);
+  assert(a[1] == 1);
+  assert(a[2] == 1);
+  assert(a[3] == 1);
 }
 
-void
-test_int_array()
-{
-    int a[4] = {};
-    assert(std::fill_n(a, UDI(4), static_cast<char>(1)) == a + 4);
-    assert(a[0] == 1);
-    assert(a[1] == 1);
-    assert(a[2] == 1);
-    assert(a[3] == 1);
+void test_int_array() {
+  int a[4] = {};
+  assert(std::fill_n(a, UDI(4), static_cast<char>(1)) == a + 4);
+  assert(a[0] == 1);
+  assert(a[1] == 1);
+  assert(a[2] == 1);
+  assert(a[3] == 1);
 }
 
 struct source {
-    source() : i(0) { }
+  source() : i(0) {}
 
-    operator int() const { return i++; }
-    mutable int i;
+  operator int() const { return i++; }
+  mutable int i;
 };
 
-void
-test_int_array_struct_source()
-{
-    int a[4] = {};
-    assert(std::fill_n(a, UDI(4), source()) == a + 4);
-    assert(a[0] == 0);
-    assert(a[1] == 1);
-    assert(a[2] == 2);
-    assert(a[3] == 3);
+void test_int_array_struct_source() {
+  int a[4] = {};
+  assert(std::fill_n(a, UDI(4), source()) == a + 4);
+  assert(a[0] == 0);
+  assert(a[1] == 1);
+  assert(a[2] == 2);
+  assert(a[3] == 3);
 }
 
 struct test1 {
-    test1() : c(0) { }
-    test1(char xc) : c(xc + 1) { }
-    char c;
+  test1() : c(0) {}
+  test1(char xc) : c(xc + 1) {}
+  char c;
 };
 
-void
-test_struct_array()
-{
-    test1 test1a[4] = {};
-    assert(std::fill_n(test1a, UDI(4), static_cast<char>(10)) == test1a + 4);
-    assert(test1a[0].c == 11);
-    assert(test1a[1].c == 11);
-    assert(test1a[2].c == 11);
-    assert(test1a[3].c == 11);
+void test_struct_array() {
+  test1 test1a[4] = {};
+  assert(std::fill_n(test1a, UDI(4), static_cast<char>(10)) == test1a + 4);
+  assert(test1a[0].c == 11);
+  assert(test1a[1].c == 11);
+  assert(test1a[2].c == 11);
+  assert(test1a[3].c == 11);
 }
 
-class A
-{
-    char a_;
+class A {
+  char a_;
+
 public:
-    A() {}
-    explicit A(char a) : a_(a) {}
-    operator unsigned char() const {return 'b';}
+  A() {}
+  explicit A(char a) : a_(a) {}
+  operator unsigned char() const { return 'b'; }
 
-    friend bool operator==(const A& x, const A& y)
-        {return x.a_ == y.a_;}
+  friend bool operator==(const A& x, const A& y) { return x.a_ == y.a_; }
 };
 
-void
-test5()
-{
-    A a[3];
-    assert(std::fill_n(&a[0], UDI(3), A('a')) == a+3);
-    assert(a[0] == A('a'));
-    assert(a[1] == A('a'));
-    assert(a[2] == A('a'));
+void test5() {
+  A a[3];
+  assert(std::fill_n(&a[0], UDI(3), A('a')) == a + 3);
+  assert(a[0] == A('a'));
+  assert(a[1] == A('a'));
+  assert(a[2] == A('a'));
 }
 
-struct Storage
-{
-  union
-  {
+struct Storage {
+  union {
     unsigned char a;
     unsigned char b;
   };
 };
 
-void test6()
-{
+void test6() {
   Storage foo[5];
   std::fill_n(&foo[0], UDI(5), Storage());
 }
 
+// Make sure std::fill_n behaves properly with std::vector<bool> iterators with custom size types.
+// See https://github.com/llvm/llvm-project/pull/122410.
+TEST_CONSTEXPR_CXX20 void test_bititer_with_custom_sized_types() {
+  {
+    using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
+    std::vector<bool, Alloc> in(100, false, Alloc(1));
+    std::vector<bool, Alloc> expected(100, true, Alloc(1));
+    std::fill_n(in.begin(), in.size(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::fill_n(in.begin(), in.size(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::fill_n(in.begin(), in.size(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint64_t, std::int64_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::fill_n(in.begin(), in.size(), true);
+    assert(in == expected);
+  }
+}
+
+int main(int, char**) {
+  test_char<cpp17_output_iterator<char*> >();
+  test_char<forward_iterator<char*> >();
+  test_char<bidirectional_iterator<char*> >();
+  test_char<random_access_iterator<char*> >();
+  test_char<char*>();
 
-int main(int, char**)
-{
-    test_char<cpp17_output_iterator<char*> >();
-    test_char<forward_iterator<char*> >();
-    test_char<bidirectional_iterator<char*> >();
-    test_char<random_access_iterator<char*> >();
-    test_char<char*>();
+  test_int<cpp17_output_iterator<int*> >();
+  test_int<forward_iterator<int*> >();
+  test_int<bidirectional_iterator<int*> >();
+  test_int<random_access_iterator<int*> >();
+  test_int<int*>();
 
-    test_int<cpp17_output_iterator<int*> >();
-    test_int<forward_iterator<int*> >();
-    test_int<bidirectional_iterator<int*> >();
-    test_int<random_access_iterator<int*> >();
-    test_int<int*>();
+  test_int_array();
+  test_int_array_struct_source();
+  test_struct_array();
 
-    test_int_array();
-    test_int_array_struct_source();
-    test_struct_array();
+  test5();
+  test6();
 
-    test5();
-    test6();
+  test_bititer_with_custom_sized_types();
 
 #if TEST_STD_VER > 17
-    static_assert(test_constexpr());
+  static_assert(test_constexpr());
 #endif
 
   return 0;
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
index 5dc375e..30dfdd5 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill.pass.cpp
@@ -20,9 +20,12 @@
 #include <cassert>
 #include <ranges>
 #include <string>
+#include <vector>
 
+#include "sized_allocator.h"
 #include "almost_satisfies_types.h"
 #include "test_iterators.h"
+#include "test_macros.h"
 
 template <class Iter, class Sent = sentinel_wrapper<Iter>>
 concept HasFillIt = requires(Iter iter, Sent sent) { std::ranges::fill(iter, sent, int{}); };
@@ -53,7 +56,7 @@ constexpr void test_iterators() {
     }
     {
       int a[3];
-      auto range = std::ranges::subrange(It(a), Sent(It(a + 3)));
+      auto range                = std::ranges::subrange(It(a), Sent(It(a + 3)));
       std::same_as<It> auto ret = std::ranges::fill(range, 1);
       assert(std::all_of(a, a + 3, [](int i) { return i == 1; }));
       assert(base(ret) == a + 3);
@@ -69,12 +72,51 @@ constexpr void test_iterators() {
     {
       std::array<int, 0> a;
       auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data())));
-      auto ret = std::ranges::fill(range, 1);
+      auto ret   = std::ranges::fill(range, 1);
       assert(base(ret) == a.data());
     }
   }
 }
 
+// Make sure std::ranges::fill behaves properly with std::vector<bool> iterators with custom
+// size types. See https://github.com/llvm/llvm-project/pull/122410.
+//
+// The `ranges::{fill, fill_n}` algorithms require `vector<bool, Alloc>::iterator` to satisfy
+// the `std::indirectly_writable` concept when used with `vector<bool, Alloc>`, which is only
+// satisfied since C++23.
+#if TEST_STD_VER >= 23
+TEST_CONSTEXPR_CXX20 void test_bititer_with_custom_sized_types() {
+  {
+    using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
+    std::vector<bool, Alloc> in(100, false, Alloc(1));
+    std::vector<bool, Alloc> expected(100, true, Alloc(1));
+    std::ranges::fill(in, true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::ranges::fill(in, true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::ranges::fill(in, true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint64_t, std::int64_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::ranges::fill(in, true);
+    assert(in == expected);
+  }
+}
+#endif
+
 constexpr bool test() {
   test_iterators<cpp17_output_iterator<int*>, sentinel_wrapper<cpp17_output_iterator<int*>>>();
   test_iterators<cpp20_output_iterator<int*>, sentinel_wrapper<cpp20_output_iterator<int*>>>();
@@ -94,19 +136,19 @@ constexpr bool test() {
     };
     {
       S a[5];
-      std::ranges::fill(a, a + 5, S {true});
+      std::ranges::fill(a, a + 5, S{true});
       assert(std::all_of(a, a + 5, [](S& s) { return s.copied; }));
     }
     {
       S a[5];
-      std::ranges::fill(a, S {true});
+      std::ranges::fill(a, S{true});
       assert(std::all_of(a, a + 5, [](S& s) { return s.copied; }));
     }
   }
 
   { // check that std::ranges::dangling is returned
     [[maybe_unused]] std::same_as<std::ranges::dangling> decltype(auto) ret =
-        std::ranges::fill(std::array<int, 10> {}, 1);
+        std::ranges::fill(std::array<int, 10>{}, 1);
   }
 
   { // check that std::ranges::dangling isn't returned with a borrowing range
@@ -131,6 +173,10 @@ constexpr bool test() {
     }
   }
 
+#if TEST_STD_VER >= 23
+  test_bititer_with_custom_sized_types();
+#endif
+
   return true;
 }
 
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
index 10ff385..ae70e71 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/ranges.fill_n.pass.cpp
@@ -18,9 +18,12 @@
 #include <cassert>
 #include <ranges>
 #include <string>
+#include <vector>
 
+#include "sized_allocator.h"
 #include "almost_satisfies_types.h"
 #include "test_iterators.h"
+#include "test_macros.h"
 
 template <class Iter>
 concept HasFillN = requires(Iter iter) { std::ranges::fill_n(iter, int{}, int{}); };
@@ -48,6 +51,45 @@ constexpr void test_iterators() {
   }
 }
 
+// Make sure std::ranges::fill_n behaves properly with std::vector<bool> iterators with custom
+// size types. See https://github.com/llvm/llvm-project/pull/122410.
+//
+// The `ranges::{fill, fill_n}` algorithms require `vector<bool, Alloc>::iterator` to satisfy
+// the `std::indirectly_writable` concept when used with `vector<bool, Alloc>`, which is only
+// satisfied since C++23.
+#if TEST_STD_VER >= 23
+TEST_CONSTEXPR_CXX20 void test_bititer_with_custom_sized_types() {
+  {
+    using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
+    std::vector<bool, Alloc> in(100, false, Alloc(1));
+    std::vector<bool, Alloc> expected(100, true, Alloc(1));
+    std::ranges::fill_n(std::ranges::begin(in), in.size(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::ranges::fill_n(std::ranges::begin(in), in.size(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::ranges::fill_n(std::ranges::begin(in), in.size(), true);
+    assert(in == expected);
+  }
+  {
+    using Alloc = sized_allocator<bool, std::uint64_t, std::int64_t>;
+    std::vector<bool, Alloc> in(200, false, Alloc(1));
+    std::vector<bool, Alloc> expected(200, true, Alloc(1));
+    std::ranges::fill_n(std::ranges::begin(in), in.size(), true);
+    assert(in == expected);
+  }
+}
+#endif
+
 constexpr bool test() {
   test_iterators<cpp17_output_iterator<int*>, sentinel_wrapper<cpp17_output_iterator<int*>>>();
   test_iterators<cpp20_output_iterator<int*>, sentinel_wrapper<cpp20_output_iterator<int*>>>();
@@ -68,7 +110,7 @@ constexpr bool test() {
     };
 
     S a[5];
-    std::ranges::fill_n(a, 5, S {});
+    std::ranges::fill_n(a, 5, S{});
     assert(std::all_of(a, a + 5, [](S& s) { return s.copied; }));
   }
 
@@ -79,6 +121,10 @@ constexpr bool test() {
     assert(std::all_of(a.begin(), a.end(), [](auto& s) { return s == "long long string so no SSO"; }));
   }
 
+#if TEST_STD_VER >= 23
+  test_bititer_with_custom_sized_types();
+#endif
+
   return true;
 }
 
diff --git a/libcxx/test/support/sized_allocator.h b/libcxx/test/support/sized_allocator.h
new file mode 100644
index 0000000..8d52f5b
--- /dev/null
+++ b/libcxx/test/support/sized_allocator.h
@@ -0,0 +1,60 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_SUPPORT_SIZED_ALLOCATOR_H
+#define TEST_SUPPORT_SIZED_ALLOCATOR_H
+
+#include <cstddef>
+#include <limits>
+#include <memory>
+#include <new>
+
+#include "test_macros.h"
+
+// Allocator with a provided size_type and difference_type, used to test corner cases
+// like arithmetic on Allocator::size_type in generic code.
+template <typename T, typename Size = std::size_t, typename Difference = std::ptrdiff_t>
+class sized_allocator {
+  template <typename U, typename Sz, typename Diff>
+  friend class sized_allocator;
+
+public:
+  using value_type                  = T;
+  using size_type                   = Size;
+  using difference_type             = Difference;
+  using propagate_on_container_swap = std::true_type;
+
+  TEST_CONSTEXPR_CXX20 explicit sized_allocator(int d = 0) : data_(d) {}
+
+  template <typename U, typename Sz, typename Diff>
+  TEST_CONSTEXPR_CXX20 sized_allocator(const sized_allocator<U, Sz, Diff>& a) TEST_NOEXCEPT : data_(a.data_) {}
+
+  TEST_CONSTEXPR_CXX20 T* allocate(size_type n) {
+    if (n > max_size())
+      TEST_THROW(std::bad_array_new_length());
+    return std::allocator<T>().allocate(n);
+  }
+
+  TEST_CONSTEXPR_CXX20 void deallocate(T* p, size_type n) TEST_NOEXCEPT { std::allocator<T>().deallocate(p, n); }
+
+  TEST_CONSTEXPR size_type max_size() const TEST_NOEXCEPT {
+    return std::numeric_limits<size_type>::max() / sizeof(value_type);
+  }
+
+private:
+  int data_;
+
+  TEST_CONSTEXPR friend bool operator==(const sized_allocator& a, const sized_allocator& b) {
+    return a.data_ == b.data_;
+  }
+  TEST_CONSTEXPR friend bool operator!=(const sized_allocator& a, const sized_allocator& b) {
+    return a.data_ != b.data_;
+  }
+};
+
+#endif // TEST_SUPPORT_SIZED_ALLOCATOR_H
diff --git a/libcxx/utils/data/unicode/DerivedCoreProperties.txt b/libcxx/utils/data/unicode/DerivedCoreProperties.txt
index 220c556..1075638 100644
--- a/libcxx/utils/data/unicode/DerivedCoreProperties.txt
+++ b/libcxx/utils/data/unicode/DerivedCoreProperties.txt
@@ -1,8 +1,8 @@
-# DerivedCoreProperties-15.1.0.txt
-# Date: 2023-08-07, 15:21:24 GMT
-# © 2023 Unicode®, Inc.
+# DerivedCoreProperties-16.0.0.txt
+# Date: 2024-05-31, 18:09:32 GMT
+# © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
 # Unicode Character Database
 #   For documentation, see https://www.unicode.org/reports/tr44/
@@ -177,6 +177,7 @@ FF5C          ; Math # Sm       FULLWIDTH VERTICAL LINE
 FF5E          ; Math # Sm       FULLWIDTH TILDE
 FFE2          ; Math # Sm       FULLWIDTH NOT SIGN
 FFE9..FFEC    ; Math # Sm   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
+10D8E..10D8F  ; Math # Sm   [2] GARAY PLUS SIGN..GARAY MINUS SIGN
 1D400..1D454  ; Math # L&  [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
 1D456..1D49C  ; Math # L&  [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
 1D49E..1D49F  ; Math # L&   [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
@@ -253,7 +254,7 @@ FFE9..FFEC    ; Math # Sm   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
 1EEAB..1EEBB  ; Math # Lo  [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
 1EEF0..1EEF1  ; Math # Sm   [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
 
-# Total code points: 2310
+# Total code points: 2312
 
 # ================================================
 
@@ -280,6 +281,7 @@ FFE9..FFEC    ; Math # Sm   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
 02EC          ; Alphabetic # Lm       MODIFIER LETTER VOICING
 02EE          ; Alphabetic # Lm       MODIFIER LETTER DOUBLE APOSTROPHE
 0345          ; Alphabetic # Mn       COMBINING GREEK YPOGEGRAMMENI
+0363..036F    ; Alphabetic # Mn  [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X
 0370..0373    ; Alphabetic # L&   [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
 0374          ; Alphabetic # Lm       GREEK NUMERAL SIGN
 0376..0377    ; Alphabetic # L&   [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
@@ -343,6 +345,7 @@ FFE9..FFEC    ; Math # Sm   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
 0860..086A    ; Alphabetic # Lo  [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
 0870..0887    ; Alphabetic # Lo  [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
 0889..088E    ; Alphabetic # Lo   [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
+0897          ; Alphabetic # Mn       ARABIC PEPET
 08A0..08C8    ; Alphabetic # Lo  [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
 08C9          ; Alphabetic # Lm       ARABIC SMALL FARSI YEH
 08D4..08DF    ; Alphabetic # Mn  [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA
@@ -710,7 +713,7 @@ FFE9..FFEC    ; Math # Sm   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
 1C4D..1C4F    ; Alphabetic # Lo   [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
 1C5A..1C77    ; Alphabetic # Lo  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
 1C78..1C7D    ; Alphabetic # Lm   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88    ; Alphabetic # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A    ; Alphabetic # L&  [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA    ; Alphabetic # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; Alphabetic # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1CE9..1CEC    ; Alphabetic # Lo   [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
@@ -723,7 +726,7 @@ FFE9..FFEC    ; Math # Sm   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
 1D78          ; Alphabetic # Lm       MODIFIER LETTER CYRILLIC EN
 1D79..1D9A    ; Alphabetic # L&  [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
 1D9B..1DBF    ; Alphabetic # Lm  [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
-1DE7..1DF4    ; Alphabetic # Mn  [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS
+1DD3..1DF4    ; Alphabetic # Mn  [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS
 1E00..1F15    ; Alphabetic # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
 1F18..1F1D    ; Alphabetic # L&   [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
 1F20..1F45    ; Alphabetic # L&  [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
@@ -830,10 +833,10 @@ A771..A787    ; Alphabetic # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER
 A788          ; Alphabetic # Lm       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
 A78B..A78E    ; Alphabetic # L&   [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
 A78F          ; Alphabetic # Lo       LATIN LETTER SINOLOGICAL DOT
-A790..A7CA    ; Alphabetic # L&  [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CD    ; Alphabetic # L&  [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1    ; Alphabetic # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; Alphabetic # L&       LATIN SMALL LETTER DOUBLE THORN
-A7D5..A7D9    ; Alphabetic # L&   [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7D5..A7DC    ; Alphabetic # L&   [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F2..A7F4    ; Alphabetic # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6    ; Alphabetic # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F7          ; Alphabetic # Lo       LATIN EPIGRAPHIC LETTER SIDEWAYS I
@@ -998,6 +1001,7 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 105A3..105B1  ; Alphabetic # L&  [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
 105B3..105B9  ; Alphabetic # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; Alphabetic # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3  ; Alphabetic # Lo  [52] TODHRI LETTER A..TODHRI LETTER OO
 10600..10736  ; Alphabetic # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
 10740..10755  ; Alphabetic # Lo  [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
 10760..10767  ; Alphabetic # Lo   [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1038,9 +1042,18 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 10CC0..10CF2  ; Alphabetic # L&  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
 10D00..10D23  ; Alphabetic # Lo  [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
 10D24..10D27  ; Alphabetic # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+10D4A..10D4D  ; Alphabetic # Lo   [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE
+10D4E         ; Alphabetic # Lm       GARAY VOWEL LENGTH MARK
+10D4F         ; Alphabetic # Lo       GARAY SUKUN
+10D50..10D65  ; Alphabetic # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D69         ; Alphabetic # Mn       GARAY VOWEL SIGN E
+10D6F         ; Alphabetic # Lm       GARAY REDUPLICATION MARK
+10D70..10D85  ; Alphabetic # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 10E80..10EA9  ; Alphabetic # Lo  [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
 10EAB..10EAC  ; Alphabetic # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
 10EB0..10EB1  ; Alphabetic # Lo   [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4  ; Alphabetic # Lo   [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC         ; Alphabetic # Mn       ARABIC COMBINING ALEF OVERLAY
 10F00..10F1C  ; Alphabetic # Lo  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F27         ; Alphabetic # Lo       OLD SOGDIAN LIGATURE AYIN-DALETH
 10F30..10F45  ; Alphabetic # Lo  [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -1121,6 +1134,19 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 11357         ; Alphabetic # Mc       GRANTHA AU LENGTH MARK
 1135D..11361  ; Alphabetic # Lo   [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
 11362..11363  ; Alphabetic # Mc   [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
+11380..11389  ; Alphabetic # Lo  [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL
+1138B         ; Alphabetic # Lo       TULU-TIGALARI LETTER EE
+1138E         ; Alphabetic # Lo       TULU-TIGALARI LETTER AI
+11390..113B5  ; Alphabetic # Lo  [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA
+113B7         ; Alphabetic # Lo       TULU-TIGALARI SIGN AVAGRAHA
+113B8..113BA  ; Alphabetic # Mc   [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II
+113BB..113C0  ; Alphabetic # Mn   [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113C2         ; Alphabetic # Mc       TULU-TIGALARI VOWEL SIGN EE
+113C5         ; Alphabetic # Mc       TULU-TIGALARI VOWEL SIGN AI
+113C7..113CA  ; Alphabetic # Mc   [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA
+113CC..113CD  ; Alphabetic # Mc   [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA
+113D1         ; Alphabetic # Lo       TULU-TIGALARI REPHA
+113D3         ; Alphabetic # Lo       TULU-TIGALARI SIGN PLUTA
 11400..11434  ; Alphabetic # Lo  [53] NEWA LETTER A..NEWA LETTER HA
 11435..11437  ; Alphabetic # Mc   [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
 11438..1143F  ; Alphabetic # Mn   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
@@ -1163,7 +1189,9 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 116B0..116B5  ; Alphabetic # Mn   [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
 116B8         ; Alphabetic # Lo       TAKRI LETTER ARCHAIC KHA
 11700..1171A  ; Alphabetic # Lo  [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
-1171D..1171F  ; Alphabetic # Mn   [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+1171D         ; Alphabetic # Mn       AHOM CONSONANT SIGN MEDIAL LA
+1171E         ; Alphabetic # Mc       AHOM CONSONANT SIGN MEDIAL RA
+1171F         ; Alphabetic # Mn       AHOM CONSONANT SIGN MEDIAL LIGATING RA
 11720..11721  ; Alphabetic # Mc   [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
 11722..11725  ; Alphabetic # Mn   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
 11726         ; Alphabetic # Mc       AHOM VOWEL SIGN E
@@ -1211,6 +1239,7 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 11A97         ; Alphabetic # Mc       SOYOMBO SIGN VISARGA
 11A9D         ; Alphabetic # Lo       SOYOMBO MARK PLUTA
 11AB0..11AF8  ; Alphabetic # Lo  [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0  ; Alphabetic # Lo  [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
 11C00..11C08  ; Alphabetic # Lo   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E  ; Alphabetic # Lo  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C2F         ; Alphabetic # Mc       BHAIKSUKI VOWEL SIGN AA
@@ -1264,7 +1293,12 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 12F90..12FF0  ; Alphabetic # Lo  [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
 13000..1342F  ; Alphabetic # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
 13441..13446  ; Alphabetic # Lo   [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
+13460..143FA  ; Alphabetic # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
 14400..14646  ; Alphabetic # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16100..1611D  ; Alphabetic # Lo  [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA
+1611E..16129  ; Alphabetic # Mn  [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612A..1612C  ; Alphabetic # Mc   [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA
+1612D..1612E  ; Alphabetic # Mn   [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA
 16800..16A38  ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E  ; Alphabetic # Lo  [31] MRO LETTER TA..MRO LETTER TEK
 16A70..16ABE  ; Alphabetic # Lo  [79] TANGSA LETTER OZ..TANGSA LETTER ZA
@@ -1273,6 +1307,9 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 16B40..16B43  ; Alphabetic # Lm   [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
 16B63..16B77  ; Alphabetic # Lo  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
 16B7D..16B8F  ; Alphabetic # Lo  [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16D40..16D42  ; Alphabetic # Lm   [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D43..16D6A  ; Alphabetic # Lo  [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU
+16D6B..16D6C  ; Alphabetic # Lm   [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
 16E40..16E7F  ; Alphabetic # L&  [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 16F00..16F4A  ; Alphabetic # Lo  [75] MIAO LETTER PA..MIAO LETTER RTE
 16F4F         ; Alphabetic # Mn       MIAO SIGN CONSONANT MODIFIER BAR
@@ -1285,7 +1322,7 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 16FF0..16FF1  ; Alphabetic # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
 17000..187F7  ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18CD5  ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
-18D00..18D08  ; Alphabetic # Lo   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+18CFF..18D08  ; Alphabetic # Lo  [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08
 1AFF0..1AFF3  ; Alphabetic # Lm   [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
 1AFF5..1AFFB  ; Alphabetic # Lm   [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
 1AFFD..1AFFE  ; Alphabetic # Lm   [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
@@ -1348,6 +1385,8 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 1E2C0..1E2EB  ; Alphabetic # Lo  [44] WANCHO LETTER AA..WANCHO LETTER YIH
 1E4D0..1E4EA  ; Alphabetic # Lo  [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
 1E4EB         ; Alphabetic # Lm       NAG MUNDARI SIGN OJOD
+1E5D0..1E5ED  ; Alphabetic # Lo  [30] OL ONAL LETTER O..OL ONAL LETTER EG
+1E5F0         ; Alphabetic # Lo       OL ONAL SIGN HODDOND
 1E7E0..1E7E6  ; Alphabetic # Lo   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
 1E7E8..1E7EB  ; Alphabetic # Lo   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
 1E7ED..1E7EE  ; Alphabetic # Lo   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -1402,7 +1441,7 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 30000..3134A  ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
 31350..323AF  ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
 
-# Total code points: 138387
+# Total code points: 142759
 
 # ================================================
 
@@ -1691,6 +1730,7 @@ FFDA..FFDC    ; Alphabetic # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
 10FD..10FF    ; Lowercase # L&   [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
 13F8..13FD    ; Lowercase # L&   [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
 1C80..1C88    ; Lowercase # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A          ; Lowercase # L&       CYRILLIC SMALL LETTER TJE
 1D00..1D2B    ; Lowercase # L&  [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
 1D2C..1D6A    ; Lowercase # Lm  [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
 1D6B..1D77    ; Lowercase # L&  [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
@@ -2032,11 +2072,13 @@ A7C1          ; Lowercase # L&       LATIN SMALL LETTER OLD POLISH O
 A7C3          ; Lowercase # L&       LATIN SMALL LETTER ANGLICANA W
 A7C8          ; Lowercase # L&       LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
 A7CA          ; Lowercase # L&       LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A7CD          ; Lowercase # L&       LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D1          ; Lowercase # L&       LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; Lowercase # L&       LATIN SMALL LETTER DOUBLE THORN
 A7D5          ; Lowercase # L&       LATIN SMALL LETTER DOUBLE WYNN
 A7D7          ; Lowercase # L&       LATIN SMALL LETTER MIDDLE SCOTS S
 A7D9          ; Lowercase # L&       LATIN SMALL LETTER SIGMOID S
+A7DB          ; Lowercase # L&       LATIN SMALL LETTER LAMBDA
 A7F2..A7F4    ; Lowercase # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F6          ; Lowercase # L&       LATIN SMALL LETTER REVERSED HALF H
 A7F8..A7F9    ; Lowercase # Lm   [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
@@ -2060,6 +2102,7 @@ FF41..FF5A    ; Lowercase # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
 10787..107B0  ; Lowercase # Lm  [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
 107B2..107BA  ; Lowercase # Lm   [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
 10CC0..10CF2  ; Lowercase # L&  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
+10D70..10D85  ; Lowercase # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 118C0..118DF  ; Lowercase # L&  [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
 16E60..16E7F  ; Lowercase # L&  [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 1D41A..1D433  ; Lowercase # L&  [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z
@@ -2096,7 +2139,7 @@ FF41..FF5A    ; Lowercase # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
 1E030..1E06D  ; Lowercase # Lm  [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
 1E922..1E943  ; Lowercase # L&  [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
 
-# Total code points: 2544
+# Total code points: 2569
 
 # ================================================
 
@@ -2379,6 +2422,7 @@ FF41..FF5A    ; Lowercase # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
 10C7          ; Uppercase # L&       GEORGIAN CAPITAL LETTER YN
 10CD          ; Uppercase # L&       GEORGIAN CAPITAL LETTER AEN
 13A0..13F5    ; Uppercase # L&  [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+1C89          ; Uppercase # L&       CYRILLIC CAPITAL LETTER TJE
 1C90..1CBA    ; Uppercase # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; Uppercase # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1E00          ; Uppercase # L&       LATIN CAPITAL LETTER A WITH RING BELOW
@@ -2705,9 +2749,12 @@ A7C0          ; Uppercase # L&       LATIN CAPITAL LETTER OLD POLISH O
 A7C2          ; Uppercase # L&       LATIN CAPITAL LETTER ANGLICANA W
 A7C4..A7C7    ; Uppercase # L&   [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
 A7C9          ; Uppercase # L&       LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7CB..A7CC    ; Uppercase # L&   [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
 A7D0          ; Uppercase # L&       LATIN CAPITAL LETTER CLOSED INSULAR G
 A7D6          ; Uppercase # L&       LATIN CAPITAL LETTER MIDDLE SCOTS S
 A7D8          ; Uppercase # L&       LATIN CAPITAL LETTER SIGMOID S
+A7DA          ; Uppercase # L&       LATIN CAPITAL LETTER LAMBDA
+A7DC          ; Uppercase # L&       LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F5          ; Uppercase # L&       LATIN CAPITAL LETTER REVERSED HALF H
 FF21..FF3A    ; Uppercase # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
 10400..10427  ; Uppercase # L&  [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
@@ -2717,6 +2764,7 @@ FF21..FF3A    ; Uppercase # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH
 1058C..10592  ; Uppercase # L&   [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
 10594..10595  ; Uppercase # L&   [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
 10C80..10CB2  ; Uppercase # L&  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
+10D50..10D65  ; Uppercase # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
 118A0..118BF  ; Uppercase # L&  [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO
 16E40..16E5F  ; Uppercase # L&  [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y
 1D400..1D419  ; Uppercase # L&  [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z
@@ -2755,7 +2803,7 @@ FF21..FF3A    ; Uppercase # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH
 1F150..1F169  ; Uppercase # So  [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
 1F170..1F189  ; Uppercase # So  [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
 
-# Total code points: 1951
+# Total code points: 1978
 
 # ================================================
 
@@ -2800,7 +2848,7 @@ FF21..FF3A    ; Uppercase # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH
 10FD..10FF    ; Cased # L&   [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
 13A0..13F5    ; Cased # L&  [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
 13F8..13FD    ; Cased # L&   [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
-1C80..1C88    ; Cased # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A    ; Cased # L&  [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA    ; Cased # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; Cased # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1D00..1D2B    ; Cased # L&  [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
@@ -2863,10 +2911,10 @@ A722..A76F    ; Cased # L&  [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN
 A770          ; Cased # Lm       MODIFIER LETTER US
 A771..A787    ; Cased # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
 A78B..A78E    ; Cased # L&   [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
-A790..A7CA    ; Cased # L&  [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CD    ; Cased # L&  [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1    ; Cased # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; Cased # L&       LATIN SMALL LETTER DOUBLE THORN
-A7D5..A7D9    ; Cased # L&   [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7D5..A7DC    ; Cased # L&   [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F2..A7F4    ; Cased # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6    ; Cased # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F8..A7F9    ; Cased # Lm   [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
@@ -2897,6 +2945,8 @@ FF41..FF5A    ; Cased # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
 107B2..107BA  ; Cased # Lm   [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
 10C80..10CB2  ; Cased # L&  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
 10CC0..10CF2  ; Cased # L&  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
+10D50..10D65  ; Cased # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D70..10D85  ; Cased # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 118A0..118DF  ; Cased # L&  [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
 16E40..16E7F  ; Cased # L&  [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 1D400..1D454  ; Cased # L&  [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
@@ -2938,7 +2988,7 @@ FF41..FF5A    ; Cased # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
 1F150..1F169  ; Cased # So  [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
 1F170..1F189  ; Cased # So  [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
 
-# Total code points: 4526
+# Total code points: 4578
 
 # ================================================
 
@@ -3015,7 +3065,7 @@ FF41..FF5A    ; Cased # L&  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
 0859..085B    ; Case_Ignorable # Mn   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
 0888          ; Case_Ignorable # Sk       ARABIC RAISED ROUND DOT
 0890..0891    ; Case_Ignorable # Cf   [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
-0898..089F    ; Case_Ignorable # Mn   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F    ; Case_Ignorable # Mn   [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
 08C9          ; Case_Ignorable # Lm       ARABIC SMALL FARSI YEH
 08CA..08E1    ; Case_Ignorable # Mn  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
 08E2          ; Case_Ignorable # Cf       ARABIC DISPUTED END OF AYAH
@@ -3296,8 +3346,11 @@ FFF9..FFFB    ; Case_Ignorable # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI
 10A3F         ; Case_Ignorable # Mn       KHAROSHTHI VIRAMA
 10AE5..10AE6  ; Case_Ignorable # Mn   [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
 10D24..10D27  ; Case_Ignorable # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+10D4E         ; Case_Ignorable # Lm       GARAY VOWEL LENGTH MARK
+10D69..10D6D  ; Case_Ignorable # Mn   [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
+10D6F         ; Case_Ignorable # Lm       GARAY REDUPLICATION MARK
 10EAB..10EAC  ; Case_Ignorable # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF  ; Case_Ignorable # Mn   [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF  ; Case_Ignorable # Mn   [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
 10F46..10F50  ; Case_Ignorable # Mn  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
 10F82..10F85  ; Case_Ignorable # Mn   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
 11001         ; Case_Ignorable # Mn       BRAHMI SIGN ANUSVARA
@@ -3330,6 +3383,11 @@ FFF9..FFFB    ; Case_Ignorable # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI
 11340         ; Case_Ignorable # Mn       GRANTHA VOWEL SIGN II
 11366..1136C  ; Case_Ignorable # Mn   [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
 11370..11374  ; Case_Ignorable # Mn   [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+113BB..113C0  ; Case_Ignorable # Mn   [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113CE         ; Case_Ignorable # Mn       TULU-TIGALARI SIGN VIRAMA
+113D0         ; Case_Ignorable # Mn       TULU-TIGALARI CONJOINER
+113D2         ; Case_Ignorable # Mn       TULU-TIGALARI GEMINATION MARK
+113E1..113E2  ; Case_Ignorable # Mn   [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA
 11438..1143F  ; Case_Ignorable # Mn   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
 11442..11444  ; Case_Ignorable # Mn   [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
 11446         ; Case_Ignorable # Mn       NEWA SIGN NUKTA
@@ -3349,7 +3407,8 @@ FFF9..FFFB    ; Case_Ignorable # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI
 116AD         ; Case_Ignorable # Mn       TAKRI VOWEL SIGN AA
 116B0..116B5  ; Case_Ignorable # Mn   [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
 116B7         ; Case_Ignorable # Mn       TAKRI SIGN NUKTA
-1171D..1171F  ; Case_Ignorable # Mn   [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+1171D         ; Case_Ignorable # Mn       AHOM CONSONANT SIGN MEDIAL LA
+1171F         ; Case_Ignorable # Mn       AHOM CONSONANT SIGN MEDIAL LIGATING RA
 11722..11725  ; Case_Ignorable # Mn   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
 11727..1172B  ; Case_Ignorable # Mn   [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
 1182F..11837  ; Case_Ignorable # Mn   [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
@@ -3388,12 +3447,17 @@ FFF9..FFFB    ; Case_Ignorable # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI
 11F36..11F3A  ; Case_Ignorable # Mn   [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
 11F40         ; Case_Ignorable # Mn       KAWI VOWEL SIGN EU
 11F42         ; Case_Ignorable # Mn       KAWI CONJOINER
+11F5A         ; Case_Ignorable # Mn       KAWI SIGN NUKTA
 13430..1343F  ; Case_Ignorable # Cf  [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
 13440         ; Case_Ignorable # Mn       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
 13447..13455  ; Case_Ignorable # Mn  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
+1611E..16129  ; Case_Ignorable # Mn  [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612D..1612F  ; Case_Ignorable # Mn   [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA
 16AF0..16AF4  ; Case_Ignorable # Mn   [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
 16B30..16B36  ; Case_Ignorable # Mn   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
 16B40..16B43  ; Case_Ignorable # Lm   [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
+16D40..16D42  ; Case_Ignorable # Lm   [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D6B..16D6C  ; Case_Ignorable # Lm   [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
 16F4F         ; Case_Ignorable # Mn       MIAO SIGN CONSONANT MODIFIER BAR
 16F8F..16F92  ; Case_Ignorable # Mn   [4] MIAO TONE RIGHT..MIAO TONE BELOW
 16F93..16F9F  ; Case_Ignorable # Lm  [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
@@ -3432,6 +3496,7 @@ FFF9..FFFB    ; Case_Ignorable # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI
 1E2EC..1E2EF  ; Case_Ignorable # Mn   [4] WANCHO TONE TUP..WANCHO TONE KOINI
 1E4EB         ; Case_Ignorable # Lm       NAG MUNDARI SIGN OJOD
 1E4EC..1E4EF  ; Case_Ignorable # Mn   [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
+1E5EE..1E5EF  ; Case_Ignorable # Mn   [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
 1E8D0..1E8D6  ; Case_Ignorable # Mn   [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
 1E944..1E94A  ; Case_Ignorable # Mn   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
 1E94B         ; Case_Ignorable # Lm       ADLAM NASALIZATION MARK
@@ -3440,7 +3505,7 @@ E0001         ; Case_Ignorable # Cf       LANGUAGE TAG
 E0020..E007F  ; Case_Ignorable # Cf  [96] TAG SPACE..CANCEL TAG
 E0100..E01EF  ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 
-# Total code points: 2707
+# Total code points: 2749
 
 # ================================================
 
@@ -3724,6 +3789,7 @@ E0100..E01EF  ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELEC
 10C7          ; Changes_When_Lowercased # L&       GEORGIAN CAPITAL LETTER YN
 10CD          ; Changes_When_Lowercased # L&       GEORGIAN CAPITAL LETTER AEN
 13A0..13F5    ; Changes_When_Lowercased # L&  [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+1C89          ; Changes_When_Lowercased # L&       CYRILLIC CAPITAL LETTER TJE
 1C90..1CBA    ; Changes_When_Lowercased # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; Changes_When_Lowercased # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1E00          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER A WITH RING BELOW
@@ -4043,9 +4109,12 @@ A7C0          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER OLD POLI
 A7C2          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER ANGLICANA W
 A7C4..A7C7    ; Changes_When_Lowercased # L&   [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
 A7C9          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7CB..A7CC    ; Changes_When_Lowercased # L&   [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
 A7D0          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER CLOSED INSULAR G
 A7D6          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER MIDDLE SCOTS S
 A7D8          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER SIGMOID S
+A7DA          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER LAMBDA
+A7DC          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F5          ; Changes_When_Lowercased # L&       LATIN CAPITAL LETTER REVERSED HALF H
 FF21..FF3A    ; Changes_When_Lowercased # L&  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
 10400..10427  ; Changes_When_Lowercased # L&  [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
@@ -4055,11 +4124,12 @@ FF21..FF3A    ; Changes_When_Lowercased # L&  [26] FULLWIDTH LATIN CAPITAL LETTE
 1058C..10592  ; Changes_When_Lowercased # L&   [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
 10594..10595  ; Changes_When_Lowercased # L&   [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
 10C80..10CB2  ; Changes_When_Lowercased # L&  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
+10D50..10D65  ; Changes_When_Lowercased # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
 118A0..118BF  ; Changes_When_Lowercased # L&  [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO
 16E40..16E5F  ; Changes_When_Lowercased # L&  [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y
 1E900..1E921  ; Changes_When_Lowercased # L&  [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
 
-# Total code points: 1433
+# Total code points: 1460
 
 # ================================================
 
@@ -4140,7 +4210,7 @@ FF21..FF3A    ; Changes_When_Lowercased # L&  [26] FULLWIDTH LATIN CAPITAL LETTE
 018C          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER D WITH TOPBAR
 0192          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER F WITH HOOK
 0195          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER HV
-0199..019A    ; Changes_When_Uppercased # L&   [2] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER L WITH BAR
+0199..019B    ; Changes_When_Uppercased # L&   [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE
 019E          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER N WITH LONG RIGHT LEG
 01A1          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER O WITH HORN
 01A3          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER OI
@@ -4216,8 +4286,7 @@ FF21..FF3A    ; Changes_When_Lowercased # L&  [26] FULLWIDTH LATIN CAPITAL LETTE
 0259          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER SCHWA
 025B..025C    ; Changes_When_Uppercased # L&   [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E
 0260..0261    ; Changes_When_Uppercased # L&   [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G
-0263          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER GAMMA
-0265..0266    ; Changes_When_Uppercased # L&   [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK
+0263..0266    ; Changes_When_Uppercased # L&   [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK
 0268..026C    ; Changes_When_Uppercased # L&   [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT
 026F          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER TURNED M
 0271..0272    ; Changes_When_Uppercased # L&   [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK
@@ -4357,6 +4426,7 @@ FF21..FF3A    ; Changes_When_Lowercased # L&  [26] FULLWIDTH LATIN CAPITAL LETTE
 10FD..10FF    ; Changes_When_Uppercased # L&   [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
 13F8..13FD    ; Changes_When_Uppercased # L&   [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
 1C80..1C88    ; Changes_When_Uppercased # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A          ; Changes_When_Uppercased # L&       CYRILLIC SMALL LETTER TJE
 1D79          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER INSULAR G
 1D7D          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER P WITH STROKE
 1D8E          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER Z WITH PALATAL HOOK
@@ -4676,9 +4746,11 @@ A7C1          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER OLD POLISH
 A7C3          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER ANGLICANA W
 A7C8          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
 A7CA          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A7CD          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D1          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER CLOSED INSULAR G
 A7D7          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER MIDDLE SCOTS S
 A7D9          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER SIGMOID S
+A7DB          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER LAMBDA
 A7F6          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER REVERSED HALF H
 AB53          ; Changes_When_Uppercased # L&       LATIN SMALL LETTER CHI
 AB70..ABBF    ; Changes_When_Uppercased # L&  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
@@ -4692,11 +4764,12 @@ FF41..FF5A    ; Changes_When_Uppercased # L&  [26] FULLWIDTH LATIN SMALL LETTER
 105B3..105B9  ; Changes_When_Uppercased # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; Changes_When_Uppercased # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
 10CC0..10CF2  ; Changes_When_Uppercased # L&  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
+10D70..10D85  ; Changes_When_Uppercased # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 118C0..118DF  ; Changes_When_Uppercased # L&  [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
 16E60..16E7F  ; Changes_When_Uppercased # L&  [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 1E922..1E943  ; Changes_When_Uppercased # L&  [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
 
-# Total code points: 1525
+# Total code points: 1552
 
 # ================================================
 
@@ -4777,7 +4850,7 @@ FF41..FF5A    ; Changes_When_Uppercased # L&  [26] FULLWIDTH LATIN SMALL LETTER
 018C          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER D WITH TOPBAR
 0192          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER F WITH HOOK
 0195          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER HV
-0199..019A    ; Changes_When_Titlecased # L&   [2] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER L WITH BAR
+0199..019B    ; Changes_When_Titlecased # L&   [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE
 019E          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER N WITH LONG RIGHT LEG
 01A1          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER O WITH HORN
 01A3          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER OI
@@ -4854,8 +4927,7 @@ FF41..FF5A    ; Changes_When_Uppercased # L&  [26] FULLWIDTH LATIN SMALL LETTER
 0259          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER SCHWA
 025B..025C    ; Changes_When_Titlecased # L&   [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E
 0260..0261    ; Changes_When_Titlecased # L&   [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G
-0263          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER GAMMA
-0265..0266    ; Changes_When_Titlecased # L&   [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK
+0263..0266    ; Changes_When_Titlecased # L&   [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK
 0268..026C    ; Changes_When_Titlecased # L&   [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT
 026F          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER TURNED M
 0271..0272    ; Changes_When_Titlecased # L&   [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK
@@ -4993,6 +5065,7 @@ FF41..FF5A    ; Changes_When_Uppercased # L&  [26] FULLWIDTH LATIN SMALL LETTER
 0561..0587    ; Changes_When_Titlecased # L&  [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
 13F8..13FD    ; Changes_When_Titlecased # L&   [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
 1C80..1C88    ; Changes_When_Titlecased # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A          ; Changes_When_Titlecased # L&       CYRILLIC SMALL LETTER TJE
 1D79          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER INSULAR G
 1D7D          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER P WITH STROKE
 1D8E          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER Z WITH PALATAL HOOK
@@ -5312,9 +5385,11 @@ A7C1          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER OLD POLISH
 A7C3          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER ANGLICANA W
 A7C8          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
 A7CA          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A7CD          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D1          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER CLOSED INSULAR G
 A7D7          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER MIDDLE SCOTS S
 A7D9          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER SIGMOID S
+A7DB          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER LAMBDA
 A7F6          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER REVERSED HALF H
 AB53          ; Changes_When_Titlecased # L&       LATIN SMALL LETTER CHI
 AB70..ABBF    ; Changes_When_Titlecased # L&  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
@@ -5328,11 +5403,12 @@ FF41..FF5A    ; Changes_When_Titlecased # L&  [26] FULLWIDTH LATIN SMALL LETTER
 105B3..105B9  ; Changes_When_Titlecased # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; Changes_When_Titlecased # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
 10CC0..10CF2  ; Changes_When_Titlecased # L&  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
+10D70..10D85  ; Changes_When_Titlecased # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 118C0..118DF  ; Changes_When_Titlecased # L&  [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
 16E60..16E7F  ; Changes_When_Titlecased # L&  [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 1E922..1E943  ; Changes_When_Titlecased # L&  [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
 
-# Total code points: 1452
+# Total code points: 1479
 
 # ================================================
 
@@ -5623,7 +5699,7 @@ FF41..FF5A    ; Changes_When_Titlecased # L&  [26] FULLWIDTH LATIN SMALL LETTER
 10C7          ; Changes_When_Casefolded # L&       GEORGIAN CAPITAL LETTER YN
 10CD          ; Changes_When_Casefolded # L&       GEORGIAN CAPITAL LETTER AEN
 13F8..13FD    ; Changes_When_Casefolded # L&   [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
-1C80..1C88    ; Changes_When_Casefolded # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C89    ; Changes_When_Casefolded # L&  [10] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC CAPITAL LETTER TJE
 1C90..1CBA    ; Changes_When_Casefolded # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; Changes_When_Casefolded # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1E00          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER A WITH RING BELOW
@@ -5945,9 +6021,12 @@ A7C0          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER OLD POLI
 A7C2          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER ANGLICANA W
 A7C4..A7C7    ; Changes_When_Casefolded # L&   [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
 A7C9          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7CB..A7CC    ; Changes_When_Casefolded # L&   [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
 A7D0          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER CLOSED INSULAR G
 A7D6          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER MIDDLE SCOTS S
 A7D8          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER SIGMOID S
+A7DA          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER LAMBDA
+A7DC          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F5          ; Changes_When_Casefolded # L&       LATIN CAPITAL LETTER REVERSED HALF H
 AB70..ABBF    ; Changes_When_Casefolded # L&  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
 FB00..FB06    ; Changes_When_Casefolded # L&   [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
@@ -5960,11 +6039,12 @@ FF21..FF3A    ; Changes_When_Casefolded # L&  [26] FULLWIDTH LATIN CAPITAL LETTE
 1058C..10592  ; Changes_When_Casefolded # L&   [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
 10594..10595  ; Changes_When_Casefolded # L&   [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
 10C80..10CB2  ; Changes_When_Casefolded # L&  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
+10D50..10D65  ; Changes_When_Casefolded # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
 118A0..118BF  ; Changes_When_Casefolded # L&  [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO
 16E40..16E5F  ; Changes_When_Casefolded # L&  [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y
 1E900..1E921  ; Changes_When_Casefolded # L&  [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
 
-# Total code points: 1506
+# Total code points: 1533
 
 # ================================================
 
@@ -5980,8 +6060,7 @@ FF21..FF3A    ; Changes_When_Casefolded # L&  [26] FULLWIDTH LATIN CAPITAL LETTE
 00D8..00F6    ; Changes_When_Casemapped # L&  [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
 00F8..0137    ; Changes_When_Casemapped # L&  [64] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER K WITH CEDILLA
 0139..018C    ; Changes_When_Casemapped # L&  [84] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER D WITH TOPBAR
-018E..019A    ; Changes_When_Casemapped # L&  [13] LATIN CAPITAL LETTER REVERSED E..LATIN SMALL LETTER L WITH BAR
-019C..01A9    ; Changes_When_Casemapped # L&  [14] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER ESH
+018E..01A9    ; Changes_When_Casemapped # L&  [28] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER ESH
 01AC..01B9    ; Changes_When_Casemapped # L&  [14] LATIN CAPITAL LETTER T WITH HOOK..LATIN SMALL LETTER EZH REVERSED
 01BC..01BD    ; Changes_When_Casemapped # L&   [2] LATIN CAPITAL LETTER TONE FIVE..LATIN SMALL LETTER TONE FIVE
 01BF          ; Changes_When_Casemapped # L&       LATIN LETTER WYNN
@@ -5992,8 +6071,7 @@ FF21..FF3A    ; Changes_When_Casefolded # L&  [26] FULLWIDTH LATIN CAPITAL LETTE
 0259          ; Changes_When_Casemapped # L&       LATIN SMALL LETTER SCHWA
 025B..025C    ; Changes_When_Casemapped # L&   [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E
 0260..0261    ; Changes_When_Casemapped # L&   [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G
-0263          ; Changes_When_Casemapped # L&       LATIN SMALL LETTER GAMMA
-0265..0266    ; Changes_When_Casemapped # L&   [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK
+0263..0266    ; Changes_When_Casemapped # L&   [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK
 0268..026C    ; Changes_When_Casemapped # L&   [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT
 026F          ; Changes_When_Casemapped # L&       LATIN SMALL LETTER TURNED M
 0271..0272    ; Changes_When_Casemapped # L&   [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK
@@ -6027,7 +6105,7 @@ FF21..FF3A    ; Changes_When_Casefolded # L&  [26] FULLWIDTH LATIN CAPITAL LETTE
 10FD..10FF    ; Changes_When_Casemapped # L&   [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
 13A0..13F5    ; Changes_When_Casemapped # L&  [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
 13F8..13FD    ; Changes_When_Casemapped # L&   [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
-1C80..1C88    ; Changes_When_Casemapped # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A    ; Changes_When_Casemapped # L&  [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA    ; Changes_When_Casemapped # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; Changes_When_Casemapped # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1D79          ; Changes_When_Casemapped # L&       LATIN SMALL LETTER INSULAR G
@@ -6078,9 +6156,9 @@ A779..A787    ; Changes_When_Casemapped # L&  [15] LATIN CAPITAL LETTER INSULAR
 A78B..A78D    ; Changes_When_Casemapped # L&   [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H
 A790..A794    ; Changes_When_Casemapped # L&   [5] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH PALATAL HOOK
 A796..A7AE    ; Changes_When_Casemapped # L&  [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I
-A7B0..A7CA    ; Changes_When_Casemapped # L&  [27] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A7B0..A7CD    ; Changes_When_Casemapped # L&  [30] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1    ; Changes_When_Casemapped # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
-A7D6..A7D9    ; Changes_When_Casemapped # L&   [4] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN SMALL LETTER SIGMOID S
+A7D6..A7DC    ; Changes_When_Casemapped # L&   [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F5..A7F6    ; Changes_When_Casemapped # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 AB53          ; Changes_When_Casemapped # L&       LATIN SMALL LETTER CHI
 AB70..ABBF    ; Changes_When_Casemapped # L&  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
@@ -6101,11 +6179,13 @@ FF41..FF5A    ; Changes_When_Casemapped # L&  [26] FULLWIDTH LATIN SMALL LETTER
 105BB..105BC  ; Changes_When_Casemapped # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
 10C80..10CB2  ; Changes_When_Casemapped # L&  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
 10CC0..10CF2  ; Changes_When_Casemapped # L&  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
+10D50..10D65  ; Changes_When_Casemapped # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D70..10D85  ; Changes_When_Casemapped # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 118A0..118DF  ; Changes_When_Casemapped # L&  [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
 16E40..16E7F  ; Changes_When_Casemapped # L&  [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 1E900..1E943  ; Changes_When_Casemapped # L&  [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
 
-# Total code points: 2927
+# Total code points: 2981
 
 # ================================================
 
@@ -6364,7 +6444,7 @@ FF41..FF5A    ; Changes_When_Casemapped # L&  [26] FULLWIDTH LATIN SMALL LETTER
 1C4D..1C4F    ; ID_Start # Lo   [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
 1C5A..1C77    ; ID_Start # Lo  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
 1C78..1C7D    ; ID_Start # Lm   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88    ; ID_Start # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A    ; ID_Start # L&  [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA    ; ID_Start # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; ID_Start # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1CE9..1CEC    ; ID_Start # Lo   [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
@@ -6481,10 +6561,10 @@ A771..A787    ; ID_Start # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER I
 A788          ; ID_Start # Lm       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
 A78B..A78E    ; ID_Start # L&   [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
 A78F          ; ID_Start # Lo       LATIN LETTER SINOLOGICAL DOT
-A790..A7CA    ; ID_Start # L&  [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CD    ; ID_Start # L&  [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1    ; ID_Start # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; ID_Start # L&       LATIN SMALL LETTER DOUBLE THORN
-A7D5..A7D9    ; ID_Start # L&   [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7D5..A7DC    ; ID_Start # L&   [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F2..A7F4    ; ID_Start # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6    ; ID_Start # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F7          ; ID_Start # Lo       LATIN EPIGRAPHIC LETTER SIDEWAYS I
@@ -6603,6 +6683,7 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 105A3..105B1  ; ID_Start # L&  [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
 105B3..105B9  ; ID_Start # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; ID_Start # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3  ; ID_Start # Lo  [52] TODHRI LETTER A..TODHRI LETTER OO
 10600..10736  ; ID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
 10740..10755  ; ID_Start # Lo  [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
 10760..10767  ; ID_Start # Lo   [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -6639,8 +6720,15 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 10C80..10CB2  ; ID_Start # L&  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
 10CC0..10CF2  ; ID_Start # L&  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
 10D00..10D23  ; ID_Start # Lo  [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
+10D4A..10D4D  ; ID_Start # Lo   [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE
+10D4E         ; ID_Start # Lm       GARAY VOWEL LENGTH MARK
+10D4F         ; ID_Start # Lo       GARAY SUKUN
+10D50..10D65  ; ID_Start # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D6F         ; ID_Start # Lm       GARAY REDUPLICATION MARK
+10D70..10D85  ; ID_Start # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 10E80..10EA9  ; ID_Start # Lo  [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
 10EB0..10EB1  ; ID_Start # Lo   [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4  ; ID_Start # Lo   [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
 10F00..10F1C  ; ID_Start # Lo  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F27         ; ID_Start # Lo       OLD SOGDIAN LIGATURE AYIN-DALETH
 10F30..10F45  ; ID_Start # Lo  [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -6679,6 +6767,13 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 1133D         ; ID_Start # Lo       GRANTHA SIGN AVAGRAHA
 11350         ; ID_Start # Lo       GRANTHA OM
 1135D..11361  ; ID_Start # Lo   [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
+11380..11389  ; ID_Start # Lo  [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL
+1138B         ; ID_Start # Lo       TULU-TIGALARI LETTER EE
+1138E         ; ID_Start # Lo       TULU-TIGALARI LETTER AI
+11390..113B5  ; ID_Start # Lo  [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA
+113B7         ; ID_Start # Lo       TULU-TIGALARI SIGN AVAGRAHA
+113D1         ; ID_Start # Lo       TULU-TIGALARI REPHA
+113D3         ; ID_Start # Lo       TULU-TIGALARI SIGN PLUTA
 11400..11434  ; ID_Start # Lo  [53] NEWA LETTER A..NEWA LETTER HA
 11447..1144A  ; ID_Start # Lo   [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
 1145F..11461  ; ID_Start # Lo   [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
@@ -6713,6 +6808,7 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 11A5C..11A89  ; ID_Start # Lo  [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
 11A9D         ; ID_Start # Lo       SOYOMBO MARK PLUTA
 11AB0..11AF8  ; ID_Start # Lo  [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0  ; ID_Start # Lo  [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
 11C00..11C08  ; ID_Start # Lo   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E  ; ID_Start # Lo  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C40         ; ID_Start # Lo       BHAIKSUKI SIGN AVAGRAHA
@@ -6736,7 +6832,9 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 12F90..12FF0  ; ID_Start # Lo  [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
 13000..1342F  ; ID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
 13441..13446  ; ID_Start # Lo   [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
+13460..143FA  ; ID_Start # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
 14400..14646  ; ID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16100..1611D  ; ID_Start # Lo  [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA
 16800..16A38  ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E  ; ID_Start # Lo  [31] MRO LETTER TA..MRO LETTER TEK
 16A70..16ABE  ; ID_Start # Lo  [79] TANGSA LETTER OZ..TANGSA LETTER ZA
@@ -6745,6 +6843,9 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 16B40..16B43  ; ID_Start # Lm   [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
 16B63..16B77  ; ID_Start # Lo  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
 16B7D..16B8F  ; ID_Start # Lo  [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16D40..16D42  ; ID_Start # Lm   [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D43..16D6A  ; ID_Start # Lo  [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU
+16D6B..16D6C  ; ID_Start # Lm   [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
 16E40..16E7F  ; ID_Start # L&  [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 16F00..16F4A  ; ID_Start # Lo  [75] MIAO LETTER PA..MIAO LETTER RTE
 16F50         ; ID_Start # Lo       MIAO LETTER NASALIZATION
@@ -6753,7 +6854,7 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 16FE3         ; ID_Start # Lm       OLD CHINESE ITERATION MARK
 17000..187F7  ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18CD5  ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
-18D00..18D08  ; ID_Start # Lo   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+18CFF..18D08  ; ID_Start # Lo  [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08
 1AFF0..1AFF3  ; ID_Start # Lm   [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
 1AFF5..1AFFB  ; ID_Start # Lm   [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
 1AFFD..1AFFE  ; ID_Start # Lm   [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
@@ -6809,6 +6910,8 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 1E2C0..1E2EB  ; ID_Start # Lo  [44] WANCHO LETTER AA..WANCHO LETTER YIH
 1E4D0..1E4EA  ; ID_Start # Lo  [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
 1E4EB         ; ID_Start # Lm       NAG MUNDARI SIGN OJOD
+1E5D0..1E5ED  ; ID_Start # Lo  [30] OL ONAL LETTER O..OL ONAL LETTER EG
+1E5F0         ; ID_Start # Lo       OL ONAL SIGN HODDOND
 1E7E0..1E7E6  ; ID_Start # Lo   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
 1E7E8..1E7EB  ; ID_Start # Lo   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
 1E7ED..1E7EE  ; ID_Start # Lo   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -6859,7 +6962,7 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 30000..3134A  ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
 31350..323AF  ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
 
-# Total code points: 136967
+# Total code points: 141269
 
 # ================================================
 
@@ -6966,7 +7069,7 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 0860..086A    ; ID_Continue # Lo  [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
 0870..0887    ; ID_Continue # Lo  [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
 0889..088E    ; ID_Continue # Lo   [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
-0898..089F    ; ID_Continue # Mn   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F    ; ID_Continue # Mn   [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
 08A0..08C8    ; ID_Continue # Lo  [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
 08C9          ; ID_Continue # Lm       ARABIC SMALL FARSI YEH
 08CA..08E1    ; ID_Continue # Mn  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -7399,7 +7502,7 @@ FFDA..FFDC    ; ID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
 1C50..1C59    ; ID_Continue # Nd  [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
 1C5A..1C77    ; ID_Continue # Lo  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
 1C78..1C7D    ; ID_Continue # Lm   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88    ; ID_Continue # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A    ; ID_Continue # L&  [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA    ; ID_Continue # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; ID_Continue # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1CD0..1CD2    ; ID_Continue # Mn   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@@ -7543,10 +7646,10 @@ A771..A787    ; ID_Continue # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTE
 A788          ; ID_Continue # Lm       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
 A78B..A78E    ; ID_Continue # L&   [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
 A78F          ; ID_Continue # Lo       LATIN LETTER SINOLOGICAL DOT
-A790..A7CA    ; ID_Continue # L&  [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CD    ; ID_Continue # L&  [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1    ; ID_Continue # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; ID_Continue # L&       LATIN SMALL LETTER DOUBLE THORN
-A7D5..A7D9    ; ID_Continue # L&   [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7D5..A7DC    ; ID_Continue # L&   [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F2..A7F4    ; ID_Continue # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6    ; ID_Continue # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F7          ; ID_Continue # Lo       LATIN EPIGRAPHIC LETTER SIDEWAYS I
@@ -7735,6 +7838,7 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 105A3..105B1  ; ID_Continue # L&  [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
 105B3..105B9  ; ID_Continue # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; ID_Continue # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3  ; ID_Continue # Lo  [52] TODHRI LETTER A..TODHRI LETTER OO
 10600..10736  ; ID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
 10740..10755  ; ID_Continue # Lo  [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
 10760..10767  ; ID_Continue # Lo   [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -7779,10 +7883,19 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 10D00..10D23  ; ID_Continue # Lo  [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
 10D24..10D27  ; ID_Continue # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
 10D30..10D39  ; ID_Continue # Nd  [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
+10D40..10D49  ; ID_Continue # Nd  [10] GARAY DIGIT ZERO..GARAY DIGIT NINE
+10D4A..10D4D  ; ID_Continue # Lo   [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE
+10D4E         ; ID_Continue # Lm       GARAY VOWEL LENGTH MARK
+10D4F         ; ID_Continue # Lo       GARAY SUKUN
+10D50..10D65  ; ID_Continue # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D69..10D6D  ; ID_Continue # Mn   [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
+10D6F         ; ID_Continue # Lm       GARAY REDUPLICATION MARK
+10D70..10D85  ; ID_Continue # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 10E80..10EA9  ; ID_Continue # Lo  [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
 10EAB..10EAC  ; ID_Continue # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
 10EB0..10EB1  ; ID_Continue # Lo   [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF  ; ID_Continue # Mn   [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4  ; ID_Continue # Lo   [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF  ; ID_Continue # Mn   [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
 10F00..10F1C  ; ID_Continue # Lo  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F27         ; ID_Continue # Lo       OLD SOGDIAN LIGATURE AYIN-DALETH
 10F30..10F45  ; ID_Continue # Lo  [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -7878,6 +7991,24 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 11362..11363  ; ID_Continue # Mc   [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
 11366..1136C  ; ID_Continue # Mn   [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
 11370..11374  ; ID_Continue # Mn   [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+11380..11389  ; ID_Continue # Lo  [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL
+1138B         ; ID_Continue # Lo       TULU-TIGALARI LETTER EE
+1138E         ; ID_Continue # Lo       TULU-TIGALARI LETTER AI
+11390..113B5  ; ID_Continue # Lo  [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA
+113B7         ; ID_Continue # Lo       TULU-TIGALARI SIGN AVAGRAHA
+113B8..113BA  ; ID_Continue # Mc   [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II
+113BB..113C0  ; ID_Continue # Mn   [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113C2         ; ID_Continue # Mc       TULU-TIGALARI VOWEL SIGN EE
+113C5         ; ID_Continue # Mc       TULU-TIGALARI VOWEL SIGN AI
+113C7..113CA  ; ID_Continue # Mc   [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA
+113CC..113CD  ; ID_Continue # Mc   [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA
+113CE         ; ID_Continue # Mn       TULU-TIGALARI SIGN VIRAMA
+113CF         ; ID_Continue # Mc       TULU-TIGALARI SIGN LOOPED VIRAMA
+113D0         ; ID_Continue # Mn       TULU-TIGALARI CONJOINER
+113D1         ; ID_Continue # Lo       TULU-TIGALARI REPHA
+113D2         ; ID_Continue # Mn       TULU-TIGALARI GEMINATION MARK
+113D3         ; ID_Continue # Lo       TULU-TIGALARI SIGN PLUTA
+113E1..113E2  ; ID_Continue # Mn   [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA
 11400..11434  ; ID_Continue # Lo  [53] NEWA LETTER A..NEWA LETTER HA
 11435..11437  ; ID_Continue # Mc   [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
 11438..1143F  ; ID_Continue # Mn   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
@@ -7929,8 +8060,11 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 116B7         ; ID_Continue # Mn       TAKRI SIGN NUKTA
 116B8         ; ID_Continue # Lo       TAKRI LETTER ARCHAIC KHA
 116C0..116C9  ; ID_Continue # Nd  [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3  ; ID_Continue # Nd  [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
 11700..1171A  ; ID_Continue # Lo  [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
-1171D..1171F  ; ID_Continue # Mn   [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+1171D         ; ID_Continue # Mn       AHOM CONSONANT SIGN MEDIAL LA
+1171E         ; ID_Continue # Mc       AHOM CONSONANT SIGN MEDIAL RA
+1171F         ; ID_Continue # Mn       AHOM CONSONANT SIGN MEDIAL LIGATING RA
 11720..11721  ; ID_Continue # Mc   [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
 11722..11725  ; ID_Continue # Mn   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
 11726         ; ID_Continue # Mc       AHOM VOWEL SIGN E
@@ -7988,6 +8122,8 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 11A98..11A99  ; ID_Continue # Mn   [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
 11A9D         ; ID_Continue # Lo       SOYOMBO MARK PLUTA
 11AB0..11AF8  ; ID_Continue # Lo  [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0  ; ID_Continue # Lo  [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BF0..11BF9  ; ID_Continue # Nd  [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
 11C00..11C08  ; ID_Continue # Lo   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E  ; ID_Continue # Lo  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C2F         ; ID_Continue # Mc       BHAIKSUKI VOWEL SIGN AA
@@ -8041,6 +8177,7 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 11F41         ; ID_Continue # Mc       KAWI SIGN KILLER
 11F42         ; ID_Continue # Mn       KAWI CONJOINER
 11F50..11F59  ; ID_Continue # Nd  [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
+11F5A         ; ID_Continue # Mn       KAWI SIGN NUKTA
 11FB0         ; ID_Continue # Lo       LISU LETTER YHA
 12000..12399  ; ID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
 12400..1246E  ; ID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
@@ -8050,7 +8187,13 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 13440         ; ID_Continue # Mn       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
 13441..13446  ; ID_Continue # Lo   [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
 13447..13455  ; ID_Continue # Mn  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
+13460..143FA  ; ID_Continue # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
 14400..14646  ; ID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16100..1611D  ; ID_Continue # Lo  [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA
+1611E..16129  ; ID_Continue # Mn  [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612A..1612C  ; ID_Continue # Mc   [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA
+1612D..1612F  ; ID_Continue # Mn   [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA
+16130..16139  ; ID_Continue # Nd  [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE
 16800..16A38  ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E  ; ID_Continue # Lo  [31] MRO LETTER TA..MRO LETTER TEK
 16A60..16A69  ; ID_Continue # Nd  [10] MRO DIGIT ZERO..MRO DIGIT NINE
@@ -8064,6 +8207,10 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 16B50..16B59  ; ID_Continue # Nd  [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
 16B63..16B77  ; ID_Continue # Lo  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
 16B7D..16B8F  ; ID_Continue # Lo  [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16D40..16D42  ; ID_Continue # Lm   [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D43..16D6A  ; ID_Continue # Lo  [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU
+16D6B..16D6C  ; ID_Continue # Lm   [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
+16D70..16D79  ; ID_Continue # Nd  [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE
 16E40..16E7F  ; ID_Continue # L&  [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 16F00..16F4A  ; ID_Continue # Lo  [75] MIAO LETTER PA..MIAO LETTER RTE
 16F4F         ; ID_Continue # Mn       MIAO SIGN CONSONANT MODIFIER BAR
@@ -8077,7 +8224,7 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 16FF0..16FF1  ; ID_Continue # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
 17000..187F7  ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18CD5  ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
-18D00..18D08  ; ID_Continue # Lo   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+18CFF..18D08  ; ID_Continue # Lo  [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08
 1AFF0..1AFF3  ; ID_Continue # Lm   [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
 1AFF5..1AFFB  ; ID_Continue # Lm   [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
 1AFFD..1AFFE  ; ID_Continue # Lm   [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
@@ -8092,6 +8239,7 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 1BC80..1BC88  ; ID_Continue # Lo   [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
 1BC90..1BC99  ; ID_Continue # Lo  [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
 1BC9D..1BC9E  ; ID_Continue # Mn   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+1CCF0..1CCF9  ; ID_Continue # Nd  [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE
 1CF00..1CF2D  ; ID_Continue # Mn  [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
 1CF30..1CF46  ; ID_Continue # Mn  [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
 1D165..1D166  ; ID_Continue # Mc   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
@@ -8163,6 +8311,10 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 1E4EB         ; ID_Continue # Lm       NAG MUNDARI SIGN OJOD
 1E4EC..1E4EF  ; ID_Continue # Mn   [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
 1E4F0..1E4F9  ; ID_Continue # Nd  [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
+1E5D0..1E5ED  ; ID_Continue # Lo  [30] OL ONAL LETTER O..OL ONAL LETTER EG
+1E5EE..1E5EF  ; ID_Continue # Mn   [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
+1E5F0         ; ID_Continue # Lo       OL ONAL SIGN HODDOND
+1E5F1..1E5FA  ; ID_Continue # Nd  [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE
 1E7E0..1E7E6  ; ID_Continue # Lo   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
 1E7E8..1E7EB  ; ID_Continue # Lo   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
 1E7ED..1E7EE  ; ID_Continue # Lo   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -8218,7 +8370,7 @@ FFDA..FFDC    ; ID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
 31350..323AF  ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
 E0100..E01EF  ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 
-# Total code points: 140108
+# Total code points: 144541
 
 # ================================================
 
@@ -8474,7 +8626,7 @@ E0100..E01EF  ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR
 1C4D..1C4F    ; XID_Start # Lo   [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
 1C5A..1C77    ; XID_Start # Lo  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
 1C78..1C7D    ; XID_Start # Lm   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88    ; XID_Start # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A    ; XID_Start # L&  [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA    ; XID_Start # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; XID_Start # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1CE9..1CEC    ; XID_Start # Lo   [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
@@ -8590,10 +8742,10 @@ A771..A787    ; XID_Start # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER
 A788          ; XID_Start # Lm       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
 A78B..A78E    ; XID_Start # L&   [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
 A78F          ; XID_Start # Lo       LATIN LETTER SINOLOGICAL DOT
-A790..A7CA    ; XID_Start # L&  [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CD    ; XID_Start # L&  [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1    ; XID_Start # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; XID_Start # L&       LATIN SMALL LETTER DOUBLE THORN
-A7D5..A7D9    ; XID_Start # L&   [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7D5..A7DC    ; XID_Start # L&   [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F2..A7F4    ; XID_Start # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6    ; XID_Start # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F7          ; XID_Start # Lo       LATIN EPIGRAPHIC LETTER SIDEWAYS I
@@ -8717,6 +8869,7 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 105A3..105B1  ; XID_Start # L&  [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
 105B3..105B9  ; XID_Start # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; XID_Start # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3  ; XID_Start # Lo  [52] TODHRI LETTER A..TODHRI LETTER OO
 10600..10736  ; XID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
 10740..10755  ; XID_Start # Lo  [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
 10760..10767  ; XID_Start # Lo   [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -8753,8 +8906,15 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 10C80..10CB2  ; XID_Start # L&  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
 10CC0..10CF2  ; XID_Start # L&  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
 10D00..10D23  ; XID_Start # Lo  [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
+10D4A..10D4D  ; XID_Start # Lo   [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE
+10D4E         ; XID_Start # Lm       GARAY VOWEL LENGTH MARK
+10D4F         ; XID_Start # Lo       GARAY SUKUN
+10D50..10D65  ; XID_Start # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D6F         ; XID_Start # Lm       GARAY REDUPLICATION MARK
+10D70..10D85  ; XID_Start # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 10E80..10EA9  ; XID_Start # Lo  [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
 10EB0..10EB1  ; XID_Start # Lo   [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4  ; XID_Start # Lo   [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
 10F00..10F1C  ; XID_Start # Lo  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F27         ; XID_Start # Lo       OLD SOGDIAN LIGATURE AYIN-DALETH
 10F30..10F45  ; XID_Start # Lo  [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -8793,6 +8953,13 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 1133D         ; XID_Start # Lo       GRANTHA SIGN AVAGRAHA
 11350         ; XID_Start # Lo       GRANTHA OM
 1135D..11361  ; XID_Start # Lo   [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
+11380..11389  ; XID_Start # Lo  [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL
+1138B         ; XID_Start # Lo       TULU-TIGALARI LETTER EE
+1138E         ; XID_Start # Lo       TULU-TIGALARI LETTER AI
+11390..113B5  ; XID_Start # Lo  [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA
+113B7         ; XID_Start # Lo       TULU-TIGALARI SIGN AVAGRAHA
+113D1         ; XID_Start # Lo       TULU-TIGALARI REPHA
+113D3         ; XID_Start # Lo       TULU-TIGALARI SIGN PLUTA
 11400..11434  ; XID_Start # Lo  [53] NEWA LETTER A..NEWA LETTER HA
 11447..1144A  ; XID_Start # Lo   [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
 1145F..11461  ; XID_Start # Lo   [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
@@ -8827,6 +8994,7 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 11A5C..11A89  ; XID_Start # Lo  [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
 11A9D         ; XID_Start # Lo       SOYOMBO MARK PLUTA
 11AB0..11AF8  ; XID_Start # Lo  [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0  ; XID_Start # Lo  [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
 11C00..11C08  ; XID_Start # Lo   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E  ; XID_Start # Lo  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C40         ; XID_Start # Lo       BHAIKSUKI SIGN AVAGRAHA
@@ -8850,7 +9018,9 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 12F90..12FF0  ; XID_Start # Lo  [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
 13000..1342F  ; XID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
 13441..13446  ; XID_Start # Lo   [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
+13460..143FA  ; XID_Start # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
 14400..14646  ; XID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16100..1611D  ; XID_Start # Lo  [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA
 16800..16A38  ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E  ; XID_Start # Lo  [31] MRO LETTER TA..MRO LETTER TEK
 16A70..16ABE  ; XID_Start # Lo  [79] TANGSA LETTER OZ..TANGSA LETTER ZA
@@ -8859,6 +9029,9 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 16B40..16B43  ; XID_Start # Lm   [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
 16B63..16B77  ; XID_Start # Lo  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
 16B7D..16B8F  ; XID_Start # Lo  [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16D40..16D42  ; XID_Start # Lm   [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D43..16D6A  ; XID_Start # Lo  [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU
+16D6B..16D6C  ; XID_Start # Lm   [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
 16E40..16E7F  ; XID_Start # L&  [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 16F00..16F4A  ; XID_Start # Lo  [75] MIAO LETTER PA..MIAO LETTER RTE
 16F50         ; XID_Start # Lo       MIAO LETTER NASALIZATION
@@ -8867,7 +9040,7 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 16FE3         ; XID_Start # Lm       OLD CHINESE ITERATION MARK
 17000..187F7  ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18CD5  ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
-18D00..18D08  ; XID_Start # Lo   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+18CFF..18D08  ; XID_Start # Lo  [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08
 1AFF0..1AFF3  ; XID_Start # Lm   [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
 1AFF5..1AFFB  ; XID_Start # Lm   [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
 1AFFD..1AFFE  ; XID_Start # Lm   [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
@@ -8923,6 +9096,8 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 1E2C0..1E2EB  ; XID_Start # Lo  [44] WANCHO LETTER AA..WANCHO LETTER YIH
 1E4D0..1E4EA  ; XID_Start # Lo  [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
 1E4EB         ; XID_Start # Lm       NAG MUNDARI SIGN OJOD
+1E5D0..1E5ED  ; XID_Start # Lo  [30] OL ONAL LETTER O..OL ONAL LETTER EG
+1E5F0         ; XID_Start # Lo       OL ONAL SIGN HODDOND
 1E7E0..1E7E6  ; XID_Start # Lo   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
 1E7E8..1E7EB  ; XID_Start # Lo   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
 1E7ED..1E7EE  ; XID_Start # Lo   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -8973,7 +9148,7 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 30000..3134A  ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
 31350..323AF  ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
 
-# Total code points: 136944
+# Total code points: 141246
 
 # ================================================
 
@@ -9076,7 +9251,7 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 0860..086A    ; XID_Continue # Lo  [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
 0870..0887    ; XID_Continue # Lo  [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
 0889..088E    ; XID_Continue # Lo   [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
-0898..089F    ; XID_Continue # Mn   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F    ; XID_Continue # Mn   [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
 08A0..08C8    ; XID_Continue # Lo  [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
 08C9          ; XID_Continue # Lm       ARABIC SMALL FARSI YEH
 08CA..08E1    ; XID_Continue # Mn  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -9509,7 +9684,7 @@ FFDA..FFDC    ; XID_Start # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
 1C50..1C59    ; XID_Continue # Nd  [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
 1C5A..1C77    ; XID_Continue # Lo  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
 1C78..1C7D    ; XID_Continue # Lm   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88    ; XID_Continue # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A    ; XID_Continue # L&  [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA    ; XID_Continue # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; XID_Continue # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1CD0..1CD2    ; XID_Continue # Mn   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@@ -9652,10 +9827,10 @@ A771..A787    ; XID_Continue # L&  [23] LATIN SMALL LETTER DUM..LATIN SMALL LETT
 A788          ; XID_Continue # Lm       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
 A78B..A78E    ; XID_Continue # L&   [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
 A78F          ; XID_Continue # Lo       LATIN LETTER SINOLOGICAL DOT
-A790..A7CA    ; XID_Continue # L&  [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CD    ; XID_Continue # L&  [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1    ; XID_Continue # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; XID_Continue # L&       LATIN SMALL LETTER DOUBLE THORN
-A7D5..A7D9    ; XID_Continue # L&   [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7D5..A7DC    ; XID_Continue # L&   [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F2..A7F4    ; XID_Continue # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6    ; XID_Continue # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F7          ; XID_Continue # Lo       LATIN EPIGRAPHIC LETTER SIDEWAYS I
@@ -9850,6 +10025,7 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 105A3..105B1  ; XID_Continue # L&  [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
 105B3..105B9  ; XID_Continue # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; XID_Continue # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3  ; XID_Continue # Lo  [52] TODHRI LETTER A..TODHRI LETTER OO
 10600..10736  ; XID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
 10740..10755  ; XID_Continue # Lo  [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
 10760..10767  ; XID_Continue # Lo   [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -9894,10 +10070,19 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 10D00..10D23  ; XID_Continue # Lo  [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
 10D24..10D27  ; XID_Continue # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
 10D30..10D39  ; XID_Continue # Nd  [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
+10D40..10D49  ; XID_Continue # Nd  [10] GARAY DIGIT ZERO..GARAY DIGIT NINE
+10D4A..10D4D  ; XID_Continue # Lo   [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE
+10D4E         ; XID_Continue # Lm       GARAY VOWEL LENGTH MARK
+10D4F         ; XID_Continue # Lo       GARAY SUKUN
+10D50..10D65  ; XID_Continue # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D69..10D6D  ; XID_Continue # Mn   [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
+10D6F         ; XID_Continue # Lm       GARAY REDUPLICATION MARK
+10D70..10D85  ; XID_Continue # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 10E80..10EA9  ; XID_Continue # Lo  [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
 10EAB..10EAC  ; XID_Continue # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
 10EB0..10EB1  ; XID_Continue # Lo   [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF  ; XID_Continue # Mn   [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4  ; XID_Continue # Lo   [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF  ; XID_Continue # Mn   [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
 10F00..10F1C  ; XID_Continue # Lo  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F27         ; XID_Continue # Lo       OLD SOGDIAN LIGATURE AYIN-DALETH
 10F30..10F45  ; XID_Continue # Lo  [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -9993,6 +10178,24 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 11362..11363  ; XID_Continue # Mc   [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
 11366..1136C  ; XID_Continue # Mn   [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
 11370..11374  ; XID_Continue # Mn   [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+11380..11389  ; XID_Continue # Lo  [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL
+1138B         ; XID_Continue # Lo       TULU-TIGALARI LETTER EE
+1138E         ; XID_Continue # Lo       TULU-TIGALARI LETTER AI
+11390..113B5  ; XID_Continue # Lo  [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA
+113B7         ; XID_Continue # Lo       TULU-TIGALARI SIGN AVAGRAHA
+113B8..113BA  ; XID_Continue # Mc   [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II
+113BB..113C0  ; XID_Continue # Mn   [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113C2         ; XID_Continue # Mc       TULU-TIGALARI VOWEL SIGN EE
+113C5         ; XID_Continue # Mc       TULU-TIGALARI VOWEL SIGN AI
+113C7..113CA  ; XID_Continue # Mc   [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA
+113CC..113CD  ; XID_Continue # Mc   [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA
+113CE         ; XID_Continue # Mn       TULU-TIGALARI SIGN VIRAMA
+113CF         ; XID_Continue # Mc       TULU-TIGALARI SIGN LOOPED VIRAMA
+113D0         ; XID_Continue # Mn       TULU-TIGALARI CONJOINER
+113D1         ; XID_Continue # Lo       TULU-TIGALARI REPHA
+113D2         ; XID_Continue # Mn       TULU-TIGALARI GEMINATION MARK
+113D3         ; XID_Continue # Lo       TULU-TIGALARI SIGN PLUTA
+113E1..113E2  ; XID_Continue # Mn   [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA
 11400..11434  ; XID_Continue # Lo  [53] NEWA LETTER A..NEWA LETTER HA
 11435..11437  ; XID_Continue # Mc   [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
 11438..1143F  ; XID_Continue # Mn   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
@@ -10044,8 +10247,11 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 116B7         ; XID_Continue # Mn       TAKRI SIGN NUKTA
 116B8         ; XID_Continue # Lo       TAKRI LETTER ARCHAIC KHA
 116C0..116C9  ; XID_Continue # Nd  [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3  ; XID_Continue # Nd  [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
 11700..1171A  ; XID_Continue # Lo  [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
-1171D..1171F  ; XID_Continue # Mn   [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+1171D         ; XID_Continue # Mn       AHOM CONSONANT SIGN MEDIAL LA
+1171E         ; XID_Continue # Mc       AHOM CONSONANT SIGN MEDIAL RA
+1171F         ; XID_Continue # Mn       AHOM CONSONANT SIGN MEDIAL LIGATING RA
 11720..11721  ; XID_Continue # Mc   [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
 11722..11725  ; XID_Continue # Mn   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
 11726         ; XID_Continue # Mc       AHOM VOWEL SIGN E
@@ -10103,6 +10309,8 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 11A98..11A99  ; XID_Continue # Mn   [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
 11A9D         ; XID_Continue # Lo       SOYOMBO MARK PLUTA
 11AB0..11AF8  ; XID_Continue # Lo  [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0  ; XID_Continue # Lo  [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BF0..11BF9  ; XID_Continue # Nd  [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
 11C00..11C08  ; XID_Continue # Lo   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E  ; XID_Continue # Lo  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C2F         ; XID_Continue # Mc       BHAIKSUKI VOWEL SIGN AA
@@ -10156,6 +10364,7 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 11F41         ; XID_Continue # Mc       KAWI SIGN KILLER
 11F42         ; XID_Continue # Mn       KAWI CONJOINER
 11F50..11F59  ; XID_Continue # Nd  [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
+11F5A         ; XID_Continue # Mn       KAWI SIGN NUKTA
 11FB0         ; XID_Continue # Lo       LISU LETTER YHA
 12000..12399  ; XID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
 12400..1246E  ; XID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
@@ -10165,7 +10374,13 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 13440         ; XID_Continue # Mn       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
 13441..13446  ; XID_Continue # Lo   [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
 13447..13455  ; XID_Continue # Mn  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
+13460..143FA  ; XID_Continue # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
 14400..14646  ; XID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16100..1611D  ; XID_Continue # Lo  [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA
+1611E..16129  ; XID_Continue # Mn  [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612A..1612C  ; XID_Continue # Mc   [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA
+1612D..1612F  ; XID_Continue # Mn   [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA
+16130..16139  ; XID_Continue # Nd  [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE
 16800..16A38  ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E  ; XID_Continue # Lo  [31] MRO LETTER TA..MRO LETTER TEK
 16A60..16A69  ; XID_Continue # Nd  [10] MRO DIGIT ZERO..MRO DIGIT NINE
@@ -10179,6 +10394,10 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 16B50..16B59  ; XID_Continue # Nd  [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
 16B63..16B77  ; XID_Continue # Lo  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
 16B7D..16B8F  ; XID_Continue # Lo  [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16D40..16D42  ; XID_Continue # Lm   [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D43..16D6A  ; XID_Continue # Lo  [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU
+16D6B..16D6C  ; XID_Continue # Lm   [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
+16D70..16D79  ; XID_Continue # Nd  [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE
 16E40..16E7F  ; XID_Continue # L&  [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 16F00..16F4A  ; XID_Continue # Lo  [75] MIAO LETTER PA..MIAO LETTER RTE
 16F4F         ; XID_Continue # Mn       MIAO SIGN CONSONANT MODIFIER BAR
@@ -10192,7 +10411,7 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 16FF0..16FF1  ; XID_Continue # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
 17000..187F7  ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18CD5  ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
-18D00..18D08  ; XID_Continue # Lo   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+18CFF..18D08  ; XID_Continue # Lo  [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08
 1AFF0..1AFF3  ; XID_Continue # Lm   [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
 1AFF5..1AFFB  ; XID_Continue # Lm   [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
 1AFFD..1AFFE  ; XID_Continue # Lm   [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
@@ -10207,6 +10426,7 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 1BC80..1BC88  ; XID_Continue # Lo   [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
 1BC90..1BC99  ; XID_Continue # Lo  [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
 1BC9D..1BC9E  ; XID_Continue # Mn   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+1CCF0..1CCF9  ; XID_Continue # Nd  [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE
 1CF00..1CF2D  ; XID_Continue # Mn  [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
 1CF30..1CF46  ; XID_Continue # Mn  [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
 1D165..1D166  ; XID_Continue # Mc   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
@@ -10278,6 +10498,10 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 1E4EB         ; XID_Continue # Lm       NAG MUNDARI SIGN OJOD
 1E4EC..1E4EF  ; XID_Continue # Mn   [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
 1E4F0..1E4F9  ; XID_Continue # Nd  [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
+1E5D0..1E5ED  ; XID_Continue # Lo  [30] OL ONAL LETTER O..OL ONAL LETTER EG
+1E5EE..1E5EF  ; XID_Continue # Mn   [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
+1E5F0         ; XID_Continue # Lo       OL ONAL SIGN HODDOND
+1E5F1..1E5FA  ; XID_Continue # Nd  [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE
 1E7E0..1E7E6  ; XID_Continue # Lo   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
 1E7E8..1E7EB  ; XID_Continue # Lo   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
 1E7ED..1E7EE  ; XID_Continue # Lo   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -10333,7 +10557,7 @@ FFDA..FFDC    ; XID_Continue # Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
 31350..323AF  ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
 E0100..E01EF  ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 
-# Total code points: 140089
+# Total code points: 144522
 
 # ================================================
 
@@ -10418,7 +10642,7 @@ E01F0..E0FFF  ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese
 0825..0827    ; Grapheme_Extend # Mn   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
 0829..082D    ; Grapheme_Extend # Mn   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
 0859..085B    ; Grapheme_Extend # Mn   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F    ; Grapheme_Extend # Mn   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F    ; Grapheme_Extend # Mn   [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
 08CA..08E1    ; Grapheme_Extend # Mn  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
 08E3..0902    ; Grapheme_Extend # Mn  [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
 093A          ; Grapheme_Extend # Mn       DEVANAGARI VOWEL SIGN OE
@@ -10475,8 +10699,11 @@ E01F0..E0FFF  ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese
 0C81          ; Grapheme_Extend # Mn       KANNADA SIGN CANDRABINDU
 0CBC          ; Grapheme_Extend # Mn       KANNADA SIGN NUKTA
 0CBF          ; Grapheme_Extend # Mn       KANNADA VOWEL SIGN I
+0CC0          ; Grapheme_Extend # Mc       KANNADA VOWEL SIGN II
 0CC2          ; Grapheme_Extend # Mc       KANNADA VOWEL SIGN UU
 0CC6          ; Grapheme_Extend # Mn       KANNADA VOWEL SIGN E
+0CC7..0CC8    ; Grapheme_Extend # Mc   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+0CCA..0CCB    ; Grapheme_Extend # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
 0CCC..0CCD    ; Grapheme_Extend # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
 0CD5..0CD6    ; Grapheme_Extend # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
 0CE2..0CE3    ; Grapheme_Extend # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
@@ -10522,7 +10749,9 @@ E01F0..E0FFF  ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese
 109D          ; Grapheme_Extend # Mn       MYANMAR VOWEL SIGN AITON AI
 135D..135F    ; Grapheme_Extend # Mn   [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
 1712..1714    ; Grapheme_Extend # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+1715          ; Grapheme_Extend # Mc       TAGALOG SIGN PAMUDPOD
 1732..1733    ; Grapheme_Extend # Mn   [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
+1734          ; Grapheme_Extend # Mc       HANUNOO SIGN PAMUDPOD
 1752..1753    ; Grapheme_Extend # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
 1772..1773    ; Grapheme_Extend # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
 17B4..17B5    ; Grapheme_Extend # Mn   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
@@ -10554,17 +10783,22 @@ E01F0..E0FFF  ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese
 1B34          ; Grapheme_Extend # Mn       BALINESE SIGN REREKAN
 1B35          ; Grapheme_Extend # Mc       BALINESE VOWEL SIGN TEDUNG
 1B36..1B3A    ; Grapheme_Extend # Mn   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3B          ; Grapheme_Extend # Mc       BALINESE VOWEL SIGN RA REPA TEDUNG
 1B3C          ; Grapheme_Extend # Mn       BALINESE VOWEL SIGN LA LENGA
+1B3D          ; Grapheme_Extend # Mc       BALINESE VOWEL SIGN LA LENGA TEDUNG
 1B42          ; Grapheme_Extend # Mn       BALINESE VOWEL SIGN PEPET
+1B43..1B44    ; Grapheme_Extend # Mc   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
 1B6B..1B73    ; Grapheme_Extend # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
 1B80..1B81    ; Grapheme_Extend # Mn   [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
 1BA2..1BA5    ; Grapheme_Extend # Mn   [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
 1BA8..1BA9    ; Grapheme_Extend # Mn   [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+1BAA          ; Grapheme_Extend # Mc       SUNDANESE SIGN PAMAAEH
 1BAB..1BAD    ; Grapheme_Extend # Mn   [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
 1BE6          ; Grapheme_Extend # Mn       BATAK SIGN TOMPI
 1BE8..1BE9    ; Grapheme_Extend # Mn   [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
 1BED          ; Grapheme_Extend # Mn       BATAK VOWEL SIGN KARO O
 1BEF..1BF1    ; Grapheme_Extend # Mn   [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
+1BF2..1BF3    ; Grapheme_Extend # Mc   [2] BATAK PANGOLAT..BATAK PANONGONAN
 1C2C..1C33    ; Grapheme_Extend # Mn   [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
 1C36..1C37    ; Grapheme_Extend # Mn   [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
 1CD0..1CD2    ; Grapheme_Extend # Mn   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@@ -10601,10 +10835,12 @@ A8E0..A8F1    ; Grapheme_Extend # Mn  [18] COMBINING DEVANAGARI DIGIT ZERO..COMB
 A8FF          ; Grapheme_Extend # Mn       DEVANAGARI VOWEL SIGN AY
 A926..A92D    ; Grapheme_Extend # Mn   [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
 A947..A951    ; Grapheme_Extend # Mn  [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+A953          ; Grapheme_Extend # Mc       REJANG VIRAMA
 A980..A982    ; Grapheme_Extend # Mn   [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
 A9B3          ; Grapheme_Extend # Mn       JAVANESE SIGN CECAK TELU
 A9B6..A9B9    ; Grapheme_Extend # Mn   [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
 A9BC..A9BD    ; Grapheme_Extend # Mn   [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
+A9C0          ; Grapheme_Extend # Mc       JAVANESE PANGKON
 A9E5          ; Grapheme_Extend # Mn       MYANMAR SIGN SHAN SAW
 AA29..AA2E    ; Grapheme_Extend # Mn   [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
 AA31..AA32    ; Grapheme_Extend # Mn   [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
@@ -10636,8 +10872,9 @@ FF9E..FF9F    ; Grapheme_Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK.
 10A3F         ; Grapheme_Extend # Mn       KHAROSHTHI VIRAMA
 10AE5..10AE6  ; Grapheme_Extend # Mn   [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
 10D24..10D27  ; Grapheme_Extend # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+10D69..10D6D  ; Grapheme_Extend # Mn   [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
 10EAB..10EAC  ; Grapheme_Extend # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF  ; Grapheme_Extend # Mn   [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF  ; Grapheme_Extend # Mn   [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
 10F46..10F50  ; Grapheme_Extend # Mn  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
 10F82..10F85  ; Grapheme_Extend # Mn   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
 11001         ; Grapheme_Extend # Mn       BRAHMI SIGN ANUSVARA
@@ -10654,10 +10891,12 @@ FF9E..FF9F    ; Grapheme_Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK.
 11173         ; Grapheme_Extend # Mn       MAHAJANI SIGN NUKTA
 11180..11181  ; Grapheme_Extend # Mn   [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
 111B6..111BE  ; Grapheme_Extend # Mn   [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
+111C0         ; Grapheme_Extend # Mc       SHARADA SIGN VIRAMA
 111C9..111CC  ; Grapheme_Extend # Mn   [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
 111CF         ; Grapheme_Extend # Mn       SHARADA SIGN INVERTED CANDRABINDU
 1122F..11231  ; Grapheme_Extend # Mn   [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
 11234         ; Grapheme_Extend # Mn       KHOJKI SIGN ANUSVARA
+11235         ; Grapheme_Extend # Mc       KHOJKI SIGN VIRAMA
 11236..11237  ; Grapheme_Extend # Mn   [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
 1123E         ; Grapheme_Extend # Mn       KHOJKI SIGN SUKUN
 11241         ; Grapheme_Extend # Mn       KHOJKI VOWEL SIGN VOCALIC R
@@ -10667,9 +10906,20 @@ FF9E..FF9F    ; Grapheme_Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK.
 1133B..1133C  ; Grapheme_Extend # Mn   [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
 1133E         ; Grapheme_Extend # Mc       GRANTHA VOWEL SIGN AA
 11340         ; Grapheme_Extend # Mn       GRANTHA VOWEL SIGN II
+1134D         ; Grapheme_Extend # Mc       GRANTHA SIGN VIRAMA
 11357         ; Grapheme_Extend # Mc       GRANTHA AU LENGTH MARK
 11366..1136C  ; Grapheme_Extend # Mn   [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
 11370..11374  ; Grapheme_Extend # Mn   [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+113B8         ; Grapheme_Extend # Mc       TULU-TIGALARI VOWEL SIGN AA
+113BB..113C0  ; Grapheme_Extend # Mn   [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113C2         ; Grapheme_Extend # Mc       TULU-TIGALARI VOWEL SIGN EE
+113C5         ; Grapheme_Extend # Mc       TULU-TIGALARI VOWEL SIGN AI
+113C7..113C9  ; Grapheme_Extend # Mc   [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK
+113CE         ; Grapheme_Extend # Mn       TULU-TIGALARI SIGN VIRAMA
+113CF         ; Grapheme_Extend # Mc       TULU-TIGALARI SIGN LOOPED VIRAMA
+113D0         ; Grapheme_Extend # Mn       TULU-TIGALARI CONJOINER
+113D2         ; Grapheme_Extend # Mn       TULU-TIGALARI GEMINATION MARK
+113E1..113E2  ; Grapheme_Extend # Mn   [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA
 11438..1143F  ; Grapheme_Extend # Mn   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
 11442..11444  ; Grapheme_Extend # Mn   [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
 11446         ; Grapheme_Extend # Mn       NEWA SIGN NUKTA
@@ -10691,14 +10941,17 @@ FF9E..FF9F    ; Grapheme_Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK.
 116AB         ; Grapheme_Extend # Mn       TAKRI SIGN ANUSVARA
 116AD         ; Grapheme_Extend # Mn       TAKRI VOWEL SIGN AA
 116B0..116B5  ; Grapheme_Extend # Mn   [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
+116B6         ; Grapheme_Extend # Mc       TAKRI SIGN VIRAMA
 116B7         ; Grapheme_Extend # Mn       TAKRI SIGN NUKTA
-1171D..1171F  ; Grapheme_Extend # Mn   [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+1171D         ; Grapheme_Extend # Mn       AHOM CONSONANT SIGN MEDIAL LA
+1171F         ; Grapheme_Extend # Mn       AHOM CONSONANT SIGN MEDIAL LIGATING RA
 11722..11725  ; Grapheme_Extend # Mn   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
 11727..1172B  ; Grapheme_Extend # Mn   [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
 1182F..11837  ; Grapheme_Extend # Mn   [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
 11839..1183A  ; Grapheme_Extend # Mn   [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
 11930         ; Grapheme_Extend # Mc       DIVES AKURU VOWEL SIGN AA
 1193B..1193C  ; Grapheme_Extend # Mn   [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
+1193D         ; Grapheme_Extend # Mc       DIVES AKURU SIGN HALANTA
 1193E         ; Grapheme_Extend # Mn       DIVES AKURU VIRAMA
 11943         ; Grapheme_Extend # Mn       DIVES AKURU SIGN NUKTA
 119D4..119D7  ; Grapheme_Extend # Mn   [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
@@ -10731,20 +10984,25 @@ FF9E..FF9F    ; Grapheme_Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK.
 11F00..11F01  ; Grapheme_Extend # Mn   [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
 11F36..11F3A  ; Grapheme_Extend # Mn   [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
 11F40         ; Grapheme_Extend # Mn       KAWI VOWEL SIGN EU
+11F41         ; Grapheme_Extend # Mc       KAWI SIGN KILLER
 11F42         ; Grapheme_Extend # Mn       KAWI CONJOINER
+11F5A         ; Grapheme_Extend # Mn       KAWI SIGN NUKTA
 13440         ; Grapheme_Extend # Mn       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
 13447..13455  ; Grapheme_Extend # Mn  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
+1611E..16129  ; Grapheme_Extend # Mn  [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612D..1612F  ; Grapheme_Extend # Mn   [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA
 16AF0..16AF4  ; Grapheme_Extend # Mn   [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
 16B30..16B36  ; Grapheme_Extend # Mn   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
 16F4F         ; Grapheme_Extend # Mn       MIAO SIGN CONSONANT MODIFIER BAR
 16F8F..16F92  ; Grapheme_Extend # Mn   [4] MIAO TONE RIGHT..MIAO TONE BELOW
 16FE4         ; Grapheme_Extend # Mn       KHITAN SMALL SCRIPT FILLER
+16FF0..16FF1  ; Grapheme_Extend # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
 1BC9D..1BC9E  ; Grapheme_Extend # Mn   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
 1CF00..1CF2D  ; Grapheme_Extend # Mn  [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
 1CF30..1CF46  ; Grapheme_Extend # Mn  [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
-1D165         ; Grapheme_Extend # Mc       MUSICAL SYMBOL COMBINING STEM
+1D165..1D166  ; Grapheme_Extend # Mc   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
 1D167..1D169  ; Grapheme_Extend # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
-1D16E..1D172  ; Grapheme_Extend # Mc   [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
+1D16D..1D172  ; Grapheme_Extend # Mc   [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
 1D17B..1D182  ; Grapheme_Extend # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
 1D185..1D18B  ; Grapheme_Extend # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
 1D1AA..1D1AD  ; Grapheme_Extend # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
@@ -10765,12 +11023,13 @@ FF9E..FF9F    ; Grapheme_Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK.
 1E2AE         ; Grapheme_Extend # Mn       TOTO SIGN RISING TONE
 1E2EC..1E2EF  ; Grapheme_Extend # Mn   [4] WANCHO TONE TUP..WANCHO TONE KOINI
 1E4EC..1E4EF  ; Grapheme_Extend # Mn   [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
+1E5EE..1E5EF  ; Grapheme_Extend # Mn   [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
 1E8D0..1E8D6  ; Grapheme_Extend # Mn   [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
 1E944..1E94A  ; Grapheme_Extend # Mn   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
 E0020..E007F  ; Grapheme_Extend # Cf  [96] TAG SPACE..CANCEL TAG
 E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 
-# Total code points: 2125
+# Total code points: 2193
 
 # ================================================
 
@@ -11062,10 +11321,8 @@ E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
 0CB5..0CB9    ; Grapheme_Base # Lo   [5] KANNADA LETTER VA..KANNADA LETTER HA
 0CBD          ; Grapheme_Base # Lo       KANNADA SIGN AVAGRAHA
 0CBE          ; Grapheme_Base # Mc       KANNADA VOWEL SIGN AA
-0CC0..0CC1    ; Grapheme_Base # Mc   [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U
+0CC1          ; Grapheme_Base # Mc       KANNADA VOWEL SIGN U
 0CC3..0CC4    ; Grapheme_Base # Mc   [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR
-0CC7..0CC8    ; Grapheme_Base # Mc   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
-0CCA..0CCB    ; Grapheme_Base # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
 0CDD..0CDE    ; Grapheme_Base # Lo   [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
 0CE0..0CE1    ; Grapheme_Base # Lo   [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
 0CE6..0CEF    ; Grapheme_Base # Nd  [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
@@ -11214,9 +11471,7 @@ E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
 16EE..16F0    ; Grapheme_Base # Nl   [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
 16F1..16F8    ; Grapheme_Base # Lo   [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC
 1700..1711    ; Grapheme_Base # Lo  [18] TAGALOG LETTER A..TAGALOG LETTER HA
-1715          ; Grapheme_Base # Mc       TAGALOG SIGN PAMUDPOD
 171F..1731    ; Grapheme_Base # Lo  [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA
-1734          ; Grapheme_Base # Mc       HANUNOO SIGN PAMUDPOD
 1735..1736    ; Grapheme_Base # Po   [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
 1740..1751    ; Grapheme_Base # Lo  [18] BUHID LETTER A..BUHID LETTER HA
 1760..176C    ; Grapheme_Base # Lo  [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
@@ -11274,27 +11529,24 @@ E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
 1AA8..1AAD    ; Grapheme_Base # Po   [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
 1B04          ; Grapheme_Base # Mc       BALINESE SIGN BISAH
 1B05..1B33    ; Grapheme_Base # Lo  [47] BALINESE LETTER AKARA..BALINESE LETTER HA
-1B3B          ; Grapheme_Base # Mc       BALINESE VOWEL SIGN RA REPA TEDUNG
-1B3D..1B41    ; Grapheme_Base # Mc   [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
-1B43..1B44    ; Grapheme_Base # Mc   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
+1B3E..1B41    ; Grapheme_Base # Mc   [4] BALINESE VOWEL SIGN TALING..BALINESE VOWEL SIGN TALING REPA TEDUNG
 1B45..1B4C    ; Grapheme_Base # Lo   [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA
+1B4E..1B4F    ; Grapheme_Base # Po   [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN
 1B50..1B59    ; Grapheme_Base # Nd  [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
 1B5A..1B60    ; Grapheme_Base # Po   [7] BALINESE PANTI..BALINESE PAMENENG
 1B61..1B6A    ; Grapheme_Base # So  [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
 1B74..1B7C    ; Grapheme_Base # So   [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
-1B7D..1B7E    ; Grapheme_Base # Po   [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
+1B7D..1B7F    ; Grapheme_Base # Po   [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK
 1B82          ; Grapheme_Base # Mc       SUNDANESE SIGN PANGWISAD
 1B83..1BA0    ; Grapheme_Base # Lo  [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
 1BA1          ; Grapheme_Base # Mc       SUNDANESE CONSONANT SIGN PAMINGKAL
 1BA6..1BA7    ; Grapheme_Base # Mc   [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
-1BAA          ; Grapheme_Base # Mc       SUNDANESE SIGN PAMAAEH
 1BAE..1BAF    ; Grapheme_Base # Lo   [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
 1BB0..1BB9    ; Grapheme_Base # Nd  [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
 1BBA..1BE5    ; Grapheme_Base # Lo  [44] SUNDANESE AVAGRAHA..BATAK LETTER U
 1BE7          ; Grapheme_Base # Mc       BATAK VOWEL SIGN E
 1BEA..1BEC    ; Grapheme_Base # Mc   [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
 1BEE          ; Grapheme_Base # Mc       BATAK VOWEL SIGN U
-1BF2..1BF3    ; Grapheme_Base # Mc   [2] BATAK PANGOLAT..BATAK PANONGONAN
 1BFC..1BFF    ; Grapheme_Base # Po   [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
 1C00..1C23    ; Grapheme_Base # Lo  [36] LEPCHA LETTER KA..LEPCHA LETTER A
 1C24..1C2B    ; Grapheme_Base # Mc   [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
@@ -11306,7 +11558,7 @@ E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
 1C5A..1C77    ; Grapheme_Base # Lo  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
 1C78..1C7D    ; Grapheme_Base # Lm   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
 1C7E..1C7F    ; Grapheme_Base # Po   [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80..1C88    ; Grapheme_Base # L&   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A    ; Grapheme_Base # L&  [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA    ; Grapheme_Base # L&  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; Grapheme_Base # L&   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1CC0..1CC7    ; Grapheme_Base # Po   [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -11461,7 +11713,7 @@ E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
 239B..23B3    ; Grapheme_Base # Sm  [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
 23B4..23DB    ; Grapheme_Base # So  [40] TOP SQUARE BRACKET..FUSE
 23DC..23E1    ; Grapheme_Base # Sm   [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
-23E2..2426    ; Grapheme_Base # So  [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
+23E2..2429    ; Grapheme_Base # So  [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM
 2440..244A    ; Grapheme_Base # So  [11] OCR HOOK..OCR DOUBLE BACKSLASH
 2460..249B    ; Grapheme_Base # No  [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
 249C..24E9    ; Grapheme_Base # So  [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
@@ -11676,7 +11928,7 @@ E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE
 3192..3195    ; Grapheme_Base # No   [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
 3196..319F    ; Grapheme_Base # So  [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
 31A0..31BF    ; Grapheme_Base # Lo  [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
-31C0..31E3    ; Grapheme_Base # So  [36] CJK STROKE T..CJK STROKE Q
+31C0..31E5    ; Grapheme_Base # So  [38] CJK STROKE T..CJK STROKE SZP
 31EF          ; Grapheme_Base # So       IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
 31F0..31FF    ; Grapheme_Base # Lo  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
 3200..321E    ; Grapheme_Base # So  [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
@@ -11725,10 +11977,10 @@ A788          ; Grapheme_Base # Lm       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
 A789..A78A    ; Grapheme_Base # Sk   [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
 A78B..A78E    ; Grapheme_Base # L&   [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
 A78F          ; Grapheme_Base # Lo       LATIN LETTER SINOLOGICAL DOT
-A790..A7CA    ; Grapheme_Base # L&  [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CD    ; Grapheme_Base # L&  [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1    ; Grapheme_Base # L&   [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; Grapheme_Base # L&       LATIN SMALL LETTER DOUBLE THORN
-A7D5..A7D9    ; Grapheme_Base # L&   [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7D5..A7DC    ; Grapheme_Base # L&   [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F2..A7F4    ; Grapheme_Base # Lm   [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6    ; Grapheme_Base # L&   [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F7          ; Grapheme_Base # Lo       LATIN EPIGRAPHIC LETTER SIDEWAYS I
@@ -11761,14 +12013,14 @@ A900..A909    ; Grapheme_Base # Nd  [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NIN
 A90A..A925    ; Grapheme_Base # Lo  [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
 A92E..A92F    ; Grapheme_Base # Po   [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
 A930..A946    ; Grapheme_Base # Lo  [23] REJANG LETTER KA..REJANG LETTER A
-A952..A953    ; Grapheme_Base # Mc   [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
+A952          ; Grapheme_Base # Mc       REJANG CONSONANT SIGN H
 A95F          ; Grapheme_Base # Po       REJANG SECTION MARK
 A960..A97C    ; Grapheme_Base # Lo  [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
 A983          ; Grapheme_Base # Mc       JAVANESE SIGN WIGNYAN
 A984..A9B2    ; Grapheme_Base # Lo  [47] JAVANESE LETTER A..JAVANESE LETTER HA
 A9B4..A9B5    ; Grapheme_Base # Mc   [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
 A9BA..A9BB    ; Grapheme_Base # Mc   [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
-A9BE..A9C0    ; Grapheme_Base # Mc   [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON
+A9BE..A9BF    ; Grapheme_Base # Mc   [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA
 A9C1..A9CD    ; Grapheme_Base # Po  [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
 A9CF          ; Grapheme_Base # Lm       JAVANESE PANGRANGKEP
 A9D0..A9D9    ; Grapheme_Base # Nd  [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
@@ -12000,6 +12252,7 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 105A3..105B1  ; Grapheme_Base # L&  [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
 105B3..105B9  ; Grapheme_Base # L&   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; Grapheme_Base # L&   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3  ; Grapheme_Base # Lo  [52] TODHRI LETTER A..TODHRI LETTER OO
 10600..10736  ; Grapheme_Base # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
 10740..10755  ; Grapheme_Base # Lo  [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
 10760..10767  ; Grapheme_Base # Lo   [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -12063,10 +12316,20 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 10CFA..10CFF  ; Grapheme_Base # No   [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND
 10D00..10D23  ; Grapheme_Base # Lo  [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
 10D30..10D39  ; Grapheme_Base # Nd  [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
+10D40..10D49  ; Grapheme_Base # Nd  [10] GARAY DIGIT ZERO..GARAY DIGIT NINE
+10D4A..10D4D  ; Grapheme_Base # Lo   [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE
+10D4E         ; Grapheme_Base # Lm       GARAY VOWEL LENGTH MARK
+10D4F         ; Grapheme_Base # Lo       GARAY SUKUN
+10D50..10D65  ; Grapheme_Base # L&  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D6E         ; Grapheme_Base # Pd       GARAY HYPHEN
+10D6F         ; Grapheme_Base # Lm       GARAY REDUPLICATION MARK
+10D70..10D85  ; Grapheme_Base # L&  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
+10D8E..10D8F  ; Grapheme_Base # Sm   [2] GARAY PLUS SIGN..GARAY MINUS SIGN
 10E60..10E7E  ; Grapheme_Base # No  [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
 10E80..10EA9  ; Grapheme_Base # Lo  [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
 10EAD         ; Grapheme_Base # Pd       YEZIDI HYPHENATION MARK
 10EB0..10EB1  ; Grapheme_Base # Lo   [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4  ; Grapheme_Base # Lo   [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
 10F00..10F1C  ; Grapheme_Base # Lo  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F1D..10F26  ; Grapheme_Base # No  [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
 10F27         ; Grapheme_Base # Lo       OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -12107,7 +12370,7 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 11182         ; Grapheme_Base # Mc       SHARADA SIGN VISARGA
 11183..111B2  ; Grapheme_Base # Lo  [48] SHARADA LETTER A..SHARADA LETTER HA
 111B3..111B5  ; Grapheme_Base # Mc   [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
-111BF..111C0  ; Grapheme_Base # Mc   [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
+111BF         ; Grapheme_Base # Mc       SHARADA VOWEL SIGN AU
 111C1..111C4  ; Grapheme_Base # Lo   [4] SHARADA SIGN AVAGRAHA..SHARADA OM
 111C5..111C8  ; Grapheme_Base # Po   [4] SHARADA DANDA..SHARADA SEPARATOR
 111CD         ; Grapheme_Base # Po       SHARADA SUTRA MARK
@@ -12122,7 +12385,6 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 11213..1122B  ; Grapheme_Base # Lo  [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA
 1122C..1122E  ; Grapheme_Base # Mc   [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
 11232..11233  ; Grapheme_Base # Mc   [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
-11235         ; Grapheme_Base # Mc       KHOJKI SIGN VIRAMA
 11238..1123D  ; Grapheme_Base # Po   [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
 1123F..11240  ; Grapheme_Base # Lo   [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
 11280..11286  ; Grapheme_Base # Lo   [7] MULTANI LETTER A..MULTANI LETTER GA
@@ -12145,10 +12407,22 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 1133F         ; Grapheme_Base # Mc       GRANTHA VOWEL SIGN I
 11341..11344  ; Grapheme_Base # Mc   [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
 11347..11348  ; Grapheme_Base # Mc   [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
-1134B..1134D  ; Grapheme_Base # Mc   [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
+1134B..1134C  ; Grapheme_Base # Mc   [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU
 11350         ; Grapheme_Base # Lo       GRANTHA OM
 1135D..11361  ; Grapheme_Base # Lo   [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
 11362..11363  ; Grapheme_Base # Mc   [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
+11380..11389  ; Grapheme_Base # Lo  [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL
+1138B         ; Grapheme_Base # Lo       TULU-TIGALARI LETTER EE
+1138E         ; Grapheme_Base # Lo       TULU-TIGALARI LETTER AI
+11390..113B5  ; Grapheme_Base # Lo  [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA
+113B7         ; Grapheme_Base # Lo       TULU-TIGALARI SIGN AVAGRAHA
+113B9..113BA  ; Grapheme_Base # Mc   [2] TULU-TIGALARI VOWEL SIGN I..TULU-TIGALARI VOWEL SIGN II
+113CA         ; Grapheme_Base # Mc       TULU-TIGALARI SIGN CANDRA ANUNASIKA
+113CC..113CD  ; Grapheme_Base # Mc   [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA
+113D1         ; Grapheme_Base # Lo       TULU-TIGALARI REPHA
+113D3         ; Grapheme_Base # Lo       TULU-TIGALARI SIGN PLUTA
+113D4..113D5  ; Grapheme_Base # Po   [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA
+113D7..113D8  ; Grapheme_Base # Po   [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA
 11400..11434  ; Grapheme_Base # Lo  [53] NEWA LETTER A..NEWA LETTER HA
 11435..11437  ; Grapheme_Base # Mc   [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
 11440..11441  ; Grapheme_Base # Mc   [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
@@ -12186,11 +12460,12 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 11680..116AA  ; Grapheme_Base # Lo  [43] TAKRI LETTER A..TAKRI LETTER RRA
 116AC         ; Grapheme_Base # Mc       TAKRI SIGN VISARGA
 116AE..116AF  ; Grapheme_Base # Mc   [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
-116B6         ; Grapheme_Base # Mc       TAKRI SIGN VIRAMA
 116B8         ; Grapheme_Base # Lo       TAKRI LETTER ARCHAIC KHA
 116B9         ; Grapheme_Base # Po       TAKRI ABBREVIATION SIGN
 116C0..116C9  ; Grapheme_Base # Nd  [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3  ; Grapheme_Base # Nd  [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
 11700..1171A  ; Grapheme_Base # Lo  [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
+1171E         ; Grapheme_Base # Mc       AHOM CONSONANT SIGN MEDIAL RA
 11720..11721  ; Grapheme_Base # Mc   [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
 11726         ; Grapheme_Base # Mc       AHOM VOWEL SIGN E
 11730..11739  ; Grapheme_Base # Nd  [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
@@ -12212,7 +12487,6 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 11918..1192F  ; Grapheme_Base # Lo  [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
 11931..11935  ; Grapheme_Base # Mc   [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E
 11937..11938  ; Grapheme_Base # Mc   [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
-1193D         ; Grapheme_Base # Mc       DIVES AKURU SIGN HALANTA
 1193F         ; Grapheme_Base # Lo       DIVES AKURU PREFIXED NASAL SIGN
 11940         ; Grapheme_Base # Mc       DIVES AKURU MEDIAL YA
 11941         ; Grapheme_Base # Lo       DIVES AKURU INITIAL RA
@@ -12241,6 +12515,9 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 11A9E..11AA2  ; Grapheme_Base # Po   [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
 11AB0..11AF8  ; Grapheme_Base # Lo  [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
 11B00..11B09  ; Grapheme_Base # Po  [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BC0..11BE0  ; Grapheme_Base # Lo  [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1         ; Grapheme_Base # Po       SUNUWAR SIGN PVO
+11BF0..11BF9  ; Grapheme_Base # Nd  [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
 11C00..11C08  ; Grapheme_Base # Lo   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E  ; Grapheme_Base # Lo  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C2F         ; Grapheme_Base # Mc       BHAIKSUKI VOWEL SIGN AA
@@ -12276,7 +12553,6 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 11F12..11F33  ; Grapheme_Base # Lo  [34] KAWI LETTER KA..KAWI LETTER JNYA
 11F34..11F35  ; Grapheme_Base # Mc   [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
 11F3E..11F3F  ; Grapheme_Base # Mc   [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
-11F41         ; Grapheme_Base # Mc       KAWI SIGN KILLER
 11F43..11F4F  ; Grapheme_Base # Po  [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL
 11F50..11F59  ; Grapheme_Base # Nd  [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
 11FB0         ; Grapheme_Base # Lo       LISU LETTER YHA
@@ -12293,7 +12569,11 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 12FF1..12FF2  ; Grapheme_Base # Po   [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
 13000..1342F  ; Grapheme_Base # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
 13441..13446  ; Grapheme_Base # Lo   [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
+13460..143FA  ; Grapheme_Base # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
 14400..14646  ; Grapheme_Base # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16100..1611D  ; Grapheme_Base # Lo  [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA
+1612A..1612C  ; Grapheme_Base # Mc   [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA
+16130..16139  ; Grapheme_Base # Nd  [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE
 16800..16A38  ; Grapheme_Base # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E  ; Grapheme_Base # Lo  [31] MRO LETTER TA..MRO LETTER TEK
 16A60..16A69  ; Grapheme_Base # Nd  [10] MRO DIGIT ZERO..MRO DIGIT NINE
@@ -12312,6 +12592,11 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 16B5B..16B61  ; Grapheme_Base # No   [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS
 16B63..16B77  ; Grapheme_Base # Lo  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
 16B7D..16B8F  ; Grapheme_Base # Lo  [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16D40..16D42  ; Grapheme_Base # Lm   [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D43..16D6A  ; Grapheme_Base # Lo  [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU
+16D6B..16D6C  ; Grapheme_Base # Lm   [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
+16D6D..16D6F  ; Grapheme_Base # Po   [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA
+16D70..16D79  ; Grapheme_Base # Nd  [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE
 16E40..16E7F  ; Grapheme_Base # L&  [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 16E80..16E96  ; Grapheme_Base # No  [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM
 16E97..16E9A  ; Grapheme_Base # Po   [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH
@@ -12322,10 +12607,9 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 16FE0..16FE1  ; Grapheme_Base # Lm   [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
 16FE2         ; Grapheme_Base # Po       OLD CHINESE HOOK MARK
 16FE3         ; Grapheme_Base # Lm       OLD CHINESE ITERATION MARK
-16FF0..16FF1  ; Grapheme_Base # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
 17000..187F7  ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18CD5  ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
-18D00..18D08  ; Grapheme_Base # Lo   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+18CFF..18D08  ; Grapheme_Base # Lo  [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08
 1AFF0..1AFF3  ; Grapheme_Base # Lm   [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
 1AFF5..1AFFB  ; Grapheme_Base # Lm   [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
 1AFFD..1AFFE  ; Grapheme_Base # Lm   [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
@@ -12341,13 +12625,14 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 1BC90..1BC99  ; Grapheme_Base # Lo  [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
 1BC9C         ; Grapheme_Base # So       DUPLOYAN SIGN O WITH CROSS
 1BC9F         ; Grapheme_Base # Po       DUPLOYAN PUNCTUATION CHINOOK FULL STOP
+1CC00..1CCEF  ; Grapheme_Base # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z
+1CCF0..1CCF9  ; Grapheme_Base # Nd  [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE
+1CD00..1CEB3  ; Grapheme_Base # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET
 1CF50..1CFC3  ; Grapheme_Base # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
 1D000..1D0F5  ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
 1D100..1D126  ; Grapheme_Base # So  [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
 1D129..1D164  ; Grapheme_Base # So  [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
-1D166         ; Grapheme_Base # Mc       MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
 1D16A..1D16C  ; Grapheme_Base # So   [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
-1D16D         ; Grapheme_Base # Mc       MUSICAL SYMBOL COMBINING AUGMENTATION DOT
 1D183..1D184  ; Grapheme_Base # So   [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
 1D18C..1D1A9  ; Grapheme_Base # So  [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
 1D1AE..1D1EA  ; Grapheme_Base # So  [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON
@@ -12421,6 +12706,10 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 1E4D0..1E4EA  ; Grapheme_Base # Lo  [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
 1E4EB         ; Grapheme_Base # Lm       NAG MUNDARI SIGN OJOD
 1E4F0..1E4F9  ; Grapheme_Base # Nd  [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
+1E5D0..1E5ED  ; Grapheme_Base # Lo  [30] OL ONAL LETTER O..OL ONAL LETTER EG
+1E5F0         ; Grapheme_Base # Lo       OL ONAL SIGN HODDOND
+1E5F1..1E5FA  ; Grapheme_Base # Nd  [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE
+1E5FF         ; Grapheme_Base # Po       OL ONAL ABBREVIATION SIGN
 1E7E0..1E7E6  ; Grapheme_Base # Lo   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
 1E7E8..1E7EB  ; Grapheme_Base # Lo   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
 1E7ED..1E7EE  ; Grapheme_Base # Lo   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -12500,18 +12789,18 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 1F850..1F859  ; Grapheme_Base # So  [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
 1F860..1F887  ; Grapheme_Base # So  [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
 1F890..1F8AD  ; Grapheme_Base # So  [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F8B0..1F8B1  ; Grapheme_Base # So   [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
+1F8B0..1F8BB  ; Grapheme_Base # So  [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR
+1F8C0..1F8C1  ; Grapheme_Base # So   [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW
 1F900..1FA53  ; Grapheme_Base # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
 1FA60..1FA6D  ; Grapheme_Base # So  [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
 1FA70..1FA7C  ; Grapheme_Base # So  [13] BALLET SHOES..CRUTCH
-1FA80..1FA88  ; Grapheme_Base # So   [9] YO-YO..FLUTE
-1FA90..1FABD  ; Grapheme_Base # So  [46] RINGED PLANET..WING
-1FABF..1FAC5  ; Grapheme_Base # So   [7] GOOSE..PERSON WITH CROWN
-1FACE..1FADB  ; Grapheme_Base # So  [14] MOOSE..PEA POD
-1FAE0..1FAE8  ; Grapheme_Base # So   [9] MELTING FACE..SHAKING FACE
+1FA80..1FA89  ; Grapheme_Base # So  [10] YO-YO..HARP
+1FA8F..1FAC6  ; Grapheme_Base # So  [56] SHOVEL..FINGERPRINT
+1FACE..1FADC  ; Grapheme_Base # So  [15] MOOSE..ROOT VEGETABLE
+1FADF..1FAE9  ; Grapheme_Base # So  [11] SPLATTER..FACE WITH BAGS UNDER EYES
 1FAF0..1FAF8  ; Grapheme_Base # So   [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND
 1FB00..1FB92  ; Grapheme_Base # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
-1FB94..1FBCA  ; Grapheme_Base # So  [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
+1FB94..1FBEF  ; Grapheme_Base # So  [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE
 1FBF0..1FBF9  ; Grapheme_Base # Nd  [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
 20000..2A6DF  ; Grapheme_Base # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
 2A700..2B739  ; Grapheme_Base # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739
@@ -12523,7 +12812,7 @@ FFFC..FFFD    ; Grapheme_Base # So   [2] OBJECT REPLACEMENT CHARACTER..REPLACEME
 30000..3134A  ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
 31350..323AF  ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
 
-# Total code points: 147613
+# Total code points: 152730
 
 # ================================================
 
@@ -12573,6 +12862,9 @@ ABED          ; Grapheme_Link # Mn       MEETEI MAYEK APUN IYEK
 11235         ; Grapheme_Link # Mc       KHOJKI SIGN VIRAMA
 112EA         ; Grapheme_Link # Mn       KHUDAWADI SIGN VIRAMA
 1134D         ; Grapheme_Link # Mc       GRANTHA SIGN VIRAMA
+113CE         ; Grapheme_Link # Mn       TULU-TIGALARI SIGN VIRAMA
+113CF         ; Grapheme_Link # Mc       TULU-TIGALARI SIGN LOOPED VIRAMA
+113D0         ; Grapheme_Link # Mn       TULU-TIGALARI CONJOINER
 11442         ; Grapheme_Link # Mn       NEWA SIGN VIRAMA
 114C2         ; Grapheme_Link # Mn       TIRHUTA SIGN VIRAMA
 115BF         ; Grapheme_Link # Mn       SIDDHAM SIGN VIRAMA
@@ -12591,8 +12883,9 @@ ABED          ; Grapheme_Link # Mn       MEETEI MAYEK APUN IYEK
 11D97         ; Grapheme_Link # Mn       GUNJALA GONDI VIRAMA
 11F41         ; Grapheme_Link # Mc       KAWI SIGN KILLER
 11F42         ; Grapheme_Link # Mn       KAWI CONJOINER
+1612F         ; Grapheme_Link # Mn       GURUNG KHEMA SIGN THOLHOMA
 
-# Total code points: 65
+# Total code points: 69
 
 # ================================================
 
@@ -12656,9 +12949,9 @@ ABED          ; Grapheme_Link # Mn       MEETEI MAYEK APUN IYEK
 
 # Indic_Conjunct_Break=Extend
 
-0300..034E    ; InCB; Extend # Mn  [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW
-0350..036F    ; InCB; Extend # Mn  [32] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING LATIN SMALL LETTER X
+0300..036F    ; InCB; Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
 0483..0487    ; InCB; Extend # Mn   [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
+0488..0489    ; InCB; Extend # Me   [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
 0591..05BD    ; InCB; Extend # Mn  [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
 05BF          ; InCB; Extend # Mn       HEBREW POINT RAFE
 05C1..05C2    ; InCB; Extend # Mn   [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
@@ -12673,6 +12966,7 @@ ABED          ; Grapheme_Link # Mn       MEETEI MAYEK APUN IYEK
 06EA..06ED    ; InCB; Extend # Mn   [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
 0711          ; InCB; Extend # Mn       SYRIAC LETTER SUPERSCRIPT ALAPH
 0730..074A    ; InCB; Extend # Mn  [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+07A6..07B0    ; InCB; Extend # Mn  [11] THAANA ABAFILI..THAANA SUKUN
 07EB..07F3    ; InCB; Extend # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
 07FD          ; InCB; Extend # Mn       NKO DANTAYALAN
 0816..0819    ; InCB; Extend # Mn   [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
@@ -12680,55 +12974,160 @@ ABED          ; Grapheme_Link # Mn       MEETEI MAYEK APUN IYEK
 0825..0827    ; InCB; Extend # Mn   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
 0829..082D    ; InCB; Extend # Mn   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
 0859..085B    ; InCB; Extend # Mn   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F    ; InCB; Extend # Mn   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F    ; InCB; Extend # Mn   [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
 08CA..08E1    ; InCB; Extend # Mn  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
-08E3..08FF    ; InCB; Extend # Mn  [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
+08E3..0902    ; InCB; Extend # Mn  [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
+093A          ; InCB; Extend # Mn       DEVANAGARI VOWEL SIGN OE
 093C          ; InCB; Extend # Mn       DEVANAGARI SIGN NUKTA
-0951..0954    ; InCB; Extend # Mn   [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
+0941..0948    ; InCB; Extend # Mn   [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+0951..0957    ; InCB; Extend # Mn   [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
+0962..0963    ; InCB; Extend # Mn   [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+0981          ; InCB; Extend # Mn       BENGALI SIGN CANDRABINDU
 09BC          ; InCB; Extend # Mn       BENGALI SIGN NUKTA
+09BE          ; InCB; Extend # Mc       BENGALI VOWEL SIGN AA
+09C1..09C4    ; InCB; Extend # Mn   [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+09D7          ; InCB; Extend # Mc       BENGALI AU LENGTH MARK
+09E2..09E3    ; InCB; Extend # Mn   [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
 09FE          ; InCB; Extend # Mn       BENGALI SANDHI MARK
+0A01..0A02    ; InCB; Extend # Mn   [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
 0A3C          ; InCB; Extend # Mn       GURMUKHI SIGN NUKTA
+0A41..0A42    ; InCB; Extend # Mn   [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+0A47..0A48    ; InCB; Extend # Mn   [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+0A4B..0A4D    ; InCB; Extend # Mn   [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+0A51          ; InCB; Extend # Mn       GURMUKHI SIGN UDAAT
+0A70..0A71    ; InCB; Extend # Mn   [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+0A75          ; InCB; Extend # Mn       GURMUKHI SIGN YAKASH
+0A81..0A82    ; InCB; Extend # Mn   [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
 0ABC          ; InCB; Extend # Mn       GUJARATI SIGN NUKTA
+0AC1..0AC5    ; InCB; Extend # Mn   [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+0AC7..0AC8    ; InCB; Extend # Mn   [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+0AE2..0AE3    ; InCB; Extend # Mn   [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AFA..0AFF    ; InCB; Extend # Mn   [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
+0B01          ; InCB; Extend # Mn       ORIYA SIGN CANDRABINDU
 0B3C          ; InCB; Extend # Mn       ORIYA SIGN NUKTA
+0B3E          ; InCB; Extend # Mc       ORIYA VOWEL SIGN AA
+0B3F          ; InCB; Extend # Mn       ORIYA VOWEL SIGN I
+0B41..0B44    ; InCB; Extend # Mn   [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
+0B55..0B56    ; InCB; Extend # Mn   [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
+0B57          ; InCB; Extend # Mc       ORIYA AU LENGTH MARK
+0B62..0B63    ; InCB; Extend # Mn   [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
+0B82          ; InCB; Extend # Mn       TAMIL SIGN ANUSVARA
+0BBE          ; InCB; Extend # Mc       TAMIL VOWEL SIGN AA
+0BC0          ; InCB; Extend # Mn       TAMIL VOWEL SIGN II
+0BCD          ; InCB; Extend # Mn       TAMIL SIGN VIRAMA
+0BD7          ; InCB; Extend # Mc       TAMIL AU LENGTH MARK
+0C00          ; InCB; Extend # Mn       TELUGU SIGN COMBINING CANDRABINDU ABOVE
+0C04          ; InCB; Extend # Mn       TELUGU SIGN COMBINING ANUSVARA ABOVE
 0C3C          ; InCB; Extend # Mn       TELUGU SIGN NUKTA
+0C3E..0C40    ; InCB; Extend # Mn   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+0C46..0C48    ; InCB; Extend # Mn   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+0C4A..0C4C    ; InCB; Extend # Mn   [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU
 0C55..0C56    ; InCB; Extend # Mn   [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+0C62..0C63    ; InCB; Extend # Mn   [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
+0C81          ; InCB; Extend # Mn       KANNADA SIGN CANDRABINDU
 0CBC          ; InCB; Extend # Mn       KANNADA SIGN NUKTA
+0CBF          ; InCB; Extend # Mn       KANNADA VOWEL SIGN I
+0CC0          ; InCB; Extend # Mc       KANNADA VOWEL SIGN II
+0CC2          ; InCB; Extend # Mc       KANNADA VOWEL SIGN UU
+0CC6          ; InCB; Extend # Mn       KANNADA VOWEL SIGN E
+0CC7..0CC8    ; InCB; Extend # Mc   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+0CCA..0CCB    ; InCB; Extend # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
+0CCC..0CCD    ; InCB; Extend # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
+0CD5..0CD6    ; InCB; Extend # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+0CE2..0CE3    ; InCB; Extend # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+0D00..0D01    ; InCB; Extend # Mn   [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
 0D3B..0D3C    ; InCB; Extend # Mn   [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
-0E38..0E3A    ; InCB; Extend # Mn   [3] THAI CHARACTER SARA U..THAI CHARACTER PHINTHU
-0E48..0E4B    ; InCB; Extend # Mn   [4] THAI CHARACTER MAI EK..THAI CHARACTER MAI CHATTAWA
-0EB8..0EBA    ; InCB; Extend # Mn   [3] LAO VOWEL SIGN U..LAO SIGN PALI VIRAMA
-0EC8..0ECB    ; InCB; Extend # Mn   [4] LAO TONE MAI EK..LAO TONE MAI CATAWA
+0D3E          ; InCB; Extend # Mc       MALAYALAM VOWEL SIGN AA
+0D41..0D44    ; InCB; Extend # Mn   [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
+0D57          ; InCB; Extend # Mc       MALAYALAM AU LENGTH MARK
+0D62..0D63    ; InCB; Extend # Mn   [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
+0D81          ; InCB; Extend # Mn       SINHALA SIGN CANDRABINDU
+0DCA          ; InCB; Extend # Mn       SINHALA SIGN AL-LAKUNA
+0DCF          ; InCB; Extend # Mc       SINHALA VOWEL SIGN AELA-PILLA
+0DD2..0DD4    ; InCB; Extend # Mn   [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+0DD6          ; InCB; Extend # Mn       SINHALA VOWEL SIGN DIGA PAA-PILLA
+0DDF          ; InCB; Extend # Mc       SINHALA VOWEL SIGN GAYANUKITTA
+0E31          ; InCB; Extend # Mn       THAI CHARACTER MAI HAN-AKAT
+0E34..0E3A    ; InCB; Extend # Mn   [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+0E47..0E4E    ; InCB; Extend # Mn   [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
+0EB1          ; InCB; Extend # Mn       LAO VOWEL SIGN MAI KAN
+0EB4..0EBC    ; InCB; Extend # Mn   [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
+0EC8..0ECE    ; InCB; Extend # Mn   [7] LAO TONE MAI EK..LAO YAMAKKAN
 0F18..0F19    ; InCB; Extend # Mn   [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
 0F35          ; InCB; Extend # Mn       TIBETAN MARK NGAS BZUNG NYI ZLA
 0F37          ; InCB; Extend # Mn       TIBETAN MARK NGAS BZUNG SGOR RTAGS
 0F39          ; InCB; Extend # Mn       TIBETAN MARK TSA -PHRU
-0F71..0F72    ; InCB; Extend # Mn   [2] TIBETAN VOWEL SIGN AA..TIBETAN VOWEL SIGN I
-0F74          ; InCB; Extend # Mn       TIBETAN VOWEL SIGN U
-0F7A..0F7D    ; InCB; Extend # Mn   [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO
-0F80          ; InCB; Extend # Mn       TIBETAN VOWEL SIGN REVERSED I
-0F82..0F84    ; InCB; Extend # Mn   [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA
+0F71..0F7E    ; InCB; Extend # Mn  [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+0F80..0F84    ; InCB; Extend # Mn   [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
 0F86..0F87    ; InCB; Extend # Mn   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+0F8D..0F97    ; InCB; Extend # Mn  [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
+0F99..0FBC    ; InCB; Extend # Mn  [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
 0FC6          ; InCB; Extend # Mn       TIBETAN SYMBOL PADMA GDAN
-1037          ; InCB; Extend # Mn       MYANMAR SIGN DOT BELOW
+102D..1030    ; InCB; Extend # Mn   [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+1032..1037    ; InCB; Extend # Mn   [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
 1039..103A    ; InCB; Extend # Mn   [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
+103D..103E    ; InCB; Extend # Mn   [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
+1058..1059    ; InCB; Extend # Mn   [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+105E..1060    ; InCB; Extend # Mn   [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
+1071..1074    ; InCB; Extend # Mn   [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
+1082          ; InCB; Extend # Mn       MYANMAR CONSONANT SIGN SHAN MEDIAL WA
+1085..1086    ; InCB; Extend # Mn   [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
 108D          ; InCB; Extend # Mn       MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
+109D          ; InCB; Extend # Mn       MYANMAR VOWEL SIGN AITON AI
 135D..135F    ; InCB; Extend # Mn   [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
-1714          ; InCB; Extend # Mn       TAGALOG SIGN VIRAMA
-17D2          ; InCB; Extend # Mn       KHMER SIGN COENG
+1712..1714    ; InCB; Extend # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+1715          ; InCB; Extend # Mc       TAGALOG SIGN PAMUDPOD
+1732..1733    ; InCB; Extend # Mn   [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
+1734          ; InCB; Extend # Mc       HANUNOO SIGN PAMUDPOD
+1752..1753    ; InCB; Extend # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+1772..1773    ; InCB; Extend # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+17B4..17B5    ; InCB; Extend # Mn   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+17B7..17BD    ; InCB; Extend # Mn   [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+17C6          ; InCB; Extend # Mn       KHMER SIGN NIKAHIT
+17C9..17D3    ; InCB; Extend # Mn  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
 17DD          ; InCB; Extend # Mn       KHMER SIGN ATTHACAN
+180B..180D    ; InCB; Extend # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+180F          ; InCB; Extend # Mn       MONGOLIAN FREE VARIATION SELECTOR FOUR
+1885..1886    ; InCB; Extend # Mn   [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
 18A9          ; InCB; Extend # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
+1920..1922    ; InCB; Extend # Mn   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+1927..1928    ; InCB; Extend # Mn   [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+1932          ; InCB; Extend # Mn       LIMBU SMALL LETTER ANUSVARA
 1939..193B    ; InCB; Extend # Mn   [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
 1A17..1A18    ; InCB; Extend # Mn   [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+1A1B          ; InCB; Extend # Mn       BUGINESE VOWEL SIGN AE
+1A56          ; InCB; Extend # Mn       TAI THAM CONSONANT SIGN MEDIAL LA
+1A58..1A5E    ; InCB; Extend # Mn   [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
 1A60          ; InCB; Extend # Mn       TAI THAM SIGN SAKOT
-1A75..1A7C    ; InCB; Extend # Mn   [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
+1A62          ; InCB; Extend # Mn       TAI THAM VOWEL SIGN MAI SAT
+1A65..1A6C    ; InCB; Extend # Mn   [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
+1A73..1A7C    ; InCB; Extend # Mn  [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
 1A7F          ; InCB; Extend # Mn       TAI THAM COMBINING CRYPTOGRAMMIC DOT
 1AB0..1ABD    ; InCB; Extend # Mn  [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
+1ABE          ; InCB; Extend # Me       COMBINING PARENTHESES OVERLAY
 1ABF..1ACE    ; InCB; Extend # Mn  [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T
+1B00..1B03    ; InCB; Extend # Mn   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
 1B34          ; InCB; Extend # Mn       BALINESE SIGN REREKAN
+1B35          ; InCB; Extend # Mc       BALINESE VOWEL SIGN TEDUNG
+1B36..1B3A    ; InCB; Extend # Mn   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3B          ; InCB; Extend # Mc       BALINESE VOWEL SIGN RA REPA TEDUNG
+1B3C          ; InCB; Extend # Mn       BALINESE VOWEL SIGN LA LENGA
+1B3D          ; InCB; Extend # Mc       BALINESE VOWEL SIGN LA LENGA TEDUNG
+1B42          ; InCB; Extend # Mn       BALINESE VOWEL SIGN PEPET
+1B43..1B44    ; InCB; Extend # Mc   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
 1B6B..1B73    ; InCB; Extend # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
-1BAB          ; InCB; Extend # Mn       SUNDANESE SIGN VIRAMA
+1B80..1B81    ; InCB; Extend # Mn   [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
+1BA2..1BA5    ; InCB; Extend # Mn   [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
+1BA8..1BA9    ; InCB; Extend # Mn   [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+1BAA          ; InCB; Extend # Mc       SUNDANESE SIGN PAMAAEH
+1BAB..1BAD    ; InCB; Extend # Mn   [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
 1BE6          ; InCB; Extend # Mn       BATAK SIGN TOMPI
-1C37          ; InCB; Extend # Mn       LEPCHA SIGN NUKTA
+1BE8..1BE9    ; InCB; Extend # Mn   [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
+1BED          ; InCB; Extend # Mn       BATAK VOWEL SIGN KARO O
+1BEF..1BF1    ; InCB; Extend # Mn   [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
+1BF2..1BF3    ; InCB; Extend # Mc   [2] BATAK PANGOLAT..BATAK PANONGONAN
+1C2C..1C33    ; InCB; Extend # Mn   [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
+1C36..1C37    ; InCB; Extend # Mn   [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
 1CD0..1CD2    ; InCB; Extend # Mn   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
 1CD4..1CE0    ; InCB; Extend # Mn  [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
 1CE2..1CE8    ; InCB; Extend # Mn   [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
@@ -12738,7 +13137,9 @@ ABED          ; Grapheme_Link # Mn       MEETEI MAYEK APUN IYEK
 1DC0..1DFF    ; InCB; Extend # Mn  [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
 200D          ; InCB; Extend # Cf       ZERO WIDTH JOINER
 20D0..20DC    ; InCB; Extend # Mn  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+20DD..20E0    ; InCB; Extend # Me   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
 20E1          ; InCB; Extend # Mn       COMBINING LEFT RIGHT ARROW ABOVE
+20E2..20E4    ; InCB; Extend # Me   [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
 20E5..20F0    ; InCB; Extend # Mn  [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
 2CEF..2CF1    ; InCB; Extend # Mn   [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
 2D7F          ; InCB; Extend # Mn       TIFINAGH CONSONANT JOINER
@@ -12747,73 +13148,198 @@ ABED          ; Grapheme_Link # Mn       MEETEI MAYEK APUN IYEK
 302E..302F    ; InCB; Extend # Mc   [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
 3099..309A    ; InCB; Extend # Mn   [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
 A66F          ; InCB; Extend # Mn       COMBINING CYRILLIC VZMET
+A670..A672    ; InCB; Extend # Me   [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
 A674..A67D    ; InCB; Extend # Mn  [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
 A69E..A69F    ; InCB; Extend # Mn   [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
 A6F0..A6F1    ; InCB; Extend # Mn   [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+A802          ; InCB; Extend # Mn       SYLOTI NAGRI SIGN DVISVARA
+A806          ; InCB; Extend # Mn       SYLOTI NAGRI SIGN HASANTA
+A80B          ; InCB; Extend # Mn       SYLOTI NAGRI SIGN ANUSVARA
+A825..A826    ; InCB; Extend # Mn   [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
 A82C          ; InCB; Extend # Mn       SYLOTI NAGRI SIGN ALTERNATE HASANTA
+A8C4..A8C5    ; InCB; Extend # Mn   [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
 A8E0..A8F1    ; InCB; Extend # Mn  [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
-A92B..A92D    ; InCB; Extend # Mn   [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU
+A8FF          ; InCB; Extend # Mn       DEVANAGARI VOWEL SIGN AY
+A926..A92D    ; InCB; Extend # Mn   [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
+A947..A951    ; InCB; Extend # Mn  [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+A953          ; InCB; Extend # Mc       REJANG VIRAMA
+A980..A982    ; InCB; Extend # Mn   [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
 A9B3          ; InCB; Extend # Mn       JAVANESE SIGN CECAK TELU
+A9B6..A9B9    ; InCB; Extend # Mn   [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
+A9BC..A9BD    ; InCB; Extend # Mn   [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
+A9C0          ; InCB; Extend # Mc       JAVANESE PANGKON
+A9E5          ; InCB; Extend # Mn       MYANMAR SIGN SHAN SAW
+AA29..AA2E    ; InCB; Extend # Mn   [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
+AA31..AA32    ; InCB; Extend # Mn   [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
+AA35..AA36    ; InCB; Extend # Mn   [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
+AA43          ; InCB; Extend # Mn       CHAM CONSONANT SIGN FINAL NG
+AA4C          ; InCB; Extend # Mn       CHAM CONSONANT SIGN FINAL M
+AA7C          ; InCB; Extend # Mn       MYANMAR SIGN TAI LAING TONE-2
 AAB0          ; InCB; Extend # Mn       TAI VIET MAI KANG
 AAB2..AAB4    ; InCB; Extend # Mn   [3] TAI VIET VOWEL I..TAI VIET VOWEL U
 AAB7..AAB8    ; InCB; Extend # Mn   [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
 AABE..AABF    ; InCB; Extend # Mn   [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
 AAC1          ; InCB; Extend # Mn       TAI VIET TONE MAI THO
+AAEC..AAED    ; InCB; Extend # Mn   [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI
 AAF6          ; InCB; Extend # Mn       MEETEI MAYEK VIRAMA
+ABE5          ; InCB; Extend # Mn       MEETEI MAYEK VOWEL SIGN ANAP
+ABE8          ; InCB; Extend # Mn       MEETEI MAYEK VOWEL SIGN UNAP
 ABED          ; InCB; Extend # Mn       MEETEI MAYEK APUN IYEK
 FB1E          ; InCB; Extend # Mn       HEBREW POINT JUDEO-SPANISH VARIKA
+FE00..FE0F    ; InCB; Extend # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
 FE20..FE2F    ; InCB; Extend # Mn  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
+FF9E..FF9F    ; InCB; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
 101FD         ; InCB; Extend # Mn       PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
 102E0         ; InCB; Extend # Mn       COPTIC EPACT THOUSANDS MARK
 10376..1037A  ; InCB; Extend # Mn   [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
-10A0D         ; InCB; Extend # Mn       KHAROSHTHI SIGN DOUBLE RING BELOW
-10A0F         ; InCB; Extend # Mn       KHAROSHTHI SIGN VISARGA
+10A01..10A03  ; InCB; Extend # Mn   [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
+10A05..10A06  ; InCB; Extend # Mn   [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+10A0C..10A0F  ; InCB; Extend # Mn   [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
 10A38..10A3A  ; InCB; Extend # Mn   [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
 10A3F         ; InCB; Extend # Mn       KHAROSHTHI VIRAMA
 10AE5..10AE6  ; InCB; Extend # Mn   [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
 10D24..10D27  ; InCB; Extend # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+10D69..10D6D  ; InCB; Extend # Mn   [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
 10EAB..10EAC  ; InCB; Extend # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF  ; InCB; Extend # Mn   [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF  ; InCB; Extend # Mn   [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
 10F46..10F50  ; InCB; Extend # Mn  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
 10F82..10F85  ; InCB; Extend # Mn   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
+11001         ; InCB; Extend # Mn       BRAHMI SIGN ANUSVARA
+11038..11046  ; InCB; Extend # Mn  [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
 11070         ; InCB; Extend # Mn       BRAHMI SIGN OLD TAMIL VIRAMA
-1107F         ; InCB; Extend # Mn       BRAHMI NUMBER JOINER
-110BA         ; InCB; Extend # Mn       KAITHI SIGN NUKTA
+11073..11074  ; InCB; Extend # Mn   [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O
+1107F..11081  ; InCB; Extend # Mn   [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
+110B3..110B6  ; InCB; Extend # Mn   [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
+110B9..110BA  ; InCB; Extend # Mn   [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+110C2         ; InCB; Extend # Mn       KAITHI VOWEL SIGN VOCALIC R
 11100..11102  ; InCB; Extend # Mn   [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
-11133..11134  ; InCB; Extend # Mn   [2] CHAKMA VIRAMA..CHAKMA MAAYYAA
+11127..1112B  ; InCB; Extend # Mn   [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
+1112D..11134  ; InCB; Extend # Mn   [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
 11173         ; InCB; Extend # Mn       MAHAJANI SIGN NUKTA
-111CA         ; InCB; Extend # Mn       SHARADA SIGN NUKTA
-11236         ; InCB; Extend # Mn       KHOJKI SIGN NUKTA
-112E9..112EA  ; InCB; Extend # Mn   [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA
+11180..11181  ; InCB; Extend # Mn   [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
+111B6..111BE  ; InCB; Extend # Mn   [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
+111C0         ; InCB; Extend # Mc       SHARADA SIGN VIRAMA
+111C9..111CC  ; InCB; Extend # Mn   [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
+111CF         ; InCB; Extend # Mn       SHARADA SIGN INVERTED CANDRABINDU
+1122F..11231  ; InCB; Extend # Mn   [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
+11234         ; InCB; Extend # Mn       KHOJKI SIGN ANUSVARA
+11235         ; InCB; Extend # Mc       KHOJKI SIGN VIRAMA
+11236..11237  ; InCB; Extend # Mn   [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
+1123E         ; InCB; Extend # Mn       KHOJKI SIGN SUKUN
+11241         ; InCB; Extend # Mn       KHOJKI VOWEL SIGN VOCALIC R
+112DF         ; InCB; Extend # Mn       KHUDAWADI SIGN ANUSVARA
+112E3..112EA  ; InCB; Extend # Mn   [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
+11300..11301  ; InCB; Extend # Mn   [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU
 1133B..1133C  ; InCB; Extend # Mn   [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
+1133E         ; InCB; Extend # Mc       GRANTHA VOWEL SIGN AA
+11340         ; InCB; Extend # Mn       GRANTHA VOWEL SIGN II
+1134D         ; InCB; Extend # Mc       GRANTHA SIGN VIRAMA
+11357         ; InCB; Extend # Mc       GRANTHA AU LENGTH MARK
 11366..1136C  ; InCB; Extend # Mn   [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
 11370..11374  ; InCB; Extend # Mn   [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+113B8         ; InCB; Extend # Mc       TULU-TIGALARI VOWEL SIGN AA
+113BB..113C0  ; InCB; Extend # Mn   [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113C2         ; InCB; Extend # Mc       TULU-TIGALARI VOWEL SIGN EE
+113C5         ; InCB; Extend # Mc       TULU-TIGALARI VOWEL SIGN AI
+113C7..113C9  ; InCB; Extend # Mc   [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK
+113CE         ; InCB; Extend # Mn       TULU-TIGALARI SIGN VIRAMA
+113CF         ; InCB; Extend # Mc       TULU-TIGALARI SIGN LOOPED VIRAMA
+113D0         ; InCB; Extend # Mn       TULU-TIGALARI CONJOINER
+113D2         ; InCB; Extend # Mn       TULU-TIGALARI GEMINATION MARK
+113E1..113E2  ; InCB; Extend # Mn   [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA
+11438..1143F  ; InCB; Extend # Mn   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
+11442..11444  ; InCB; Extend # Mn   [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
 11446         ; InCB; Extend # Mn       NEWA SIGN NUKTA
 1145E         ; InCB; Extend # Mn       NEWA SANDHI MARK
-114C3         ; InCB; Extend # Mn       TIRHUTA SIGN NUKTA
-115C0         ; InCB; Extend # Mn       SIDDHAM SIGN NUKTA
+114B0         ; InCB; Extend # Mc       TIRHUTA VOWEL SIGN AA
+114B3..114B8  ; InCB; Extend # Mn   [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
+114BA         ; InCB; Extend # Mn       TIRHUTA VOWEL SIGN SHORT E
+114BD         ; InCB; Extend # Mc       TIRHUTA VOWEL SIGN SHORT O
+114BF..114C0  ; InCB; Extend # Mn   [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA
+114C2..114C3  ; InCB; Extend # Mn   [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
+115AF         ; InCB; Extend # Mc       SIDDHAM VOWEL SIGN AA
+115B2..115B5  ; InCB; Extend # Mn   [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR
+115BC..115BD  ; InCB; Extend # Mn   [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA
+115BF..115C0  ; InCB; Extend # Mn   [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
+115DC..115DD  ; InCB; Extend # Mn   [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU
+11633..1163A  ; InCB; Extend # Mn   [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI
+1163D         ; InCB; Extend # Mn       MODI SIGN ANUSVARA
+1163F..11640  ; InCB; Extend # Mn   [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA
+116AB         ; InCB; Extend # Mn       TAKRI SIGN ANUSVARA
+116AD         ; InCB; Extend # Mn       TAKRI VOWEL SIGN AA
+116B0..116B5  ; InCB; Extend # Mn   [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
+116B6         ; InCB; Extend # Mc       TAKRI SIGN VIRAMA
 116B7         ; InCB; Extend # Mn       TAKRI SIGN NUKTA
-1172B         ; InCB; Extend # Mn       AHOM SIGN KILLER
-1183A         ; InCB; Extend # Mn       DOGRA SIGN NUKTA
+1171D         ; InCB; Extend # Mn       AHOM CONSONANT SIGN MEDIAL LA
+1171F         ; InCB; Extend # Mn       AHOM CONSONANT SIGN MEDIAL LIGATING RA
+11722..11725  ; InCB; Extend # Mn   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
+11727..1172B  ; InCB; Extend # Mn   [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
+1182F..11837  ; InCB; Extend # Mn   [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
+11839..1183A  ; InCB; Extend # Mn   [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
+11930         ; InCB; Extend # Mc       DIVES AKURU VOWEL SIGN AA
+1193B..1193C  ; InCB; Extend # Mn   [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
+1193D         ; InCB; Extend # Mc       DIVES AKURU SIGN HALANTA
 1193E         ; InCB; Extend # Mn       DIVES AKURU VIRAMA
 11943         ; InCB; Extend # Mn       DIVES AKURU SIGN NUKTA
-11A34         ; InCB; Extend # Mn       ZANABAZAR SQUARE SIGN VIRAMA
+119D4..119D7  ; InCB; Extend # Mn   [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
+119DA..119DB  ; InCB; Extend # Mn   [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
+119E0         ; InCB; Extend # Mn       NANDINAGARI SIGN VIRAMA
+11A01..11A0A  ; InCB; Extend # Mn  [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+11A33..11A38  ; InCB; Extend # Mn   [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
+11A3B..11A3E  ; InCB; Extend # Mn   [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
 11A47         ; InCB; Extend # Mn       ZANABAZAR SQUARE SUBJOINER
-11A99         ; InCB; Extend # Mn       SOYOMBO SUBJOINER
-11D42         ; InCB; Extend # Mn       MASARAM GONDI SIGN NUKTA
-11D44..11D45  ; InCB; Extend # Mn   [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA
+11A51..11A56  ; InCB; Extend # Mn   [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+11A59..11A5B  ; InCB; Extend # Mn   [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+11A8A..11A96  ; InCB; Extend # Mn  [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+11A98..11A99  ; InCB; Extend # Mn   [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
+11C30..11C36  ; InCB; Extend # Mn   [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
+11C38..11C3D  ; InCB; Extend # Mn   [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
+11C3F         ; InCB; Extend # Mn       BHAIKSUKI SIGN VIRAMA
+11C92..11CA7  ; InCB; Extend # Mn  [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
+11CAA..11CB0  ; InCB; Extend # Mn   [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
+11CB2..11CB3  ; InCB; Extend # Mn   [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
+11CB5..11CB6  ; InCB; Extend # Mn   [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+11D31..11D36  ; InCB; Extend # Mn   [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+11D3A         ; InCB; Extend # Mn       MASARAM GONDI VOWEL SIGN E
+11D3C..11D3D  ; InCB; Extend # Mn   [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+11D3F..11D45  ; InCB; Extend # Mn   [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
+11D47         ; InCB; Extend # Mn       MASARAM GONDI RA-KARA
+11D90..11D91  ; InCB; Extend # Mn   [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI
+11D95         ; InCB; Extend # Mn       GUNJALA GONDI SIGN ANUSVARA
 11D97         ; InCB; Extend # Mn       GUNJALA GONDI VIRAMA
+11EF3..11EF4  ; InCB; Extend # Mn   [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
+11F00..11F01  ; InCB; Extend # Mn   [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
+11F36..11F3A  ; InCB; Extend # Mn   [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
+11F40         ; InCB; Extend # Mn       KAWI VOWEL SIGN EU
+11F41         ; InCB; Extend # Mc       KAWI SIGN KILLER
 11F42         ; InCB; Extend # Mn       KAWI CONJOINER
+11F5A         ; InCB; Extend # Mn       KAWI SIGN NUKTA
+13440         ; InCB; Extend # Mn       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
+13447..13455  ; InCB; Extend # Mn  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
+1611E..16129  ; InCB; Extend # Mn  [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612D..1612F  ; InCB; Extend # Mn   [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA
 16AF0..16AF4  ; InCB; Extend # Mn   [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
 16B30..16B36  ; InCB; Extend # Mn   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
-1BC9E         ; InCB; Extend # Mn       DUPLOYAN DOUBLE MARK
-1D165         ; InCB; Extend # Mc       MUSICAL SYMBOL COMBINING STEM
+16F4F         ; InCB; Extend # Mn       MIAO SIGN CONSONANT MODIFIER BAR
+16F8F..16F92  ; InCB; Extend # Mn   [4] MIAO TONE RIGHT..MIAO TONE BELOW
+16FE4         ; InCB; Extend # Mn       KHITAN SMALL SCRIPT FILLER
+16FF0..16FF1  ; InCB; Extend # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
+1BC9D..1BC9E  ; InCB; Extend # Mn   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+1CF00..1CF2D  ; InCB; Extend # Mn  [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
+1CF30..1CF46  ; InCB; Extend # Mn  [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
+1D165..1D166  ; InCB; Extend # Mc   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
 1D167..1D169  ; InCB; Extend # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
-1D16E..1D172  ; InCB; Extend # Mc   [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
+1D16D..1D172  ; InCB; Extend # Mc   [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
 1D17B..1D182  ; InCB; Extend # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
 1D185..1D18B  ; InCB; Extend # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
 1D1AA..1D1AD  ; InCB; Extend # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
 1D242..1D244  ; InCB; Extend # Mn   [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
+1DA00..1DA36  ; InCB; Extend # Mn  [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN
+1DA3B..1DA6C  ; InCB; Extend # Mn  [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT
+1DA75         ; InCB; Extend # Mn       SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS
+1DA84         ; InCB; Extend # Mn       SIGNWRITING LOCATION HEAD NECK
+1DA9B..1DA9F  ; InCB; Extend # Mn   [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
+1DAA1..1DAAF  ; InCB; Extend # Mn  [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
 1E000..1E006  ; InCB; Extend # Mn   [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
 1E008..1E018  ; InCB; Extend # Mn  [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
 1E01B..1E021  ; InCB; Extend # Mn   [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
@@ -12824,9 +13350,13 @@ FE20..FE2F    ; InCB; Extend # Mn  [16] COMBINING LIGATURE LEFT HALF..COMBINING
 1E2AE         ; InCB; Extend # Mn       TOTO SIGN RISING TONE
 1E2EC..1E2EF  ; InCB; Extend # Mn   [4] WANCHO TONE TUP..WANCHO TONE KOINI
 1E4EC..1E4EF  ; InCB; Extend # Mn   [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
+1E5EE..1E5EF  ; InCB; Extend # Mn   [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
 1E8D0..1E8D6  ; InCB; Extend # Mn   [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
 1E944..1E94A  ; InCB; Extend # Mn   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+1F3FB..1F3FF  ; InCB; Extend # Sk   [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
+E0020..E007F  ; InCB; Extend # Cf  [96] TAG SPACE..CANCEL TAG
+E0100..E01EF  ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 
-# Total code points: 884
+# Total code points: 2192
 
 # EOF
diff --git a/libcxx/utils/data/unicode/DerivedGeneralCategory.txt b/libcxx/utils/data/unicode/DerivedGeneralCategory.txt
index 285ffa8..07bf7bc 100644
--- a/libcxx/utils/data/unicode/DerivedGeneralCategory.txt
+++ b/libcxx/utils/data/unicode/DerivedGeneralCategory.txt
@@ -1,8 +1,8 @@
-# DerivedGeneralCategory-15.1.0.txt
-# Date: 2023-07-28, 23:34:02 GMT
-# © 2023 Unicode®, Inc.
+# DerivedGeneralCategory-16.0.0.txt
+# Date: 2024-04-30, 21:48:17 GMT
+# © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
 # Unicode Character Database
 #   For documentation, see https://www.unicode.org/reports/tr44/
@@ -37,7 +37,7 @@
 085F          ; Cn #       <reserved-085F>
 086B..086F    ; Cn #   [5] <reserved-086B>..<reserved-086F>
 088F          ; Cn #       <reserved-088F>
-0892..0897    ; Cn #   [6] <reserved-0892>..<reserved-0897>
+0892..0896    ; Cn #   [5] <reserved-0892>..<reserved-0896>
 0984          ; Cn #       <reserved-0984>
 098D..098E    ; Cn #   [2] <reserved-098D>..<reserved-098E>
 0991..0992    ; Cn #   [2] <reserved-0991>..<reserved-0992>
@@ -229,12 +229,11 @@
 1A9A..1A9F    ; Cn #   [6] <reserved-1A9A>..<reserved-1A9F>
 1AAE..1AAF    ; Cn #   [2] <reserved-1AAE>..<reserved-1AAF>
 1ACF..1AFF    ; Cn #  [49] <reserved-1ACF>..<reserved-1AFF>
-1B4D..1B4F    ; Cn #   [3] <reserved-1B4D>..<reserved-1B4F>
-1B7F          ; Cn #       <reserved-1B7F>
+1B4D          ; Cn #       <reserved-1B4D>
 1BF4..1BFB    ; Cn #   [8] <reserved-1BF4>..<reserved-1BFB>
 1C38..1C3A    ; Cn #   [3] <reserved-1C38>..<reserved-1C3A>
 1C4A..1C4C    ; Cn #   [3] <reserved-1C4A>..<reserved-1C4C>
-1C89..1C8F    ; Cn #   [7] <reserved-1C89>..<reserved-1C8F>
+1C8B..1C8F    ; Cn #   [5] <reserved-1C8B>..<reserved-1C8F>
 1CBB..1CBC    ; Cn #   [2] <reserved-1CBB>..<reserved-1CBC>
 1CC8..1CCF    ; Cn #   [8] <reserved-1CC8>..<reserved-1CCF>
 1CFB..1CFF    ; Cn #   [5] <reserved-1CFB>..<reserved-1CFF>
@@ -261,7 +260,7 @@
 20C1..20CF    ; Cn #  [15] <reserved-20C1>..<reserved-20CF>
 20F1..20FF    ; Cn #  [15] <reserved-20F1>..<reserved-20FF>
 218C..218F    ; Cn #   [4] <reserved-218C>..<reserved-218F>
-2427..243F    ; Cn #  [25] <reserved-2427>..<reserved-243F>
+242A..243F    ; Cn #  [22] <reserved-242A>..<reserved-243F>
 244B..245F    ; Cn #  [21] <reserved-244B>..<reserved-245F>
 2B74..2B75    ; Cn #   [2] <reserved-2B74>..<reserved-2B75>
 2B96          ; Cn #       <reserved-2B96>
@@ -289,16 +288,16 @@
 3100..3104    ; Cn #   [5] <reserved-3100>..<reserved-3104>
 3130          ; Cn #       <reserved-3130>
 318F          ; Cn #       <reserved-318F>
-31E4..31EE    ; Cn #  [11] <reserved-31E4>..<reserved-31EE>
+31E6..31EE    ; Cn #   [9] <reserved-31E6>..<reserved-31EE>
 321F          ; Cn #       <reserved-321F>
 A48D..A48F    ; Cn #   [3] <reserved-A48D>..<reserved-A48F>
 A4C7..A4CF    ; Cn #   [9] <reserved-A4C7>..<reserved-A4CF>
 A62C..A63F    ; Cn #  [20] <reserved-A62C>..<reserved-A63F>
 A6F8..A6FF    ; Cn #   [8] <reserved-A6F8>..<reserved-A6FF>
-A7CB..A7CF    ; Cn #   [5] <reserved-A7CB>..<reserved-A7CF>
+A7CE..A7CF    ; Cn #   [2] <reserved-A7CE>..<reserved-A7CF>
 A7D2          ; Cn #       <reserved-A7D2>
 A7D4          ; Cn #       <reserved-A7D4>
-A7DA..A7F1    ; Cn #  [24] <reserved-A7DA>..<reserved-A7F1>
+A7DD..A7F1    ; Cn #  [21] <reserved-A7DD>..<reserved-A7F1>
 A82D..A82F    ; Cn #   [3] <reserved-A82D>..<reserved-A82F>
 A83A..A83F    ; Cn #   [6] <reserved-A83A>..<reserved-A83F>
 A878..A87F    ; Cn #   [8] <reserved-A878>..<reserved-A87F>
@@ -388,7 +387,8 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 105A2         ; Cn #       <reserved-105A2>
 105B2         ; Cn #       <reserved-105B2>
 105BA         ; Cn #       <reserved-105BA>
-105BD..105FF  ; Cn #  [67] <reserved-105BD>..<reserved-105FF>
+105BD..105BF  ; Cn #   [3] <reserved-105BD>..<reserved-105BF>
+105F4..105FF  ; Cn #  [12] <reserved-105F4>..<reserved-105FF>
 10737..1073F  ; Cn #   [9] <reserved-10737>..<reserved-1073F>
 10756..1075F  ; Cn #  [10] <reserved-10756>..<reserved-1075F>
 10768..1077F  ; Cn #  [24] <reserved-10768>..<reserved-1077F>
@@ -431,11 +431,15 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 10CB3..10CBF  ; Cn #  [13] <reserved-10CB3>..<reserved-10CBF>
 10CF3..10CF9  ; Cn #   [7] <reserved-10CF3>..<reserved-10CF9>
 10D28..10D2F  ; Cn #   [8] <reserved-10D28>..<reserved-10D2F>
-10D3A..10E5F  ; Cn # [294] <reserved-10D3A>..<reserved-10E5F>
+10D3A..10D3F  ; Cn #   [6] <reserved-10D3A>..<reserved-10D3F>
+10D66..10D68  ; Cn #   [3] <reserved-10D66>..<reserved-10D68>
+10D86..10D8D  ; Cn #   [8] <reserved-10D86>..<reserved-10D8D>
+10D90..10E5F  ; Cn # [208] <reserved-10D90>..<reserved-10E5F>
 10E7F         ; Cn #       <reserved-10E7F>
 10EAA         ; Cn #       <reserved-10EAA>
 10EAE..10EAF  ; Cn #   [2] <reserved-10EAE>..<reserved-10EAF>
-10EB2..10EFC  ; Cn #  [75] <reserved-10EB2>..<reserved-10EFC>
+10EB2..10EC1  ; Cn #  [16] <reserved-10EB2>..<reserved-10EC1>
+10EC5..10EFB  ; Cn #  [55] <reserved-10EC5>..<reserved-10EFB>
 10F28..10F2F  ; Cn #   [8] <reserved-10F28>..<reserved-10F2F>
 10F5A..10F6F  ; Cn #  [22] <reserved-10F5A>..<reserved-10F6F>
 10F8A..10FAF  ; Cn #  [38] <reserved-10F8A>..<reserved-10FAF>
@@ -475,7 +479,18 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 11358..1135C  ; Cn #   [5] <reserved-11358>..<reserved-1135C>
 11364..11365  ; Cn #   [2] <reserved-11364>..<reserved-11365>
 1136D..1136F  ; Cn #   [3] <reserved-1136D>..<reserved-1136F>
-11375..113FF  ; Cn # [139] <reserved-11375>..<reserved-113FF>
+11375..1137F  ; Cn #  [11] <reserved-11375>..<reserved-1137F>
+1138A         ; Cn #       <reserved-1138A>
+1138C..1138D  ; Cn #   [2] <reserved-1138C>..<reserved-1138D>
+1138F         ; Cn #       <reserved-1138F>
+113B6         ; Cn #       <reserved-113B6>
+113C1         ; Cn #       <reserved-113C1>
+113C3..113C4  ; Cn #   [2] <reserved-113C3>..<reserved-113C4>
+113C6         ; Cn #       <reserved-113C6>
+113CB         ; Cn #       <reserved-113CB>
+113D6         ; Cn #       <reserved-113D6>
+113D9..113E0  ; Cn #   [8] <reserved-113D9>..<reserved-113E0>
+113E3..113FF  ; Cn #  [29] <reserved-113E3>..<reserved-113FF>
 1145C         ; Cn #       <reserved-1145C>
 11462..1147F  ; Cn #  [30] <reserved-11462>..<reserved-1147F>
 114C8..114CF  ; Cn #   [8] <reserved-114C8>..<reserved-114CF>
@@ -486,7 +501,8 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1165A..1165F  ; Cn #   [6] <reserved-1165A>..<reserved-1165F>
 1166D..1167F  ; Cn #  [19] <reserved-1166D>..<reserved-1167F>
 116BA..116BF  ; Cn #   [6] <reserved-116BA>..<reserved-116BF>
-116CA..116FF  ; Cn #  [54] <reserved-116CA>..<reserved-116FF>
+116CA..116CF  ; Cn #   [6] <reserved-116CA>..<reserved-116CF>
+116E4..116FF  ; Cn #  [28] <reserved-116E4>..<reserved-116FF>
 1171B..1171C  ; Cn #   [2] <reserved-1171B>..<reserved-1171C>
 1172C..1172F  ; Cn #   [4] <reserved-1172C>..<reserved-1172F>
 11747..117FF  ; Cn # [185] <reserved-11747>..<reserved-117FF>
@@ -506,7 +522,9 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 11A48..11A4F  ; Cn #   [8] <reserved-11A48>..<reserved-11A4F>
 11AA3..11AAF  ; Cn #  [13] <reserved-11AA3>..<reserved-11AAF>
 11AF9..11AFF  ; Cn #   [7] <reserved-11AF9>..<reserved-11AFF>
-11B0A..11BFF  ; Cn # [246] <reserved-11B0A>..<reserved-11BFF>
+11B0A..11BBF  ; Cn # [182] <reserved-11B0A>..<reserved-11BBF>
+11BE2..11BEF  ; Cn #  [14] <reserved-11BE2>..<reserved-11BEF>
+11BFA..11BFF  ; Cn #   [6] <reserved-11BFA>..<reserved-11BFF>
 11C09         ; Cn #       <reserved-11C09>
 11C37         ; Cn #       <reserved-11C37>
 11C46..11C4F  ; Cn #  [10] <reserved-11C46>..<reserved-11C4F>
@@ -530,7 +548,7 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 11EF9..11EFF  ; Cn #   [7] <reserved-11EF9>..<reserved-11EFF>
 11F11         ; Cn #       <reserved-11F11>
 11F3B..11F3D  ; Cn #   [3] <reserved-11F3B>..<reserved-11F3D>
-11F5A..11FAF  ; Cn #  [86] <reserved-11F5A>..<reserved-11FAF>
+11F5B..11FAF  ; Cn #  [85] <reserved-11F5B>..<reserved-11FAF>
 11FB1..11FBF  ; Cn #  [15] <reserved-11FB1>..<reserved-11FBF>
 11FF2..11FFE  ; Cn #  [13] <reserved-11FF2>..<reserved-11FFE>
 1239A..123FF  ; Cn # [102] <reserved-1239A>..<reserved-123FF>
@@ -538,8 +556,10 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 12475..1247F  ; Cn #  [11] <reserved-12475>..<reserved-1247F>
 12544..12F8F  ; Cn # [2636] <reserved-12544>..<reserved-12F8F>
 12FF3..12FFF  ; Cn #  [13] <reserved-12FF3>..<reserved-12FFF>
-13456..143FF  ; Cn # [4010] <reserved-13456>..<reserved-143FF>
-14647..167FF  ; Cn # [8633] <reserved-14647>..<reserved-167FF>
+13456..1345F  ; Cn #  [10] <reserved-13456>..<reserved-1345F>
+143FB..143FF  ; Cn #   [5] <reserved-143FB>..<reserved-143FF>
+14647..160FF  ; Cn # [6841] <reserved-14647>..<reserved-160FF>
+1613A..167FF  ; Cn # [1734] <reserved-1613A>..<reserved-167FF>
 16A39..16A3F  ; Cn #   [7] <reserved-16A39>..<reserved-16A3F>
 16A5F         ; Cn #       <reserved-16A5F>
 16A6A..16A6D  ; Cn #   [4] <reserved-16A6A>..<reserved-16A6D>
@@ -551,7 +571,8 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 16B5A         ; Cn #       <reserved-16B5A>
 16B62         ; Cn #       <reserved-16B62>
 16B78..16B7C  ; Cn #   [5] <reserved-16B78>..<reserved-16B7C>
-16B90..16E3F  ; Cn # [688] <reserved-16B90>..<reserved-16E3F>
+16B90..16D3F  ; Cn # [432] <reserved-16B90>..<reserved-16D3F>
+16D7A..16E3F  ; Cn # [198] <reserved-16D7A>..<reserved-16E3F>
 16E9B..16EFF  ; Cn # [101] <reserved-16E9B>..<reserved-16EFF>
 16F4B..16F4E  ; Cn #   [4] <reserved-16F4B>..<reserved-16F4E>
 16F88..16F8E  ; Cn #   [7] <reserved-16F88>..<reserved-16F8E>
@@ -559,7 +580,7 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 16FE5..16FEF  ; Cn #  [11] <reserved-16FE5>..<reserved-16FEF>
 16FF2..16FFF  ; Cn #  [14] <reserved-16FF2>..<reserved-16FFF>
 187F8..187FF  ; Cn #   [8] <reserved-187F8>..<reserved-187FF>
-18CD6..18CFF  ; Cn #  [42] <reserved-18CD6>..<reserved-18CFF>
+18CD6..18CFE  ; Cn #  [41] <reserved-18CD6>..<reserved-18CFE>
 18D09..1AFEF  ; Cn # [8935] <reserved-18D09>..<reserved-1AFEF>
 1AFF4         ; Cn #       <reserved-1AFF4>
 1AFFC         ; Cn #       <reserved-1AFFC>
@@ -574,7 +595,9 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1BC7D..1BC7F  ; Cn #   [3] <reserved-1BC7D>..<reserved-1BC7F>
 1BC89..1BC8F  ; Cn #   [7] <reserved-1BC89>..<reserved-1BC8F>
 1BC9A..1BC9B  ; Cn #   [2] <reserved-1BC9A>..<reserved-1BC9B>
-1BCA4..1CEFF  ; Cn # [4700] <reserved-1BCA4>..<reserved-1CEFF>
+1BCA4..1CBFF  ; Cn # [3932] <reserved-1BCA4>..<reserved-1CBFF>
+1CCFA..1CCFF  ; Cn #   [6] <reserved-1CCFA>..<reserved-1CCFF>
+1CEB4..1CEFF  ; Cn #  [76] <reserved-1CEB4>..<reserved-1CEFF>
 1CF2E..1CF2F  ; Cn #   [2] <reserved-1CF2E>..<reserved-1CF2F>
 1CF47..1CF4F  ; Cn #   [9] <reserved-1CF47>..<reserved-1CF4F>
 1CFC4..1CFFF  ; Cn #  [60] <reserved-1CFC4>..<reserved-1CFFF>
@@ -625,7 +648,9 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1E2AF..1E2BF  ; Cn #  [17] <reserved-1E2AF>..<reserved-1E2BF>
 1E2FA..1E2FE  ; Cn #   [5] <reserved-1E2FA>..<reserved-1E2FE>
 1E300..1E4CF  ; Cn # [464] <reserved-1E300>..<reserved-1E4CF>
-1E4FA..1E7DF  ; Cn # [742] <reserved-1E4FA>..<reserved-1E7DF>
+1E4FA..1E5CF  ; Cn # [214] <reserved-1E4FA>..<reserved-1E5CF>
+1E5FB..1E5FE  ; Cn #   [4] <reserved-1E5FB>..<reserved-1E5FE>
+1E600..1E7DF  ; Cn # [480] <reserved-1E600>..<reserved-1E7DF>
 1E7E7         ; Cn #       <reserved-1E7E7>
 1E7EC         ; Cn #       <reserved-1E7EC>
 1E7EF         ; Cn #       <reserved-1E7EF>
@@ -695,18 +720,17 @@ FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
 1F85A..1F85F  ; Cn #   [6] <reserved-1F85A>..<reserved-1F85F>
 1F888..1F88F  ; Cn #   [8] <reserved-1F888>..<reserved-1F88F>
 1F8AE..1F8AF  ; Cn #   [2] <reserved-1F8AE>..<reserved-1F8AF>
-1F8B2..1F8FF  ; Cn #  [78] <reserved-1F8B2>..<reserved-1F8FF>
+1F8BC..1F8BF  ; Cn #   [4] <reserved-1F8BC>..<reserved-1F8BF>
+1F8C2..1F8FF  ; Cn #  [62] <reserved-1F8C2>..<reserved-1F8FF>
 1FA54..1FA5F  ; Cn #  [12] <reserved-1FA54>..<reserved-1FA5F>
 1FA6E..1FA6F  ; Cn #   [2] <reserved-1FA6E>..<reserved-1FA6F>
 1FA7D..1FA7F  ; Cn #   [3] <reserved-1FA7D>..<reserved-1FA7F>
-1FA89..1FA8F  ; Cn #   [7] <reserved-1FA89>..<reserved-1FA8F>
-1FABE         ; Cn #       <reserved-1FABE>
-1FAC6..1FACD  ; Cn #   [8] <reserved-1FAC6>..<reserved-1FACD>
-1FADC..1FADF  ; Cn #   [4] <reserved-1FADC>..<reserved-1FADF>
-1FAE9..1FAEF  ; Cn #   [7] <reserved-1FAE9>..<reserved-1FAEF>
+1FA8A..1FA8E  ; Cn #   [5] <reserved-1FA8A>..<reserved-1FA8E>
+1FAC7..1FACD  ; Cn #   [7] <reserved-1FAC7>..<reserved-1FACD>
+1FADD..1FADE  ; Cn #   [2] <reserved-1FADD>..<reserved-1FADE>
+1FAEA..1FAEF  ; Cn #   [6] <reserved-1FAEA>..<reserved-1FAEF>
 1FAF9..1FAFF  ; Cn #   [7] <reserved-1FAF9>..<reserved-1FAFF>
 1FB93         ; Cn #       <reserved-1FB93>
-1FBCB..1FBEF  ; Cn #  [37] <reserved-1FBCB>..<reserved-1FBEF>
 1FBFA..1FFFF  ; Cn # [1030] <reserved-1FBFA>..<noncharacter-1FFFF>
 2A6E0..2A6FF  ; Cn #  [32] <reserved-2A6E0>..<reserved-2A6FF>
 2B73A..2B73F  ; Cn #   [6] <reserved-2B73A>..<reserved-2B73F>
@@ -723,7 +747,7 @@ E01F0..EFFFF  ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF>
 FFFFE..FFFFF  ; Cn #   [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
 10FFFE..10FFFF; Cn #   [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
 
-# Total code points: 824718
+# Total code points: 819533
 
 # ================================================
 
@@ -1005,6 +1029,7 @@ FFFFE..FFFFF  ; Cn #   [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
 10C7          ; Lu #       GEORGIAN CAPITAL LETTER YN
 10CD          ; Lu #       GEORGIAN CAPITAL LETTER AEN
 13A0..13F5    ; Lu #  [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+1C89          ; Lu #       CYRILLIC CAPITAL LETTER TJE
 1C90..1CBA    ; Lu #  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF    ; Lu #   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1E00          ; Lu #       LATIN CAPITAL LETTER A WITH RING BELOW
@@ -1329,9 +1354,12 @@ A7C0          ; Lu #       LATIN CAPITAL LETTER OLD POLISH O
 A7C2          ; Lu #       LATIN CAPITAL LETTER ANGLICANA W
 A7C4..A7C7    ; Lu #   [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
 A7C9          ; Lu #       LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7CB..A7CC    ; Lu #   [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
 A7D0          ; Lu #       LATIN CAPITAL LETTER CLOSED INSULAR G
 A7D6          ; Lu #       LATIN CAPITAL LETTER MIDDLE SCOTS S
 A7D8          ; Lu #       LATIN CAPITAL LETTER SIGMOID S
+A7DA          ; Lu #       LATIN CAPITAL LETTER LAMBDA
+A7DC          ; Lu #       LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F5          ; Lu #       LATIN CAPITAL LETTER REVERSED HALF H
 FF21..FF3A    ; Lu #  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
 10400..10427  ; Lu #  [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
@@ -1341,6 +1369,7 @@ FF21..FF3A    ; Lu #  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
 1058C..10592  ; Lu #   [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
 10594..10595  ; Lu #   [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
 10C80..10CB2  ; Lu #  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
+10D50..10D65  ; Lu #  [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
 118A0..118BF  ; Lu #  [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO
 16E40..16E5F  ; Lu #  [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y
 1D400..1D419  ; Lu #  [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z
@@ -1376,7 +1405,7 @@ FF21..FF3A    ; Lu #  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
 1D7CA         ; Lu #       MATHEMATICAL BOLD CAPITAL DIGAMMA
 1E900..1E921  ; Lu #  [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
 
-# Total code points: 1831
+# Total code points: 1858
 
 # ================================================
 
@@ -1656,6 +1685,7 @@ FF21..FF3A    ; Lu #  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
 10FD..10FF    ; Ll #   [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
 13F8..13FD    ; Ll #   [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
 1C80..1C88    ; Ll #   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A          ; Ll #       CYRILLIC SMALL LETTER TJE
 1D00..1D2B    ; Ll #  [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
 1D6B..1D77    ; Ll #  [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
 1D79..1D9A    ; Ll #  [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
@@ -1986,11 +2016,13 @@ A7C1          ; Ll #       LATIN SMALL LETTER OLD POLISH O
 A7C3          ; Ll #       LATIN SMALL LETTER ANGLICANA W
 A7C8          ; Ll #       LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
 A7CA          ; Ll #       LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A7CD          ; Ll #       LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D1          ; Ll #       LATIN SMALL LETTER CLOSED INSULAR G
 A7D3          ; Ll #       LATIN SMALL LETTER DOUBLE THORN
 A7D5          ; Ll #       LATIN SMALL LETTER DOUBLE WYNN
 A7D7          ; Ll #       LATIN SMALL LETTER MIDDLE SCOTS S
 A7D9          ; Ll #       LATIN SMALL LETTER SIGMOID S
+A7DB          ; Ll #       LATIN SMALL LETTER LAMBDA
 A7F6          ; Ll #       LATIN SMALL LETTER REVERSED HALF H
 A7FA          ; Ll #       LATIN LETTER SMALL CAPITAL TURNED M
 AB30..AB5A    ; Ll #  [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
@@ -2006,6 +2038,7 @@ FF41..FF5A    ; Ll #  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL
 105B3..105B9  ; Ll #   [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC  ; Ll #   [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
 10CC0..10CF2  ; Ll #  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
+10D70..10D85  ; Ll #  [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
 118C0..118DF  ; Ll #  [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
 16E60..16E7F  ; Ll #  [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 1D41A..1D433  ; Ll #  [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z
@@ -2041,7 +2074,7 @@ FF41..FF5A    ; Ll #  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL
 1DF25..1DF2A  ; Ll #   [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
 1E922..1E943  ; Ll #  [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
 
-# Total code points: 2233
+# Total code points: 2258
 
 # ================================================
 
@@ -2124,7 +2157,11 @@ FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
 10780..10785  ; Lm #   [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK
 10787..107B0  ; Lm #  [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK
 107B2..107BA  ; Lm #   [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL
+10D4E         ; Lm #       GARAY VOWEL LENGTH MARK
+10D6F         ; Lm #       GARAY REDUPLICATION MARK
 16B40..16B43  ; Lm #   [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
+16D40..16D42  ; Lm #   [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D6B..16D6C  ; Lm #   [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
 16F93..16F9F  ; Lm #  [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
 16FE0..16FE1  ; Lm #   [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
 16FE3         ; Lm #       OLD CHINESE ITERATION MARK
@@ -2136,7 +2173,7 @@ FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
 1E4EB         ; Lm #       NAG MUNDARI SIGN OJOD
 1E94B         ; Lm #       ADLAM NASALIZATION MARK
 
-# Total code points: 397
+# Total code points: 404
 
 # ================================================
 
@@ -2451,6 +2488,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 10450..1049D  ; Lo #  [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
 10500..10527  ; Lo #  [40] ELBASAN LETTER A..ELBASAN LETTER KHE
 10530..10563  ; Lo #  [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
+105C0..105F3  ; Lo #  [52] TODHRI LETTER A..TODHRI LETTER OO
 10600..10736  ; Lo # [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
 10740..10755  ; Lo #  [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
 10760..10767  ; Lo #   [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -2482,8 +2520,11 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 10B80..10B91  ; Lo #  [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW
 10C00..10C48  ; Lo #  [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
 10D00..10D23  ; Lo #  [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
+10D4A..10D4D  ; Lo #   [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE
+10D4F         ; Lo #       GARAY SUKUN
 10E80..10EA9  ; Lo #  [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
 10EB0..10EB1  ; Lo #   [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4  ; Lo #   [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
 10F00..10F1C  ; Lo #  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F27         ; Lo #       OLD SOGDIAN LIGATURE AYIN-DALETH
 10F30..10F45  ; Lo #  [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -2522,6 +2563,13 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 1133D         ; Lo #       GRANTHA SIGN AVAGRAHA
 11350         ; Lo #       GRANTHA OM
 1135D..11361  ; Lo #   [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
+11380..11389  ; Lo #  [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL
+1138B         ; Lo #       TULU-TIGALARI LETTER EE
+1138E         ; Lo #       TULU-TIGALARI LETTER AI
+11390..113B5  ; Lo #  [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA
+113B7         ; Lo #       TULU-TIGALARI SIGN AVAGRAHA
+113D1         ; Lo #       TULU-TIGALARI REPHA
+113D3         ; Lo #       TULU-TIGALARI SIGN PLUTA
 11400..11434  ; Lo #  [53] NEWA LETTER A..NEWA LETTER HA
 11447..1144A  ; Lo #   [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
 1145F..11461  ; Lo #   [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
@@ -2555,6 +2603,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 11A5C..11A89  ; Lo #  [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
 11A9D         ; Lo #       SOYOMBO MARK PLUTA
 11AB0..11AF8  ; Lo #  [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0  ; Lo #  [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
 11C00..11C08  ; Lo #   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E  ; Lo #  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C40         ; Lo #       BHAIKSUKI SIGN AVAGRAHA
@@ -2577,7 +2626,9 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 12F90..12FF0  ; Lo #  [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
 13000..1342F  ; Lo # [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
 13441..13446  ; Lo #   [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
+13460..143FA  ; Lo # [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
 14400..14646  ; Lo # [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16100..1611D  ; Lo #  [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA
 16800..16A38  ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E  ; Lo #  [31] MRO LETTER TA..MRO LETTER TEK
 16A70..16ABE  ; Lo #  [79] TANGSA LETTER OZ..TANGSA LETTER ZA
@@ -2585,11 +2636,12 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 16B00..16B2F  ; Lo #  [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
 16B63..16B77  ; Lo #  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
 16B7D..16B8F  ; Lo #  [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16D43..16D6A  ; Lo #  [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU
 16F00..16F4A  ; Lo #  [75] MIAO LETTER PA..MIAO LETTER RTE
 16F50         ; Lo #       MIAO LETTER NASALIZATION
 17000..187F7  ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18CD5  ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
-18D00..18D08  ; Lo #   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+18CFF..18D08  ; Lo #  [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08
 1B000..1B122  ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU
 1B132         ; Lo #       HIRAGANA LETTER SMALL KO
 1B150..1B152  ; Lo #   [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
@@ -2606,6 +2658,8 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 1E290..1E2AD  ; Lo #  [30] TOTO LETTER PA..TOTO LETTER A
 1E2C0..1E2EB  ; Lo #  [44] WANCHO LETTER AA..WANCHO LETTER YIH
 1E4D0..1E4EA  ; Lo #  [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
+1E5D0..1E5ED  ; Lo #  [30] OL ONAL LETTER O..OL ONAL LETTER EG
+1E5F0         ; Lo #       OL ONAL SIGN HODDOND
 1E7E0..1E7E6  ; Lo #   [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
 1E7E8..1E7EB  ; Lo #   [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
 1E7ED..1E7EE  ; Lo #   [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -2654,7 +2708,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 30000..3134A  ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
 31350..323AF  ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
 
-# Total code points: 132234
+# Total code points: 136477
 
 # ================================================
 
@@ -2684,7 +2738,7 @@ FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
 0825..0827    ; Mn #   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
 0829..082D    ; Mn #   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
 0859..085B    ; Mn #   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F    ; Mn #   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F    ; Mn #   [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
 08CA..08E1    ; Mn #  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
 08E3..0902    ; Mn #  [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
 093A          ; Mn #       DEVANAGARI VOWEL SIGN OE
@@ -2882,8 +2936,9 @@ FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
 10A3F         ; Mn #       KHAROSHTHI VIRAMA
 10AE5..10AE6  ; Mn #   [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
 10D24..10D27  ; Mn #   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+10D69..10D6D  ; Mn #   [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
 10EAB..10EAC  ; Mn #   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF  ; Mn #   [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF  ; Mn #   [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
 10F46..10F50  ; Mn #  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
 10F82..10F85  ; Mn #   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
 11001         ; Mn #       BRAHMI SIGN ANUSVARA
@@ -2914,6 +2969,11 @@ FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
 11340         ; Mn #       GRANTHA VOWEL SIGN II
 11366..1136C  ; Mn #   [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
 11370..11374  ; Mn #   [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+113BB..113C0  ; Mn #   [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113CE         ; Mn #       TULU-TIGALARI SIGN VIRAMA
+113D0         ; Mn #       TULU-TIGALARI CONJOINER
+113D2         ; Mn #       TULU-TIGALARI GEMINATION MARK
+113E1..113E2  ; Mn #   [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA
 11438..1143F  ; Mn #   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
 11442..11444  ; Mn #   [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
 11446         ; Mn #       NEWA SIGN NUKTA
@@ -2933,7 +2993,8 @@ FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
 116AD         ; Mn #       TAKRI VOWEL SIGN AA
 116B0..116B5  ; Mn #   [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
 116B7         ; Mn #       TAKRI SIGN NUKTA
-1171D..1171F  ; Mn #   [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+1171D         ; Mn #       AHOM CONSONANT SIGN MEDIAL LA
+1171F         ; Mn #       AHOM CONSONANT SIGN MEDIAL LIGATING RA
 11722..11725  ; Mn #   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
 11727..1172B  ; Mn #   [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
 1182F..11837  ; Mn #   [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
@@ -2972,8 +3033,11 @@ FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
 11F36..11F3A  ; Mn #   [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
 11F40         ; Mn #       KAWI VOWEL SIGN EU
 11F42         ; Mn #       KAWI CONJOINER
+11F5A         ; Mn #       KAWI SIGN NUKTA
 13440         ; Mn #       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
 13447..13455  ; Mn #  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
+1611E..16129  ; Mn #  [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612D..1612F  ; Mn #   [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA
 16AF0..16AF4  ; Mn #   [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
 16B30..16B36  ; Mn #   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
 16F4F         ; Mn #       MIAO SIGN CONSONANT MODIFIER BAR
@@ -3003,11 +3067,12 @@ FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
 1E2AE         ; Mn #       TOTO SIGN RISING TONE
 1E2EC..1E2EF  ; Mn #   [4] WANCHO TONE TUP..WANCHO TONE KOINI
 1E4EC..1E4EF  ; Mn #   [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
+1E5EE..1E5EF  ; Mn #   [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
 1E8D0..1E8D6  ; Mn #   [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
 1E944..1E94A  ; Mn #   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
 E0100..E01EF  ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 
-# Total code points: 1985
+# Total code points: 2020
 
 # ================================================
 
@@ -3159,6 +3224,12 @@ ABEC          ; Mc #       MEETEI MAYEK LUM IYEK
 1134B..1134D  ; Mc #   [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
 11357         ; Mc #       GRANTHA AU LENGTH MARK
 11362..11363  ; Mc #   [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
+113B8..113BA  ; Mc #   [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II
+113C2         ; Mc #       TULU-TIGALARI VOWEL SIGN EE
+113C5         ; Mc #       TULU-TIGALARI VOWEL SIGN AI
+113C7..113CA  ; Mc #   [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA
+113CC..113CD  ; Mc #   [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA
+113CF         ; Mc #       TULU-TIGALARI SIGN LOOPED VIRAMA
 11435..11437  ; Mc #   [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
 11440..11441  ; Mc #   [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
 11445         ; Mc #       NEWA SIGN VISARGA
@@ -3175,6 +3246,7 @@ ABEC          ; Mc #       MEETEI MAYEK LUM IYEK
 116AC         ; Mc #       TAKRI SIGN VISARGA
 116AE..116AF  ; Mc #   [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
 116B6         ; Mc #       TAKRI SIGN VIRAMA
+1171E         ; Mc #       AHOM CONSONANT SIGN MEDIAL RA
 11720..11721  ; Mc #   [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
 11726         ; Mc #       AHOM VOWEL SIGN E
 1182C..1182E  ; Mc #   [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
@@ -3203,12 +3275,13 @@ ABEC          ; Mc #       MEETEI MAYEK LUM IYEK
 11F34..11F35  ; Mc #   [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
 11F3E..11F3F  ; Mc #   [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
 11F41         ; Mc #       KAWI SIGN KILLER
+1612A..1612C  ; Mc #   [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA
 16F51..16F87  ; Mc #  [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
 16FF0..16FF1  ; Mc #   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
 1D165..1D166  ; Mc #   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
 1D16D..1D172  ; Mc #   [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
 
-# Total code points: 452
+# Total code points: 468
 
 # ================================================
 
@@ -3253,6 +3326,7 @@ ABF0..ABF9    ; Nd #  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
 FF10..FF19    ; Nd #  [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
 104A0..104A9  ; Nd #  [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
 10D30..10D39  ; Nd #  [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
+10D40..10D49  ; Nd #  [10] GARAY DIGIT ZERO..GARAY DIGIT NINE
 11066..1106F  ; Nd #  [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
 110F0..110F9  ; Nd #  [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE
 11136..1113F  ; Nd #  [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
@@ -3262,24 +3336,30 @@ FF10..FF19    ; Nd #  [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
 114D0..114D9  ; Nd #  [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
 11650..11659  ; Nd #  [10] MODI DIGIT ZERO..MODI DIGIT NINE
 116C0..116C9  ; Nd #  [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3  ; Nd #  [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
 11730..11739  ; Nd #  [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
 118E0..118E9  ; Nd #  [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
 11950..11959  ; Nd #  [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
+11BF0..11BF9  ; Nd #  [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
 11C50..11C59  ; Nd #  [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
 11D50..11D59  ; Nd #  [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
 11DA0..11DA9  ; Nd #  [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
 11F50..11F59  ; Nd #  [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
+16130..16139  ; Nd #  [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE
 16A60..16A69  ; Nd #  [10] MRO DIGIT ZERO..MRO DIGIT NINE
 16AC0..16AC9  ; Nd #  [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
 16B50..16B59  ; Nd #  [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
+16D70..16D79  ; Nd #  [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE
+1CCF0..1CCF9  ; Nd #  [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE
 1D7CE..1D7FF  ; Nd #  [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
 1E140..1E149  ; Nd #  [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
 1E2F0..1E2F9  ; Nd #  [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
 1E4F0..1E4F9  ; Nd #  [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
+1E5F1..1E5FA  ; Nd #  [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE
 1E950..1E959  ; Nd #  [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
 1FBF0..1FBF9  ; Nd #  [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
 
-# Total code points: 680
+# Total code points: 760
 
 # ================================================
 
@@ -3486,9 +3566,10 @@ FE31..FE32    ; Pd #   [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION
 FE58          ; Pd #       SMALL EM DASH
 FE63          ; Pd #       SMALL HYPHEN-MINUS
 FF0D          ; Pd #       FULLWIDTH HYPHEN-MINUS
+10D6E         ; Pd #       GARAY HYPHEN
 10EAD         ; Pd #       YEZIDI HYPHENATION MARK
 
-# Total code points: 26
+# Total code points: 27
 
 # ================================================
 
@@ -3735,8 +3816,9 @@ FF3F          ; Pc #       FULLWIDTH LOW LINE
 1A1E..1A1F    ; Po #   [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
 1AA0..1AA6    ; Po #   [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
 1AA8..1AAD    ; Po #   [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
+1B4E..1B4F    ; Po #   [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN
 1B5A..1B60    ; Po #   [7] BALINESE PANTI..BALINESE PAMENENG
-1B7D..1B7E    ; Po #   [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
+1B7D..1B7F    ; Po #   [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK
 1BFC..1BFF    ; Po #   [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
 1C3B..1C3F    ; Po #   [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
 1C7E..1C7F    ; Po #   [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
@@ -3831,6 +3913,8 @@ FF64..FF65    ; Po #   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
 111DD..111DF  ; Po #   [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2
 11238..1123D  ; Po #   [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
 112A9         ; Po #       MULTANI SECTION MARK
+113D4..113D5  ; Po #   [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA
+113D7..113D8  ; Po #   [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA
 1144B..1144F  ; Po #   [5] NEWA DANDA..NEWA ABBREVIATION SIGN
 1145A..1145B  ; Po #   [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
 1145D         ; Po #       NEWA INSERTION SIGN
@@ -3847,6 +3931,7 @@ FF64..FF65    ; Po #   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
 11A9A..11A9C  ; Po #   [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
 11A9E..11AA2  ; Po #   [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
 11B00..11B09  ; Po #  [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BE1         ; Po #       SUNUWAR SIGN PVO
 11C41..11C45  ; Po #   [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
 11C70..11C71  ; Po #   [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD
 11EF7..11EF8  ; Po #   [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
@@ -3858,13 +3943,15 @@ FF64..FF65    ; Po #   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
 16AF5         ; Po #       BASSA VAH FULL STOP
 16B37..16B3B  ; Po #   [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM
 16B44         ; Po #       PAHAWH HMONG SIGN XAUS
+16D6D..16D6F  ; Po #   [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA
 16E97..16E9A  ; Po #   [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH
 16FE2         ; Po #       OLD CHINESE HOOK MARK
 1BC9F         ; Po #       DUPLOYAN PUNCTUATION CHINOOK FULL STOP
 1DA87..1DA8B  ; Po #   [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS
+1E5FF         ; Po #       OL ONAL ABBREVIATION SIGN
 1E95E..1E95F  ; Po #   [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
 
-# Total code points: 628
+# Total code points: 640
 
 # ================================================
 
@@ -3923,6 +4010,7 @@ FF5C          ; Sm #       FULLWIDTH VERTICAL LINE
 FF5E          ; Sm #       FULLWIDTH TILDE
 FFE2          ; Sm #       FULLWIDTH NOT SIGN
 FFE9..FFEC    ; Sm #   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
+10D8E..10D8F  ; Sm #   [2] GARAY PLUS SIGN..GARAY MINUS SIGN
 1D6C1         ; Sm #       MATHEMATICAL BOLD NABLA
 1D6DB         ; Sm #       MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
 1D6FB         ; Sm #       MATHEMATICAL ITALIC NABLA
@@ -3935,7 +4023,7 @@ FFE9..FFEC    ; Sm #   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
 1D7C3         ; Sm #       MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
 1EEF0..1EEF1  ; Sm #   [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
 
-# Total code points: 948
+# Total code points: 950
 
 # ================================================
 
@@ -4073,7 +4161,7 @@ FFE3          ; Sk #       FULLWIDTH MACRON
 232B..237B    ; So #  [81] ERASE TO THE LEFT..NOT CHECK MARK
 237D..239A    ; So #  [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
 23B4..23DB    ; So #  [40] TOP SQUARE BRACKET..FUSE
-23E2..2426    ; So #  [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
+23E2..2429    ; So #  [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM
 2440..244A    ; So #  [11] OCR HOOK..OCR DOUBLE BACKSLASH
 249C..24E9    ; So #  [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
 2500..25B6    ; So # [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
@@ -4101,7 +4189,7 @@ FFE3          ; Sk #       FULLWIDTH MACRON
 303E..303F    ; So #   [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
 3190..3191    ; So #   [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
 3196..319F    ; So #  [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
-31C0..31E3    ; So #  [36] CJK STROKE T..CJK STROKE Q
+31C0..31E5    ; So #  [38] CJK STROKE T..CJK STROKE SZP
 31EF          ; So #       IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
 3200..321E    ; So #  [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
 322A..3247    ; So #  [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
@@ -4136,6 +4224,8 @@ FFFC..FFFD    ; So #   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
 16B3C..16B3F  ; So #   [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB
 16B45         ; So #       PAHAWH HMONG SIGN CIM TSOV ROG
 1BC9C         ; So #       DUPLOYAN SIGN O WITH CROSS
+1CC00..1CCEF  ; So # [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z
+1CD00..1CEB3  ; So # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET
 1CF50..1CFC3  ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
 1D000..1D0F5  ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
 1D100..1D126  ; So #  [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
@@ -4180,20 +4270,20 @@ FFFC..FFFD    ; So #   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
 1F850..1F859  ; So #  [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
 1F860..1F887  ; So #  [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
 1F890..1F8AD  ; So #  [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F8B0..1F8B1  ; So #   [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
+1F8B0..1F8BB  ; So #  [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR
+1F8C0..1F8C1  ; So #   [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW
 1F900..1FA53  ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
 1FA60..1FA6D  ; So #  [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
 1FA70..1FA7C  ; So #  [13] BALLET SHOES..CRUTCH
-1FA80..1FA88  ; So #   [9] YO-YO..FLUTE
-1FA90..1FABD  ; So #  [46] RINGED PLANET..WING
-1FABF..1FAC5  ; So #   [7] GOOSE..PERSON WITH CROWN
-1FACE..1FADB  ; So #  [14] MOOSE..PEA POD
-1FAE0..1FAE8  ; So #   [9] MELTING FACE..SHAKING FACE
+1FA80..1FA89  ; So #  [10] YO-YO..HARP
+1FA8F..1FAC6  ; So #  [56] SHOVEL..FINGERPRINT
+1FACE..1FADC  ; So #  [15] MOOSE..ROOT VEGETABLE
+1FADF..1FAE9  ; So #  [11] SPLATTER..FACE WITH BAGS UNDER EYES
 1FAF0..1FAF8  ; So #   [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND
 1FB00..1FB92  ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
-1FB94..1FBCA  ; So #  [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
+1FB94..1FBEF  ; So #  [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE
 
-# Total code points: 6639
+# Total code points: 7376
 
 # ================================================
 
diff --git a/libcxx/utils/data/unicode/EastAsianWidth.txt b/libcxx/utils/data/unicode/EastAsianWidth.txt
index 02df4df..99f7a31 100644
--- a/libcxx/utils/data/unicode/EastAsianWidth.txt
+++ b/libcxx/utils/data/unicode/EastAsianWidth.txt
@@ -1,8 +1,8 @@
-# EastAsianWidth-15.1.0.txt
-# Date: 2023-07-28, 23:34:08 GMT
-# © 2023 Unicode®, Inc.
+# EastAsianWidth-16.0.0.txt
+# Date: 2024-04-30, 21:48:20 GMT
+# © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
 # Unicode Character Database
 #   For documentation, see https://www.unicode.org/reports/tr44/
@@ -334,7 +334,7 @@
 0888           ; N  # Sk         ARABIC RAISED ROUND DOT
 0889..088E     ; N  # Lo     [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
 0890..0891     ; N  # Cf     [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
-0898..089F     ; N  # Mn     [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F     ; N  # Mn     [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
 08A0..08C8     ; N  # Lo    [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
 08C9           ; N  # Lm         ARABIC SMALL FARSI YEH
 08CA..08E1     ; N  # Mn    [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -819,12 +819,13 @@
 1B42           ; N  # Mn         BALINESE VOWEL SIGN PEPET
 1B43..1B44     ; N  # Mc     [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
 1B45..1B4C     ; N  # Lo     [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA
+1B4E..1B4F     ; N  # Po     [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN
 1B50..1B59     ; N  # Nd    [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
 1B5A..1B60     ; N  # Po     [7] BALINESE PANTI..BALINESE PAMENENG
 1B61..1B6A     ; N  # So    [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
 1B6B..1B73     ; N  # Mn     [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
 1B74..1B7C     ; N  # So     [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
-1B7D..1B7E     ; N  # Po     [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG
+1B7D..1B7F     ; N  # Po     [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK
 1B80..1B81     ; N  # Mn     [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
 1B82           ; N  # Mc         SUNDANESE SIGN PANGWISAD
 1B83..1BA0     ; N  # Lo    [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
@@ -859,7 +860,7 @@
 1C5A..1C77     ; N  # Lo    [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
 1C78..1C7D     ; N  # Lm     [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
 1C7E..1C7F     ; N  # Po     [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80..1C88     ; N  # Ll     [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A     ; N  # L&    [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
 1C90..1CBA     ; N  # Lu    [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
 1CBD..1CBF     ; N  # Lu     [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
 1CC0..1CC7     ; N  # Po     [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -1142,7 +1143,7 @@
 23F1..23F2     ; N  # So     [2] STOPWATCH..TIMER CLOCK
 23F3           ; W  # So         HOURGLASS WITH FLOWING SAND
 23F4..23FF     ; N  # So    [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL
-2400..2426     ; N  # So    [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
+2400..2429     ; N  # So    [42] SYMBOL FOR NULL..SYMBOL FOR DELETE MEDIUM SHADE FORM
 2440..244A     ; N  # So    [11] OCR HOOK..OCR DOUBLE BACKSLASH
 2460..249B     ; A  # No    [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
 249C..24E9     ; A  # So    [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
@@ -1195,7 +1196,9 @@
 261C           ; A  # So         WHITE LEFT POINTING INDEX
 261D           ; N  # So         WHITE UP POINTING INDEX
 261E           ; A  # So         WHITE RIGHT POINTING INDEX
-261F..263F     ; N  # So    [33] WHITE DOWN POINTING INDEX..MERCURY
+261F..262F     ; N  # So    [17] WHITE DOWN POINTING INDEX..YIN YANG
+2630..2637     ; W  # So     [8] TRIGRAM FOR HEAVEN..TRIGRAM FOR EARTH
+2638..263F     ; N  # So     [8] WHEEL OF DHARMA..MERCURY
 2640           ; A  # So         FEMALE SIGN
 2641           ; N  # So         EARTH
 2642           ; A  # So         MALE SIGN
@@ -1213,7 +1216,9 @@
 266F           ; A  # Sm         MUSIC SHARP SIGN
 2670..267E     ; N  # So    [15] WEST SYRIAC CROSS..PERMANENT PAPER SIGN
 267F           ; W  # So         WHEELCHAIR SYMBOL
-2680..2692     ; N  # So    [19] DIE FACE-1..HAMMER AND PICK
+2680..2689     ; N  # So    [10] DIE FACE-1..BLACK CIRCLE WITH TWO WHITE DOTS
+268A..268F     ; W  # So     [6] MONOGRAM FOR YANG..DIGRAM FOR GREATER YIN
+2690..2692     ; N  # So     [3] WHITE FLAG..HAMMER AND PICK
 2693           ; W  # So         ANCHOR
 2694..269D     ; N  # So    [10] CROSSED SWORDS..OUTLINED WHITE STAR
 269E..269F     ; A  # So     [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT
@@ -1487,7 +1492,7 @@
 3192..3195     ; W  # No     [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
 3196..319F     ; W  # So    [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
 31A0..31BF     ; W  # Lo    [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
-31C0..31E3     ; W  # So    [36] CJK STROKE T..CJK STROKE Q
+31C0..31E5     ; W  # So    [38] CJK STROKE T..CJK STROKE SZP
 31EF           ; W  # So         IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
 31F0..31FF     ; W  # Lo    [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
 3200..321E     ; W  # So    [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
@@ -1503,7 +1508,7 @@
 32C0..32FF     ; W  # So    [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA
 3300..33FF     ; W  # So   [256] SQUARE APAATO..SQUARE GAL
 3400..4DBF     ; W  # Lo  [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
-4DC0..4DFF     ; N  # So    [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
+4DC0..4DFF     ; W  # So    [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
 4E00..9FFF     ; W  # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF
 A000..A014     ; W  # Lo    [21] YI SYLLABLE IT..YI SYLLABLE E
 A015           ; W  # Lm         YI SYLLABLE WU
@@ -1543,10 +1548,10 @@ A788           ; N  # Lm         MODIFIER LETTER LOW CIRCUMFLEX ACCENT
 A789..A78A     ; N  # Sk     [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
 A78B..A78E     ; N  # L&     [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
 A78F           ; N  # Lo         LATIN LETTER SINOLOGICAL DOT
-A790..A7CA     ; N  # L&    [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A790..A7CD     ; N  # L&    [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE
 A7D0..A7D1     ; N  # L&     [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G
 A7D3           ; N  # Ll         LATIN SMALL LETTER DOUBLE THORN
-A7D5..A7D9     ; N  # L&     [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S
+A7D5..A7DC     ; N  # L&     [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE
 A7F2..A7F4     ; N  # Lm     [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
 A7F5..A7F6     ; N  # L&     [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
 A7F7           ; N  # Lo         LATIN EPIGRAPHIC LETTER SIDEWAYS I
@@ -1870,6 +1875,7 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 105A3..105B1   ; N  # Ll    [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
 105B3..105B9   ; N  # Ll     [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
 105BB..105BC   ; N  # Ll     [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3   ; N  # Lo    [52] TODHRI LETTER A..TODHRI LETTER OO
 10600..10736   ; N  # Lo   [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
 10740..10755   ; N  # Lo    [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
 10760..10767   ; N  # Lo     [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1942,12 +1948,23 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 10D00..10D23   ; N  # Lo    [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
 10D24..10D27   ; N  # Mn     [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
 10D30..10D39   ; N  # Nd    [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
+10D40..10D49   ; N  # Nd    [10] GARAY DIGIT ZERO..GARAY DIGIT NINE
+10D4A..10D4D   ; N  # Lo     [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE
+10D4E          ; N  # Lm         GARAY VOWEL LENGTH MARK
+10D4F          ; N  # Lo         GARAY SUKUN
+10D50..10D65   ; N  # Lu    [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA
+10D69..10D6D   ; N  # Mn     [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
+10D6E          ; N  # Pd         GARAY HYPHEN
+10D6F          ; N  # Lm         GARAY REDUPLICATION MARK
+10D70..10D85   ; N  # Ll    [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA
+10D8E..10D8F   ; N  # Sm     [2] GARAY PLUS SIGN..GARAY MINUS SIGN
 10E60..10E7E   ; N  # No    [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
 10E80..10EA9   ; N  # Lo    [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
 10EAB..10EAC   ; N  # Mn     [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
 10EAD          ; N  # Pd         YEZIDI HYPHENATION MARK
 10EB0..10EB1   ; N  # Lo     [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF   ; N  # Mn     [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4   ; N  # Lo     [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF   ; N  # Mn     [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
 10F00..10F1C   ; N  # Lo    [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
 10F1D..10F26   ; N  # No    [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
 10F27          ; N  # Lo         OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -2064,6 +2081,26 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 11362..11363   ; N  # Mc     [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
 11366..1136C   ; N  # Mn     [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
 11370..11374   ; N  # Mn     [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+11380..11389   ; N  # Lo    [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL
+1138B          ; N  # Lo         TULU-TIGALARI LETTER EE
+1138E          ; N  # Lo         TULU-TIGALARI LETTER AI
+11390..113B5   ; N  # Lo    [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA
+113B7          ; N  # Lo         TULU-TIGALARI SIGN AVAGRAHA
+113B8..113BA   ; N  # Mc     [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II
+113BB..113C0   ; N  # Mn     [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113C2          ; N  # Mc         TULU-TIGALARI VOWEL SIGN EE
+113C5          ; N  # Mc         TULU-TIGALARI VOWEL SIGN AI
+113C7..113CA   ; N  # Mc     [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA
+113CC..113CD   ; N  # Mc     [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA
+113CE          ; N  # Mn         TULU-TIGALARI SIGN VIRAMA
+113CF          ; N  # Mc         TULU-TIGALARI SIGN LOOPED VIRAMA
+113D0          ; N  # Mn         TULU-TIGALARI CONJOINER
+113D1          ; N  # Lo         TULU-TIGALARI REPHA
+113D2          ; N  # Mn         TULU-TIGALARI GEMINATION MARK
+113D3          ; N  # Lo         TULU-TIGALARI SIGN PLUTA
+113D4..113D5   ; N  # Po     [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA
+113D7..113D8   ; N  # Po     [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA
+113E1..113E2   ; N  # Mn     [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA
 11400..11434   ; N  # Lo    [53] NEWA LETTER A..NEWA LETTER HA
 11435..11437   ; N  # Mc     [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
 11438..1143F   ; N  # Mn     [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
@@ -2123,8 +2160,11 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 116B8          ; N  # Lo         TAKRI LETTER ARCHAIC KHA
 116B9          ; N  # Po         TAKRI ABBREVIATION SIGN
 116C0..116C9   ; N  # Nd    [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3   ; N  # Nd    [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
 11700..1171A   ; N  # Lo    [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
-1171D..1171F   ; N  # Mn     [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+1171D          ; N  # Mn         AHOM CONSONANT SIGN MEDIAL LA
+1171E          ; N  # Mc         AHOM CONSONANT SIGN MEDIAL RA
+1171F          ; N  # Mn         AHOM CONSONANT SIGN MEDIAL LIGATING RA
 11720..11721   ; N  # Mc     [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
 11722..11725   ; N  # Mn     [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
 11726          ; N  # Mc         AHOM VOWEL SIGN E
@@ -2195,6 +2235,9 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 11AB0..11ABF   ; N  # Lo    [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
 11AC0..11AF8   ; N  # Lo    [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
 11B00..11B09   ; N  # Po    [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BC0..11BE0   ; N  # Lo    [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1          ; N  # Po         SUNUWAR SIGN PVO
+11BF0..11BF9   ; N  # Nd    [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
 11C00..11C08   ; N  # Lo     [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
 11C0A..11C2E   ; N  # Lo    [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
 11C2F          ; N  # Mc         BHAIKSUKI VOWEL SIGN AA
@@ -2253,6 +2296,7 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 11F42          ; N  # Mn         KAWI CONJOINER
 11F43..11F4F   ; N  # Po    [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL
 11F50..11F59   ; N  # Nd    [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
+11F5A          ; N  # Mn         KAWI SIGN NUKTA
 11FB0          ; N  # Lo         LISU LETTER YHA
 11FC0..11FD4   ; N  # No    [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
 11FD5..11FDC   ; N  # So     [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
@@ -2270,7 +2314,13 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 13440          ; N  # Mn         EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
 13441..13446   ; N  # Lo     [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
 13447..13455   ; N  # Mn    [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
+13460..143FA   ; N  # Lo  [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA
 14400..14646   ; N  # Lo   [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16100..1611D   ; N  # Lo    [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA
+1611E..16129   ; N  # Mn    [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612A..1612C   ; N  # Mc     [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA
+1612D..1612F   ; N  # Mn     [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA
+16130..16139   ; N  # Nd    [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE
 16800..16A38   ; N  # Lo   [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
 16A40..16A5E   ; N  # Lo    [31] MRO LETTER TA..MRO LETTER TEK
 16A60..16A69   ; N  # Nd    [10] MRO DIGIT ZERO..MRO DIGIT NINE
@@ -2291,6 +2341,11 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 16B5B..16B61   ; N  # No     [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS
 16B63..16B77   ; N  # Lo    [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
 16B7D..16B8F   ; N  # Lo    [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16D40..16D42   ; N  # Lm     [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA
+16D43..16D6A   ; N  # Lo    [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU
+16D6B..16D6C   ; N  # Lm     [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT
+16D6D..16D6F   ; N  # Po     [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA
+16D70..16D79   ; N  # Nd    [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE
 16E40..16E7F   ; N  # L&    [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
 16E80..16E96   ; N  # No    [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM
 16E97..16E9A   ; N  # Po     [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH
@@ -2308,6 +2363,7 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 17000..187F7   ; W  # Lo  [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
 18800..18AFF   ; W  # Lo   [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768
 18B00..18CD5   ; W  # Lo   [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
+18CFF          ; W  # Lo         KHITAN SMALL SCRIPT CHARACTER-18CFF
 18D00..18D08   ; W  # Lo     [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
 1AFF0..1AFF3   ; W  # Lm     [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5
 1AFF5..1AFFB   ; W  # Lm     [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
@@ -2327,6 +2383,9 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 1BC9D..1BC9E   ; N  # Mn     [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
 1BC9F          ; N  # Po         DUPLOYAN PUNCTUATION CHINOOK FULL STOP
 1BCA0..1BCA3   ; N  # Cf     [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
+1CC00..1CCEF   ; N  # So   [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z
+1CCF0..1CCF9   ; N  # Nd    [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE
+1CD00..1CEB3   ; N  # So   [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET
 1CF00..1CF2D   ; N  # Mn    [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
 1CF30..1CF46   ; N  # Mn    [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
 1CF50..1CFC3   ; N  # So   [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
@@ -2349,8 +2408,9 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 1D245          ; N  # So         GREEK MUSICAL LEIMMA
 1D2C0..1D2D3   ; N  # No    [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN
 1D2E0..1D2F3   ; N  # No    [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
-1D300..1D356   ; N  # So    [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
-1D360..1D378   ; N  # No    [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
+1D300..1D356   ; W  # So    [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
+1D360..1D376   ; W  # No    [23] COUNTING ROD UNIT DIGIT ONE..IDEOGRAPHIC TALLY MARK FIVE
+1D377..1D378   ; N  # No     [2] TALLY MARK ONE..TALLY MARK FIVE
 1D400..1D454   ; N  # L&    [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
 1D456..1D49C   ; N  # L&    [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
 1D49E..1D49F   ; N  # Lu     [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
@@ -2431,6 +2491,11 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 1E4EB          ; N  # Lm         NAG MUNDARI SIGN OJOD
 1E4EC..1E4EF   ; N  # Mn     [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
 1E4F0..1E4F9   ; N  # Nd    [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
+1E5D0..1E5ED   ; N  # Lo    [30] OL ONAL LETTER O..OL ONAL LETTER EG
+1E5EE..1E5EF   ; N  # Mn     [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
+1E5F0          ; N  # Lo         OL ONAL SIGN HODDOND
+1E5F1..1E5FA   ; N  # Nd    [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE
+1E5FF          ; N  # Po         OL ONAL ABBREVIATION SIGN
 1E7E0..1E7E6   ; N  # Lo     [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
 1E7E8..1E7EB   ; N  # Lo     [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
 1E7ED..1E7EE   ; N  # Lo     [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@@ -2574,7 +2639,8 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 1F850..1F859   ; N  # So    [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
 1F860..1F887   ; N  # So    [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
 1F890..1F8AD   ; N  # So    [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F8B0..1F8B1   ; N  # So     [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
+1F8B0..1F8BB   ; N  # So    [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR
+1F8C0..1F8C1   ; N  # So     [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW
 1F900..1F90B   ; N  # So    [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
 1F90C..1F93A   ; W  # So    [47] PINCHED FINGERS..FENCER
 1F93B          ; N  # So         MODERN PENTATHLON
@@ -2584,14 +2650,13 @@ FFFD           ; A  # So         REPLACEMENT CHARACTER
 1FA00..1FA53   ; N  # So    [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP
 1FA60..1FA6D   ; N  # So    [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
 1FA70..1FA7C   ; W  # So    [13] BALLET SHOES..CRUTCH
-1FA80..1FA88   ; W  # So     [9] YO-YO..FLUTE
-1FA90..1FABD   ; W  # So    [46] RINGED PLANET..WING
-1FABF..1FAC5   ; W  # So     [7] GOOSE..PERSON WITH CROWN
-1FACE..1FADB   ; W  # So    [14] MOOSE..PEA POD
-1FAE0..1FAE8   ; W  # So     [9] MELTING FACE..SHAKING FACE
+1FA80..1FA89   ; W  # So    [10] YO-YO..HARP
+1FA8F..1FAC6   ; W  # So    [56] SHOVEL..FINGERPRINT
+1FACE..1FADC   ; W  # So    [15] MOOSE..ROOT VEGETABLE
+1FADF..1FAE9   ; W  # So    [11] SPLATTER..FACE WITH BAGS UNDER EYES
 1FAF0..1FAF8   ; W  # So     [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND
 1FB00..1FB92   ; N  # So   [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
-1FB94..1FBCA   ; N  # So    [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
+1FB94..1FBEF   ; N  # So    [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE
 1FBF0..1FBF9   ; N  # Nd    [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
 20000..2A6DF   ; W  # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
 2A6E0..2A6FF   ; W  # Cn    [32] <reserved-2A6E0>..<reserved-2A6FF>
diff --git a/libcxx/utils/data/unicode/GraphemeBreakProperty.txt b/libcxx/utils/data/unicode/GraphemeBreakProperty.txt
index 12453cb..a863397 100644
--- a/libcxx/utils/data/unicode/GraphemeBreakProperty.txt
+++ b/libcxx/utils/data/unicode/GraphemeBreakProperty.txt
@@ -1,8 +1,8 @@
-# GraphemeBreakProperty-15.1.0.txt
-# Date: 2023-01-05, 20:34:41 GMT
-# © 2023 Unicode®, Inc.
+# GraphemeBreakProperty-16.0.0.txt
+# Date: 2024-05-31, 18:09:38 GMT
+# © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
 # Unicode Character Database
 #   For documentation, see https://www.unicode.org/reports/tr44/
@@ -27,6 +27,7 @@
 110BD         ; Prepend # Cf       KAITHI NUMBER SIGN
 110CD         ; Prepend # Cf       KAITHI NUMBER SIGN ABOVE
 111C2..111C3  ; Prepend # Lo   [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA
+113D1         ; Prepend # Lo       TULU-TIGALARI REPHA
 1193F         ; Prepend # Lo       DIVES AKURU PREFIXED NASAL SIGN
 11941         ; Prepend # Lo       DIVES AKURU INITIAL RA
 11A3A         ; Prepend # Lo       ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
@@ -34,7 +35,7 @@
 11D46         ; Prepend # Lo       MASARAM GONDI REPHA
 11F02         ; Prepend # Lo       KAWI SIGN REPHA
 
-# Total code points: 27
+# Total code points: 28
 
 # ================================================
 
@@ -106,7 +107,7 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 0825..0827    ; Extend # Mn   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
 0829..082D    ; Extend # Mn   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
 0859..085B    ; Extend # Mn   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F    ; Extend # Mn   [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F    ; Extend # Mn   [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
 08CA..08E1    ; Extend # Mn  [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
 08E3..0902    ; Extend # Mn  [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
 093A          ; Extend # Mn       DEVANAGARI VOWEL SIGN OE
@@ -163,8 +164,11 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 0C81          ; Extend # Mn       KANNADA SIGN CANDRABINDU
 0CBC          ; Extend # Mn       KANNADA SIGN NUKTA
 0CBF          ; Extend # Mn       KANNADA VOWEL SIGN I
+0CC0          ; Extend # Mc       KANNADA VOWEL SIGN II
 0CC2          ; Extend # Mc       KANNADA VOWEL SIGN UU
 0CC6          ; Extend # Mn       KANNADA VOWEL SIGN E
+0CC7..0CC8    ; Extend # Mc   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+0CCA..0CCB    ; Extend # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
 0CCC..0CCD    ; Extend # Mn   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
 0CD5..0CD6    ; Extend # Mc   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
 0CE2..0CE3    ; Extend # Mn   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
@@ -210,7 +214,9 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 109D          ; Extend # Mn       MYANMAR VOWEL SIGN AITON AI
 135D..135F    ; Extend # Mn   [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
 1712..1714    ; Extend # Mn   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+1715          ; Extend # Mc       TAGALOG SIGN PAMUDPOD
 1732..1733    ; Extend # Mn   [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
+1734          ; Extend # Mc       HANUNOO SIGN PAMUDPOD
 1752..1753    ; Extend # Mn   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
 1772..1773    ; Extend # Mn   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
 17B4..17B5    ; Extend # Mn   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
@@ -242,17 +248,22 @@ E01F0..E0FFF  ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
 1B34          ; Extend # Mn       BALINESE SIGN REREKAN
 1B35          ; Extend # Mc       BALINESE VOWEL SIGN TEDUNG
 1B36..1B3A    ; Extend # Mn   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3B          ; Extend # Mc       BALINESE VOWEL SIGN RA REPA TEDUNG
 1B3C          ; Extend # Mn       BALINESE VOWEL SIGN LA LENGA
+1B3D          ; Extend # Mc       BALINESE VOWEL SIGN LA LENGA TEDUNG
 1B42          ; Extend # Mn       BALINESE VOWEL SIGN PEPET
+1B43..1B44    ; Extend # Mc   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
 1B6B..1B73    ; Extend # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
 1B80..1B81    ; Extend # Mn   [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
 1BA2..1BA5    ; Extend # Mn   [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
 1BA8..1BA9    ; Extend # Mn   [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+1BAA          ; Extend # Mc       SUNDANESE SIGN PAMAAEH
 1BAB..1BAD    ; Extend # Mn   [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
 1BE6          ; Extend # Mn       BATAK SIGN TOMPI
 1BE8..1BE9    ; Extend # Mn   [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
 1BED          ; Extend # Mn       BATAK VOWEL SIGN KARO O
 1BEF..1BF1    ; Extend # Mn   [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
+1BF2..1BF3    ; Extend # Mc   [2] BATAK PANGOLAT..BATAK PANONGONAN
 1C2C..1C33    ; Extend # Mn   [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
 1C36..1C37    ; Extend # Mn   [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
 1CD0..1CD2    ; Extend # Mn   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@@ -289,10 +300,12 @@ A8E0..A8F1    ; Extend # Mn  [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEV
 A8FF          ; Extend # Mn       DEVANAGARI VOWEL SIGN AY
 A926..A92D    ; Extend # Mn   [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
 A947..A951    ; Extend # Mn  [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+A953          ; Extend # Mc       REJANG VIRAMA
 A980..A982    ; Extend # Mn   [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
 A9B3          ; Extend # Mn       JAVANESE SIGN CECAK TELU
 A9B6..A9B9    ; Extend # Mn   [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
 A9BC..A9BD    ; Extend # Mn   [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
+A9C0          ; Extend # Mc       JAVANESE PANGKON
 A9E5          ; Extend # Mn       MYANMAR SIGN SHAN SAW
 AA29..AA2E    ; Extend # Mn   [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
 AA31..AA32    ; Extend # Mn   [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
@@ -324,8 +337,9 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 10A3F         ; Extend # Mn       KHAROSHTHI VIRAMA
 10AE5..10AE6  ; Extend # Mn   [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
 10D24..10D27  ; Extend # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+10D69..10D6D  ; Extend # Mn   [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK
 10EAB..10EAC  ; Extend # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF  ; Extend # Mn   [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF  ; Extend # Mn   [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
 10F46..10F50  ; Extend # Mn  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
 10F82..10F85  ; Extend # Mn   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
 11001         ; Extend # Mn       BRAHMI SIGN ANUSVARA
@@ -342,10 +356,12 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 11173         ; Extend # Mn       MAHAJANI SIGN NUKTA
 11180..11181  ; Extend # Mn   [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
 111B6..111BE  ; Extend # Mn   [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
+111C0         ; Extend # Mc       SHARADA SIGN VIRAMA
 111C9..111CC  ; Extend # Mn   [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
 111CF         ; Extend # Mn       SHARADA SIGN INVERTED CANDRABINDU
 1122F..11231  ; Extend # Mn   [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
 11234         ; Extend # Mn       KHOJKI SIGN ANUSVARA
+11235         ; Extend # Mc       KHOJKI SIGN VIRAMA
 11236..11237  ; Extend # Mn   [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
 1123E         ; Extend # Mn       KHOJKI SIGN SUKUN
 11241         ; Extend # Mn       KHOJKI VOWEL SIGN VOCALIC R
@@ -355,9 +371,20 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 1133B..1133C  ; Extend # Mn   [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
 1133E         ; Extend # Mc       GRANTHA VOWEL SIGN AA
 11340         ; Extend # Mn       GRANTHA VOWEL SIGN II
+1134D         ; Extend # Mc       GRANTHA SIGN VIRAMA
 11357         ; Extend # Mc       GRANTHA AU LENGTH MARK
 11366..1136C  ; Extend # Mn   [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
 11370..11374  ; Extend # Mn   [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+113B8         ; Extend # Mc       TULU-TIGALARI VOWEL SIGN AA
+113BB..113C0  ; Extend # Mn   [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL
+113C2         ; Extend # Mc       TULU-TIGALARI VOWEL SIGN EE
+113C5         ; Extend # Mc       TULU-TIGALARI VOWEL SIGN AI
+113C7..113C9  ; Extend # Mc   [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK
+113CE         ; Extend # Mn       TULU-TIGALARI SIGN VIRAMA
+113CF         ; Extend # Mc       TULU-TIGALARI SIGN LOOPED VIRAMA
+113D0         ; Extend # Mn       TULU-TIGALARI CONJOINER
+113D2         ; Extend # Mn       TULU-TIGALARI GEMINATION MARK
+113E1..113E2  ; Extend # Mn   [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA
 11438..1143F  ; Extend # Mn   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
 11442..11444  ; Extend # Mn   [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
 11446         ; Extend # Mn       NEWA SIGN NUKTA
@@ -379,14 +406,17 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 116AB         ; Extend # Mn       TAKRI SIGN ANUSVARA
 116AD         ; Extend # Mn       TAKRI VOWEL SIGN AA
 116B0..116B5  ; Extend # Mn   [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
+116B6         ; Extend # Mc       TAKRI SIGN VIRAMA
 116B7         ; Extend # Mn       TAKRI SIGN NUKTA
-1171D..1171F  ; Extend # Mn   [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+1171D         ; Extend # Mn       AHOM CONSONANT SIGN MEDIAL LA
+1171F         ; Extend # Mn       AHOM CONSONANT SIGN MEDIAL LIGATING RA
 11722..11725  ; Extend # Mn   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
 11727..1172B  ; Extend # Mn   [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
 1182F..11837  ; Extend # Mn   [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
 11839..1183A  ; Extend # Mn   [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
 11930         ; Extend # Mc       DIVES AKURU VOWEL SIGN AA
 1193B..1193C  ; Extend # Mn   [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
+1193D         ; Extend # Mc       DIVES AKURU SIGN HALANTA
 1193E         ; Extend # Mn       DIVES AKURU VIRAMA
 11943         ; Extend # Mn       DIVES AKURU SIGN NUKTA
 119D4..119D7  ; Extend # Mn   [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
@@ -419,20 +449,25 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 11F00..11F01  ; Extend # Mn   [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
 11F36..11F3A  ; Extend # Mn   [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
 11F40         ; Extend # Mn       KAWI VOWEL SIGN EU
+11F41         ; Extend # Mc       KAWI SIGN KILLER
 11F42         ; Extend # Mn       KAWI CONJOINER
+11F5A         ; Extend # Mn       KAWI SIGN NUKTA
 13440         ; Extend # Mn       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
 13447..13455  ; Extend # Mn  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
+1611E..16129  ; Extend # Mn  [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK
+1612D..1612F  ; Extend # Mn   [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA
 16AF0..16AF4  ; Extend # Mn   [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
 16B30..16B36  ; Extend # Mn   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
 16F4F         ; Extend # Mn       MIAO SIGN CONSONANT MODIFIER BAR
 16F8F..16F92  ; Extend # Mn   [4] MIAO TONE RIGHT..MIAO TONE BELOW
 16FE4         ; Extend # Mn       KHITAN SMALL SCRIPT FILLER
+16FF0..16FF1  ; Extend # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
 1BC9D..1BC9E  ; Extend # Mn   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
 1CF00..1CF2D  ; Extend # Mn  [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
 1CF30..1CF46  ; Extend # Mn  [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
-1D165         ; Extend # Mc       MUSICAL SYMBOL COMBINING STEM
+1D165..1D166  ; Extend # Mc   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
 1D167..1D169  ; Extend # Mn   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
-1D16E..1D172  ; Extend # Mc   [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
+1D16D..1D172  ; Extend # Mc   [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
 1D17B..1D182  ; Extend # Mn   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
 1D185..1D18B  ; Extend # Mn   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
 1D1AA..1D1AD  ; Extend # Mn   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
@@ -453,13 +488,14 @@ FF9E..FF9F    ; Extend # Lm   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
 1E2AE         ; Extend # Mn       TOTO SIGN RISING TONE
 1E2EC..1E2EF  ; Extend # Mn   [4] WANCHO TONE TUP..WANCHO TONE KOINI
 1E4EC..1E4EF  ; Extend # Mn   [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
+1E5EE..1E5EF  ; Extend # Mn   [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR
 1E8D0..1E8D6  ; Extend # Mn   [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
 1E944..1E94A  ; Extend # Mn   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
 1F3FB..1F3FF  ; Extend # Sk   [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
 E0020..E007F  ; Extend # Cf  [96] TAG SPACE..CANCEL TAG
 E0100..E01EF  ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 
-# Total code points: 2130
+# Total code points: 2198
 
 # ================================================
 
@@ -496,10 +532,8 @@ E0100..E01EF  ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 0C41..0C44    ; SpacingMark # Mc   [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
 0C82..0C83    ; SpacingMark # Mc   [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
 0CBE          ; SpacingMark # Mc       KANNADA VOWEL SIGN AA
-0CC0..0CC1    ; SpacingMark # Mc   [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U
+0CC1          ; SpacingMark # Mc       KANNADA VOWEL SIGN U
 0CC3..0CC4    ; SpacingMark # Mc   [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR
-0CC7..0CC8    ; SpacingMark # Mc   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
-0CCA..0CCB    ; SpacingMark # Mc   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
 0CF3          ; SpacingMark # Mc       KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
 0D02..0D03    ; SpacingMark # Mc   [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
 0D3F..0D40    ; SpacingMark # Mc   [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II
@@ -517,8 +551,6 @@ E0100..E01EF  ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 103B..103C    ; SpacingMark # Mc   [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
 1056..1057    ; SpacingMark # Mc   [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
 1084          ; SpacingMark # Mc       MYANMAR VOWEL SIGN SHAN E
-1715          ; SpacingMark # Mc       TAGALOG SIGN PAMUDPOD
-1734          ; SpacingMark # Mc       HANUNOO SIGN PAMUDPOD
 17B6          ; SpacingMark # Mc       KHMER VOWEL SIGN AA
 17BE..17C5    ; SpacingMark # Mc   [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
 17C7..17C8    ; SpacingMark # Mc   [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
@@ -531,17 +563,13 @@ E0100..E01EF  ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
 1A57          ; SpacingMark # Mc       TAI THAM CONSONANT SIGN LA TANG LAI
 1A6D..1A72    ; SpacingMark # Mc   [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
 1B04          ; SpacingMark # Mc       BALINESE SIGN BISAH
-1B3B          ; SpacingMark # Mc       BALINESE VOWEL SIGN RA REPA TEDUNG
-1B3D..1B41    ; SpacingMark # Mc   [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
-1B43..1B44    ; SpacingMark # Mc   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
+1B3E..1B41    ; SpacingMark # Mc   [4] BALINESE VOWEL SIGN TALING..BALINESE VOWEL SIGN TALING REPA TEDUNG
 1B82          ; SpacingMark # Mc       SUNDANESE SIGN PANGWISAD
 1BA1          ; SpacingMark # Mc       SUNDANESE CONSONANT SIGN PAMINGKAL
 1BA6..1BA7    ; SpacingMark # Mc   [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
-1BAA          ; SpacingMark # Mc       SUNDANESE SIGN PAMAAEH
 1BE7          ; SpacingMark # Mc       BATAK VOWEL SIGN E
 1BEA..1BEC    ; SpacingMark # Mc   [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
 1BEE          ; SpacingMark # Mc       BATAK VOWEL SIGN U
-1BF2..1BF3    ; SpacingMark # Mc   [2] BATAK PANGOLAT..BATAK PANONGONAN
 1C24..1C2B    ; SpacingMark # Mc   [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
 1C34..1C35    ; SpacingMark # Mc   [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
 1CE1          ; SpacingMark # Mc       VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
@@ -550,11 +578,11 @@ A823..A824    ; SpacingMark # Mc   [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI V
 A827          ; SpacingMark # Mc       SYLOTI NAGRI VOWEL SIGN OO
 A880..A881    ; SpacingMark # Mc   [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
 A8B4..A8C3    ; SpacingMark # Mc  [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
-A952..A953    ; SpacingMark # Mc   [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
+A952          ; SpacingMark # Mc       REJANG CONSONANT SIGN H
 A983          ; SpacingMark # Mc       JAVANESE SIGN WIGNYAN
 A9B4..A9B5    ; SpacingMark # Mc   [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
 A9BA..A9BB    ; SpacingMark # Mc   [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
-A9BE..A9C0    ; SpacingMark # Mc   [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON
+A9BE..A9BF    ; SpacingMark # Mc   [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA
 AA2F..AA30    ; SpacingMark # Mc   [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
 AA33..AA34    ; SpacingMark # Mc   [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
 AA4D          ; SpacingMark # Mc       CHAM CONSONANT SIGN FINAL H
@@ -574,18 +602,20 @@ ABEC          ; SpacingMark # Mc       MEETEI MAYEK LUM IYEK
 11145..11146  ; SpacingMark # Mc   [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI
 11182         ; SpacingMark # Mc       SHARADA SIGN VISARGA
 111B3..111B5  ; SpacingMark # Mc   [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
-111BF..111C0  ; SpacingMark # Mc   [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
+111BF         ; SpacingMark # Mc       SHARADA VOWEL SIGN AU
 111CE         ; SpacingMark # Mc       SHARADA VOWEL SIGN PRISHTHAMATRA E
 1122C..1122E  ; SpacingMark # Mc   [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
 11232..11233  ; SpacingMark # Mc   [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
-11235         ; SpacingMark # Mc       KHOJKI SIGN VIRAMA
 112E0..112E2  ; SpacingMark # Mc   [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
 11302..11303  ; SpacingMark # Mc   [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA
 1133F         ; SpacingMark # Mc       GRANTHA VOWEL SIGN I
 11341..11344  ; SpacingMark # Mc   [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
 11347..11348  ; SpacingMark # Mc   [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
-1134B..1134D  ; SpacingMark # Mc   [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
+1134B..1134C  ; SpacingMark # Mc   [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU
 11362..11363  ; SpacingMark # Mc   [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
+113B9..113BA  ; SpacingMark # Mc   [2] TULU-TIGALARI VOWEL SIGN I..TULU-TIGALARI VOWEL SIGN II
+113CA         ; SpacingMark # Mc       TULU-TIGALARI SIGN CANDRA ANUNASIKA
+113CC..113CD  ; SpacingMark # Mc   [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA
 11435..11437  ; SpacingMark # Mc   [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
 11440..11441  ; SpacingMark # Mc   [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
 11445         ; SpacingMark # Mc       NEWA SIGN VISARGA
@@ -602,13 +632,12 @@ ABEC          ; SpacingMark # Mc       MEETEI MAYEK LUM IYEK
 1163E         ; SpacingMark # Mc       MODI SIGN VISARGA
 116AC         ; SpacingMark # Mc       TAKRI SIGN VISARGA
 116AE..116AF  ; SpacingMark # Mc   [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
-116B6         ; SpacingMark # Mc       TAKRI SIGN VIRAMA
+1171E         ; SpacingMark # Mc       AHOM CONSONANT SIGN MEDIAL RA
 11726         ; SpacingMark # Mc       AHOM VOWEL SIGN E
 1182C..1182E  ; SpacingMark # Mc   [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
 11838         ; SpacingMark # Mc       DOGRA SIGN VISARGA
 11931..11935  ; SpacingMark # Mc   [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E
 11937..11938  ; SpacingMark # Mc   [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
-1193D         ; SpacingMark # Mc       DIVES AKURU SIGN HALANTA
 11940         ; SpacingMark # Mc       DIVES AKURU MEDIAL YA
 11942         ; SpacingMark # Mc       DIVES AKURU MEDIAL RA
 119D1..119D3  ; SpacingMark # Mc   [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
@@ -629,13 +658,10 @@ ABEC          ; SpacingMark # Mc       MEETEI MAYEK LUM IYEK
 11F03         ; SpacingMark # Mc       KAWI SIGN VISARGA
 11F34..11F35  ; SpacingMark # Mc   [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
 11F3E..11F3F  ; SpacingMark # Mc   [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
-11F41         ; SpacingMark # Mc       KAWI SIGN KILLER
+1612A..1612C  ; SpacingMark # Mc   [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA
 16F51..16F87  ; SpacingMark # Mc  [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
-16FF0..16FF1  ; SpacingMark # Mc   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
-1D166         ; SpacingMark # Mc       MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
-1D16D         ; SpacingMark # Mc       MUSICAL SYMBOL COMBINING AUGMENTATION DOT
 
-# Total code points: 395
+# Total code points: 378
 
 # ================================================
 
@@ -648,8 +674,10 @@ A960..A97C    ; L # Lo  [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANG
 
 1160..11A7    ; V # Lo  [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE
 D7B0..D7C6    ; V # Lo  [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
+16D63         ; V # Lo       KIRAT RAI VOWEL SIGN AA
+16D67..16D6A  ; V # Lo   [4] KIRAT RAI VOWEL SIGN E..KIRAT RAI VOWEL SIGN AU
 
-# Total code points: 95
+# Total code points: 100
 
 # ================================================
 
diff --git a/libcxx/utils/data/unicode/GraphemeBreakTest.txt b/libcxx/utils/data/unicode/GraphemeBreakTest.txt
index 4c1ed51..d10c174 100644
--- a/libcxx/utils/data/unicode/GraphemeBreakTest.txt
+++ b/libcxx/utils/data/unicode/GraphemeBreakTest.txt
@@ -1,8 +1,8 @@
-# GraphemeBreakTest-15.1.0.txt
-# Date: 2023-08-07, 15:52:55 GMT
-# © 2023 Unicode®, Inc.
+# GraphemeBreakTest-16.0.0.txt
+# Date: 2024-05-02, 15:02:48 GMT
+# © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
 # Unicode Character Database
 #   For documentation, see https://www.unicode.org/reports/tr44/
@@ -30,8 +30,8 @@
 ÷ 0020 × 0308 ÷ 000A ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0020 ÷ 0001 ÷	#  ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0020 × 0308 ÷ 0001 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0020 × 034F ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0020 × 0308 × 034F ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0020 × 200C ÷	#  ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0020 × 0308 × 200C ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0020 ÷ 1F1E6 ÷	#  ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0020 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0020 ÷ 0600 ÷	#  ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -48,8 +48,6 @@
 ÷ 0020 × 0308 ÷ AC00 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0020 ÷ AC01 ÷	#  ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0020 × 0308 ÷ AC01 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0020 × 0900 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0020 × 0308 × 0900 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0020 × 0903 ÷	#  ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0020 × 0308 × 0903 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0020 ÷ 0904 ÷	#  ÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -62,8 +60,8 @@
 ÷ 0020 × 0308 ÷ 231A ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0020 × 0300 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0020 × 0308 × 0300 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0020 × 093C ÷	#  ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0020 × 0308 × 093C ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0900 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0308 × 0900 ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0020 × 094D ÷	#  ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0020 × 0308 × 094D ÷	#  ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0020 × 200D ÷	#  ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -78,8 +76,8 @@
 ÷ 000D ÷ 0308 ÷ 000A ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 000D ÷ 0001 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 000D ÷ 0308 ÷ 0001 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 000D ÷ 034F ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 000D ÷ 0308 × 034F ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000D ÷ 200C ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 000D ÷ 0308 × 200C ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 000D ÷ 1F1E6 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 000D ÷ 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 000D ÷ 0600 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -96,8 +94,6 @@
 ÷ 000D ÷ 0308 ÷ AC00 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 000D ÷ AC01 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 000D ÷ 0308 ÷ AC01 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 000D ÷ 0900 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 000D ÷ 0308 × 0900 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 000D ÷ 0903 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 000D ÷ 0308 × 0903 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 000D ÷ 0904 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -110,8 +106,8 @@
 ÷ 000D ÷ 0308 ÷ 231A ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 000D ÷ 0300 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 000D ÷ 0308 × 0300 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 000D ÷ 093C ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 000D ÷ 0308 × 093C ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0900 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0308 × 0900 ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 000D ÷ 094D ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 000D ÷ 0308 × 094D ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 000D ÷ 200D ÷	#  ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -126,8 +122,8 @@
 ÷ 000A ÷ 0308 ÷ 000A ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 000A ÷ 0001 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 000A ÷ 0308 ÷ 0001 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 000A ÷ 034F ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 000A ÷ 0308 × 034F ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000A ÷ 200C ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 000A ÷ 0308 × 200C ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 000A ÷ 1F1E6 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 000A ÷ 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 000A ÷ 0600 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -144,8 +140,6 @@
 ÷ 000A ÷ 0308 ÷ AC00 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 000A ÷ AC01 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 000A ÷ 0308 ÷ AC01 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 000A ÷ 0900 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 000A ÷ 0308 × 0900 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 000A ÷ 0903 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 000A ÷ 0308 × 0903 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 000A ÷ 0904 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -158,8 +152,8 @@
 ÷ 000A ÷ 0308 ÷ 231A ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 000A ÷ 0300 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 000A ÷ 0308 × 0300 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 000A ÷ 093C ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 000A ÷ 0308 × 093C ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0900 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0308 × 0900 ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 000A ÷ 094D ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 000A ÷ 0308 × 094D ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 000A ÷ 200D ÷	#  ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -174,8 +168,8 @@
 ÷ 0001 ÷ 0308 ÷ 000A ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0001 ÷ 0001 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0001 ÷ 0308 ÷ 0001 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0001 ÷ 034F ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0001 ÷ 0308 × 034F ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0001 ÷ 200C ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0001 ÷ 0308 × 200C ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0001 ÷ 1F1E6 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0001 ÷ 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0001 ÷ 0600 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -192,8 +186,6 @@
 ÷ 0001 ÷ 0308 ÷ AC00 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0001 ÷ AC01 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0001 ÷ 0308 ÷ AC01 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0001 ÷ 0900 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0001 ÷ 0308 × 0900 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0001 ÷ 0903 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0001 ÷ 0308 × 0903 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0001 ÷ 0904 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -206,62 +198,60 @@
 ÷ 0001 ÷ 0308 ÷ 231A ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0001 ÷ 0300 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0001 ÷ 0308 × 0300 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0001 ÷ 093C ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0001 ÷ 0308 × 093C ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0900 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0308 × 0900 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0001 ÷ 094D ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0001 ÷ 0308 × 094D ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0001 ÷ 200D ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
 ÷ 0001 ÷ 0308 × 200D ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
 ÷ 0001 ÷ 0378 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
 ÷ 0001 ÷ 0308 ÷ 0378 ÷	#  ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
-÷ 034F ÷ 0020 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
-÷ 034F × 0308 ÷ 0020 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
-÷ 034F ÷ 000D ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
-÷ 034F × 0308 ÷ 000D ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
-÷ 034F ÷ 000A ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
-÷ 034F × 0308 ÷ 000A ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
-÷ 034F ÷ 0001 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 034F × 0308 ÷ 0001 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 034F × 034F ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 034F × 0308 × 034F ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 034F ÷ 1F1E6 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
-÷ 034F × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
-÷ 034F ÷ 0600 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
-÷ 034F × 0308 ÷ 0600 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
-÷ 034F × 0A03 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
-÷ 034F × 0308 × 0A03 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
-÷ 034F ÷ 1100 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
-÷ 034F × 0308 ÷ 1100 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
-÷ 034F ÷ 1160 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
-÷ 034F × 0308 ÷ 1160 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
-÷ 034F ÷ 11A8 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
-÷ 034F × 0308 ÷ 11A8 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
-÷ 034F ÷ AC00 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
-÷ 034F × 0308 ÷ AC00 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
-÷ 034F ÷ AC01 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 034F × 0308 ÷ AC01 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 034F × 0900 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 034F × 0308 × 0900 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 034F × 0903 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
-÷ 034F × 0308 × 0903 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
-÷ 034F ÷ 0904 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
-÷ 034F × 0308 ÷ 0904 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
-÷ 034F ÷ 0D4E ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 034F × 0308 ÷ 0D4E ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 034F ÷ 0915 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
-÷ 034F × 0308 ÷ 0915 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
-÷ 034F ÷ 231A ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
-÷ 034F × 0308 ÷ 231A ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
-÷ 034F × 0300 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 034F × 0308 × 0300 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 034F × 093C ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 034F × 0308 × 093C ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 034F × 094D ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
-÷ 034F × 0308 × 094D ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
-÷ 034F × 200D ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
-÷ 034F × 0308 × 200D ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
-÷ 034F ÷ 0378 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
-÷ 034F × 0308 ÷ 0378 ÷	#  ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 200C ÷ 0020 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 200C × 0308 ÷ 0020 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 200C ÷ 000D ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200C × 0308 ÷ 000D ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200C ÷ 000A ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200C × 0308 ÷ 000A ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200C ÷ 0001 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 200C × 0308 ÷ 0001 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 200C × 200C ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 200C × 0308 × 200C ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 200C ÷ 1F1E6 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200C × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200C ÷ 0600 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 200C × 0308 ÷ 0600 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 200C × 0A03 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200C × 0308 × 0A03 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200C ÷ 1100 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200C × 0308 ÷ 1100 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200C ÷ 1160 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200C × 0308 ÷ 1160 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200C ÷ 11A8 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200C × 0308 ÷ 11A8 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200C ÷ AC00 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200C × 0308 ÷ AC00 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200C ÷ AC01 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200C × 0308 ÷ AC01 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200C × 0903 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
+÷ 200C × 0308 × 0903 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
+÷ 200C ÷ 0904 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
+÷ 200C × 0308 ÷ 0904 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
+÷ 200C ÷ 0D4E ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
+÷ 200C × 0308 ÷ 0D4E ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
+÷ 200C ÷ 0915 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
+÷ 200C × 0308 ÷ 0915 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
+÷ 200C ÷ 231A ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200C × 0308 ÷ 231A ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200C × 0300 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200C × 0308 × 0300 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200C × 0900 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 200C × 0308 × 0900 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 200C × 094D ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
+÷ 200C × 0308 × 094D ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
+÷ 200C × 200D ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 200C × 0308 × 200D ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 200C ÷ 0378 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 200C × 0308 ÷ 0378 ÷	#  ÷ [0.2] ZERO WIDTH NON-JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
 ÷ 1F1E6 ÷ 0020 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]
 ÷ 1F1E6 × 0308 ÷ 0020 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
 ÷ 1F1E6 ÷ 000D ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@@ -270,8 +260,8 @@
 ÷ 1F1E6 × 0308 ÷ 000A ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 1F1E6 ÷ 0001 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 1F1E6 × 0308 ÷ 0001 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 1F1E6 × 034F ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 1F1E6 × 0308 × 034F ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1F1E6 × 200C ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200C ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 1F1E6 × 1F1E6 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 1F1E6 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 1F1E6 ÷ 0600 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -288,8 +278,6 @@
 ÷ 1F1E6 × 0308 ÷ AC00 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 1F1E6 ÷ AC01 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 1F1E6 × 0308 ÷ AC01 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 1F1E6 × 0900 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 1F1E6 × 0308 × 0900 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1F1E6 × 0903 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1F1E6 × 0308 × 0903 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1F1E6 ÷ 0904 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -302,8 +290,8 @@
 ÷ 1F1E6 × 0308 ÷ 231A ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 1F1E6 × 0300 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 1F1E6 × 0308 × 0300 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 1F1E6 × 093C ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 1F1E6 × 0308 × 093C ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0900 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0900 ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 1F1E6 × 094D ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 1F1E6 × 0308 × 094D ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 1F1E6 × 200D ÷	#  ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -318,8 +306,8 @@
 ÷ 0600 × 0308 ÷ 000A ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0600 ÷ 0001 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0600 × 0308 ÷ 0001 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0600 × 034F ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0600 × 0308 × 034F ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0600 × 200C ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0600 × 0308 × 200C ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0600 × 1F1E6 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0600 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0600 × 0600 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -336,8 +324,6 @@
 ÷ 0600 × 0308 ÷ AC00 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0600 × AC01 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0600 × 0308 ÷ AC01 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0600 × 0900 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0600 × 0308 × 0900 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0600 × 0903 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0600 × 0308 × 0903 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0600 × 0904 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -350,8 +336,8 @@
 ÷ 0600 × 0308 ÷ 231A ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0600 × 0300 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0600 × 0308 × 0300 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0600 × 093C ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0600 × 0308 × 093C ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0900 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0308 × 0900 ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0600 × 094D ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0600 × 0308 × 094D ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0600 × 200D ÷	#  ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -366,8 +352,8 @@
 ÷ 0A03 × 0308 ÷ 000A ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0A03 ÷ 0001 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0A03 × 0308 ÷ 0001 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0A03 × 034F ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0A03 × 0308 × 034F ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0A03 × 200C ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0A03 × 0308 × 200C ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0A03 ÷ 1F1E6 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0A03 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0A03 ÷ 0600 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -384,8 +370,6 @@
 ÷ 0A03 × 0308 ÷ AC00 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0A03 ÷ AC01 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0A03 × 0308 ÷ AC01 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0A03 × 0900 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0A03 × 0308 × 0900 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0A03 × 0903 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0A03 × 0308 × 0903 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0A03 ÷ 0904 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -398,8 +382,8 @@
 ÷ 0A03 × 0308 ÷ 231A ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0A03 × 0300 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0A03 × 0308 × 0300 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0A03 × 093C ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0A03 × 0308 × 093C ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0A03 × 0900 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0A03 × 0308 × 0900 ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0A03 × 094D ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0A03 × 0308 × 094D ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0A03 × 200D ÷	#  ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -414,8 +398,8 @@
 ÷ 1100 × 0308 ÷ 000A ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 1100 ÷ 0001 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 1100 × 0308 ÷ 0001 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 1100 × 034F ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 1100 × 0308 × 034F ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1100 × 200C ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 1100 × 0308 × 200C ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 1100 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 1100 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 1100 ÷ 0600 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -432,8 +416,6 @@
 ÷ 1100 × 0308 ÷ AC00 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 1100 × AC01 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 1100 × 0308 ÷ AC01 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 1100 × 0900 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 1100 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1100 × 0903 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1100 × 0308 × 0903 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1100 ÷ 0904 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -446,8 +428,8 @@
 ÷ 1100 × 0308 ÷ 231A ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 1100 × 0300 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 1100 × 0308 × 0300 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 1100 × 093C ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 1100 × 0308 × 093C ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0900 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 1100 × 094D ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 1100 × 0308 × 094D ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 1100 × 200D ÷	#  ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -462,8 +444,8 @@
 ÷ 1160 × 0308 ÷ 000A ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 1160 ÷ 0001 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 1160 × 0308 ÷ 0001 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 1160 × 034F ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 1160 × 0308 × 034F ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1160 × 200C ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 1160 × 0308 × 200C ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 1160 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 1160 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 1160 ÷ 0600 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -480,8 +462,6 @@
 ÷ 1160 × 0308 ÷ AC00 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 1160 ÷ AC01 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 1160 × 0308 ÷ AC01 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 1160 × 0900 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 1160 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1160 × 0903 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1160 × 0308 × 0903 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 1160 ÷ 0904 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -494,8 +474,8 @@
 ÷ 1160 × 0308 ÷ 231A ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 1160 × 0300 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 1160 × 0308 × 0300 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 1160 × 093C ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 1160 × 0308 × 093C ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0900 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 1160 × 094D ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 1160 × 0308 × 094D ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 1160 × 200D ÷	#  ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -510,8 +490,8 @@
 ÷ 11A8 × 0308 ÷ 000A ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 11A8 ÷ 0001 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 11A8 × 0308 ÷ 0001 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 11A8 × 034F ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 11A8 × 0308 × 034F ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 11A8 × 200C ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 11A8 × 0308 × 200C ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 11A8 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 11A8 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 11A8 ÷ 0600 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -528,8 +508,6 @@
 ÷ 11A8 × 0308 ÷ AC00 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 11A8 ÷ AC01 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 11A8 × 0308 ÷ AC01 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 11A8 × 0900 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 11A8 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 11A8 × 0903 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 11A8 × 0308 × 0903 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 11A8 ÷ 0904 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -542,8 +520,8 @@
 ÷ 11A8 × 0308 ÷ 231A ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 11A8 × 0300 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 11A8 × 0308 × 0300 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 11A8 × 093C ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 11A8 × 0308 × 093C ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0900 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 11A8 × 094D ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 11A8 × 0308 × 094D ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 11A8 × 200D ÷	#  ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -558,8 +536,8 @@
 ÷ AC00 × 0308 ÷ 000A ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ AC00 ÷ 0001 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ AC00 × 0308 ÷ 0001 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ AC00 × 034F ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ AC00 × 0308 × 034F ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC00 × 200C ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ AC00 × 0308 × 200C ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ AC00 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ AC00 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ AC00 ÷ 0600 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -576,8 +554,6 @@
 ÷ AC00 × 0308 ÷ AC00 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ AC00 ÷ AC01 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ AC00 × 0308 ÷ AC01 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ AC00 × 0900 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ AC00 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ AC00 × 0903 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ AC00 × 0308 × 0903 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ AC00 ÷ 0904 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -590,8 +566,8 @@
 ÷ AC00 × 0308 ÷ 231A ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ AC00 × 0300 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ AC00 × 0308 × 0300 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ AC00 × 093C ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ AC00 × 0308 × 093C ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0900 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ AC00 × 094D ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ AC00 × 0308 × 094D ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ AC00 × 200D ÷	#  ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -606,8 +582,8 @@
 ÷ AC01 × 0308 ÷ 000A ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ AC01 ÷ 0001 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ AC01 × 0308 ÷ 0001 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ AC01 × 034F ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ AC01 × 0308 × 034F ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC01 × 200C ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ AC01 × 0308 × 200C ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ AC01 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ AC01 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ AC01 ÷ 0600 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -624,8 +600,6 @@
 ÷ AC01 × 0308 ÷ AC00 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ AC01 ÷ AC01 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ AC01 × 0308 ÷ AC01 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ AC01 × 0900 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ AC01 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ AC01 × 0903 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ AC01 × 0308 × 0903 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ AC01 ÷ 0904 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -638,62 +612,14 @@
 ÷ AC01 × 0308 ÷ 231A ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ AC01 × 0300 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ AC01 × 0308 × 0300 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ AC01 × 093C ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ AC01 × 0308 × 093C ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0900 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0308 × 0900 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ AC01 × 094D ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ AC01 × 0308 × 094D ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ AC01 × 200D ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
 ÷ AC01 × 0308 × 200D ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
 ÷ AC01 ÷ 0378 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
 ÷ AC01 × 0308 ÷ 0378 ÷	#  ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
-÷ 0900 ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3]
-÷ 0900 × 0308 ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
-÷ 0900 ÷ 000D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
-÷ 0900 × 0308 ÷ 000D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
-÷ 0900 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
-÷ 0900 × 0308 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
-÷ 0900 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0900 × 0308 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0900 × 034F ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0900 × 0308 × 034F ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0900 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
-÷ 0900 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
-÷ 0900 ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
-÷ 0900 × 0308 ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
-÷ 0900 × 0A03 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
-÷ 0900 × 0308 × 0A03 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
-÷ 0900 ÷ 1100 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
-÷ 0900 × 0308 ÷ 1100 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
-÷ 0900 ÷ 1160 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
-÷ 0900 × 0308 ÷ 1160 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
-÷ 0900 ÷ 11A8 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
-÷ 0900 × 0308 ÷ 11A8 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
-÷ 0900 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
-÷ 0900 × 0308 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
-÷ 0900 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0900 × 0308 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0900 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0900 × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0900 × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0900 × 0308 × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0900 ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
-÷ 0900 × 0308 ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
-÷ 0900 ÷ 0D4E ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0900 × 0308 ÷ 0D4E ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0900 ÷ 0915 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
-÷ 0900 × 0308 ÷ 0915 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
-÷ 0900 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
-÷ 0900 × 0308 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
-÷ 0900 × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0900 × 0308 × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0900 × 093C ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0900 × 0308 × 093C ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0900 × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
-÷ 0900 × 0308 × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
-÷ 0900 × 200D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
-÷ 0900 × 0308 × 200D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
-÷ 0900 ÷ 0378 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
-÷ 0900 × 0308 ÷ 0378 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
 ÷ 0903 ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3]
 ÷ 0903 × 0308 ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
 ÷ 0903 ÷ 000D ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@@ -702,8 +628,8 @@
 ÷ 0903 × 0308 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0903 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0903 × 0308 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0903 × 034F ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0903 × 0308 × 034F ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0903 × 200C ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0903 × 0308 × 200C ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0903 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0903 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0903 ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -720,8 +646,6 @@
 ÷ 0903 × 0308 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0903 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0903 × 0308 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0903 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0903 × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0903 × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0903 × 0308 × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0903 ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -734,8 +658,8 @@
 ÷ 0903 × 0308 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0903 × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0903 × 0308 × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0903 × 093C ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0903 × 0308 × 093C ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0903 × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0903 × 0308 × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0903 × 200D ÷	#  ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -750,8 +674,8 @@
 ÷ 0904 × 0308 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0904 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0904 × 0308 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0904 × 034F ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0904 × 0308 × 034F ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0904 × 200C ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0904 × 0308 × 200C ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0904 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0904 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0904 ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -768,8 +692,6 @@
 ÷ 0904 × 0308 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0904 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0904 × 0308 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0904 × 0900 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0904 × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0904 × 0903 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0904 × 0308 × 0903 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0904 ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -782,8 +704,8 @@
 ÷ 0904 × 0308 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0904 × 0300 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0904 × 0308 × 0300 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0904 × 093C ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0904 × 0308 × 093C ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0904 × 0900 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0904 × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0904 × 094D ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0904 × 0308 × 094D ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0904 × 200D ÷	#  ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -798,8 +720,8 @@
 ÷ 0D4E × 0308 ÷ 000A ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0D4E ÷ 0001 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0D4E × 0308 ÷ 0001 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0D4E × 034F ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0D4E × 0308 × 034F ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0D4E × 200C ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0D4E × 0308 × 200C ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0D4E × 1F1E6 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0D4E × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0D4E × 0600 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -816,8 +738,6 @@
 ÷ 0D4E × 0308 ÷ AC00 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0D4E × AC01 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0D4E × 0308 ÷ AC01 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0D4E × 0900 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0D4E × 0308 × 0900 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0D4E × 0903 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0D4E × 0308 × 0903 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0D4E × 0904 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -830,8 +750,8 @@
 ÷ 0D4E × 0308 ÷ 231A ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0D4E × 0300 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0D4E × 0308 × 0300 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0D4E × 093C ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0D4E × 0308 × 093C ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0D4E × 0900 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0D4E × 0308 × 0900 ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0D4E × 094D ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0D4E × 0308 × 094D ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0D4E × 200D ÷	#  ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -846,8 +766,8 @@
 ÷ 0915 × 0308 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0915 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0915 × 0308 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0915 × 034F ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0915 × 0308 × 034F ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0915 × 200C ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0915 × 0308 × 200C ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0915 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0915 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0915 ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -864,8 +784,6 @@
 ÷ 0915 × 0308 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0915 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0915 × 0308 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0915 × 0900 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0915 × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0915 × 0903 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0915 × 0308 × 0903 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0915 ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -878,8 +796,8 @@
 ÷ 0915 × 0308 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0915 × 0300 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0915 × 0308 × 0300 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0915 × 093C ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0915 × 0308 × 093C ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0915 × 0900 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0915 × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0915 × 094D ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0915 × 0308 × 094D ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0915 × 200D ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -894,8 +812,8 @@
 ÷ 231A × 0308 ÷ 000A ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 231A ÷ 0001 ÷	#  ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 231A × 0308 ÷ 0001 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 231A × 034F ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 231A × 0308 × 034F ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 231A × 200C ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 231A × 0308 × 200C ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 231A ÷ 1F1E6 ÷	#  ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 231A × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 231A ÷ 0600 ÷	#  ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -912,8 +830,6 @@
 ÷ 231A × 0308 ÷ AC00 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 231A ÷ AC01 ÷	#  ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 231A × 0308 ÷ AC01 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 231A × 0900 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 231A × 0308 × 0900 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 231A × 0903 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 231A × 0308 × 0903 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 231A ÷ 0904 ÷	#  ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -926,8 +842,8 @@
 ÷ 231A × 0308 ÷ 231A ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 231A × 0300 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 231A × 0308 × 0300 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 231A × 093C ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 231A × 0308 × 093C ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0900 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0308 × 0900 ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 231A × 094D ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 231A × 0308 × 094D ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 231A × 200D ÷	#  ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -942,8 +858,8 @@
 ÷ 0300 × 0308 ÷ 000A ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0300 ÷ 0001 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0300 × 0308 ÷ 0001 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0300 × 034F ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0300 × 0308 × 034F ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0300 × 200C ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0300 × 0308 × 200C ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0300 ÷ 1F1E6 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0300 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0300 ÷ 0600 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -960,8 +876,6 @@
 ÷ 0300 × 0308 ÷ AC00 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0300 ÷ AC01 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0300 × 0308 ÷ AC01 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0300 × 0900 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0300 × 0308 × 0900 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0300 × 0903 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0300 × 0308 × 0903 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0300 ÷ 0904 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -974,62 +888,60 @@
 ÷ 0300 × 0308 ÷ 231A ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0300 × 0300 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0300 × 0308 × 0300 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0300 × 093C ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0300 × 0308 × 093C ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0900 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0308 × 0900 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0300 × 094D ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0300 × 0308 × 094D ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0300 × 200D ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
 ÷ 0300 × 0308 × 200D ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
 ÷ 0300 ÷ 0378 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
 ÷ 0300 × 0308 ÷ 0378 ÷	#  ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
-÷ 093C ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
-÷ 093C × 0308 ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
-÷ 093C ÷ 000D ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
-÷ 093C × 0308 ÷ 000D ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
-÷ 093C ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
-÷ 093C × 0308 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
-÷ 093C ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 093C × 0308 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 093C × 034F ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 093C × 0308 × 034F ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 093C ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
-÷ 093C × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
-÷ 093C ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
-÷ 093C × 0308 ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
-÷ 093C × 0A03 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
-÷ 093C × 0308 × 0A03 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
-÷ 093C ÷ 1100 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
-÷ 093C × 0308 ÷ 1100 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
-÷ 093C ÷ 1160 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
-÷ 093C × 0308 ÷ 1160 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
-÷ 093C ÷ 11A8 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
-÷ 093C × 0308 ÷ 11A8 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
-÷ 093C ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
-÷ 093C × 0308 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
-÷ 093C ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 093C × 0308 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 093C × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 093C × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 093C × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
-÷ 093C × 0308 × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
-÷ 093C ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
-÷ 093C × 0308 ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
-÷ 093C ÷ 0D4E ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 093C × 0308 ÷ 0D4E ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 093C ÷ 0915 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
-÷ 093C × 0308 ÷ 0915 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
-÷ 093C ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
-÷ 093C × 0308 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
-÷ 093C × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 093C × 0308 × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 093C × 093C ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 093C × 0308 × 093C ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 093C × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
-÷ 093C × 0308 × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
-÷ 093C × 200D ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
-÷ 093C × 0308 × 200D ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
-÷ 093C ÷ 0378 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
-÷ 093C × 0308 ÷ 0378 ÷	#  ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0900 ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0900 × 0308 ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0900 ÷ 000D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0900 × 0308 ÷ 000D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0900 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0900 × 0308 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0900 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0900 × 0308 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0900 × 200C ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0900 × 0308 × 200C ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0900 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0900 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0900 ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0900 × 0308 ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0900 × 0A03 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0900 × 0308 × 0A03 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0900 ÷ 1100 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0900 × 0308 ÷ 1100 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0900 ÷ 1160 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0900 × 0308 ÷ 1160 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0900 ÷ 11A8 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0900 × 0308 ÷ 11A8 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0900 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0900 × 0308 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0900 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0900 × 0308 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0900 × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
+÷ 0900 × 0308 × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
+÷ 0900 ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
+÷ 0900 × 0308 ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
+÷ 0900 ÷ 0D4E ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
+÷ 0900 × 0308 ÷ 0D4E ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3]
+÷ 0900 ÷ 0915 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
+÷ 0900 × 0308 ÷ 0915 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
+÷ 0900 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0900 × 0308 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0900 × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0900 × 0308 × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0900 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0900 × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0900 × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
+÷ 0900 × 0308 × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
+÷ 0900 × 200D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0900 × 0308 × 200D ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0900 ÷ 0378 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0900 × 0308 ÷ 0378 ÷	#  ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
 ÷ 094D ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
 ÷ 094D × 0308 ÷ 0020 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
 ÷ 094D ÷ 000D ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
@@ -1038,8 +950,8 @@
 ÷ 094D × 0308 ÷ 000A ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 094D ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 094D × 0308 ÷ 0001 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 094D × 034F ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 094D × 0308 × 034F ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 094D × 200C ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 094D × 0308 × 200C ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 094D ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 094D × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 094D ÷ 0600 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -1056,8 +968,6 @@
 ÷ 094D × 0308 ÷ AC00 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 094D ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 094D × 0308 ÷ AC01 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 094D × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 094D × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 094D × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 094D × 0308 × 0903 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 094D ÷ 0904 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -1070,8 +980,8 @@
 ÷ 094D × 0308 ÷ 231A ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 094D × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 094D × 0308 × 0300 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 094D × 093C ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 094D × 0308 × 093C ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 094D × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 094D × 0308 × 0900 ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 094D × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 094D × 0308 × 094D ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 094D × 200D ÷	#  ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -1086,8 +996,8 @@
 ÷ 200D × 0308 ÷ 000A ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 200D ÷ 0001 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 200D × 0308 ÷ 0001 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 200D × 034F ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 200D × 0308 × 034F ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 200D × 200C ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 200D × 0308 × 200C ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 200D ÷ 1F1E6 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 200D × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 200D ÷ 0600 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -1104,8 +1014,6 @@
 ÷ 200D × 0308 ÷ AC00 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 200D ÷ AC01 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 200D × 0308 ÷ AC01 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 200D × 0900 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 200D × 0308 × 0900 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 200D × 0903 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 200D × 0308 × 0903 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 200D ÷ 0904 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -1118,8 +1026,8 @@
 ÷ 200D × 0308 ÷ 231A ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 200D × 0300 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 200D × 0308 × 0300 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 200D × 093C ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 200D × 0308 × 093C ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0900 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0308 × 0900 ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 200D × 094D ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 200D × 0308 × 094D ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 200D × 200D ÷	#  ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -1134,8 +1042,8 @@
 ÷ 0378 × 0308 ÷ 000A ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
 ÷ 0378 ÷ 0001 ÷	#  ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
 ÷ 0378 × 0308 ÷ 0001 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
-÷ 0378 × 034F ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
-÷ 0378 × 0308 × 034F ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0378 × 200C ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
+÷ 0378 × 0308 × 200C ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH NON-JOINER (Extend) ÷ [0.3]
 ÷ 0378 ÷ 1F1E6 ÷	#  ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0378 × 0308 ÷ 1F1E6 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
 ÷ 0378 ÷ 0600 ÷	#  ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
@@ -1152,8 +1060,6 @@
 ÷ 0378 × 0308 ÷ AC00 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
 ÷ 0378 ÷ AC01 ÷	#  ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
 ÷ 0378 × 0308 ÷ AC01 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
-÷ 0378 × 0900 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
-÷ 0378 × 0308 × 0900 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0378 × 0903 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0378 × 0308 × 0903 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3]
 ÷ 0378 ÷ 0904 ÷	#  ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3]
@@ -1166,8 +1072,8 @@
 ÷ 0378 × 0308 ÷ 231A ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
 ÷ 0378 × 0300 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 0378 × 0308 × 0300 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
-÷ 0378 × 093C ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
-÷ 0378 × 0308 × 093C ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0900 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0308 × 0900 ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3]
 ÷ 0378 × 094D ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0378 × 0308 × 094D ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3]
 ÷ 0378 × 200D ÷	#  ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
@@ -1190,10 +1096,10 @@
 ÷ 0061 × 0308 ÷ 0062 ÷	#  ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
 ÷ 0061 × 0903 ÷ 0062 ÷	#  ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
 ÷ 0061 ÷ 0600 × 0062 ÷	#  ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
-÷ 1F476 × 1F3FF ÷ 1F476 ÷	#  ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
-÷ 0061 × 1F3FF ÷ 1F476 ÷	#  ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
-÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷	#  ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
-÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷	#  ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3]
+÷ 1F476 × 1F3FF ÷ 1F476 ÷	#  ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 ÷	#  ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷	#  ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷	#  ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ExtCccZwj) ÷ [0.3]
 ÷ 1F6D1 × 200D × 1F6D1 ÷	#  ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
 ÷ 0061 × 200D ÷ 1F6D1 ÷	#  ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
 ÷ 2701 × 200D × 2701 ÷	#  ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
@@ -1210,6 +1116,6 @@
 ÷ 003F × 094D ÷ 0924 ÷	#  ÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
 ÷ 0915 × 094D × 094D × 0924 ÷	#  ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3]
 #
-# Lines: 1187
+# Lines: 1093
 #
 # EOF
diff --git a/libcxx/utils/data/unicode/emoji-data.txt b/libcxx/utils/data/unicode/emoji-data.txt
index 0ba10e9..ff99028 100644
--- a/libcxx/utils/data/unicode/emoji-data.txt
+++ b/libcxx/utils/data/unicode/emoji-data.txt
@@ -1,11 +1,11 @@
 # emoji-data.txt
-# Date: 2023-02-01, 02:22:54 GMT
-# © 2023 Unicode®, Inc.
+# Date: 2024-05-01, 21:25:24 GMT
+# © 2024 Unicode®, Inc.
 # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
 # Emoji Data for UTS #51
-# Used with Emoji Version 15.1 and subsequent minor revisions (if any)
+# Used with Emoji Version 16.0 and subsequent minor revisions (if any)
 #
 # For documentation and usage, see https://www.unicode.org/reports/tr51
 #
@@ -407,6 +407,8 @@
 1FA80..1FA82  ; Emoji                # E12.0  [3] (🪀..🪂)    yo-yo..parachute
 1FA83..1FA86  ; Emoji                # E13.0  [4] (🪃..🪆)    boomerang..nesting dolls
 1FA87..1FA88  ; Emoji                # E15.0  [2] (🪇..🪈)    maracas..flute
+1FA89         ; Emoji                # E16.0  [1] (🪉)       harp
+1FA8F         ; Emoji                # E16.0  [1] (🪏)       shovel
 1FA90..1FA95  ; Emoji                # E12.0  [6] (🪐..🪕)    ringed planet..banjo
 1FA96..1FAA8  ; Emoji                # E13.0 [19] (🪖..🪨)    military helmet..rock
 1FAA9..1FAAC  ; Emoji                # E14.0  [4] (🪩..🪬)    mirror ball..hamsa
@@ -414,19 +416,24 @@
 1FAB0..1FAB6  ; Emoji                # E13.0  [7] (🪰..🪶)    fly..feather
 1FAB7..1FABA  ; Emoji                # E14.0  [4] (🪷..🪺)    lotus..nest with eggs
 1FABB..1FABD  ; Emoji                # E15.0  [3] (🪻..🪽)    hyacinth..wing
+1FABE         ; Emoji                # E16.0  [1] (🪾)       leafless tree
 1FABF         ; Emoji                # E15.0  [1] (🪿)       goose
 1FAC0..1FAC2  ; Emoji                # E13.0  [3] (🫀..🫂)    anatomical heart..people hugging
 1FAC3..1FAC5  ; Emoji                # E14.0  [3] (🫃..🫅)    pregnant man..person with crown
+1FAC6         ; Emoji                # E16.0  [1] (🫆)       fingerprint
 1FACE..1FACF  ; Emoji                # E15.0  [2] (🫎..🫏)    moose..donkey
 1FAD0..1FAD6  ; Emoji                # E13.0  [7] (🫐..🫖)    blueberries..teapot
 1FAD7..1FAD9  ; Emoji                # E14.0  [3] (🫗..🫙)    pouring liquid..jar
 1FADA..1FADB  ; Emoji                # E15.0  [2] (🫚..🫛)    ginger root..pea pod
+1FADC         ; Emoji                # E16.0  [1] (🫜)       root vegetable
+1FADF         ; Emoji                # E16.0  [1] (🫟)       splatter
 1FAE0..1FAE7  ; Emoji                # E14.0  [8] (🫠..🫧)    melting face..bubbles
 1FAE8         ; Emoji                # E15.0  [1] (🫨)       shaking face
+1FAE9         ; Emoji                # E16.0  [1] (🫩)       face with bags under eyes
 1FAF0..1FAF6  ; Emoji                # E14.0  [7] (🫰..🫶)    hand with index finger and thumb crossed..heart hands
 1FAF7..1FAF8  ; Emoji                # E15.0  [2] (🫷..🫸)    leftwards pushing hand..rightwards pushing hand
 
-# Total elements: 1424
+# Total elements: 1431
 
 # ================================================
 
@@ -696,6 +703,8 @@
 1FA80..1FA82  ; Emoji_Presentation   # E12.0  [3] (🪀..🪂)    yo-yo..parachute
 1FA83..1FA86  ; Emoji_Presentation   # E13.0  [4] (🪃..🪆)    boomerang..nesting dolls
 1FA87..1FA88  ; Emoji_Presentation   # E15.0  [2] (🪇..🪈)    maracas..flute
+1FA89         ; Emoji_Presentation   # E16.0  [1] (🪉)       harp
+1FA8F         ; Emoji_Presentation   # E16.0  [1] (🪏)       shovel
 1FA90..1FA95  ; Emoji_Presentation   # E12.0  [6] (🪐..🪕)    ringed planet..banjo
 1FA96..1FAA8  ; Emoji_Presentation   # E13.0 [19] (🪖..🪨)    military helmet..rock
 1FAA9..1FAAC  ; Emoji_Presentation   # E14.0  [4] (🪩..🪬)    mirror ball..hamsa
@@ -703,19 +712,24 @@
 1FAB0..1FAB6  ; Emoji_Presentation   # E13.0  [7] (🪰..🪶)    fly..feather
 1FAB7..1FABA  ; Emoji_Presentation   # E14.0  [4] (🪷..🪺)    lotus..nest with eggs
 1FABB..1FABD  ; Emoji_Presentation   # E15.0  [3] (🪻..🪽)    hyacinth..wing
+1FABE         ; Emoji_Presentation   # E16.0  [1] (🪾)       leafless tree
 1FABF         ; Emoji_Presentation   # E15.0  [1] (🪿)       goose
 1FAC0..1FAC2  ; Emoji_Presentation   # E13.0  [3] (🫀..🫂)    anatomical heart..people hugging
 1FAC3..1FAC5  ; Emoji_Presentation   # E14.0  [3] (🫃..🫅)    pregnant man..person with crown
+1FAC6         ; Emoji_Presentation   # E16.0  [1] (🫆)       fingerprint
 1FACE..1FACF  ; Emoji_Presentation   # E15.0  [2] (🫎..🫏)    moose..donkey
 1FAD0..1FAD6  ; Emoji_Presentation   # E13.0  [7] (🫐..🫖)    blueberries..teapot
 1FAD7..1FAD9  ; Emoji_Presentation   # E14.0  [3] (🫗..🫙)    pouring liquid..jar
 1FADA..1FADB  ; Emoji_Presentation   # E15.0  [2] (🫚..🫛)    ginger root..pea pod
+1FADC         ; Emoji_Presentation   # E16.0  [1] (🫜)       root vegetable
+1FADF         ; Emoji_Presentation   # E16.0  [1] (🫟)       splatter
 1FAE0..1FAE7  ; Emoji_Presentation   # E14.0  [8] (🫠..🫧)    melting face..bubbles
 1FAE8         ; Emoji_Presentation   # E15.0  [1] (🫨)       shaking face
+1FAE9         ; Emoji_Presentation   # E16.0  [1] (🫩)       face with bags under eyes
 1FAF0..1FAF6  ; Emoji_Presentation   # E14.0  [7] (🫰..🫶)    hand with index finger and thumb crossed..heart hands
 1FAF7..1FAF8  ; Emoji_Presentation   # E15.0  [2] (🫷..🫸)    leftwards pushing hand..rightwards pushing hand
 
-# Total elements: 1205
+# Total elements: 1212
 
 # ================================================
 
@@ -1289,7 +1303,9 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1FA80..1FA82  ; Extended_Pictographic# E12.0  [3] (🪀..🪂)    yo-yo..parachute
 1FA83..1FA86  ; Extended_Pictographic# E13.0  [4] (🪃..🪆)    boomerang..nesting dolls
 1FA87..1FA88  ; Extended_Pictographic# E15.0  [2] (🪇..🪈)    maracas..flute
-1FA89..1FA8F  ; Extended_Pictographic# E0.0   [7] (🪉..🪏)    <reserved-1FA89>..<reserved-1FA8F>
+1FA89         ; Extended_Pictographic# E16.0  [1] (🪉)       harp
+1FA8A..1FA8E  ; Extended_Pictographic# E0.0   [5] (🪊..🪎)    <reserved-1FA8A>..<reserved-1FA8E>
+1FA8F         ; Extended_Pictographic# E16.0  [1] (🪏)       shovel
 1FA90..1FA95  ; Extended_Pictographic# E12.0  [6] (🪐..🪕)    ringed planet..banjo
 1FA96..1FAA8  ; Extended_Pictographic# E13.0 [19] (🪖..🪨)    military helmet..rock
 1FAA9..1FAAC  ; Extended_Pictographic# E14.0  [4] (🪩..🪬)    mirror ball..hamsa
@@ -1297,19 +1313,23 @@ E0020..E007F  ; Emoji_Component      # E0.0  [96] (󠀠..󠁿)      tag space..c
 1FAB0..1FAB6  ; Extended_Pictographic# E13.0  [7] (🪰..🪶)    fly..feather
 1FAB7..1FABA  ; Extended_Pictographic# E14.0  [4] (🪷..🪺)    lotus..nest with eggs
 1FABB..1FABD  ; Extended_Pictographic# E15.0  [3] (🪻..🪽)    hyacinth..wing
-1FABE         ; Extended_Pictographic# E0.0   [1] (🪾)       <reserved-1FABE>
+1FABE         ; Extended_Pictographic# E16.0  [1] (🪾)       leafless tree
 1FABF         ; Extended_Pictographic# E15.0  [1] (🪿)       goose
 1FAC0..1FAC2  ; Extended_Pictographic# E13.0  [3] (🫀..🫂)    anatomical heart..people hugging
 1FAC3..1FAC5  ; Extended_Pictographic# E14.0  [3] (🫃..🫅)    pregnant man..person with crown
-1FAC6..1FACD  ; Extended_Pictographic# E0.0   [8] (🫆..🫍)    <reserved-1FAC6>..<reserved-1FACD>
+1FAC6         ; Extended_Pictographic# E16.0  [1] (🫆)       fingerprint
+1FAC7..1FACD  ; Extended_Pictographic# E0.0   [7] (🫇..🫍)    <reserved-1FAC7>..<reserved-1FACD>
 1FACE..1FACF  ; Extended_Pictographic# E15.0  [2] (🫎..🫏)    moose..donkey
 1FAD0..1FAD6  ; Extended_Pictographic# E13.0  [7] (🫐..🫖)    blueberries..teapot
 1FAD7..1FAD9  ; Extended_Pictographic# E14.0  [3] (🫗..🫙)    pouring liquid..jar
 1FADA..1FADB  ; Extended_Pictographic# E15.0  [2] (🫚..🫛)    ginger root..pea pod
-1FADC..1FADF  ; Extended_Pictographic# E0.0   [4] (🫜..🫟)    <reserved-1FADC>..<reserved-1FADF>
+1FADC         ; Extended_Pictographic# E16.0  [1] (🫜)       root vegetable
+1FADD..1FADE  ; Extended_Pictographic# E0.0   [2] (🫝..🫞)    <reserved-1FADD>..<reserved-1FADE>
+1FADF         ; Extended_Pictographic# E16.0  [1] (🫟)       splatter
 1FAE0..1FAE7  ; Extended_Pictographic# E14.0  [8] (🫠..🫧)    melting face..bubbles
 1FAE8         ; Extended_Pictographic# E15.0  [1] (🫨)       shaking face
-1FAE9..1FAEF  ; Extended_Pictographic# E0.0   [7] (🫩..🫯)    <reserved-1FAE9>..<reserved-1FAEF>
+1FAE9         ; Extended_Pictographic# E16.0  [1] (🫩)       face with bags under eyes
+1FAEA..1FAEF  ; Extended_Pictographic# E0.0   [6] (🫪..🫯)    <reserved-1FAEA>..<reserved-1FAEF>
 1FAF0..1FAF6  ; Extended_Pictographic# E14.0  [7] (🫰..🫶)    hand with index finger and thumb crossed..heart hands
 1FAF7..1FAF8  ; Extended_Pictographic# E15.0  [2] (🫷..🫸)    leftwards pushing hand..rightwards pushing hand
 1FAF9..1FAFF  ; Extended_Pictographic# E0.0   [7] (🫹..🫿)    <reserved-1FAF9>..<reserved-1FAFF>
diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py
index 8fd3872..7dba39b 100644
--- a/libcxx/utils/libcxx/test/params.py
+++ b/libcxx/utils/libcxx/test/params.py
@@ -31,6 +31,7 @@ _warningFlags = [
     "-Wno-reserved-module-identifier",
     '-Wdeprecated-copy',
     '-Wdeprecated-copy-dtor',
+    "-Wshift-negative-value",
     # GCC warns about places where we might want to add sized allocation/deallocation
     # functions, but we know better what we're doing/testing in the test suite.
     "-Wno-sized-deallocation",
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index b848e0d..979c0ae 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2692,7 +2692,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
 
   // Handle /output-def (MinGW specific).
   if (auto *arg = args.getLastArg(OPT_output_def))
-    writeDefFile(ctx, arg->getValue(), ctx.symtab.exports);
+    writeDefFile(ctx, arg->getValue(), mainSymtab.exports);
 
   // Set extra alignment for .comm symbols
   for (auto pair : config->alignComm) {
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 2bdaeb5..484ac9cd 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1191,10 +1191,12 @@ void Writer::createMiscChunks() {
   }
 
   // Create thunks for locally-dllimported symbols.
-  if (!ctx.symtab.localImportChunks.empty()) {
-    for (Chunk *c : ctx.symtab.localImportChunks)
-      rdataSec->addChunk(c);
-  }
+  ctx.forEachSymtab([&](SymbolTable &symtab) {
+    if (!symtab.localImportChunks.empty()) {
+      for (Chunk *c : symtab.localImportChunks)
+        rdataSec->addChunk(c);
+    }
+  });
 
   // Create Debug Information Chunks
   debugInfoSec = config->mingw ? buildidSec : rdataSec;
diff --git a/lld/test/COFF/arm64x-export.test b/lld/test/COFF/arm64x-export.test
index bae40af..c83db2a 100644
--- a/lld/test/COFF/arm64x-export.test
+++ b/lld/test/COFF/arm64x-export.test
@@ -14,7 +14,7 @@ RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64.s
 # A command-line export applies only to EC exports.
 
 RUN: lld-link -machine:arm64x -dll -out:out-cmd.dll arm64ec-func.obj arm64-func.obj \
-RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj -noentry -export:func
+RUN:          loadconfig-arm64.obj loadconfig-arm64ec.obj -noentry -export:func -output-def:out.def
 
 RUN: llvm-objdump -d out-cmd.dll | FileCheck --check-prefix=DISASM-EC %s
 DISASM-EC:      Disassembly of section .text:
@@ -74,6 +74,10 @@ IMPLIB-EC-NEXT: Symbol: func
 IMPLIB-EC-NEXT: Symbol: __imp_aux_func
 IMPLIB-EC-NEXT: Symbol: #func
 
+RUN: FileCheck --check-prefix=OUT-DEF %s < out.def
+OUT-DEF:      EXPORTS
+OUT-DEF-NEXT:     func @1
+
 
 # Export using the EC .drectve section.
 
diff --git a/lld/test/COFF/locally-imported-arm64x.s b/lld/test/COFF/locally-imported-arm64x.s
new file mode 100644
index 0000000..6091af2
--- /dev/null
+++ b/lld/test/COFF/locally-imported-arm64x.s
@@ -0,0 +1,32 @@
+// REQUIRES: aarch64
+
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %s -o %t.arm64.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %s -o %t.arm64ec.obj
+
+// RUN: lld-link -machine:arm64x -dll -noentry %t.arm64.obj %t.arm64ec.obj -out:%t.dll 2>&1 | FileCheck --check-prefix=WARN %s
+// WARN:      lld-link: warning: {{.*}}.arm64.obj: locally defined symbol imported: func
+// WARN-NEXT: lld-link: warning: {{.*}}.arm64ec.obj: locally defined symbol imported: func
+
+// RUN: llvm-readobj --hex-dump=.test %t.dll | FileCheck --check-prefix=TEST %s
+// TEST: 0x180005000 00300000 08300000
+
+// RUN: llvm-readobj --coff-basereloc %t.dll | FileCheck --check-prefix=RELOCS %s
+// RELOCS:      Entry {
+// RELOCS-NEXT:   Type: DIR64
+// RELOCS-NEXT:   Address: 0x3000
+// RELOCS-NEXT: }
+// RELOCS-NEXT: Entry {
+// RELOCS-NEXT:   Type: DIR64
+// RELOCS-NEXT:   Address: 0x3008
+// RELOCS-NEXT: }
+
+// RUN: llvm-readobj --hex-dump=.rdata %t.dll | FileCheck --check-prefix=RDATA %s
+// RDATA: 0x180003000 00100080 01000000 00200080 01000000
+
+    .text
+    .globl func
+func:
+    ret
+
+    .section .test, "r"
+    .rva __imp_func
diff --git a/lldb/docs/resources/lldbgdbremote.md b/lldb/docs/resources/lldbgdbremote.md
index 5cac373..1628151 100644
--- a/lldb/docs/resources/lldbgdbremote.md
+++ b/lldb/docs/resources/lldbgdbremote.md
@@ -2412,6 +2412,11 @@ value of errno if unlink failed.
 
 ## "x" - Binary memory read
 
+> **Warning:** The format of this packet was decided before GDB 16
+> introduced its own format for `x`. Future versions of LLDB may not
+> support the format described here, and new code should produce and
+> expect the format used by GDB.
+
 Like the `m` (read) and `M` (write) packets, this is a partner to the
 `X` (write binary data) packet, `x`.
 
diff --git a/lldb/include/lldb/Host/Host.h b/lldb/include/lldb/Host/Host.h
index d8113a5..4e19d15 100644
--- a/lldb/include/lldb/Host/Host.h
+++ b/lldb/include/lldb/Host/Host.h
@@ -259,6 +259,8 @@ public:
                                               const FileSpec &file_spec,
                                               uint32_t line_no);
 
+  static llvm::Error OpenURL(llvm::StringRef url);
+
   /// Check if we're running in an interactive graphical session.
   ///
   /// \return
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index 2a33161..dd9caa7 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -1342,7 +1342,8 @@ lldb::SBWatchpoint SBTarget::WatchAddress(lldb::addr_t addr, size_t size,
 
   SBWatchpointOptions options;
   options.SetWatchpointTypeRead(read);
-  options.SetWatchpointTypeWrite(eWatchpointWriteTypeOnModify);
+  if (modify)
+    options.SetWatchpointTypeWrite(eWatchpointWriteTypeOnModify);
   return WatchpointCreateByAddress(addr, size, options, error);
 }
 
diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm
index a99ffbc..5b3d04a 100644
--- a/lldb/source/Host/macosx/objcxx/Host.mm
+++ b/lldb/source/Host/macosx/objcxx/Host.mm
@@ -452,6 +452,38 @@ llvm::Error Host::OpenFileInExternalEditor(llvm::StringRef editor,
 #endif // TARGET_OS_OSX
 }
 
+llvm::Error Host::OpenURL(llvm::StringRef url) {
+#if !TARGET_OS_OSX
+  return llvm::errorCodeToError(
+      std::error_code(ENOTSUP, std::system_category()));
+#else  // !TARGET_OS_OSX
+  if (url.empty())
+    return llvm::createStringError("Cannot open empty URL.");
+
+  LLDB_LOG(GetLog(LLDBLog::Host), "Opening URL: {0}", url);
+
+  CFCString url_cfstr(url.data(), kCFStringEncodingUTF8);
+  CFCReleaser<CFURLRef> cfurl = ::CFURLCreateWithString(
+      /*allocator=*/NULL,
+      /*URLString*/ url_cfstr.get(),
+      /*baseURL=*/NULL);
+
+  if (!cfurl.get())
+    return llvm::createStringError(
+        llvm::formatv("could not create CFURL from URL \"{0}\"", url));
+
+  OSStatus error = ::LSOpenCFURLRef(
+      /*inURL=*/cfurl.get(),
+      /*outLaunchedURL=*/NULL);
+
+  if (error != noErr)
+    return llvm::createStringError(
+        llvm::formatv("LSOpenCFURLRef failed: error {0:x}", error));
+
+  return llvm::Error::success();
+#endif // TARGET_OS_OSX
+}
+
 bool Host::IsInteractiveGraphicSession() {
 #if !TARGET_OS_OSX
   return false;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp
index c201153..3ae32d4 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp
@@ -280,7 +280,8 @@ std::optional<Decl *> CxxModuleHandler::tryInstantiateStdTemplate(Decl *d) {
       new_class_template->getDeclContext(),
       new_class_template->getTemplatedDecl()->getLocation(),
       new_class_template->getLocation(), new_class_template, imported_args,
-      nullptr);
+      td->hasStrictPackMatch(),
+      /*PrevDecl=*/nullptr);
 
   new_class_template->AddSpecialization(result, InsertPos);
   if (new_class_template->isOutOfLine())
diff --git a/lldb/test/API/python_api/watchpoint/TestWatchpointRead.py b/lldb/test/API/python_api/watchpoint/TestWatchpointRead.py
new file mode 100644
index 0000000..f482ebe
--- /dev/null
+++ b/lldb/test/API/python_api/watchpoint/TestWatchpointRead.py
@@ -0,0 +1,129 @@
+"""
+Use lldb Python SBTarget API to set read watchpoints
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class SetReadOnlyWatchpointTestCase(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def setUp(self):
+        # Call super's setUp().
+        TestBase.setUp(self)
+        # Our simple source filename.
+        self.source = "main.c"
+        # Find the line number to break inside main().
+        self.line = line_number(self.source, "// Set break point at this line.")
+        self.build()
+
+    # Intel hardware does not support read-only watchpoints
+    @expectedFailureAll(archs=["i386", "x86_64"])
+    def test_read_watchpoint_watch_address(self):
+        exe = self.getBuildArtifact("a.out")
+
+        target = self.dbg.CreateTarget(exe)
+        self.assertTrue(target, VALID_TARGET)
+
+        # Now create a breakpoint on main.c.
+        breakpoint = target.BreakpointCreateByLocation(self.source, self.line)
+        self.assertTrue(
+            breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT
+        )
+
+        # Now launch the process, and do not stop at the entry point.
+        process = target.LaunchSimple(None, None, self.get_process_working_directory())
+
+        # We should be stopped due to the breakpoint.  Get frame #0.
+        process = target.GetProcess()
+        self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
+        thread = lldbutil.get_stopped_thread(process, lldb.eStopReasonBreakpoint)
+        frame0 = thread.GetFrameAtIndex(0)
+
+        value = frame0.FindValue("global", lldb.eValueTypeVariableGlobal)
+        local = frame0.FindValue("local", lldb.eValueTypeVariableLocal)
+        error = lldb.SBError()
+
+        watchpoint = target.WatchAddress(value.GetLoadAddress(), 1, True, False, error)
+        self.assertTrue(
+            value and local and watchpoint,
+            "Successfully found the values and set a watchpoint",
+        )
+        self.DebugSBValue(value)
+        self.DebugSBValue(local)
+
+        # Hide stdout if not running with '-t' option.
+        if not self.TraceOn():
+            self.HideStdout()
+
+        print(watchpoint)
+
+        # Continue.  Expect the program to stop due to the variable being
+        # read, but *not* written to.
+        process.Continue()
+
+        if self.TraceOn():
+            lldbutil.print_stacktraces(process)
+
+        self.assertTrue(
+            local.GetValueAsSigned() > 0, "The local variable has been incremented"
+        )
+
+    # Intel hardware does not support read-only watchpoints
+    @expectedFailureAll(archs=["i386", "x86_64"])
+    def test_read_watchpoint_watch_create_by_address(self):
+        exe = self.getBuildArtifact("a.out")
+
+        target = self.dbg.CreateTarget(exe)
+        self.assertTrue(target, VALID_TARGET)
+
+        # Now create a breakpoint on main.c.
+        breakpoint = target.BreakpointCreateByLocation(self.source, self.line)
+        self.assertTrue(
+            breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT
+        )
+
+        # Now launch the process, and do not stop at the entry point.
+        process = target.LaunchSimple(None, None, self.get_process_working_directory())
+
+        # We should be stopped due to the breakpoint.  Get frame #0.
+        process = target.GetProcess()
+        self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
+        thread = lldbutil.get_stopped_thread(process, lldb.eStopReasonBreakpoint)
+        frame0 = thread.GetFrameAtIndex(0)
+
+        value = frame0.FindValue("global", lldb.eValueTypeVariableGlobal)
+        local = frame0.FindValue("local", lldb.eValueTypeVariableLocal)
+        error = lldb.SBError()
+
+        wp_opts = lldb.SBWatchpointOptions()
+        wp_opts.SetWatchpointTypeRead(True)
+        watchpoint = target.WatchpointCreateByAddress(
+            value.GetLoadAddress(), 1, wp_opts, error
+        )
+        self.assertTrue(
+            value and local and watchpoint,
+            "Successfully found the values and set a watchpoint",
+        )
+        self.DebugSBValue(value)
+        self.DebugSBValue(local)
+
+        # Hide stdout if not running with '-t' option.
+        if not self.TraceOn():
+            self.HideStdout()
+
+        print(watchpoint)
+
+        # Continue.  Expect the program to stop due to the variable being
+        # read, but *not* written to.
+        process.Continue()
+
+        if self.TraceOn():
+            lldbutil.print_stacktraces(process)
+
+        self.assertTrue(
+            local.GetValueAsSigned() > 0, "The local variable has been incremented"
+        )
diff --git a/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py b/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py
index cbab3c6..7a0e42a 100644
--- a/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py
+++ b/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py
@@ -21,7 +21,7 @@ class TargetWatchpointCreateByAddressPITestCase(TestBase):
         # This is for verifying that watch location works.
         self.violating_func = "do_bad_thing_with_location"
 
-    def test_watch_address(self):
+    def test_watch_create_by_address(self):
         """Exercise SBTarget.WatchpointCreateByAddress() API to set a watchpoint."""
         self.build()
         exe = self.getBuildArtifact("a.out")
@@ -88,6 +88,75 @@ class TargetWatchpointCreateByAddressPITestCase(TestBase):
 
         # This finishes our test.
 
+    def test_watch_address(self):
+        """Exercise SBTarget.WatchAddress() API to set a watchpoint.
+        Same as test_watch_create_by_address, but uses the simpler API.
+        """
+        self.build()
+        exe = self.getBuildArtifact("a.out")
+
+        # Create a target by the debugger.
+        target = self.dbg.CreateTarget(exe)
+        self.assertTrue(target, VALID_TARGET)
+
+        # Now create a breakpoint on main.c.
+        breakpoint = target.BreakpointCreateByLocation(self.source, self.line)
+        self.assertTrue(
+            breakpoint and breakpoint.GetNumLocations() == 1, VALID_BREAKPOINT
+        )
+
+        # Now launch the process, and do not stop at the entry point.
+        process = target.LaunchSimple(None, None, self.get_process_working_directory())
+
+        # We should be stopped due to the breakpoint.  Get frame #0.
+        process = target.GetProcess()
+        self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED)
+        thread = lldbutil.get_stopped_thread(process, lldb.eStopReasonBreakpoint)
+        frame0 = thread.GetFrameAtIndex(0)
+
+        value = frame0.FindValue("g_char_ptr", lldb.eValueTypeVariableGlobal)
+        pointee = value.CreateValueFromAddress(
+            "pointee", value.GetValueAsUnsigned(0), value.GetType().GetPointeeType()
+        )
+        # Watch for write to *g_char_ptr.
+        error = lldb.SBError()
+        watch_read = False
+        watch_write = True
+        watchpoint = target.WatchAddress(
+            value.GetValueAsUnsigned(), 1, watch_read, watch_write, error
+        )
+        self.assertTrue(
+            value and watchpoint, "Successfully found the pointer and set a watchpoint"
+        )
+        self.DebugSBValue(value)
+        self.DebugSBValue(pointee)
+
+        # Hide stdout if not running with '-t' option.
+        if not self.TraceOn():
+            self.HideStdout()
+
+        print(watchpoint)
+
+        # Continue.  Expect the program to stop due to the variable being
+        # written to.
+        process.Continue()
+
+        if self.TraceOn():
+            lldbutil.print_stacktraces(process)
+
+        thread = lldbutil.get_stopped_thread(process, lldb.eStopReasonWatchpoint)
+        self.assertTrue(thread, "The thread stopped due to watchpoint")
+        self.DebugSBValue(value)
+        self.DebugSBValue(pointee)
+
+        self.expect(
+            lldbutil.print_stacktrace(thread, string_buffer=True),
+            exe=False,
+            substrs=[self.violating_func],
+        )
+
+        # This finishes our test.
+
     # No size constraint on MIPS for watches
     @skipIf(archs=["mips", "mipsel", "mips64", "mips64el"])
     @skipIf(archs=["s390x"])  # Likewise on SystemZ
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index b646621..84980d0 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -583,11 +583,11 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor
                                                                                                   - ``v_dot2_f32_f16``
 
 
-     ``gfx9-4-generic``   ``amdgcn``     - ``gfx940``      - xnack            - Absolute flat   FP8 and BF8 instructions,
-                                         - ``gfx941``      - sramecc            scratch         FP8 and BF8 conversion instructions,
-                                         - ``gfx942``                                           as well as instructions with XF32 format support
-                                         - ``gfx950``                                           are not available.
-
+     ``gfx9-4-generic``   ``amdgcn``     - ``gfx940``      - sramecc          - Architected     FP8 and BF8 instructions,
+                                         - ``gfx941``      - tgsplit            flat scratch    FP8 and BF8 conversion
+                                         - ``gfx942``      - xnack            - Packed          instructions, as well as
+                                         - ``gfx950``      - kernarg preload    work-item       instructions with XF32 format
+                                                                                IDs             support are not available.
 
      ``gfx10-1-generic``  ``amdgcn``     - ``gfx1010``     - xnack            - Absolute flat   - The following instructions are
                                          - ``gfx1011``     - wavefrontsize64    scratch           not available on ``gfx1011``
diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index f260564..edb9710 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -367,7 +367,16 @@ Simple values
 The :token:`SimpleValue` has a number of forms.
 
 .. productionlist::
-   SimpleValue: `TokInteger` | `TokString`+ | `TokCode`
+   SimpleValue: `SimpleValue1`
+              :| `SimpleValue2`
+              :| `SimpleValue3`
+              :| `SimpleValue4`
+              :| `SimpleValue5`
+              :| `SimpleValue6`
+              :| `SimpleValue7`
+              :| `SimpleValue8`
+              :| `SimpleValue9`
+   SimpleValue1: `TokInteger` | `TokString`+ | `TokCode`
 
 A value can be an integer literal, a string literal, or a code literal.
 Multiple adjacent string literals are concatenated as in C/C++; the simple
@@ -1307,8 +1316,9 @@ output. It is intended for debugging purpose.
   instantiation point of the containing record.
 
 .. productionlist::
-   Dump: "dump"  `string` ";"
+   Dump: "dump" `Value` ";"
 
+The :token:`Value` is an arbitrary string expression.
 For example, it can be used in combination with `!repr` to investigate
 the values passed to a multiclass:
 
@@ -1355,11 +1365,12 @@ The ``assert`` statement checks a boolean condition to be sure that it is true
 and prints an error message if it is not.
 
 .. productionlist::
-   Assert: "assert" `condition` "," `message` ";"
+   Assert: "assert" `Value` "," `Value` ";"
 
-If the boolean condition is true, the statement does nothing. If the
-condition is false, it prints a nonfatal error message. The **message**, which
-can be an arbitrary string expression, is included in the error message as a
+The first :token:`Value` is a boolean condition. If it is true, the
+statement does nothing. If the condition is false, it prints a nonfatal
+error message. The second :token:`Value` is a message, which can be an
+arbitrary string expression. It is included in the error message as a
 note. The exact behavior of the ``assert`` statement depends on its
 placement.
 
diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h
index 41ba8bf..4b2e01b 100644
--- a/llvm/include/llvm/ADT/GenericCycleImpl.h
+++ b/llvm/include/llvm/ADT/GenericCycleImpl.h
@@ -454,7 +454,9 @@ void GenericCycleInfoCompute<ContextT>::dfs(BlockT *EntryBlock) {
     BlockT *Block = TraverseStack.back();
     LLVM_DEBUG(errs() << "DFS visiting block: " << Info.Context.print(Block)
                       << "\n");
-    if (!BlockDFSInfo.count(Block)) {
+    if (BlockDFSInfo.try_emplace(Block, Counter + 1).second) {
+      ++Counter;
+
       // We're visiting the block for the first time. Open its DFSInfo, add
       // successors to the traversal stack, and remember the traversal stack
       // depth at which the block was opened, so that we can correctly record
@@ -465,9 +467,6 @@ void GenericCycleInfoCompute<ContextT>::dfs(BlockT *EntryBlock) {
       DFSTreeStack.emplace_back(TraverseStack.size());
       llvm::append_range(TraverseStack, successors(Block));
 
-      bool Added = BlockDFSInfo.try_emplace(Block, ++Counter).second;
-      (void)Added;
-      assert(Added);
       BlockPreorder.push_back(Block);
       LLVM_DEBUG(errs() << "  preorder number: " << Counter << "\n");
     } else {
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 5d41d1c..f21594c 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -748,12 +748,11 @@ private:
   /// \returns the newly created interleave group.
   InterleaveGroup<Instruction> *
   createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
-    assert(!InterleaveGroupMap.count(Instr) &&
-           "Already in an interleaved access group");
-    InterleaveGroupMap[Instr] =
-        new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
-    InterleaveGroups.insert(InterleaveGroupMap[Instr]);
-    return InterleaveGroupMap[Instr];
+    auto [It, Inserted] = InterleaveGroupMap.try_emplace(Instr);
+    assert(Inserted && "Already in an interleaved access group");
+    It->second = new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
+    InterleaveGroups.insert(It->second);
+    return It->second;
   }
 
   /// Release the group and remove all the relationships.
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index c3eb27b..f1e595c 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -918,6 +918,13 @@ public:
   bool verify(Pass *p = nullptr, const char *Banner = nullptr,
               raw_ostream *OS = nullptr, bool AbortOnError = true) const;
 
+  /// For New Pass Manager: Run the current MachineFunction through the machine
+  /// code verifier, useful for debugger use.
+  /// \returns true if no problems were found.
+  bool verify(MachineFunctionAnalysisManager &MFAM,
+              const char *Banner = nullptr, raw_ostream *OS = nullptr,
+              bool AbortOnError = true) const;
+
   /// Run the current MachineFunction through the machine code verifier, useful
   /// for debugger use.
   /// \returns true if no problems were found.
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 8e47d0c..f95a02a 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -390,6 +390,9 @@ public:
 
   const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
 
+  bool mayOverlapInLaterIter(const MachineInstr *BaseMI,
+                             const MachineInstr *OtherMI) const;
+
 private:
   void addLoopCarriedDependences(AAResults *AA);
   void updatePhiDependences();
@@ -409,7 +412,7 @@ private:
   void computeNodeOrder(NodeSetType &NodeSets);
   void checkValidNodeOrder(const NodeSetType &Circuits) const;
   bool schedulePipeline(SMSchedule &Schedule);
-  bool computeDelta(MachineInstr &MI, unsigned &Delta) const;
+  bool computeDelta(const MachineInstr &MI, int &Delta) const;
   MachineInstr *findDefInLoop(Register Reg);
   bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
                              unsigned &OffsetPos, unsigned &NewBase,
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index 660670c..e1f1a1e 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -17,7 +17,7 @@
 // scheduled. Targets can override the DAG builder and scheduler without
 // replacing the pass as follows:
 //
-// ScheduleDAGInstrs *<Target>PassConfig::
+// ScheduleDAGInstrs *<Target>TargetMachine::
 // createMachineScheduler(MachineSchedContext *C) {
 //   return new CustomMachineScheduler(C);
 // }
@@ -29,7 +29,7 @@
 // plugin an alternate MachineSchedStrategy. The strategy is responsible for
 // selecting the highest priority node from the list:
 //
-// ScheduleDAGInstrs *<Target>PassConfig::
+// ScheduleDAGInstrs *<Target>TargetMachine::
 // createMachineScheduler(MachineSchedContext *C) {
 //   return new ScheduleDAGMILive(C, CustomStrategy(C));
 // }
@@ -39,7 +39,7 @@
 // can adjust dependencies based on target-specific knowledge or add weak edges
 // to aid heuristics:
 //
-// ScheduleDAGInstrs *<Target>PassConfig::
+// ScheduleDAGInstrs *<Target>TargetMachine::
 // createMachineScheduler(MachineSchedContext *C) {
 //   ScheduleDAGMI *DAG = createGenericSchedLive(C);
 //   DAG->addMutation(new CustomDAGMutation(...));
@@ -137,7 +137,7 @@ struct MachineSchedContext {
   MachineFunction *MF = nullptr;
   const MachineLoopInfo *MLI = nullptr;
   const MachineDominatorTree *MDT = nullptr;
-  const TargetPassConfig *PassConfig = nullptr;
+  const TargetMachine *TM = nullptr;
   AAResults *AA = nullptr;
   LiveIntervals *LIS = nullptr;
 
@@ -1385,6 +1385,24 @@ std::unique_ptr<ScheduleDAGMutation>
 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
                                const TargetRegisterInfo *TRI);
 
+class MachineSchedulerPass : public PassInfoMixin<MachineSchedulerPass> {
+  const TargetMachine *TM;
+
+public:
+  MachineSchedulerPass(const TargetMachine *TM) : TM(TM) {}
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+class PostMachineSchedulerPass
+    : public PassInfoMixin<PostMachineSchedulerPass> {
+  const TargetMachine *TM;
+
+public:
+  PostMachineSchedulerPass(const TargetMachine *TM) : TM(TM) {}
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H
diff --git a/llvm/include/llvm/CodeGen/PostRASchedulerList.h b/llvm/include/llvm/CodeGen/PostRASchedulerList.h
new file mode 100644
index 0000000..06043be
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/PostRASchedulerList.h
@@ -0,0 +1,32 @@
+//===- llvm/CodeGen/PostRASchedulerList.h ------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_POSTRASCHEDULERLIST_H
+#define LLVM_CODEGEN_POSTRASCHEDULERLIST_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class PostRASchedulerPass : public PassInfoMixin<PostRASchedulerPass> {
+  const TargetMachine *TM;
+
+public:
+  PostRASchedulerPass(const TargetMachine *TM) : TM(TM) {}
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getRequiredProperties() const {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_POSTRASCHEDULERLIST_H
diff --git a/llvm/include/llvm/CodeGen/RenameIndependentSubregs.h b/llvm/include/llvm/CodeGen/RenameIndependentSubregs.h
new file mode 100644
index 0000000..2f6afe6
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/RenameIndependentSubregs.h
@@ -0,0 +1,25 @@
+//===- llvm/CodeGen/RenameIndependentSubregs.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_RENAME_INDEPENDENT_SUBREGS_H
+#define LLVM_CODEGEN_RENAME_INDEPENDENT_SUBREGS_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class RenameIndependentSubregsPass
+    : public PassInfoMixin<RenameIndependentSubregsPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_RENAME_INDEPENDENT_SUBREGS_H
diff --git a/llvm/include/llvm/CodeGen/StackSlotColoring.h b/llvm/include/llvm/CodeGen/StackSlotColoring.h
new file mode 100644
index 0000000..8db59a3
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/StackSlotColoring.h
@@ -0,0 +1,24 @@
+//===- llvm/CodeGen/StackSlotColoring.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_STACKSLOTCOLORING_H
+#define LLVM_CODEGEN_STACKSLOTCOLORING_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class StackSlotColoringPass : public PassInfoMixin<StackSlotColoringPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_STACKSLOTCOLORING_H
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index a91cb0d4..c8eba71 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1566,7 +1566,7 @@ public:
   ///   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
   /// or
   ///   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
-  /// to TargetPassConfig::createMachineScheduler() to have an effect.
+  /// to TargetMachine::createMachineScheduler() to have an effect.
   ///
   /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
   /// \p Offset1 and \p Offset2 are the byte offsets for the memory
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 04ee24c..bbecc7a 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -94,6 +94,7 @@ class TargetRegisterClass;
 class TargetRegisterInfo;
 class TargetTransformInfo;
 class Value;
+class VPIntrinsic;
 
 namespace Sched {
 
@@ -3156,6 +3157,30 @@ public:
     return false;
   }
 
+  /// Lower an interleaved load to target specific intrinsics. Return
+  /// true on success.
+  ///
+  /// \p Load is a vp.load instruction.
+  /// \p Mask is a mask value
+  /// \p DeinterleaveRes is a list of deinterleaved results.
+  virtual bool
+  lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask,
+                                      ArrayRef<Value *> DeinterleaveRes) const {
+    return false;
+  }
+
+  /// Lower an interleaved store to target specific intrinsics. Return
+  /// true on success.
+  ///
+  /// \p Store is the vp.store instruction.
+  /// \p Mask is a mask value
+  /// \p InterleaveOps is a list of values being interleaved.
+  virtual bool
+  lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask,
+                                     ArrayRef<Value *> InterleaveOps) const {
+    return false;
+  }
+
   /// Lower a deinterleave intrinsic to a target specific load intrinsic.
   /// Return true on success. Currently only supports
   /// llvm.vector.deinterleave2
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 66c79c7..1af7267 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -22,9 +22,7 @@
 namespace llvm {
 
 class TargetMachine;
-struct MachineSchedContext;
 class PassConfigImpl;
-class ScheduleDAGInstrs;
 class CSEConfigBase;
 class PassInstrumentationCallbacks;
 
@@ -300,27 +298,6 @@ public:
   /// Fully developed targets will not generally override this.
   virtual void addMachinePasses();
 
-  /// Create an instance of ScheduleDAGInstrs to be run within the standard
-  /// MachineScheduler pass for this function and target at the current
-  /// optimization level.
-  ///
-  /// This can also be used to plug a new MachineSchedStrategy into an instance
-  /// of the standard ScheduleDAGMI:
-  ///   return new ScheduleDAGMI(C, std::make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
-  ///
-  /// Return NULL to select the default (generic) machine scheduler.
-  virtual ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const {
-    return nullptr;
-  }
-
-  /// Similar to createMachineScheduler but used when postRA machine scheduling
-  /// is enabled.
-  virtual ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const {
-    return nullptr;
-  }
-
   /// printAndVerify - Add a pass to dump then verify the machine function, if
   /// those steps are enabled.
   void printAndVerify(const std::string &Banner);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index db85336..3eddaf4 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -1204,8 +1204,13 @@ private:
 
   JITDylib(ExecutionSession &ES, std::string Name);
 
-  std::pair<AsynchronousSymbolQuerySet, std::shared_ptr<SymbolDependenceMap>>
-  IL_removeTracker(ResourceTracker &RT);
+  struct RemoveTrackerResult {
+    AsynchronousSymbolQuerySet QueriesToFail;
+    std::shared_ptr<SymbolDependenceMap> FailedSymbols;
+    std::vector<std::unique_ptr<MaterializationUnit>> DefunctMUs;
+  };
+
+  RemoveTrackerResult IL_removeTracker(ResourceTracker &RT);
 
   void transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT);
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
index dcf5592..86e98e7 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
@@ -20,6 +20,7 @@
 #include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
 #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.h"
 #include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/MSVCErrorWorkarounds.h"
@@ -507,6 +508,9 @@ private:
                           SymbolLookupCompleteFn F) override;
 
   std::unique_ptr<jitlink::JITLinkMemoryManager> OwnedMemMgr;
+#ifdef __APPLE__
+  std::unique_ptr<UnwindInfoManager> UnwindInfoMgr;
+#endif // __APPLE__
   char GlobalManglingPrefix = 0;
 };
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h
index 31d0ecc..8eb8c1b 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h
@@ -53,6 +53,7 @@ extern StringRef MachOTextTextSectionName;
 extern StringRef MachOThreadBSSSectionName;
 extern StringRef MachOThreadDataSectionName;
 extern StringRef MachOThreadVarsSectionName;
+extern StringRef MachOUnwindInfoSectionName;
 
 extern StringRef MachOInitSectionNames[22];
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
index aed43f6..db5ff13 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
@@ -88,6 +88,15 @@ using SPSRunAsMainSignature = int64_t(shared::SPSExecutorAddr,
 using SPSRunAsVoidFunctionSignature = int32_t(shared::SPSExecutorAddr);
 using SPSRunAsIntFunctionSignature = int32_t(shared::SPSExecutorAddr, int32_t);
 } // end namespace rt
+
+namespace rt_alt {
+extern const char *UnwindInfoManagerInstanceName;
+extern const char *UnwindInfoManagerFindSectionsHelperName;
+extern const char *UnwindInfoManagerEnableWrapperName;
+extern const char *UnwindInfoManagerDisableWrapperName;
+extern const char *UnwindInfoManagerRegisterActionName;
+extern const char *UnwindInfoManagerDeregisterActionName;
+} // end namespace rt_alt
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.h
new file mode 100644
index 0000000..fc7719f
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.h
@@ -0,0 +1,78 @@
+//===--- UnwindInfoManager.h -- Register unwind info sections ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for managing eh-frame and compact-unwind registration and lookup
+// through libunwind's find_dynamic_unwind_sections mechanism.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_UNWINDINFOMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_UNWINDINFOMANAGER_H
+
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/Support/Error.h"
+#include <map>
+#include <mutex>
+
+namespace llvm::orc {
+
+class UnwindInfoManager : public ExecutorBootstrapService {
+public:
+  // This struct's layout should match the unw_dynamic_unwind_sections struct
+  // from libunwind/src/libunwid_ext.h.
+  struct UnwindSections {
+    uintptr_t dso_base;
+    uintptr_t dwarf_section;
+    size_t dwarf_section_length;
+    uintptr_t compact_unwind_section;
+    size_t compact_unwind_section_length;
+  };
+
+  /// If the libunwind find-dynamic-unwind-info callback registration APIs are
+  /// available then this method will return an UnwindInfoManager instance,
+  /// otherwise it will return nullptr.
+  static std::unique_ptr<UnwindInfoManager> TryCreate();
+
+  Error shutdown() override;
+  void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override;
+
+  Error enable(void *FindDynamicUnwindSections);
+  Error disable(void);
+
+  Error registerSections(ArrayRef<orc::ExecutorAddrRange> CodeRanges,
+                         orc::ExecutorAddr DSOBase,
+                         orc::ExecutorAddrRange DWARFEHFrame,
+                         orc::ExecutorAddrRange CompactUnwind);
+
+  Error deregisterSections(ArrayRef<orc::ExecutorAddrRange> CodeRanges);
+
+  int findSections(uintptr_t Addr, UnwindSections *Info);
+
+private:
+  UnwindInfoManager(int (*AddFindDynamicUnwindSections)(void *),
+                    int (*RemoveFindDynamicUnwindSections)(void *))
+      : AddFindDynamicUnwindSections(AddFindDynamicUnwindSections),
+        RemoveFindDynamicUnwindSections(RemoveFindDynamicUnwindSections) {}
+
+  static int findSectionsHelper(UnwindInfoManager *Instance, uintptr_t Addr,
+                                UnwindSections *Info);
+
+  std::mutex M;
+  std::map<uintptr_t, UnwindSections> UWSecs;
+
+  int (*AddFindDynamicUnwindSections)(void *) = nullptr;
+  int (*RemoveFindDynamicUnwindSections)(void *) = nullptr;
+  void *FindDynamicUnwindSections = nullptr;
+
+  static const char *AddFnName, *RemoveFnName;
+};
+
+} // namespace llvm::orc
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_UNWINDINFOMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/UnwindInfoRegistrationPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/UnwindInfoRegistrationPlugin.h
new file mode 100644
index 0000000..eb883a7
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/UnwindInfoRegistrationPlugin.h
@@ -0,0 +1,70 @@
+//===- UnwindInfoRegistrationPlugin.h -- libunwind registration -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Register eh-frame and compact-unwind sections with libunwind
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_UNWINDINFOREGISTRATIONPLUGIN_H
+#define LLVM_EXECUTIONENGINE_ORC_UNWINDINFOREGISTRATIONPLUGIN_H
+
+#include "llvm/ExecutionEngine/Orc/LinkGraphLinkingLayer.h"
+
+namespace llvm::orc {
+
+class UnwindInfoRegistrationPlugin : public LinkGraphLinkingLayer::Plugin {
+public:
+  static Expected<std::shared_ptr<UnwindInfoRegistrationPlugin>>
+  Create(IRLayer &IRL, JITDylib &PlatformJD, ExecutorAddr Instance,
+         ExecutorAddr FindHelper, ExecutorAddr Enable, ExecutorAddr Disable,
+         ExecutorAddr Register, ExecutorAddr Deregister);
+
+  static Expected<std::shared_ptr<UnwindInfoRegistrationPlugin>>
+  Create(IRLayer &IRL, JITDylib &PlatformJD);
+
+  ~UnwindInfoRegistrationPlugin();
+
+  void modifyPassConfig(MaterializationResponsibility &MR,
+                        jitlink::LinkGraph &G,
+                        jitlink::PassConfiguration &PassConfig) override;
+
+  Error notifyEmitted(MaterializationResponsibility &MR) override {
+    return Error::success();
+  }
+
+  Error notifyFailed(MaterializationResponsibility &MR) override {
+    return Error::success();
+  }
+
+  Error notifyRemovingResources(JITDylib &JD, ResourceKey K) override {
+    return Error::success();
+  }
+
+  void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey,
+                                   ResourceKey SrcKey) override {}
+
+private:
+  UnwindInfoRegistrationPlugin(ExecutionSession &ES, ExecutorAddr Instance,
+                               ExecutorAddr Disable, ExecutorAddr Register,
+                               ExecutorAddr Deregister)
+      : ES(ES), Instance(Instance), Disable(Disable), Register(Register),
+        Deregister(Deregister) {
+    DSOBaseName = ES.intern("__jitlink$libunwind_dso_base");
+  }
+
+  static Expected<ThreadSafeModule> makeBouncerModule(ExecutionSession &ES);
+  Error addUnwindInfoRegistrationActions(jitlink::LinkGraph &G);
+
+  ExecutionSession &ES;
+  SymbolStringPtr DSOBaseName;
+  ExecutorAddr Instance, Disable, Register, Deregister;
+};
+
+} // namespace llvm::orc
+
+#endif // LLVM_EXECUTIONENGINE_ORC_UNWINDINFOREGISTRATIONPLUGIN_H
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
index b13b74c..a501eaf 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h
@@ -62,7 +62,7 @@ StringRef getOpenMPContextTraitSetName(TraitSet Kind);
 
 /// Parse \p Str and return the trait set it matches or
 /// TraitSelector::invalid.
-TraitSelector getOpenMPContextTraitSelectorKind(StringRef Str);
+TraitSelector getOpenMPContextTraitSelectorKind(StringRef Str, TraitSet Set);
 
 /// Return the trait selector for which \p Property is a property.
 TraitSelector getOpenMPContextTraitSelectorForProperty(TraitProperty Property);
@@ -139,6 +139,8 @@ struct VariantMatchInfo {
     // the raw string.
     if (Property == TraitProperty::device_isa___ANY)
       ISATraits.push_back(RawString);
+    if (Property == TraitProperty::target_device_isa___ANY)
+      ISATraits.push_back(RawString);
 
     RequiredTraits.set(unsigned(Property));
     if (Set == TraitSet::construct)
@@ -155,7 +157,8 @@ struct VariantMatchInfo {
 /// e.g., device={kind(host)}, and constructs traits which describe the nesting
 /// in OpenMP constructs at the location.
 struct OMPContext {
-  OMPContext(bool IsDeviceCompilation, Triple TargetTriple);
+  OMPContext(bool IsDeviceCompilation, Triple TargetTriple,
+             Triple TargetOffloadTriple, int DeviceNum);
   virtual ~OMPContext() = default;
 
   void addTrait(TraitProperty Property) {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index ee761cb..8ea3af1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -1270,6 +1270,38 @@ __OMP_TRAIT_PROPERTY(device, arch, amdgcn)
 __OMP_TRAIT_PROPERTY(device, arch, nvptx)
 __OMP_TRAIT_PROPERTY(device, arch, nvptx64)
 
+__OMP_TRAIT_SET(target_device)
+
+__OMP_TRAIT_SELECTOR(target_device, kind, true)
+
+__OMP_TRAIT_PROPERTY(target_device, kind, host)
+__OMP_TRAIT_PROPERTY(target_device, kind, nohost)
+__OMP_TRAIT_PROPERTY(target_device, kind, cpu)
+__OMP_TRAIT_PROPERTY(target_device, kind, gpu)
+__OMP_TRAIT_PROPERTY(target_device, kind, fpga)
+__OMP_TRAIT_PROPERTY(target_device, kind, any)
+
+__OMP_TRAIT_SELECTOR(target_device, device_num, true)
+
+__OMP_TRAIT_PROPERTY(target_device, device_num, number)
+
+__OMP_TRAIT_SELECTOR(target_device, arch, true)
+
+__OMP_TRAIT_PROPERTY(target_device, arch, arm)
+__OMP_TRAIT_PROPERTY(target_device, arch, armeb)
+__OMP_TRAIT_PROPERTY(target_device, arch, aarch64)
+__OMP_TRAIT_PROPERTY(target_device, arch, aarch64_be)
+__OMP_TRAIT_PROPERTY(target_device, arch, aarch64_32)
+__OMP_TRAIT_PROPERTY(target_device, arch, ppc)
+__OMP_TRAIT_PROPERTY(target_device, arch, ppcle)
+__OMP_TRAIT_PROPERTY(target_device, arch, ppc64)
+__OMP_TRAIT_PROPERTY(target_device, arch, ppc64le)
+__OMP_TRAIT_PROPERTY(target_device, arch, x86)
+__OMP_TRAIT_PROPERTY(target_device, arch, x86_64)
+__OMP_TRAIT_PROPERTY(target_device, arch, amdgcn)
+__OMP_TRAIT_PROPERTY(target_device, arch, nvptx)
+__OMP_TRAIT_PROPERTY(target_device, arch, nvptx64)
+
 __OMP_TRAIT_SET(implementation)
 
 __OMP_TRAIT_SELECTOR(implementation, vendor, true)
@@ -1311,12 +1343,16 @@ __OMP_TRAIT_SELECTOR_AND_PROPERTY(construct, dispatch)
 // This allows us to issue warnings wrt. isa only if we match otherwise.
 __OMP_TRAIT_SELECTOR(device, isa, true)
 
+__OMP_TRAIT_SELECTOR(target_device, isa, true)
+
 // We use "__ANY" as a placeholder in the isa property to denote the
 // conceptual "any", not the literal `any` used in kind. The string we
 // we use is not important except that it will show up in diagnostics.
 OMP_TRAIT_PROPERTY(device_isa___ANY, device, device_isa,
                    "<any, entirely target dependent>")
 
+OMP_TRAIT_PROPERTY(target_device_isa___ANY, target_device, target_device_isa,
+                   "<any, entirely target dependent>")
 
 #undef OMP_TRAIT_SET
 #undef __OMP_TRAIT_SET
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 6ccbb6b..93750d6e 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1882,6 +1882,11 @@ public:
   bool isLoop() const {
     return getIntrinsicID() == Intrinsic::experimental_convergence_loop;
   }
+
+  static ConvergenceControlInst *CreateAnchor(BasicBlock &BB);
+  static ConvergenceControlInst *CreateEntry(BasicBlock &BB);
+  static ConvergenceControlInst *CreateLoop(BasicBlock &BB,
+                                            ConvergenceControlInst *Parent);
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 053f955..b8df4d1 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -209,7 +209,7 @@ void initializeMachinePipelinerPass(PassRegistry &);
 void initializeMachinePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializeMachineRegionInfoPassPass(PassRegistry &);
 void initializeMachineSanitizerBinaryMetadataPass(PassRegistry &);
-void initializeMachineSchedulerPass(PassRegistry &);
+void initializeMachineSchedulerLegacyPass(PassRegistry &);
 void initializeMachineSinkingPass(PassRegistry &);
 void initializeMachineTraceMetricsWrapperPassPass(PassRegistry &);
 void initializeMachineUniformityInfoPrinterPassPass(PassRegistry &);
@@ -238,10 +238,10 @@ void initializePostDomPrinterWrapperPassPass(PassRegistry &);
 void initializePostDomViewerWrapperPassPass(PassRegistry &);
 void initializePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializePostInlineEntryExitInstrumenterPass(PassRegistry &);
-void initializePostMachineSchedulerPass(PassRegistry &);
+void initializePostMachineSchedulerLegacyPass(PassRegistry &);
 void initializePostRAHazardRecognizerPass(PassRegistry &);
 void initializePostRAMachineSinkingPass(PassRegistry &);
-void initializePostRASchedulerPass(PassRegistry &);
+void initializePostRASchedulerLegacyPass(PassRegistry &);
 void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &);
 void initializePrintFunctionPassWrapperPass(PassRegistry &);
 void initializePrintModulePassWrapperPass(PassRegistry &);
@@ -269,7 +269,7 @@ void initializeRegionViewerPass(PassRegistry &);
 void initializeRegisterCoalescerLegacyPass(PassRegistry &);
 void initializeRemoveLoadsIntoFakeUsesPass(PassRegistry &);
 void initializeRemoveRedundantDebugValuesPass(PassRegistry &);
-void initializeRenameIndependentSubregsPass(PassRegistry &);
+void initializeRenameIndependentSubregsLegacyPass(PassRegistry &);
 void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
 void initializeResetMachineFunctionPass(PassRegistry &);
 void initializeSCEVAAWrapperPassPass(PassRegistry &);
@@ -298,7 +298,7 @@ void initializeStackMapLivenessPass(PassRegistry &);
 void initializeStackProtectorPass(PassRegistry &);
 void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &);
 void initializeStackSafetyInfoWrapperPassPass(PassRegistry &);
-void initializeStackSlotColoringPass(PassRegistry &);
+void initializeStackSlotColoringLegacyPass(PassRegistry &);
 void initializeStraightLineStrengthReduceLegacyPassPass(PassRegistry &);
 void initializeStripDebugMachineModulePass(PassRegistry &);
 void initializeStructurizeCFGLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 2e89875..1458318 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -50,16 +50,19 @@
 #include "llvm/CodeGen/MachineLICM.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachinePassManager.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/MachineVerifier.h"
 #include "llvm/CodeGen/OptimizePHIs.h"
 #include "llvm/CodeGen/PHIElimination.h"
 #include "llvm/CodeGen/PeepholeOptimizer.h"
+#include "llvm/CodeGen/PostRASchedulerList.h"
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
 #include "llvm/CodeGen/RegAllocFast.h"
 #include "llvm/CodeGen/RegUsageInfoCollector.h"
 #include "llvm/CodeGen/RegUsageInfoPropagate.h"
 #include "llvm/CodeGen/RegisterCoalescerPass.h"
 #include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/RenameIndependentSubregs.h"
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
 #include "llvm/CodeGen/SafeStack.h"
 #include "llvm/CodeGen/SelectOptimize.h"
@@ -67,6 +70,7 @@
 #include "llvm/CodeGen/SjLjEHPrepare.h"
 #include "llvm/CodeGen/StackColoring.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/StackSlotColoring.h"
 #include "llvm/CodeGen/TailDuplication.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TwoAddressInstructionPass.h"
@@ -957,9 +961,9 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses(
   if (getOptLevel() != CodeGenOptLevel::None &&
       !TM.targetSchedulesPostRAScheduling()) {
     if (Opt.MISchedPostRA)
-      addPass(PostMachineSchedulerPass());
+      addPass(PostMachineSchedulerPass(&TM));
     else
-      addPass(PostRASchedulerPass());
+      addPass(PostRASchedulerPass(&TM));
   }
 
   // GC
@@ -1141,7 +1145,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
   addPass(RenameIndependentSubregsPass());
 
   // PreRA instruction scheduling.
-  addPass(MachineSchedulerPass());
+  addPass(MachineSchedulerPass(&TM));
 
   if (derived().addRegAssignmentOptimized(addPass)) {
     // Allow targets to expand pseudo instructions depending on the choice of
diff --git a/llvm/include/llvm/Passes/DroppedVariableStats.h b/llvm/include/llvm/Passes/DroppedVariableStats.h
index c4de849..e2e9189 100644
--- a/llvm/include/llvm/Passes/DroppedVariableStats.h
+++ b/llvm/include/llvm/Passes/DroppedVariableStats.h
@@ -96,6 +96,8 @@ protected:
     DenseSet<VarID> &DebugVariablesBeforeSet =
         DbgVariables.DebugVariablesBefore;
     DenseSet<VarID> &DebugVariablesAfterSet = DbgVariables.DebugVariablesAfter;
+    if (InlinedAts.back().find(FuncName) == InlinedAts.back().end())
+      return;
     DenseMap<VarID, DILocation *> &InlinedAtsMap = InlinedAts.back()[FuncName];
     // Find an Instruction that shares the same scope as the dropped #dbg_value
     // or has a scope that is the child of the scope of the #dbg_value, and has
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 3519910..e6b4a4b 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -142,11 +142,14 @@ MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
 MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass())
+MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass(TM))
 MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass())
 MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass())
 MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass())
 MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass())
 MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass())
+MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass(TM))
+MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM))
 MACHINE_FUNCTION_PASS("print", PrintMIRPass())
 MACHINE_FUNCTION_PASS("print<livedebugvars>", LiveDebugVariablesPrinterPass(errs()))
 MACHINE_FUNCTION_PASS("print<live-intervals>", LiveIntervalsPrinterPass(errs()))
@@ -166,9 +169,11 @@ MACHINE_FUNCTION_PASS("print<virtregmap>", VirtRegMapPrinterPass(errs()))
 MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass())
 MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass())
 MACHINE_FUNCTION_PASS("register-coalescer", RegisterCoalescerPass())
+MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass())
 MACHINE_FUNCTION_PASS("require-all-machine-function-properties",
                       RequireAllMachineFunctionPropertiesPass())
 MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass())
+MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass())
 MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass())
 MACHINE_FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
 MACHINE_FUNCTION_PASS("two-address-instruction", TwoAddressInstructionPass())
@@ -240,14 +245,11 @@ DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter)
 DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass)
 DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass)
 DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata)
-DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass)
 DUMMY_MACHINE_FUNCTION_PASS("machine-sink", MachineSinkingPass)
 DUMMY_MACHINE_FUNCTION_PASS("machine-uniformity", MachineUniformityInfoWrapperPass)
 DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass)
 DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass)
-DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass)
-DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass)
 DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass)
 DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass)
 DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass)
@@ -263,11 +265,9 @@ DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass)
 DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass)
 DUMMY_MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", RemoveLoadsIntoFakeUsesPass)
 DUMMY_MACHINE_FUNCTION_PASS("removeredundantdebugvalues", RemoveRedundantDebugValuesPass)
-DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass)
 DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass)
 DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass)
 DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass)
-DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass)
 DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass)
 DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass)
 DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass)
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index fe1dbbd..b1ec0b9 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -39,6 +39,7 @@ using ModulePassManager = PassManager<Module>;
 class Function;
 class GlobalValue;
 class MachineModuleInfoWrapperPass;
+struct MachineSchedContext;
 class Mangler;
 class MCAsmInfo;
 class MCContext;
@@ -50,6 +51,7 @@ class raw_pwrite_stream;
 class PassBuilder;
 class PassInstrumentationCallbacks;
 struct PerFunctionMIParsingState;
+class ScheduleDAGInstrs;
 class SMDiagnostic;
 class SMRange;
 class Target;
@@ -147,6 +149,28 @@ public:
     return nullptr;
   }
 
+  /// Create an instance of ScheduleDAGInstrs to be run within the standard
+  /// MachineScheduler pass for this function and target at the current
+  /// optimization level.
+  ///
+  /// This can also be used to plug a new MachineSchedStrategy into an instance
+  /// of the standard ScheduleDAGMI:
+  ///   return new ScheduleDAGMI(C, std::make_unique<MyStrategy>(C),
+  ///   /*RemoveKillFlags=*/false)
+  ///
+  /// Return NULL to select the default (generic) machine scheduler.
+  virtual ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const {
+    return nullptr;
+  }
+
+  /// Similar to createMachineScheduler but used when postRA machine scheduling
+  /// is enabled.
+  virtual ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const {
+    return nullptr;
+  }
+
   /// Allocate and return a default initialized instance of the YAML
   /// representation for the MachineFunctionInfo.
   virtual yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const {
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 55feb15..6eba6c0 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -683,33 +683,30 @@ static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
 
   Value *Y;
   const APInt *Mask, *C;
+  if (!match(RHS, m_APInt(C)))
+    return;
+
   uint64_t ShAmt;
   switch (Pred) {
   case ICmpInst::ICMP_EQ:
     // assume(V = C)
-    if (match(LHS, m_V) && match(RHS, m_APInt(C))) {
+    if (match(LHS, m_V)) {
       Known = Known.unionWith(KnownBits::makeConstant(*C));
       // assume(V & Mask = C)
-    } else if (match(LHS, m_c_And(m_V, m_Value(Y))) &&
-               match(RHS, m_APInt(C))) {
+    } else if (match(LHS, m_c_And(m_V, m_Value(Y)))) {
       // For one bits in Mask, we can propagate bits from C to V.
       Known.One |= *C;
       if (match(Y, m_APInt(Mask)))
         Known.Zero |= ~*C & *Mask;
       // assume(V | Mask = C)
-    } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) {
+    } else if (match(LHS, m_c_Or(m_V, m_Value(Y)))) {
       // For zero bits in Mask, we can propagate bits from C to V.
       Known.Zero |= ~*C;
       if (match(Y, m_APInt(Mask)))
         Known.One |= *C & ~*Mask;
-      // assume(V ^ Mask = C)
-    } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) &&
-               match(RHS, m_APInt(C))) {
-      // Equivalent to assume(V == Mask ^ C)
-      Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask));
       // assume(V << ShAmt = C)
     } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
-               match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
+               ShAmt < BitWidth) {
       // For those bits in C that are known, we can propagate them to known
       // bits in V shifted to the right by ShAmt.
       KnownBits RHSKnown = KnownBits::makeConstant(*C);
@@ -718,7 +715,7 @@ static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
       Known = Known.unionWith(RHSKnown);
       // assume(V >> ShAmt = C)
     } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
-               match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
+               ShAmt < BitWidth) {
       KnownBits RHSKnown = KnownBits::makeConstant(*C);
       // For those bits in RHS that are known, we can propagate them to known
       // bits in V shifted to the right by C.
@@ -729,38 +726,36 @@ static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
   case ICmpInst::ICMP_NE: {
     // assume (V & B != 0) where B is a power of 2
     const APInt *BPow2;
-    if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero()))
+    if (C->isZero() && match(LHS, m_And(m_V, m_Power2(BPow2))))
       Known.One |= *BPow2;
     break;
   }
-  default:
-    if (match(RHS, m_APInt(C))) {
-      const APInt *Offset = nullptr;
-      if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
-        ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C);
-        if (Offset)
-          LHSRange = LHSRange.sub(*Offset);
-        Known = Known.unionWith(LHSRange.toKnownBits());
-      }
-      if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
-        // X & Y u> C     -> X u> C && Y u> C
-        // X nuw- Y u> C  -> X u> C
-        if (match(LHS, m_c_And(m_V, m_Value())) ||
-            match(LHS, m_NUWSub(m_V, m_Value())))
-          Known.One.setHighBits(
-              (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
-      }
-      if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
-        // X | Y u< C    -> X u< C && Y u< C
-        // X nuw+ Y u< C -> X u< C && Y u< C
-        if (match(LHS, m_c_Or(m_V, m_Value())) ||
-            match(LHS, m_c_NUWAdd(m_V, m_Value()))) {
-          Known.Zero.setHighBits(
-              (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
-        }
+  default: {
+    const APInt *Offset = nullptr;
+    if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
+      ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C);
+      if (Offset)
+        LHSRange = LHSRange.sub(*Offset);
+      Known = Known.unionWith(LHSRange.toKnownBits());
+    }
+    if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
+      // X & Y u> C     -> X u> C && Y u> C
+      // X nuw- Y u> C  -> X u> C
+      if (match(LHS, m_c_And(m_V, m_Value())) ||
+          match(LHS, m_NUWSub(m_V, m_Value())))
+        Known.One.setHighBits(
+            (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
+    }
+    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+      // X | Y u< C    -> X u< C && Y u< C
+      // X nuw+ Y u< C -> X u< C && Y u< C
+      if (match(LHS, m_c_Or(m_V, m_Value())) ||
+          match(LHS, m_c_NUWAdd(m_V, m_Value()))) {
+        Known.Zero.setHighBits(
+            (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
       }
     }
-    break;
+  } break;
   }
 }
 
@@ -10217,10 +10212,9 @@ void llvm::findValuesAffectedByCondition(
       if (ICmpInst::isEquality(Pred)) {
         if (HasRHSC) {
           Value *Y;
-          // (X & C) or (X | C) or (X ^ C).
+          // (X & C) or (X | C).
           // (X << C) or (X >>_s C) or (X >>_u C).
-          if (match(A, m_BitwiseLogic(m_Value(X), m_ConstantInt())) ||
-              match(A, m_Shift(m_Value(X), m_ConstantInt())))
+          if (match(A, m_Shift(m_Value(X), m_ConstantInt())))
             AddAffected(X);
           else if (match(A, m_And(m_Value(X), m_Value(Y))) ||
                    match(A, m_Or(m_Value(X), m_Value(Y)))) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e9518c8..07fe589 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2854,8 +2854,8 @@ void AsmPrinter::emitConstantPool() {
 // function.
 void AsmPrinter::emitJumpTableInfo() {
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-  if (!MJTI)
-    return;
+  if (!MJTI) return;
+
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
 
@@ -2908,14 +2908,12 @@ void AsmPrinter::emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
   }
 
   const DataLayout &DL = MF->getDataLayout();
+  emitAlignment(Align(MJTI.getEntryAlignment(DL)));
 
-  emitAlignment(Align(MJTI.getEntryAlignment(MF->getDataLayout())));
-
-  if (!JTInDiffSection) {
-    // Jump tables in code sections are marked with a data_region directive
-    // where that's supported.
+  // Jump tables in code sections are marked with a data_region directive
+  // where that's supported.
+  if (!JTInDiffSection)
     OutStreamer->emitDataRegion(MCDR_DataRegionJT32);
-  }
 
   for (const unsigned JumpTableIndex : JumpTableIndices) {
     ArrayRef<MachineBasicBlock *> JTBBs = JT[JumpTableIndex].MBBs;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 6cf05fd..ddf0275 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1757,8 +1757,11 @@ void DwarfCompileUnit::createBaseTypeDIEs() {
 DIE *DwarfCompileUnit::getLexicalBlockDIE(const DILexicalBlock *LB) {
   // Assume if there is an abstract tree all the DIEs are already emitted.
   bool isAbstract = getAbstractScopeDIEs().count(LB->getSubprogram());
-  if (isAbstract && getAbstractScopeDIEs().count(LB))
-    return getAbstractScopeDIEs()[LB];
+  if (isAbstract) {
+    auto &DIEs = getAbstractScopeDIEs();
+    if (auto It = DIEs.find(LB); It != DIEs.end())
+      return It->second;
+  }
   assert(!isAbstract && "Missed lexical block DIE in abstract tree!");
 
   // Return a concrete DIE if it exists or nullptr otherwise.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 0a8a1ad..46ea5fe 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -232,6 +232,42 @@ void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
   addUInt(Block, (dwarf::Attribute)0, Form, Integer);
 }
 
+void DwarfUnit::addIntAsBlock(DIE &Die, dwarf::Attribute Attribute, const APInt &Val) {
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
+
+  // Get the raw data form of the large APInt.
+  const uint64_t *Ptr64 = Val.getRawData();
+
+  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getDataLayout().isLittleEndian();
+
+  // Output the constant to DWARF one byte at a time.
+  for (int i = 0; i < NumBytes; i++) {
+    uint8_t c;
+    if (LittleEndian)
+      c = Ptr64[i / 8] >> (8 * (i & 7));
+    else
+      c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
+    addUInt(*Block, dwarf::DW_FORM_data1, c);
+  }
+
+  addBlock(Die, Attribute, Block);
+}
+
+void DwarfUnit::addInt(DIE &Die, dwarf::Attribute Attribute,
+		       const APInt &Val, bool Unsigned) {
+  unsigned CIBitWidth = Val.getBitWidth();
+  if (CIBitWidth <= 64) {
+    if (Unsigned)
+      addUInt(Die, Attribute, std::nullopt, Val.getZExtValue());
+    else
+      addSInt(Die, Attribute, std::nullopt, Val.getSExtValue());
+    return;
+  }
+
+  addIntAsBlock(Die, Attribute, Val);
+}
+
 void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
                         std::optional<dwarf::Form> Form, int64_t Integer) {
   if (!Form)
@@ -484,25 +520,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
     return;
   }
 
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
-
-  // Get the raw data form of the large APInt.
-  const uint64_t *Ptr64 = Val.getRawData();
-
-  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
-  bool LittleEndian = Asm->getDataLayout().isLittleEndian();
-
-  // Output the constant to DWARF one byte at a time.
-  for (int i = 0; i < NumBytes; i++) {
-    uint8_t c;
-    if (LittleEndian)
-      c = Ptr64[i / 8] >> (8 * (i & 7));
-    else
-      c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
-    addUInt(*Block, dwarf::DW_FORM_data1, c);
-  }
-
-  addBlock(Die, dwarf::DW_AT_const_value, Block);
+  addIntAsBlock(Die, dwarf::DW_AT_const_value, Val);
 }
 
 void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
@@ -972,12 +990,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
           DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
           if (const ConstantInt *CI =
               dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
-            if (DD->isUnsignedDIType(Discriminator->getBaseType()))
-              addUInt(Variant, dwarf::DW_AT_discr_value, std::nullopt,
-                      CI->getZExtValue());
-            else
-              addSInt(Variant, dwarf::DW_AT_discr_value, std::nullopt,
-                      CI->getSExtValue());
+	    addInt(Variant, dwarf::DW_AT_discr_value, CI->getValue(),
+		   DD->isUnsignedDIType(Discriminator->getBaseType()));
           }
           constructMemberDIE(Variant, DDTy);
         } else {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 1632053..9ddd6f8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -167,6 +167,10 @@ public:
 
   void addSInt(DIELoc &Die, std::optional<dwarf::Form> Form, int64_t Integer);
 
+  /// Add an integer attribute data and value; value may be any width.
+  void addInt(DIE &Die, dwarf::Attribute Attribute, const APInt &Integer,
+	      bool Unsigned);
+
   /// Add a string attribute data and value.
   ///
   /// We always emit a reference to the string pool instead of immediate
@@ -334,6 +338,10 @@ protected:
   void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
 
 private:
+  /// A helper to add a wide integer constant to a DIE using a block
+  /// form.
+  void addIntAsBlock(DIE &Die, dwarf::Attribute Attribute, const APInt &Val);
+
   void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
   void constructTypeDIE(DIE &Buffer, const DIStringType *BTy);
   void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 0a7937e..35df2a47 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -94,7 +94,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeModuloScheduleTestPass(Registry);
   initializeMachinePostDominatorTreeWrapperPassPass(Registry);
   initializeMachineRegionInfoPassPass(Registry);
-  initializeMachineSchedulerPass(Registry);
+  initializeMachineSchedulerLegacyPass(Registry);
   initializeMachineSinkingPass(Registry);
   initializeMachineUniformityAnalysisPassPass(Registry);
   initializeMachineUniformityInfoPrinterPassPass(Registry);
@@ -105,10 +105,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePHIEliminationPass(Registry);
   initializePatchableFunctionPass(Registry);
   initializePeepholeOptimizerLegacyPass(Registry);
-  initializePostMachineSchedulerPass(Registry);
+  initializePostMachineSchedulerLegacyPass(Registry);
   initializePostRAHazardRecognizerPass(Registry);
   initializePostRAMachineSinkingPass(Registry);
-  initializePostRASchedulerPass(Registry);
+  initializePostRASchedulerLegacyPass(Registry);
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
   initializeRABasicPass(Registry);
@@ -119,7 +119,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeRegisterCoalescerLegacyPass(Registry);
   initializeRemoveLoadsIntoFakeUsesPass(Registry);
   initializeRemoveRedundantDebugValuesPass(Registry);
-  initializeRenameIndependentSubregsPass(Registry);
+  initializeRenameIndependentSubregsLegacyPass(Registry);
   initializeSafeStackLegacyPassPass(Registry);
   initializeSelectOptimizePass(Registry);
   initializeShadowStackGCLoweringPass(Registry);
@@ -130,7 +130,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeStackFrameLayoutAnalysisPassPass(Registry);
   initializeStackMapLivenessPass(Registry);
   initializeStackProtectorPass(Registry);
-  initializeStackSlotColoringPass(Registry);
+  initializeStackSlotColoringLegacyPass(Registry);
   initializeStaticDataSplitterPass(Registry);
   initializeStripDebugMachineModulePass(Registry);
   initializeTailDuplicateLegacyPass(Registry);
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 3f6a69e..3261f28 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -630,11 +630,37 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
   return true;
 }
 
+// Return the corresponded deinterleaved mask, or nullptr if there is no valid
+// mask.
+static Value *getMask(Value *WideMask, unsigned Factor,
+                      VectorType *LeafValueTy) {
+  using namespace llvm::PatternMatch;
+  if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
+    SmallVector<Value *, 8> Operands;
+    SmallVector<Instruction *, 8> DeadInsts;
+    if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
+      assert(!Operands.empty());
+      if (Operands.size() == Factor && llvm::all_equal(Operands))
+        return Operands[0];
+    }
+  }
+
+  if (match(WideMask, m_AllOnes())) {
+    // Scale the vector length of all-ones mask.
+    ElementCount OrigEC =
+        cast<VectorType>(WideMask->getType())->getElementCount();
+    assert(OrigEC.getKnownMinValue() % Factor == 0);
+    return ConstantVector::getSplat(OrigEC.divideCoefficientBy(Factor),
+                                    cast<Constant>(WideMask)->getSplatValue());
+  }
+
+  return nullptr;
+}
+
 bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
     IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
-  LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
-
-  if (!LI || !LI->hasOneUse() || !LI->isSimple())
+  Value *LoadedVal = DI->getOperand(0);
+  if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
     return false;
 
   SmallVector<Value *, 8> DeinterleaveValues;
@@ -643,16 +669,43 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
                                    DeinterleaveDeadInsts))
     return false;
 
-  LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI
-                    << " with factor = " << DeinterleaveValues.size() << "\n");
+  const unsigned Factor = DeinterleaveValues.size();
 
-  // Try and match this with target specific intrinsics.
-  if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
-    return false;
+  if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
+    if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
+      return false;
+    // Check mask operand. Handle both all-true and interleaved mask.
+    Value *WideMask = VPLoad->getOperand(1);
+    Value *Mask = getMask(WideMask, Factor,
+                          cast<VectorType>(DeinterleaveValues[0]->getType()));
+    if (!Mask)
+      return false;
+
+    LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic "
+                      << *DI << " and factor = " << Factor << "\n");
+
+    // Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
+    // TLI function to emit target-specific interleaved instruction.
+    if (!TLI->lowerDeinterleavedIntrinsicToVPLoad(VPLoad, Mask,
+                                                  DeinterleaveValues))
+      return false;
+
+  } else {
+    auto *LI = cast<LoadInst>(LoadedVal);
+    if (!LI->isSimple())
+      return false;
+
+    LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
+                      << " and factor = " << Factor << "\n");
+
+    // Try and match this with target specific intrinsics.
+    if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
+      return false;
+  }
 
   DeadInsts.insert(DeinterleaveDeadInsts.begin(), DeinterleaveDeadInsts.end());
   // We now have a target-specific load, so delete the old one.
-  DeadInsts.insert(LI);
+  DeadInsts.insert(cast<Instruction>(LoadedVal));
   return true;
 }
 
@@ -660,10 +713,8 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
     IntrinsicInst *II, SmallSetVector<Instruction *, 32> &DeadInsts) {
   if (!II->hasOneUse())
     return false;
-
-  StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));
-
-  if (!SI || !SI->isSimple())
+  Value *StoredBy = II->user_back();
+  if (!isa<StoreInst, VPIntrinsic>(StoredBy))
     return false;
 
   SmallVector<Value *, 8> InterleaveValues;
@@ -671,15 +722,41 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
   if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
     return false;
 
-  LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II
-                    << " with factor = " << InterleaveValues.size() << "\n");
+  const unsigned Factor = InterleaveValues.size();
 
-  // Try and match this with target specific intrinsics.
-  if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
-    return false;
+  if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
+    if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
+      return false;
+
+    Value *WideMask = VPStore->getOperand(2);
+    Value *Mask = getMask(WideMask, Factor,
+                          cast<VectorType>(InterleaveValues[0]->getType()));
+    if (!Mask)
+      return false;
+
+    LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
+                      << *II << " and factor = " << Factor << "\n");
+
+    // Since lowerInterleavedStore expects Shuffle and StoreInst, use special
+    // TLI function to emit target-specific interleaved instruction.
+    if (!TLI->lowerInterleavedIntrinsicToVPStore(VPStore, Mask,
+                                                 InterleaveValues))
+      return false;
+  } else {
+    auto *SI = cast<StoreInst>(StoredBy);
+    if (!SI->isSimple())
+      return false;
+
+    LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II
+                      << " and factor = " << Factor << "\n");
+
+    // Try and match this with target specific intrinsics.
+    if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
+      return false;
+  }
 
   // We now have a target-specific store, so delete the old one.
-  DeadInsts.insert(SI);
+  DeadInsts.insert(cast<Instruction>(StoredBy));
   DeadInsts.insert(InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
   return true;
 }
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 0d5dc96..064b0a0 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -511,7 +511,7 @@ bool MachinePipeliner::runWindowScheduler(MachineLoop &L) {
   Context.MF = MF;
   Context.MLI = MLI;
   Context.MDT = MDT;
-  Context.PassConfig = &getAnalysis<TargetPassConfig>();
+  Context.TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
   Context.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   Context.LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
   Context.RegClassInfo->runOnMachineFunction(*MF);
@@ -2521,9 +2521,104 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
   return scheduleFound && Schedule.getMaxStageCount() > 0;
 }
 
+static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI) {
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  Register Result;
+  for (const MachineOperand &Use : MI.all_uses()) {
+    Register Reg = Use.getReg();
+    if (!Reg.isVirtual())
+      return Register();
+    if (MRI.getVRegDef(Reg)->getParent() != MI.getParent())
+      continue;
+    if (Result)
+      return Register();
+    Result = Reg;
+  }
+  return Result;
+}
+
+/// When Op is a value that is incremented recursively in a loop and there is a
+/// unique instruction that increments it, returns true and sets Value.
+static bool findLoopIncrementValue(const MachineOperand &Op, int &Value) {
+  if (!Op.isReg() || !Op.getReg().isVirtual())
+    return false;
+
+  Register OrgReg = Op.getReg();
+  Register CurReg = OrgReg;
+  const MachineBasicBlock *LoopBB = Op.getParent()->getParent();
+  const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
+
+  const TargetInstrInfo *TII =
+      LoopBB->getParent()->getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *TRI =
+      LoopBB->getParent()->getSubtarget().getRegisterInfo();
+
+  MachineInstr *Phi = nullptr;
+  MachineInstr *Increment = nullptr;
+
+  // Traverse definitions until it reaches Op or an instruction that does not
+  // satisfy the condition.
+  // Acceptable example:
+  //   bb.0:
+  //     %0 = PHI %3, %bb.0, ...
+  //     %2 = ADD %0, Value
+  //     ... = LOAD %2(Op)
+  //     %3 = COPY %2
+  while (true) {
+    if (!CurReg.isValid() || !CurReg.isVirtual())
+      return false;
+    MachineInstr *Def = MRI.getVRegDef(CurReg);
+    if (Def->getParent() != LoopBB)
+      return false;
+
+    if (Def->isCopy()) {
+      // Ignore copy instructions unless they contain subregisters
+      if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
+        return false;
+      CurReg = Def->getOperand(1).getReg();
+    } else if (Def->isPHI()) {
+      // There must be just one Phi
+      if (Phi)
+        return false;
+      Phi = Def;
+      CurReg = getLoopPhiReg(*Def, LoopBB);
+    } else if (TII->getIncrementValue(*Def, Value)) {
+      // Potentially a unique increment
+      if (Increment)
+        // Multiple increments exist
+        return false;
+
+      const MachineOperand *BaseOp;
+      int64_t Offset;
+      bool OffsetIsScalable;
+      if (TII->getMemOperandWithOffset(*Def, BaseOp, Offset, OffsetIsScalable,
+                                       TRI)) {
+        // Pre/post increment instruction
+        CurReg = BaseOp->getReg();
+      } else {
+        // If only one of the operands is defined within the loop, it is assumed
+        // to be an incremented value.
+        CurReg = findUniqueOperandDefinedInLoop(*Def);
+        if (!CurReg.isValid())
+          return false;
+      }
+      Increment = Def;
+    } else {
+      return false;
+    }
+    if (CurReg == OrgReg)
+      break;
+  }
+
+  if (!Phi || !Increment)
+    return false;
+
+  return true;
+}
+
 /// Return true if we can compute the amount the instruction changes
 /// during each iteration. Set Delta to the amount of the change.
-bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const {
+bool SwingSchedulerDAG::computeDelta(const MachineInstr &MI, int &Delta) const {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   const MachineOperand *BaseOp;
   int64_t Offset;
@@ -2538,24 +2633,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const {
   if (!BaseOp->isReg())
     return false;
 
-  Register BaseReg = BaseOp->getReg();
-
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  // Check if there is a Phi. If so, get the definition in the loop.
-  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
-  if (BaseDef && BaseDef->isPHI()) {
-    BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
-    BaseDef = MRI.getVRegDef(BaseReg);
-  }
-  if (!BaseDef)
-    return false;
-
-  int D = 0;
-  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
-    return false;
-
-  Delta = D;
-  return true;
+  return findLoopIncrementValue(*BaseOp, Delta);
 }
 
 /// Check if we can change the instruction to use an offset value from the
@@ -2673,6 +2751,100 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
   return Def;
 }
 
+/// Return false if there is no overlap between the region accessed by BaseMI in
+/// an iteration and the region accessed by OtherMI in subsequent iterations.
+bool SwingSchedulerDAG::mayOverlapInLaterIter(
+    const MachineInstr *BaseMI, const MachineInstr *OtherMI) const {
+  int DeltaB, DeltaO, Delta;
+  if (!computeDelta(*BaseMI, DeltaB) || !computeDelta(*OtherMI, DeltaO) ||
+      DeltaB != DeltaO)
+    return true;
+  Delta = DeltaB;
+
+  const MachineOperand *BaseOpB, *BaseOpO;
+  int64_t OffsetB, OffsetO;
+  bool OffsetBIsScalable, OffsetOIsScalable;
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  if (!TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
+                                    OffsetBIsScalable, TRI) ||
+      !TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
+                                    OffsetOIsScalable, TRI))
+    return true;
+
+  if (OffsetBIsScalable || OffsetOIsScalable)
+    return true;
+
+  if (!BaseOpB->isIdenticalTo(*BaseOpO)) {
+    // Pass cases with different base operands but same initial values.
+    // Typically for when pre/post increment is used.
+
+    if (!BaseOpB->isReg() || !BaseOpO->isReg())
+      return true;
+    Register RegB = BaseOpB->getReg(), RegO = BaseOpO->getReg();
+    if (!RegB.isVirtual() || !RegO.isVirtual())
+      return true;
+
+    MachineInstr *DefB = MRI.getVRegDef(BaseOpB->getReg());
+    MachineInstr *DefO = MRI.getVRegDef(BaseOpO->getReg());
+    if (!DefB || !DefO || !DefB->isPHI() || !DefO->isPHI())
+      return true;
+
+    unsigned InitValB = 0;
+    unsigned LoopValB = 0;
+    unsigned InitValO = 0;
+    unsigned LoopValO = 0;
+    getPhiRegs(*DefB, BB, InitValB, LoopValB);
+    getPhiRegs(*DefO, BB, InitValO, LoopValO);
+    MachineInstr *InitDefB = MRI.getVRegDef(InitValB);
+    MachineInstr *InitDefO = MRI.getVRegDef(InitValO);
+
+    if (!InitDefB->isIdenticalTo(*InitDefO))
+      return true;
+  }
+
+  LocationSize AccessSizeB = (*BaseMI->memoperands_begin())->getSize();
+  LocationSize AccessSizeO = (*OtherMI->memoperands_begin())->getSize();
+
+  // This is the main test, which checks the offset values and the loop
+  // increment value to determine if the accesses may be loop carried.
+  if (!AccessSizeB.hasValue() || !AccessSizeO.hasValue())
+    return true;
+
+  LLVM_DEBUG({
+    dbgs() << "Overlap check:\n";
+    dbgs() << "  BaseMI: ";
+    BaseMI->dump();
+    dbgs() << "    Base + " << OffsetB << " + I * " << Delta
+           << ", Len: " << AccessSizeB.getValue() << "\n";
+    dbgs() << "  OtherMI: ";
+    OtherMI->dump();
+    dbgs() << "    Base + " << OffsetO << " + I * " << Delta
+           << ", Len: " << AccessSizeO.getValue() << "\n";
+  });
+
+  // Excessive overlap may be detected in strided patterns.
+  // For example, the memory addresses of the store and the load in
+  //   for (i=0; i<n; i+=2) a[i+1] = a[i];
+  // are assumed to overlap.
+  if (Delta < 0) {
+    int64_t BaseMinAddr = OffsetB;
+    int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue() - 1;
+    if (BaseMinAddr > OhterNextIterMaxAddr) {
+      LLVM_DEBUG(dbgs() << "  Result: No overlap\n");
+      return false;
+    }
+  } else {
+    int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue() - 1;
+    int64_t OtherNextIterMinAddr = OffsetO + Delta;
+    if (BaseMaxAddr < OtherNextIterMinAddr) {
+      LLVM_DEBUG(dbgs() << "  Result: No overlap\n");
+      return false;
+    }
+  }
+  LLVM_DEBUG(dbgs() << "  Result: Overlap\n");
+  return true;
+}
+
 /// Return true for an order or output dependence that is loop carried
 /// potentially. A dependence is loop carried if the destination defines a value
 /// that may be used or defined by the source in a subsequent iteration.
@@ -2704,61 +2876,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(
   // The conservative assumption is that a dependence between memory operations
   // may be loop carried. The following code checks when it can be proved that
   // there is no loop carried dependence.
-  unsigned DeltaS, DeltaD;
-  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
-    return true;
-
-  const MachineOperand *BaseOpS, *BaseOpD;
-  int64_t OffsetS, OffsetD;
-  bool OffsetSIsScalable, OffsetDIsScalable;
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
-                                    TRI) ||
-      !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
-                                    TRI))
-    return true;
-
-  assert(!OffsetSIsScalable && !OffsetDIsScalable &&
-         "Expected offsets to be byte offsets");
-
-  MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg());
-  MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg());
-  if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI())
-    return true;
-
-  unsigned InitValS = 0;
-  unsigned LoopValS = 0;
-  unsigned InitValD = 0;
-  unsigned LoopValD = 0;
-  getPhiRegs(*DefS, BB, InitValS, LoopValS);
-  getPhiRegs(*DefD, BB, InitValD, LoopValD);
-  MachineInstr *InitDefS = MRI.getVRegDef(InitValS);
-  MachineInstr *InitDefD = MRI.getVRegDef(InitValD);
-
-  if (!InitDefS->isIdenticalTo(*InitDefD))
-    return true;
-
-  // Check that the base register is incremented by a constant value for each
-  // iteration.
-  MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS);
-  int D = 0;
-  if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D))
-    return true;
-
-  LocationSize AccessSizeS = (*SI->memoperands_begin())->getSize();
-  LocationSize AccessSizeD = (*DI->memoperands_begin())->getSize();
-
-  // This is the main test, which checks the offset values and the loop
-  // increment value to determine if the accesses may be loop carried.
-  if (!AccessSizeS.hasValue() || !AccessSizeD.hasValue())
-    return true;
-
-  if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() ||
-      DeltaD < AccessSizeD.getValue())
-    return true;
-
-  return (OffsetS + (int64_t)AccessSizeS.getValue() <
-          OffsetD + (int64_t)AccessSizeD.getValue());
+  return mayOverlapInLaterIter(DI, SI);
 }
 
 void SwingSchedulerDAG::postProcessDAG() {
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 393530f..df90077 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -58,6 +58,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -215,69 +216,85 @@ MachineSchedContext::~MachineSchedContext() {
 
 namespace {
 
-/// Base class for a machine scheduler class that can run at any point.
-class MachineSchedulerBase : public MachineSchedContext,
-                             public MachineFunctionPass {
+/// Base class for the machine scheduler classes.
+class MachineSchedulerBase : public MachineSchedContext {
+protected:
+  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
+};
+
+/// Impl class for MachineScheduler.
+class MachineSchedulerImpl : public MachineSchedulerBase {
+  MachineFunctionPass *P = nullptr;
+  MachineFunctionAnalysisManager *MFAM = nullptr;
+
 public:
-  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
+  MachineSchedulerImpl(MachineFunction &Func, MachineFunctionPass *P);
+  MachineSchedulerImpl(MachineFunction &Func,
+                       MachineFunctionAnalysisManager &MFAM,
+                       const TargetMachine *TargetM);
+  bool run();
+
+protected:
+  ScheduleDAGInstrs *createMachineScheduler();
+};
 
-  void print(raw_ostream &O, const Module* = nullptr) const override;
+/// Impl class for PostMachineScheduler.
+class PostMachineSchedulerImpl : public MachineSchedulerBase {
+  MachineFunctionPass *P = nullptr;
+  MachineFunctionAnalysisManager *MFAM = nullptr;
+
+public:
+  PostMachineSchedulerImpl(MachineFunction &Func, MachineFunctionPass *P);
+  PostMachineSchedulerImpl(MachineFunction &Func,
+                           MachineFunctionAnalysisManager &MFAM,
+                           const TargetMachine *TargetM);
+  bool run();
 
 protected:
-  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
+  ScheduleDAGInstrs *createPostMachineScheduler();
 };
 
 /// MachineScheduler runs after coalescing and before register allocation.
-class MachineScheduler : public MachineSchedulerBase {
+class MachineSchedulerLegacy : public MachineFunctionPass {
 public:
-  MachineScheduler();
-
+  MachineSchedulerLegacy();
   void getAnalysisUsage(AnalysisUsage &AU) const override;
-
   bool runOnMachineFunction(MachineFunction&) override;
 
   static char ID; // Class identification, replacement for typeinfo
-
-protected:
-  ScheduleDAGInstrs *createMachineScheduler();
 };
 
 /// PostMachineScheduler runs after shortly before code emission.
-class PostMachineScheduler : public MachineSchedulerBase {
+class PostMachineSchedulerLegacy : public MachineFunctionPass {
 public:
-  PostMachineScheduler();
-
+  PostMachineSchedulerLegacy();
   void getAnalysisUsage(AnalysisUsage &AU) const override;
-
   bool runOnMachineFunction(MachineFunction&) override;
 
   static char ID; // Class identification, replacement for typeinfo
-
-protected:
-  ScheduleDAGInstrs *createPostMachineScheduler();
 };
 
 } // end anonymous namespace
 
-char MachineScheduler::ID = 0;
+char MachineSchedulerLegacy::ID = 0;
 
-char &llvm::MachineSchedulerID = MachineScheduler::ID;
+char &llvm::MachineSchedulerID = MachineSchedulerLegacy::ID;
 
-INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(MachineSchedulerLegacy, DEBUG_TYPE,
                       "Machine Instruction Scheduler", false, false)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
-INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
+INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE,
                     "Machine Instruction Scheduler", false, false)
 
-MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
-  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+MachineSchedulerLegacy::MachineSchedulerLegacy() : MachineFunctionPass(ID) {
+  initializeMachineSchedulerLegacyPass(*PassRegistry::getPassRegistry());
 }
 
-void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<MachineDominatorTreeWrapperPass>();
   AU.addRequired<MachineLoopInfoWrapperPass>();
@@ -290,23 +307,24 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-char PostMachineScheduler::ID = 0;
+char PostMachineSchedulerLegacy::ID = 0;
 
-char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
+char &llvm::PostMachineSchedulerID = PostMachineSchedulerLegacy::ID;
 
-INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
+INITIALIZE_PASS_BEGIN(PostMachineSchedulerLegacy, "postmisched",
                       "PostRA Machine Instruction Scheduler", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
+INITIALIZE_PASS_END(PostMachineSchedulerLegacy, "postmisched",
                     "PostRA Machine Instruction Scheduler", false, false)
 
-PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
-  initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
+PostMachineSchedulerLegacy::PostMachineSchedulerLegacy()
+    : MachineFunctionPass(ID) {
+  initializePostMachineSchedulerLegacyPass(*PassRegistry::getPassRegistry());
 }
 
-void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+void PostMachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<MachineDominatorTreeWrapperPass>();
   AU.addRequired<MachineLoopInfoWrapperPass>();
@@ -385,15 +403,40 @@ nextIfDebug(MachineBasicBlock::iterator I,
       .getNonConstIterator();
 }
 
+MachineSchedulerImpl::MachineSchedulerImpl(MachineFunction &Func,
+                                           MachineFunctionPass *P)
+    : P(P) {
+  MF = &Func;
+  MLI = &P->getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+  MDT = &P->getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+  TM = &P->getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+  AA = &P->getAnalysis<AAResultsWrapperPass>().getAAResults();
+  LIS = &P->getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+}
+
+MachineSchedulerImpl::MachineSchedulerImpl(MachineFunction &Func,
+                                           MachineFunctionAnalysisManager &MFAM,
+                                           const TargetMachine *TargetM)
+    : MFAM(&MFAM) {
+  MF = &Func;
+  TM = TargetM;
+  MLI = &MFAM.getResult<MachineLoopAnalysis>(Func);
+  MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(Func);
+  auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(Func)
+                  .getManager();
+  AA = &FAM.getResult<AAManager>(Func.getFunction());
+  LIS = &MFAM.getResult<LiveIntervalsAnalysis>(Func);
+}
+
 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
-ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
+ScheduleDAGInstrs *MachineSchedulerImpl::createMachineScheduler() {
   // Select the scheduler, or set the default.
   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
   if (Ctor != useDefaultMachineSched)
     return Ctor(this);
 
   // Get the default scheduler set by the target for this function.
-  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
+  ScheduleDAGInstrs *Scheduler = TM->createMachineScheduler(this);
   if (Scheduler)
     return Scheduler;
 
@@ -401,12 +444,60 @@ ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
   return createGenericSchedLive(this);
 }
 
+bool MachineSchedulerImpl::run() {
+  if (VerifyScheduling) {
+    LLVM_DEBUG(LIS->dump());
+    const char *MSchedBanner = "Before machine scheduling.";
+    if (P)
+      MF->verify(P, MSchedBanner, &errs());
+    else
+      MF->verify(*MFAM, MSchedBanner, &errs());
+  }
+  RegClassInfo->runOnMachineFunction(*MF);
+
+  // Instantiate the selected scheduler for this target, function, and
+  // optimization level.
+  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
+  scheduleRegions(*Scheduler, false);
+
+  LLVM_DEBUG(LIS->dump());
+  if (VerifyScheduling) {
+    const char *MSchedBanner = "After machine scheduling.";
+    if (P)
+      MF->verify(P, MSchedBanner, &errs());
+    else
+      MF->verify(*MFAM, MSchedBanner, &errs());
+  }
+  return true;
+}
+
+PostMachineSchedulerImpl::PostMachineSchedulerImpl(MachineFunction &Func,
+                                                   MachineFunctionPass *P)
+    : P(P) {
+  MF = &Func;
+  MLI = &P->getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+  TM = &P->getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+  AA = &P->getAnalysis<AAResultsWrapperPass>().getAAResults();
+}
+
+PostMachineSchedulerImpl::PostMachineSchedulerImpl(
+    MachineFunction &Func, MachineFunctionAnalysisManager &MFAM,
+    const TargetMachine *TargetM)
+    : MFAM(&MFAM) {
+  MF = &Func;
+  TM = TargetM;
+  MLI = &MFAM.getResult<MachineLoopAnalysis>(Func);
+  auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(Func)
+                  .getManager();
+  AA = &FAM.getResult<AAManager>(Func.getFunction());
+}
+
 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
 /// the caller. We don't have a command line option to override the postRA
 /// scheduler. The Target must configure it.
-ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
+ScheduleDAGInstrs *PostMachineSchedulerImpl::createPostMachineScheduler() {
   // Get the postRA scheduler set by the target for this function.
-  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
+  ScheduleDAGInstrs *Scheduler = TM->createPostMachineScheduler(this);
   if (Scheduler)
     return Scheduler;
 
@@ -414,6 +505,30 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
   return createGenericSchedPostRA(this);
 }
 
+bool PostMachineSchedulerImpl::run() {
+  if (VerifyScheduling) {
+    const char *PostMSchedBanner = "Before post machine scheduling.";
+    if (P)
+      MF->verify(P, PostMSchedBanner, &errs());
+    else
+      MF->verify(*MFAM, PostMSchedBanner, &errs());
+  }
+
+  // Instantiate the selected scheduler for this target, function, and
+  // optimization level.
+  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
+  scheduleRegions(*Scheduler, true);
+
+  if (VerifyScheduling) {
+    const char *PostMSchedBanner = "After post machine scheduling.";
+    if (P)
+      MF->verify(P, PostMSchedBanner, &errs());
+    else
+      MF->verify(*MFAM, PostMSchedBanner, &errs());
+  }
+  return true;
+}
+
 /// Top-level MachineScheduler pass driver.
 ///
 /// Visit blocks in function order. Divide each block into scheduling regions
@@ -430,74 +545,84 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 /// general bias against block splitting purely for implementation simplicity.
-bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
-  if (skipFunction(mf.getFunction()))
+bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
     return false;
 
   if (EnableMachineSched.getNumOccurrences()) {
     if (!EnableMachineSched)
       return false;
-  } else if (!mf.getSubtarget().enableMachineScheduler())
+  } else if (!MF.getSubtarget().enableMachineScheduler()) {
     return false;
+  }
 
-  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
-
-  // Initialize the context of the pass.
-  MF = &mf;
-  MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
-  MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
-  PassConfig = &getAnalysis<TargetPassConfig>();
-  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
 
-  LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  MachineSchedulerImpl Impl(MF, this);
+  return Impl.run();
+}
 
-  if (VerifyScheduling) {
-    LLVM_DEBUG(LIS->dump());
-    MF->verify(this, "Before machine scheduling.", &errs());
+PreservedAnalyses
+MachineSchedulerPass::run(MachineFunction &MF,
+                          MachineFunctionAnalysisManager &MFAM) {
+  if (EnableMachineSched.getNumOccurrences()) {
+    if (!EnableMachineSched)
+      return PreservedAnalyses::all();
+  } else if (!MF.getSubtarget().enableMachineScheduler()) {
+    return PreservedAnalyses::all();
   }
-  RegClassInfo->runOnMachineFunction(*MF);
 
-  // Instantiate the selected scheduler for this target, function, and
-  // optimization level.
-  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
-  scheduleRegions(*Scheduler, false);
+  LLVM_DEBUG(dbgs() << "Before MISched:\n"; MF.print(dbgs()));
 
-  LLVM_DEBUG(LIS->dump());
-  if (VerifyScheduling)
-    MF->verify(this, "After machine scheduling.", &errs());
-  return true;
+  MachineSchedulerImpl Impl(MF, MFAM, TM);
+  bool Changed = Impl.run();
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<SlotIndexesAnalysis>();
+  PA.preserve<LiveIntervalsAnalysis>();
+  return PA;
 }
 
-bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
-  if (skipFunction(mf.getFunction()))
+bool PostMachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
     return false;
 
   if (EnablePostRAMachineSched.getNumOccurrences()) {
     if (!EnablePostRAMachineSched)
       return false;
-  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
+  } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
     LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     return false;
   }
-  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
+  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
 
-  // Initialize the context of the pass.
-  MF = &mf;
-  MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
-  PassConfig = &getAnalysis<TargetPassConfig>();
-  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  PostMachineSchedulerImpl Impl(MF, this);
+  return Impl.run();
+}
 
-  if (VerifyScheduling)
-    MF->verify(this, "Before post machine scheduling.", &errs());
+PreservedAnalyses
+PostMachineSchedulerPass::run(MachineFunction &MF,
+                              MachineFunctionAnalysisManager &MFAM) {
+  if (EnablePostRAMachineSched.getNumOccurrences()) {
+    if (!EnablePostRAMachineSched)
+      return PreservedAnalyses::all();
+  } else if (!MF.getSubtarget().enablePostRAMachineScheduler()) {
+    LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
+    return PreservedAnalyses::all();
+  }
+  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; MF.print(dbgs()));
 
-  // Instantiate the selected scheduler for this target, function, and
-  // optimization level.
-  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
-  scheduleRegions(*Scheduler, true);
+  PostMachineSchedulerImpl Impl(MF, MFAM, TM);
+  bool Changed = Impl.run();
+  if (!Changed)
+    return PreservedAnalyses::all();
 
-  if (VerifyScheduling)
-    MF->verify(this, "After post machine scheduling.", &errs());
-  return true;
+  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
 }
 
 /// Return true of the given instruction should not be included in a scheduling
@@ -662,10 +787,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
   Scheduler.finalizeSchedule();
 }
 
-void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
-  // unimplemented
-}
-
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void ReadyQueue::dump() const {
   dbgs() << "Queue " << Name << ": ";
@@ -1293,9 +1414,9 @@ void ScheduleDAGMILive::initRegPressure() {
     updatePressureDiffs(LiveUses);
   }
 
-  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
+  LLVM_DEBUG(dbgs() << "Top Pressure: ";
              dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
-             dbgs() << "Bottom Pressure:\n";
+             dbgs() << "Bottom Pressure: ";
              dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
 
   assert((BotRPTracker.getPos() == RegionEnd ||
@@ -1316,11 +1437,14 @@ void ScheduleDAGMILive::initRegPressure() {
       RegionCriticalPSets.push_back(PressureChange(i));
     }
   }
-  LLVM_DEBUG(dbgs() << "Excess PSets: ";
-             for (const PressureChange &RCPS
-                  : RegionCriticalPSets) dbgs()
-             << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
-             dbgs() << "\n");
+  LLVM_DEBUG({
+    if (RegionCriticalPSets.size() > 0) {
+      dbgs() << "Excess PSets: ";
+      for (const PressureChange &RCPS : RegionCriticalPSets)
+        dbgs() << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
+      dbgs() << "\n";
+    }
+  });
 }
 
 void ScheduleDAGMILive::
@@ -1374,10 +1498,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<VRegMaskOrUnit> LiveUses) {
 
         PressureDiff &PDiff = getPressureDiff(&SU);
         PDiff.addPressureChange(Reg, Decrement, &MRI);
-        LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
-                          << printReg(Reg, TRI) << ':'
-                          << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
-                   dbgs() << "              to "; PDiff.dump(*TRI););
+        if (llvm::any_of(PDiff, [](const PressureChange &Change) {
+              return Change.isValid();
+            }))
+          LLVM_DEBUG(dbgs()
+                         << "  UpdateRegPressure: SU(" << SU.NodeNum << ") "
+                         << printReg(Reg, TRI) << ':'
+                         << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
+                     dbgs() << "                     to "; PDiff.dump(*TRI););
       }
     } else {
       assert(P.LaneMask.any());
@@ -1409,9 +1537,13 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<VRegMaskOrUnit> LiveUses) {
           if (LRQ.valueIn() == VNI) {
             PressureDiff &PDiff = getPressureDiff(SU);
             PDiff.addPressureChange(Reg, true, &MRI);
-            LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
-                              << *SU->getInstr();
-                       dbgs() << "              to "; PDiff.dump(*TRI););
+            if (llvm::any_of(PDiff, [](const PressureChange &Change) {
+                  return Change.isValid();
+                }))
+              LLVM_DEBUG(dbgs() << "  UpdateRegPressure: SU(" << SU->NodeNum
+                                << ") " << *SU->getInstr();
+                         dbgs() << "                     to ";
+                         PDiff.dump(*TRI););
           }
         }
       }
@@ -1671,7 +1803,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
 
       TopRPTracker.advance(RegOpers);
       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
-      LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
+      LLVM_DEBUG(dbgs() << "Top Pressure: "; dumpRegSetPressure(
                      TopRPTracker.getRegSetPressureAtPos(), TRI););
 
       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
@@ -1709,7 +1841,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
       SmallVector<VRegMaskOrUnit, 8> LiveUses;
       BotRPTracker.recede(RegOpers, &LiveUses);
       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
-      LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
+      LLVM_DEBUG(dbgs() << "Bottom Pressure: "; dumpRegSetPressure(
                      BotRPTracker.getRegSetPressureAtPos(), TRI););
 
       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 8509369..05afcbe 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -431,6 +431,12 @@ bool MachineFunction::verify(Pass *p, const char *Banner, raw_ostream *OS,
   return MachineVerifier(p, Banner, OS, AbortOnError).verify(*this);
 }
 
+bool MachineFunction::verify(MachineFunctionAnalysisManager &MFAM,
+                             const char *Banner, raw_ostream *OS,
+                             bool AbortOnError) const {
+  return MachineVerifier(MFAM, Banner, OS, AbortOnError).verify(*this);
+}
+
 bool MachineFunction::verify(LiveIntervals *LiveInts, SlotIndexes *Indexes,
                              const char *Banner, raw_ostream *OS,
                              bool AbortOnError) const {
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index e0053fb..745c0d4 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -465,7 +465,8 @@ private:
   bool optimizeUncoalescableCopy(MachineInstr &MI,
                                  SmallPtrSetImpl<MachineInstr *> &LocalMIs);
   bool optimizeRecurrence(MachineInstr &PHI);
-  bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
+  bool findNextSource(const TargetRegisterClass *DefRC, unsigned DefSubReg,
+                      RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
   bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
                        DenseMap<Register, MachineInstr *> &ImmDefMIs);
   bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
@@ -991,8 +992,10 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
   return TII->optimizeCondBranch(MI);
 }
 
-/// Try to find the next source that share the same register file
-/// for the value defined by \p Reg and \p SubReg.
+/// Try to find a better source value that shares the same register file to
+/// replace \p RegSubReg in an instruction like
+/// `DefRC.DefSubReg = COPY RegSubReg`
+///
 /// When true is returned, the \p RewriteMap can be used by the client to
 /// retrieve all Def -> Use along the way up to the next source. Any found
 /// Use that is not itself a key for another entry, is the next source to
@@ -1002,17 +1005,15 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
 /// share the same register file as \p Reg and \p SubReg. The client should
 /// then be capable to rewrite all intermediate PHIs to get the next source.
 /// \return False if no alternative sources are available. True otherwise.
-bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
+bool PeepholeOptimizer::findNextSource(const TargetRegisterClass *DefRC,
+                                       unsigned DefSubReg,
+                                       RegSubRegPair RegSubReg,
                                        RewriteMapTy &RewriteMap) {
   // Do not try to find a new source for a physical register.
   // So far we do not have any motivating example for doing that.
   // Thus, instead of maintaining untested code, we will revisit that if
   // that changes at some point.
   Register Reg = RegSubReg.Reg;
-  if (Reg.isPhysical())
-    return false;
-  const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
-
   SmallVector<RegSubRegPair, 4> SrcToLook;
   RegSubRegPair CurSrcPair = RegSubReg;
   SrcToLook.push_back(CurSrcPair);
@@ -1076,7 +1077,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
 
       // Keep following the chain if the value isn't any better yet.
       const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
-      if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
+      if (!TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
                                      CurSrcPair.SubReg))
         continue;
 
@@ -1184,21 +1185,33 @@ bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
   bool Changed = false;
   // Get the right rewriter for the current copy.
   // Rewrite each rewritable source.
-  RegSubRegPair Src;
+  RegSubRegPair Dst;
   RegSubRegPair TrackPair;
-  while (CpyRewriter.getNextRewritableSource(Src, TrackPair)) {
+  while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
+    if (Dst.Reg.isPhysical()) {
+      // Do not try to find a new source for a physical register.
+      // So far we do not have any motivating example for doing that.
+      // Thus, instead of maintaining untested code, we will revisit that if
+      // that changes at some point.
+      continue;
+    }
+
+    const TargetRegisterClass *DefRC = MRI->getRegClass(Dst.Reg);
+
     // Keep track of PHI nodes and its incoming edges when looking for sources.
     RewriteMapTy RewriteMap;
     // Try to find a more suitable source. If we failed to do so, or get the
     // actual source, move to the next source.
-    if (!findNextSource(TrackPair, RewriteMap))
+    if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
       continue;
 
     // Get the new source to rewrite. TODO: Only enable handling of multiple
     // sources (PHIs) once we have a motivating example and testcases for it.
     RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
                                         /*HandleMultipleSources=*/false);
-    if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
+    assert(TrackPair.Reg != NewSrc.Reg &&
+           "should not rewrite source to original value");
+    if (!NewSrc.Reg)
       continue;
 
     // Rewrite source.
@@ -1325,9 +1338,14 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
     if (Def.Reg.isPhysical())
       return false;
 
+    // FIXME: Uncoalescable copies are treated differently by
+    // UncoalescableRewriter, and this probably should not share
+    // API. getNextRewritableSource really finds rewritable defs.
+    const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
+
     // If we do not know how to rewrite this definition, there is no point
     // in trying to kill this instruction.
-    if (!findNextSource(Def, RewriteMap))
+    if (!findNextSource(DefRC, Def.SubReg, Def, RewriteMap))
       return false;
 
     RewritePairs.push_back(Def);
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index badfd9a6..039a473 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -17,6 +17,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/PostRASchedulerList.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/AntiDepBreaker.h"
@@ -39,6 +40,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "post-RA-sched"
@@ -73,124 +75,134 @@ DebugMod("postra-sched-debugmod",
 AntiDepBreaker::~AntiDepBreaker() = default;
 
 namespace {
-  class PostRAScheduler : public MachineFunctionPass {
-    const TargetInstrInfo *TII = nullptr;
-    RegisterClassInfo RegClassInfo;
-
-  public:
-    static char ID;
-    PostRAScheduler() : MachineFunctionPass(ID) {}
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.setPreservesCFG();
-      AU.addRequired<AAResultsWrapperPass>();
-      AU.addRequired<TargetPassConfig>();
-      AU.addRequired<MachineDominatorTreeWrapperPass>();
-      AU.addPreserved<MachineDominatorTreeWrapperPass>();
-      AU.addRequired<MachineLoopInfoWrapperPass>();
-      AU.addPreserved<MachineLoopInfoWrapperPass>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
+class PostRAScheduler {
+  const TargetInstrInfo *TII = nullptr;
+  MachineLoopInfo *MLI = nullptr;
+  AliasAnalysis *AA = nullptr;
+  const TargetMachine *TM = nullptr;
+  RegisterClassInfo RegClassInfo;
+
+public:
+  PostRAScheduler(MachineFunction &MF, MachineLoopInfo *MLI, AliasAnalysis *AA,
+                  const TargetMachine *TM)
+      : TII(MF.getSubtarget().getInstrInfo()), MLI(MLI), AA(AA), TM(TM) {}
+  bool run(MachineFunction &MF);
+};
+
+class PostRASchedulerLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+  PostRASchedulerLegacy() : MachineFunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<TargetPassConfig>();
+    AU.addRequired<MachineDominatorTreeWrapperPass>();
+    AU.addPreserved<MachineDominatorTreeWrapperPass>();
+    AU.addRequired<MachineLoopInfoWrapperPass>();
+    AU.addPreserved<MachineLoopInfoWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
 
-    MachineFunctionProperties getRequiredProperties() const override {
-      return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::NoVRegs);
-    }
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
 
-    bool runOnMachineFunction(MachineFunction &Fn) override;
-  };
-  char PostRAScheduler::ID = 0;
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+char PostRASchedulerLegacy::ID = 0;
 
-  class SchedulePostRATDList : public ScheduleDAGInstrs {
-    /// AvailableQueue - The priority queue to use for the available SUnits.
-    ///
-    LatencyPriorityQueue AvailableQueue;
+class SchedulePostRATDList : public ScheduleDAGInstrs {
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  LatencyPriorityQueue AvailableQueue;
 
-    /// PendingQueue - This contains all of the instructions whose operands have
-    /// been issued, but their results are not ready yet (due to the latency of
-    /// the operation).  Once the operands becomes available, the instruction is
-    /// added to the AvailableQueue.
-    std::vector<SUnit*> PendingQueue;
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands becomes available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit *> PendingQueue;
 
-    /// HazardRec - The hazard recognizer to use.
-    ScheduleHazardRecognizer *HazardRec;
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
 
-    /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
-    AntiDepBreaker *AntiDepBreak;
+  /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+  AntiDepBreaker *AntiDepBreak;
 
-    /// AA - AliasAnalysis for making memory reference queries.
-    AliasAnalysis *AA;
+  /// AA - AliasAnalysis for making memory reference queries.
+  AliasAnalysis *AA;
 
-    /// The schedule. Null SUnit*'s represent noop instructions.
-    std::vector<SUnit*> Sequence;
+  /// The schedule. Null SUnit*'s represent noop instructions.
+  std::vector<SUnit *> Sequence;
 
-    /// Ordered list of DAG postprocessing steps.
-    std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+  /// Ordered list of DAG postprocessing steps.
+  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
 
-    /// The index in BB of RegionEnd.
-    ///
-    /// This is the instruction number from the top of the current block, not
-    /// the SlotIndex. It is only used by the AntiDepBreaker.
-    unsigned EndIndex = 0;
+  /// The index in BB of RegionEnd.
+  ///
+  /// This is the instruction number from the top of the current block, not
+  /// the SlotIndex. It is only used by the AntiDepBreaker.
+  unsigned EndIndex = 0;
 
-  public:
-    SchedulePostRATDList(
-        MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
-        const RegisterClassInfo &,
-        TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
-        SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
+public:
+  SchedulePostRATDList(
+      MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+      const RegisterClassInfo &,
+      TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+      SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
 
-    ~SchedulePostRATDList() override;
+  ~SchedulePostRATDList() override;
 
-    /// startBlock - Initialize register live-range state for scheduling in
-    /// this block.
-    ///
-    void startBlock(MachineBasicBlock *BB) override;
+  /// startBlock - Initialize register live-range state for scheduling in
+  /// this block.
+  ///
+  void startBlock(MachineBasicBlock *BB) override;
 
-    // Set the index of RegionEnd within the current BB.
-    void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
+  // Set the index of RegionEnd within the current BB.
+  void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
 
-    /// Initialize the scheduler state for the next scheduling region.
-    void enterRegion(MachineBasicBlock *bb,
-                     MachineBasicBlock::iterator begin,
-                     MachineBasicBlock::iterator end,
-                     unsigned regioninstrs) override;
+  /// Initialize the scheduler state for the next scheduling region.
+  void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin,
+                   MachineBasicBlock::iterator end,
+                   unsigned regioninstrs) override;
 
-    /// Notify that the scheduler has finished scheduling the current region.
-    void exitRegion() override;
+  /// Notify that the scheduler has finished scheduling the current region.
+  void exitRegion() override;
 
-    /// Schedule - Schedule the instruction range using list scheduling.
-    ///
-    void schedule() override;
+  /// Schedule - Schedule the instruction range using list scheduling.
+  ///
+  void schedule() override;
 
-    void EmitSchedule();
+  void EmitSchedule();
 
-    /// Observe - Update liveness information to account for the current
-    /// instruction, which will not be scheduled.
-    ///
-    void Observe(MachineInstr &MI, unsigned Count);
+  /// Observe - Update liveness information to account for the current
+  /// instruction, which will not be scheduled.
+  ///
+  void Observe(MachineInstr &MI, unsigned Count);
 
-    /// finishBlock - Clean up register live-range state.
-    ///
-    void finishBlock() override;
+  /// finishBlock - Clean up register live-range state.
+  ///
+  void finishBlock() override;
 
-  private:
-    /// Apply each ScheduleDAGMutation step in order.
-    void postProcessDAG();
+private:
+  /// Apply each ScheduleDAGMutation step in order.
+  void postProcessDAG();
 
-    void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
-    void ReleaseSuccessors(SUnit *SU);
-    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
-    void ListScheduleTopDown();
+  void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+  void ReleaseSuccessors(SUnit *SU);
+  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void ListScheduleTopDown();
 
-    void dumpSchedule() const;
-    void emitNoop(unsigned CurCycle);
-  };
-}
+  void dumpSchedule() const;
+  void emitNoop(unsigned CurCycle);
+};
+} // namespace
 
-char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+char &llvm::PostRASchedulerID = PostRASchedulerLegacy::ID;
 
-INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE,
+INITIALIZE_PASS(PostRASchedulerLegacy, DEBUG_TYPE,
                 "Post RA top-down list latency scheduler", false, false)
 
 SchedulePostRATDList::SchedulePostRATDList(
@@ -263,19 +275,12 @@ static bool enablePostRAScheduler(const TargetSubtargetInfo &ST,
          OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
 }
 
-bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
-  if (skipFunction(Fn.getFunction()))
-    return false;
-
-  const auto &Subtarget = Fn.getSubtarget();
-  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+bool PostRAScheduler::run(MachineFunction &MF) {
+  const auto &Subtarget = MF.getSubtarget();
   // Check that post-RA scheduling is enabled for this target.
-  if (!enablePostRAScheduler(Subtarget, PassConfig->getOptLevel()))
+  if (!enablePostRAScheduler(Subtarget, TM->getOptLevel()))
     return false;
 
-  TII = Subtarget.getInstrInfo();
-  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
-  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
       Subtarget.getAntiDepBreakMode();
   if (EnableAntiDepBreaking.getPosition() > 0) {
@@ -287,22 +292,22 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   }
   SmallVector<const TargetRegisterClass *, 4> CriticalPathRCs;
   Subtarget.getCriticalPathRCs(CriticalPathRCs);
-  RegClassInfo.runOnMachineFunction(Fn);
+  RegClassInfo.runOnMachineFunction(MF);
 
   LLVM_DEBUG(dbgs() << "PostRAScheduler\n");
 
-  SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
+  SchedulePostRATDList Scheduler(MF, *MLI, AA, RegClassInfo, AntiDepMode,
                                  CriticalPathRCs);
 
   // Loop over all of the basic blocks
-  for (auto &MBB : Fn) {
+  for (auto &MBB : MF) {
 #ifndef NDEBUG
     // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
     if (DebugDiv > 0) {
       static int bbcnt = 0;
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
-      dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":"
+      dbgs() << "*** DEBUG scheduling " << MF.getName() << ":"
              << printMBBReference(MBB) << " ***\n";
     }
 #endif
@@ -320,7 +325,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       // Calls are not scheduling boundaries before register allocation, but
       // post-ra we don't gain anything by scheduling across calls since we
       // don't need to worry about register pressure.
-      if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
+      if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, MF)) {
         Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
         Scheduler.setEndIndex(CurrentCount);
         Scheduler.schedule();
@@ -353,6 +358,39 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   return true;
 }
 
+bool PostRASchedulerLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  const TargetMachine *TM =
+      &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+  PostRAScheduler Impl(MF, MLI, AA, TM);
+  return Impl.run(MF);
+}
+
+PreservedAnalyses
+PostRASchedulerPass::run(MachineFunction &MF,
+                         MachineFunctionAnalysisManager &MFAM) {
+  MFPropsModifier _(*this, MF);
+
+  MachineLoopInfo *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
+  auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(MF)
+                  .getManager();
+  AliasAnalysis *AA = &FAM.getResult<AAManager>(MF.getFunction());
+  PostRAScheduler Impl(MF, MLI, AA, TM);
+  bool Changed = Impl.run(MF);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<MachineDominatorTreeAnalysis>();
+  PA.preserve<MachineLoopAnalysis>();
+  return PA;
+}
+
 /// StartBlock - Initialize register live-range state for scheduling in
 /// this block.
 ///
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index e1f0540..51e047b 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -135,7 +135,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescerLegacy)
-INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(MachineSchedulerLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 465c4e8..2e43ad7 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -155,7 +155,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescerLegacy)
-INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(MachineSchedulerLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index e8e6db1..ca51b67 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -79,15 +79,12 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
 LLVM_DUMP_METHOD
 void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
                               const TargetRegisterInfo *TRI) {
-  bool Empty = true;
   for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
     if (SetPressure[i] != 0) {
-      dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
-      Empty = false;
+      dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << ' ';
     }
   }
-  if (Empty)
-    dbgs() << "\n";
+  dbgs() << "\n";
 }
 
 LLVM_DUMP_METHOD
diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 0128f87..58f212e 100644
--- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -26,6 +26,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/RenameIndependentSubregs.h"
 #include "LiveRangeUtils.h"
 #include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/LiveInterval.h"
@@ -43,25 +44,11 @@ using namespace llvm;
 
 namespace {
 
-class RenameIndependentSubregs : public MachineFunctionPass {
+class RenameIndependentSubregs {
 public:
-  static char ID;
-  RenameIndependentSubregs() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-    return "Rename Disconnected Subregister Components";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addRequired<LiveIntervalsWrapperPass>();
-    AU.addPreserved<LiveIntervalsWrapperPass>();
-    AU.addRequired<SlotIndexesWrapperPass>();
-    AU.addPreserved<SlotIndexesWrapperPass>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
+  RenameIndependentSubregs(LiveIntervals *LIS) : LIS(LIS) {}
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool run(MachineFunction &MF);
 
 private:
   struct SubRangeInfo {
@@ -106,17 +93,36 @@ private:
   const TargetInstrInfo *TII = nullptr;
 };
 
+class RenameIndependentSubregsLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+  RenameIndependentSubregsLegacy() : MachineFunctionPass(ID) {}
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  StringRef getPassName() const override {
+    return "Rename Disconnected Subregister Components";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<LiveIntervalsWrapperPass>();
+    AU.addPreserved<LiveIntervalsWrapperPass>();
+    AU.addRequired<SlotIndexesWrapperPass>();
+    AU.addPreserved<SlotIndexesWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
 } // end anonymous namespace
 
-char RenameIndependentSubregs::ID;
+char RenameIndependentSubregsLegacy::ID;
 
-char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID;
+char &llvm::RenameIndependentSubregsID = RenameIndependentSubregsLegacy::ID;
 
-INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(RenameIndependentSubregsLegacy, DEBUG_TYPE,
                       "Rename Independent Subregisters", false, false)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
-INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE,
+INITIALIZE_PASS_END(RenameIndependentSubregsLegacy, DEBUG_TYPE,
                     "Rename Independent Subregisters", false, false)
 
 bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
@@ -381,7 +387,25 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
   }
 }
 
-bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+RenameIndependentSubregsPass::run(MachineFunction &MF,
+                                  MachineFunctionAnalysisManager &MFAM) {
+  auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
+  if (!RenameIndependentSubregs(&LIS).run(MF))
+    return PreservedAnalyses::all();
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<LiveIntervalsAnalysis>();
+  PA.preserve<SlotIndexesAnalysis>();
+  return PA;
+}
+
+bool RenameIndependentSubregsLegacy::runOnMachineFunction(MachineFunction &MF) {
+  auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  return RenameIndependentSubregs(&LIS).run(MF);
+}
+
+bool RenameIndependentSubregs::run(MachineFunction &MF) {
   // Skip renaming if liveness of subregister is not tracked.
   MRI = &MF.getRegInfo();
   if (!MRI->subRegLivenessEnabled())
@@ -390,7 +414,6 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "Renaming independent subregister live ranges in "
                     << MF.getName() << '\n');
 
-  LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
   TII = MF.getSubtarget().getInstrInfo();
 
   // Iterate over all vregs. Note that we query getNumVirtRegs() the newly
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index 3e57ee0..22c5c2e 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/StackSlotColoring.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -20,6 +21,7 @@
 #include "llvm/CodeGen/LiveStacks.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -59,136 +61,145 @@ STATISTIC(NumDead,       "Number of trivially dead stack accesses eliminated");
 
 namespace {
 
-  class StackSlotColoring : public MachineFunctionPass {
-    LiveStacks *LS = nullptr;
-    MachineFrameInfo *MFI = nullptr;
-    const TargetInstrInfo *TII = nullptr;
-    const MachineBlockFrequencyInfo *MBFI = nullptr;
-    SlotIndexes *Indexes = nullptr;
-
-    // SSIntervals - Spill slot intervals.
-    std::vector<LiveInterval*> SSIntervals;
-
-    // SSRefs - Keep a list of MachineMemOperands for each spill slot.
-    // MachineMemOperands can be shared between instructions, so we need
-    // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not
-    // become FI0 -> FI1 -> FI2.
-    SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs;
-
-    // OrigAlignments - Alignments of stack objects before coloring.
-    SmallVector<Align, 16> OrigAlignments;
-
-    // OrigSizes - Sizes of stack objects before coloring.
-    SmallVector<unsigned, 16> OrigSizes;
-
-    // AllColors - If index is set, it's a spill slot, i.e. color.
-    // FIXME: This assumes PEI locate spill slot with smaller indices
-    // closest to stack pointer / frame pointer. Therefore, smaller
-    // index == better color. This is per stack ID.
-    SmallVector<BitVector, 2> AllColors;
-
-    // NextColor - Next "color" that's not yet used. This is per stack ID.
-    SmallVector<int, 2> NextColors = { -1 };
-
-    // UsedColors - "Colors" that have been assigned. This is per stack ID
-    SmallVector<BitVector, 2> UsedColors;
-
-    // Join all intervals sharing one color into a single LiveIntervalUnion to
-    // speedup range overlap test.
-    class ColorAssignmentInfo {
-      // Single liverange (used to avoid creation of LiveIntervalUnion).
-      LiveInterval *SingleLI = nullptr;
-      // LiveIntervalUnion to perform overlap test.
-      LiveIntervalUnion *LIU = nullptr;
-      // LiveIntervalUnion has a parameter in its constructor so doing this
-      // dirty magic.
-      uint8_t LIUPad[sizeof(LiveIntervalUnion)];
-
-    public:
-      ~ColorAssignmentInfo() {
-        if (LIU)
-          LIU->~LiveIntervalUnion(); // Dirty magic again.
-      }
-
-      // Return true if LiveInterval overlaps with any
-      // intervals that have already been assigned to this color.
-      bool overlaps(LiveInterval *LI) const {
-        if (LIU)
-          return LiveIntervalUnion::Query(*LI, *LIU).checkInterference();
-        return SingleLI ? SingleLI->overlaps(*LI) : false;
-      }
-
-      // Add new LiveInterval to this color.
-      void add(LiveInterval *LI, LiveIntervalUnion::Allocator &Alloc) {
-        assert(!overlaps(LI));
-        if (LIU) {
-          LIU->unify(*LI, *LI);
-        } else if (SingleLI) {
-          LIU = new (LIUPad) LiveIntervalUnion(Alloc);
-          LIU->unify(*SingleLI, *SingleLI);
-          LIU->unify(*LI, *LI);
-          SingleLI = nullptr;
-        } else
-          SingleLI = LI;
-      }
-    };
-
-    LiveIntervalUnion::Allocator LIUAlloc;
-
-    // Assignments - Color to intervals mapping.
-    SmallVector<ColorAssignmentInfo, 16> Assignments;
+class StackSlotColoring {
+  MachineFrameInfo *MFI = nullptr;
+  const TargetInstrInfo *TII = nullptr;
+  LiveStacks *LS = nullptr;
+  const MachineBlockFrequencyInfo *MBFI = nullptr;
+  SlotIndexes *Indexes = nullptr;
+
+  // SSIntervals - Spill slot intervals.
+  std::vector<LiveInterval *> SSIntervals;
+
+  // SSRefs - Keep a list of MachineMemOperands for each spill slot.
+  // MachineMemOperands can be shared between instructions, so we need
+  // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not
+  // become FI0 -> FI1 -> FI2.
+  SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs;
+
+  // OrigAlignments - Alignments of stack objects before coloring.
+  SmallVector<Align, 16> OrigAlignments;
+
+  // OrigSizes - Sizes of stack objects before coloring.
+  SmallVector<unsigned, 16> OrigSizes;
+
+  // AllColors - If index is set, it's a spill slot, i.e. color.
+  // FIXME: This assumes PEI locate spill slot with smaller indices
+  // closest to stack pointer / frame pointer. Therefore, smaller
+  // index == better color. This is per stack ID.
+  SmallVector<BitVector, 2> AllColors;
+
+  // NextColor - Next "color" that's not yet used. This is per stack ID.
+  SmallVector<int, 2> NextColors = {-1};
+
+  // UsedColors - "Colors" that have been assigned. This is per stack ID
+  SmallVector<BitVector, 2> UsedColors;
+
+  // Join all intervals sharing one color into a single LiveIntervalUnion to
+  // speedup range overlap test.
+  class ColorAssignmentInfo {
+    // Single liverange (used to avoid creation of LiveIntervalUnion).
+    LiveInterval *SingleLI = nullptr;
+    // LiveIntervalUnion to perform overlap test.
+    LiveIntervalUnion *LIU = nullptr;
+    // LiveIntervalUnion has a parameter in its constructor so doing this
+    // dirty magic.
+    uint8_t LIUPad[sizeof(LiveIntervalUnion)];
 
   public:
-    static char ID; // Pass identification
+    ~ColorAssignmentInfo() {
+      if (LIU)
+        LIU->~LiveIntervalUnion(); // Dirty magic again.
+    }
 
-    StackSlotColoring() : MachineFunctionPass(ID) {
-      initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+    // Return true if LiveInterval overlaps with any
+    // intervals that have already been assigned to this color.
+    bool overlaps(LiveInterval *LI) const {
+      if (LIU)
+        return LiveIntervalUnion::Query(*LI, *LIU).checkInterference();
+      return SingleLI ? SingleLI->overlaps(*LI) : false;
     }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.setPreservesCFG();
-      AU.addRequired<SlotIndexesWrapperPass>();
-      AU.addPreserved<SlotIndexesWrapperPass>();
-      AU.addRequired<LiveStacksWrapperLegacy>();
-      AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
-      AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
-      AU.addPreservedID(MachineDominatorsID);
-
-      // In some Target's pipeline, register allocation (RA) might be
-      // split into multiple phases based on register class. So, this pass
-      // may be invoked multiple times requiring it to save these analyses to be
-      // used by RA later.
-      AU.addPreserved<LiveIntervalsWrapperPass>();
-      AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
-
-      MachineFunctionPass::getAnalysisUsage(AU);
+    // Add new LiveInterval to this color.
+    void add(LiveInterval *LI, LiveIntervalUnion::Allocator &Alloc) {
+      assert(!overlaps(LI));
+      if (LIU) {
+        LIU->unify(*LI, *LI);
+      } else if (SingleLI) {
+        LIU = new (LIUPad) LiveIntervalUnion(Alloc);
+        LIU->unify(*SingleLI, *SingleLI);
+        LIU->unify(*LI, *LI);
+        SingleLI = nullptr;
+      } else
+        SingleLI = LI;
     }
+  };
+
+  LiveIntervalUnion::Allocator LIUAlloc;
+
+  // Assignments - Color to intervals mapping.
+  SmallVector<ColorAssignmentInfo, 16> Assignments;
+
+public:
+  StackSlotColoring(MachineFunction &MF, LiveStacks *LS,
+                    MachineBlockFrequencyInfo *MBFI, SlotIndexes *Indexes)
+      : MFI(&MF.getFrameInfo()), TII(MF.getSubtarget().getInstrInfo()), LS(LS),
+        MBFI(MBFI), Indexes(Indexes) {}
+  bool run(MachineFunction &MF);
+
+private:
+  void InitializeSlots();
+  void ScanForSpillSlotRefs(MachineFunction &MF);
+  int ColorSlot(LiveInterval *li);
+  bool ColorSlots(MachineFunction &MF);
+  void RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping,
+                          MachineFunction &MF);
+  bool RemoveDeadStores(MachineBasicBlock *MBB);
+};
 
-    bool runOnMachineFunction(MachineFunction &MF) override;
+class StackSlotColoringLegacy : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification
 
-  private:
-    void InitializeSlots();
-    void ScanForSpillSlotRefs(MachineFunction &MF);
-    int ColorSlot(LiveInterval *li);
-    bool ColorSlots(MachineFunction &MF);
-    void RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping,
-                            MachineFunction &MF);
-    bool RemoveDeadStores(MachineBasicBlock* MBB);
-  };
+  StackSlotColoringLegacy() : MachineFunctionPass(ID) {
+    initializeStackSlotColoringLegacyPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<SlotIndexesWrapperPass>();
+    AU.addPreserved<SlotIndexesWrapperPass>();
+    AU.addRequired<LiveStacksWrapperLegacy>();
+    AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+    AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
+    AU.addPreservedID(MachineDominatorsID);
+
+    // In some Target's pipeline, register allocation (RA) might be
+    // split into multiple phases based on register class. So, this pass
+    // may be invoked multiple times requiring it to save these analyses to be
+    // used by RA later.
+    AU.addPreserved<LiveIntervalsWrapperPass>();
+    AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
+
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
 
 } // end anonymous namespace
 
-char StackSlotColoring::ID = 0;
+char StackSlotColoringLegacy::ID = 0;
 
-char &llvm::StackSlotColoringID = StackSlotColoring::ID;
+char &llvm::StackSlotColoringID = StackSlotColoringLegacy::ID;
 
-INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE,
-                "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_BEGIN(StackSlotColoringLegacy, DEBUG_TYPE,
+                      "Stack Slot Coloring", false, false)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
-INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE,
-                "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_END(StackSlotColoringLegacy, DEBUG_TYPE, "Stack Slot Coloring",
+                    false, false)
 
 namespace {
 
@@ -511,21 +522,12 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
   return changed;
 }
 
-bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+bool StackSlotColoring::run(MachineFunction &MF) {
   LLVM_DEBUG({
     dbgs() << "********** Stack Slot Coloring **********\n"
            << "********** Function: " << MF.getName() << '\n';
   });
 
-  if (skipFunction(MF.getFunction()))
-    return false;
-
-  MFI = &MF.getFrameInfo();
-  TII = MF.getSubtarget().getInstrInfo();
-  LS = &getAnalysis<LiveStacksWrapperLegacy>().getLS();
-  MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
-  Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
-
   bool Changed = false;
 
   unsigned NumSlots = LS->getNumIntervals();
@@ -559,3 +561,37 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
 
   return Changed;
 }
+
+bool StackSlotColoringLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  LiveStacks *LS = &getAnalysis<LiveStacksWrapperLegacy>().getLS();
+  MachineBlockFrequencyInfo *MBFI =
+      &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
+  SlotIndexes *Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
+  StackSlotColoring Impl(MF, LS, MBFI, Indexes);
+  return Impl.run(MF);
+}
+
+PreservedAnalyses
+StackSlotColoringPass::run(MachineFunction &MF,
+                           MachineFunctionAnalysisManager &MFAM) {
+  LiveStacks *LS = &MFAM.getResult<LiveStacksAnalysis>(MF);
+  MachineBlockFrequencyInfo *MBFI =
+      &MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
+  SlotIndexes *Indexes = &MFAM.getResult<SlotIndexesAnalysis>(MF);
+  StackSlotColoring Impl(MF, LS, MBFI, Indexes);
+  bool Changed = Impl.run(MF);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<SlotIndexesAnalysis>();
+  PA.preserve<MachineBlockFrequencyAnalysis>();
+  PA.preserve<MachineDominatorTreeAnalysis>();
+  PA.preserve<LiveIntervalsAnalysis>();
+  PA.preserve<LiveDebugVariablesAnalysis>();
+  return PA;
+}
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e5bf0a5..0965fe8 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -46,12 +46,13 @@ class StaticDataSplitter : public MachineFunctionPass {
   const MachineBlockFrequencyInfo *MBFI = nullptr;
   const ProfileSummaryInfo *PSI = nullptr;
 
-  // Returns true iff any jump table is hot-cold categorized.
-  bool splitJumpTables(MachineFunction &MF);
+  // Update LLVM statistics for a machine function without profiles.
+  void updateStatsWithoutProfiles(const MachineFunction &MF);
+  // Update LLVM statistics for a machine function with profiles.
+  void updateStatsWithProfiles(const MachineFunction &MF);
 
-  // Same as above but works on functions with profile information.
-  bool splitJumpTablesWithProfiles(const MachineFunction &MF,
-                                   MachineJumpTableInfo &MJTI);
+  // Use profiles to partition static data.
+  bool partitionStaticDataWithProfiles(MachineFunction &MF);
 
 public:
   static char ID;
@@ -77,13 +78,25 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
-  return splitJumpTables(MF);
+  const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
+                                MF.getFunction().hasProfileData();
+
+  if (!ProfileAvailable) {
+    updateStatsWithoutProfiles(MF);
+    return false;
+  }
+
+  bool Changed = partitionStaticDataWithProfiles(MF);
+
+  updateStatsWithProfiles(MF);
+  return Changed;
 }
 
-bool StaticDataSplitter::splitJumpTablesWithProfiles(
-    const MachineFunction &MF, MachineJumpTableInfo &MJTI) {
+bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   int NumChangedJumpTables = 0;
 
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+
   // Jump table could be used by either terminating instructions or
   // non-terminating ones, so we walk all instructions and use
   // `MachineOperand::isJTI()` to identify jump table operands.
@@ -92,63 +105,55 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isJTI())
-          continue;
-        const int JTI = Op.getIndex();
-        // This is not a source block of jump table.
-        if (JTI == -1)
-          continue;
-
-        auto Hotness = MachineFunctionDataHotness::Hot;
-
-        // Hotness is based on source basic block hotness.
-        // TODO: PSI APIs are about instruction hotness. Introduce API for data
-        // access hotness.
-        if (PSI->isColdBlock(&MBB, MBFI))
-          Hotness = MachineFunctionDataHotness::Cold;
-
-        if (MJTI.updateJumpTableEntryHotness(JTI, Hotness))
-          ++NumChangedJumpTables;
+        if (Op.isJTI()) {
+          assert(MJTI != nullptr && "Jump table info is not available.");
+          const int JTI = Op.getIndex();
+          // This is not a source block of jump table.
+          if (JTI == -1)
+            continue;
+
+          auto Hotness = MachineFunctionDataHotness::Hot;
+
+          // Hotness is based on source basic block hotness.
+          // TODO: PSI APIs are about instruction hotness. Introduce API for
+          // data access hotness.
+          if (PSI->isColdBlock(&MBB, MBFI))
+            Hotness = MachineFunctionDataHotness::Cold;
+
+          if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
+            ++NumChangedJumpTables;
+        }
       }
     }
   }
   return NumChangedJumpTables > 0;
 }
 
-bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) {
-  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
-  if (!MJTI || MJTI->getJumpTables().empty())
-    return false;
+void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
+  if (!AreStatisticsEnabled())
+    return;
 
-  const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
-                                MF.getFunction().hasProfileData();
-  auto statOnExit = llvm::make_scope_exit([&] {
-    if (!AreStatisticsEnabled())
-      return;
-
-    if (!ProfileAvailable) {
-      NumUnknownJumpTables += MJTI->getJumpTables().size();
-      return;
-    }
-
-    for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) {
-      auto Hotness = MJTI->getJumpTables()[JTI].Hotness;
-      if (Hotness == MachineFunctionDataHotness::Hot) {
+  if (const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) {
+    for (const auto &JumpTable : MJTI->getJumpTables()) {
+      if (JumpTable.Hotness == MachineFunctionDataHotness::Hot) {
         ++NumHotJumpTables;
       } else {
-        assert(Hotness == MachineFunctionDataHotness::Cold &&
+        assert(JumpTable.Hotness == MachineFunctionDataHotness::Cold &&
                "A jump table is either hot or cold when profile information is "
                "available.");
         ++NumColdJumpTables;
       }
     }
-  });
+  }
+}
 
-  // Place jump tables according to block hotness if function has profile data.
-  if (ProfileAvailable)
-    return splitJumpTablesWithProfiles(MF, *MJTI);
+void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) {
+  if (!AreStatisticsEnabled())
+    return;
 
-  return true;
+  if (const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) {
+    NumUnknownJumpTables += MJTI->getJumpTables().size();
+  }
 }
 
 char StaticDataSplitter::ID = 0;
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 77a4c74..e735c90 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -420,7 +420,10 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
                                   const TargetRegisterClass *SrcRC,
                                   unsigned SrcSubReg) {
   // Same register class.
-  if (DefRC == SrcRC)
+  //
+  // When processing uncoalescable copies / bitcasts, it is possible we reach
+  // here with the same register class, but mismatched subregister indices.
+  if (DefRC == SrcRC && DefSubReg == SrcSubReg)
     return true;
 
   // Both operands are sub registers. Check if they share a register class.
diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index e7fc0d9..379740ca 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TimeProfiler.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -167,7 +168,7 @@ WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) {
              ? new ScheduleDAGMI(
                    Context, std::make_unique<PostGenericScheduler>(Context),
                    true)
-             : Context->PassConfig->createMachineScheduler(Context);
+             : Context->TM->createMachineScheduler(Context);
 }
 
 bool WindowScheduler::initialize() {
diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt
index e5f5a99..65dd0c7 100644
--- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt
@@ -3,6 +3,7 @@ tablegen(LLVM COFFOptions.inc -gen-opt-parser-defs)
 add_public_tablegen_target(JITLinkTableGen)
 
 add_llvm_component_library(LLVMJITLink
+  CompactUnwindSupport.cpp
   DWARFRecordSectionSplitter.cpp
   EHFrameSupport.cpp
   JITLink.cpp
diff --git a/llvm/lib/ExecutionEngine/JITLink/CompactUnwindSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/CompactUnwindSupport.cpp
new file mode 100644
index 0000000..51e3d26
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/CompactUnwindSupport.cpp
@@ -0,0 +1,103 @@
+//=------- CompactUnwindSupport.cpp - Compact Unwind format support -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Compact Unwind support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CompactUnwindSupport.h"
+
+#include "llvm/ADT/Sequence.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+Error splitCompactUnwindBlocks(LinkGraph &G, Section &CompactUnwindSection,
+                               size_t RecordSize) {
+
+  std::vector<Block *> OriginalBlocks(CompactUnwindSection.blocks().begin(),
+                                      CompactUnwindSection.blocks().end());
+  LLVM_DEBUG({
+    dbgs() << "In " << G.getName() << " splitting compact unwind section "
+           << CompactUnwindSection.getName() << " containing "
+           << OriginalBlocks.size() << " initial blocks...\n";
+  });
+
+  while (!OriginalBlocks.empty()) {
+    auto *B = OriginalBlocks.back();
+    OriginalBlocks.pop_back();
+
+    if (B->getSize() == 0) {
+      LLVM_DEBUG({
+        dbgs() << "  Skipping empty block at "
+               << formatv("{0:x16}", B->getAddress()) << "\n";
+      });
+      continue;
+    }
+
+    unsigned NumBlocks = B->getSize() / RecordSize;
+
+    LLVM_DEBUG({
+      dbgs() << "  Splitting block at " << formatv("{0:x16}", B->getAddress())
+             << " into " << NumBlocks << " compact unwind record(s)\n";
+    });
+
+    if (B->getSize() % RecordSize)
+      return make_error<JITLinkError>(
+          "Error splitting compact unwind record in " + G.getName() +
+          ": block at " + formatv("{0:x}", B->getAddress()) + " has size " +
+          formatv("{0:x}", B->getSize()) +
+          " (not a multiple of CU record size of " +
+          formatv("{0:x}", RecordSize) + ")");
+
+    auto Blocks =
+        G.splitBlock(*B, map_range(seq(1U, NumBlocks), [=](Edge::OffsetT Idx) {
+          return Idx * RecordSize;
+        }));
+
+    for (auto *CURec : Blocks) {
+      bool AddedKeepAlive = false;
+
+      for (auto &E : CURec->edges()) {
+        if (E.getOffset() == 0) {
+          LLVM_DEBUG({
+            dbgs() << "    Updating compact unwind record at "
+                   << CURec->getAddress() << " to point to "
+                   << (E.getTarget().hasName() ? *E.getTarget().getName()
+                                               : StringRef())
+                   << " (at " << E.getTarget().getAddress() << ")\n";
+          });
+
+          if (E.getTarget().isExternal())
+            return make_error<JITLinkError>(
+                "Error adding keep-alive edge for compact unwind record at " +
+                formatv("{0:x}", CURec->getAddress()) + ": target " +
+                *E.getTarget().getName() + " is an external symbol");
+          auto &TgtBlock = E.getTarget().getBlock();
+          auto &CURecSym =
+              G.addAnonymousSymbol(*CURec, 0, RecordSize, false, false);
+          TgtBlock.addEdge(Edge::KeepAlive, 0, CURecSym, 0);
+          AddedKeepAlive = true;
+        }
+      }
+
+      if (!AddedKeepAlive)
+        return make_error<JITLinkError>(
+            "Error adding keep-alive edge for compact unwind record at " +
+            formatv("{0:x}", CURec->getAddress()) +
+            ": no outgoing target edge at offset 0");
+    }
+  }
+
+  return Error::success();
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/CompactUnwindSupport.h b/llvm/lib/ExecutionEngine/JITLink/CompactUnwindSupport.h
new file mode 100644
index 0000000..dc3ed94
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/CompactUnwindSupport.h
@@ -0,0 +1,653 @@
+//===- CompactUnwindSupportImpl.h - Compact Unwind format impl --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Compact Unwind format support implementation details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_JITLINK_COMPACTUNWINDSUPPORTIMPL_H
+#define LIB_EXECUTIONENGINE_JITLINK_COMPACTUNWINDSUPPORTIMPL_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ExecutionEngine/JITLink/MachO.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+
+#define DEBUG_TYPE "jitlink_cu"
+
+namespace llvm {
+namespace jitlink {
+
+/// Split blocks in an __LD,__compact_unwind section on record boundaries.
+/// When this function returns edges within each record are guaranteed to be
+/// sorted by offset.
+Error splitCompactUnwindBlocks(LinkGraph &G, Section &CompactUnwindSection,
+                               size_t RecordSize);
+
+/// CRTP base for compact unwind traits classes. Automatically provides derived
+/// constants.
+///
+/// FIXME: Passing PtrSize as a template parameter is a hack to work around a
+///        bug in older MSVC compilers (until at least MSVC 15) where constexpr
+///        fields in the CRTP impl class were not visible to the base class.
+///        Once we no longer need to support these compilers the PtrSize
+///        template argument should be removed and PointerSize should be
+///        defined as a member in the CRTP Impl classes.
+template <typename CRTPImpl, size_t PtrSize> struct CompactUnwindTraits {
+  static constexpr size_t PointerSize = PtrSize;
+  static constexpr size_t Size = 3 * PointerSize + 2 * 4;
+  static constexpr size_t FnFieldOffset = 0;
+  static constexpr size_t SizeFieldOffset = FnFieldOffset + PointerSize;
+  static constexpr size_t EncodingFieldOffset = SizeFieldOffset + 4;
+  static constexpr size_t PersonalityFieldOffset = EncodingFieldOffset + 4;
+  static constexpr size_t LSDAFieldOffset =
+      PersonalityFieldOffset + PointerSize;
+
+  static uint32_t readPCRangeSize(ArrayRef<char> RecordContent) {
+    assert(SizeFieldOffset + 4 <= RecordContent.size() &&
+           "Truncated CU record?");
+    return support::endian::read32<CRTPImpl::Endianness>(RecordContent.data() +
+                                                         SizeFieldOffset);
+  }
+
+  static uint32_t readEncoding(ArrayRef<char> RecordContent) {
+    assert(EncodingFieldOffset + 4 <= RecordContent.size() &&
+           "Truncated CU record?");
+    return support::endian::read32<CRTPImpl::Endianness>(RecordContent.data() +
+                                                         EncodingFieldOffset);
+  }
+};
+
+/// Architecture specific implementation of CompactUnwindManager.
+template <typename CURecTraits> class CompactUnwindManager {
+public:
+  CompactUnwindManager(StringRef CompactUnwindSectionName,
+                       StringRef UnwindInfoSectionName,
+                       StringRef EHFrameSectionName)
+      : CompactUnwindSectionName(CompactUnwindSectionName),
+        UnwindInfoSectionName(UnwindInfoSectionName),
+        EHFrameSectionName(EHFrameSectionName) {}
+
+  // Split compact unwind records, add keep-alive edges from functions to
+  // compact unwind records, and from compact unwind records to FDEs where
+  // needed.
+  //
+  // This method must be called *after* __eh_frame has been processed: it
+  // assumes that eh-frame records have been split up and keep-alive edges have
+  // been inserted.
+  Error prepareForPrune(LinkGraph &G) {
+    Section *CUSec = G.findSectionByName(CompactUnwindSectionName);
+    if (!CUSec || CUSec->empty()) {
+      LLVM_DEBUG({
+        dbgs() << "Compact unwind: No compact unwind info for " << G.getName()
+               << "\n";
+      });
+      return Error::success();
+    }
+
+    LLVM_DEBUG({
+      dbgs() << "Compact unwind: preparing " << G.getName() << " for prune\n";
+    });
+
+    Section *EHFrameSec = G.findSectionByName(EHFrameSectionName);
+
+    if (auto Err = splitCompactUnwindBlocks(G, *CUSec, CURecTraits::Size))
+      return Err;
+
+    LLVM_DEBUG({
+      dbgs() << "  Preparing " << CUSec->blocks_size() << " blocks in "
+             << CompactUnwindSectionName << "\n";
+    });
+
+    for (auto *B : CUSec->blocks()) {
+
+      // Find target function edge.
+      Edge *PCBeginEdge = nullptr;
+      for (auto &E : B->edges_at(CURecTraits::FnFieldOffset)) {
+        PCBeginEdge = &E;
+        break;
+      }
+
+      if (!PCBeginEdge)
+        return make_error<JITLinkError>(
+            "In " + G.getName() + ", compact unwind record at " +
+            formatv("{0:x}", B->getAddress()) + " has no pc-begin edge");
+
+      if (!PCBeginEdge->getTarget().isDefined())
+        return make_error<JITLinkError>(
+            "In " + G.getName() + ", compact unwind record at " +
+            formatv("{0:x}", B->getAddress()) + " points at external symbol " +
+            *PCBeginEdge->getTarget().getName());
+
+      auto &Fn = PCBeginEdge->getTarget();
+
+      if (!Fn.isDefined()) {
+        LLVM_DEBUG({
+          dbgs() << "In " << CompactUnwindSectionName << " for " << G.getName()
+                 << " encountered unexpected pc-edge to undefined symbol "
+                 << Fn.getName() << "\n";
+        });
+        continue;
+      } else {
+        LLVM_DEBUG({
+          dbgs() << "    Found record for function ";
+          if (Fn.hasName())
+            dbgs() << Fn.getName();
+          else
+            dbgs() << "<anon @ " << Fn.getAddress() << '>';
+          dbgs() << '\n';
+        });
+      }
+
+      bool NeedsDWARF = CURecTraits::encodingSpecifiesDWARF(
+          CURecTraits::readEncoding(B->getContent()));
+
+      auto &CURecSym =
+          G.addAnonymousSymbol(*B, 0, CURecTraits::Size, false, false);
+
+      bool KeepAliveAlreadyPresent = false;
+      if (EHFrameSec) {
+        Edge *KeepAliveEdge = nullptr;
+        for (auto &E : Fn.getBlock().edges_at(0)) {
+          if (E.getKind() == Edge::KeepAlive && E.getTarget().isDefined() &&
+              &E.getTarget().getBlock().getSection() == EHFrameSec) {
+            KeepAliveEdge = &E;
+            break;
+          }
+        }
+
+        if (KeepAliveEdge) {
+          // Found a keep-alive edge to an FDE in the eh-frame. Switch the keep
+          // alive edge to point to the CU and if the CU needs DWARF then add
+          // an extra keep-alive edge from the CU to the FDE.
+          auto &FDE = KeepAliveEdge->getTarget();
+          KeepAliveEdge->setTarget(CURecSym);
+          KeepAliveAlreadyPresent = true;
+          if (NeedsDWARF) {
+            LLVM_DEBUG({
+              dbgs() << "      Needs DWARF: adding keep-alive edge to FDE at "
+                     << FDE.getAddress() << "\n";
+            });
+            B->addEdge(Edge::KeepAlive, 0, FDE, 0);
+          }
+        } else {
+          if (NeedsDWARF)
+            return make_error<JITLinkError>(
+                "In " + G.getName() + ", compact unwind recard ot " +
+                formatv("{0:x}", B->getAddress()) +
+                " needs DWARF, but no FDE was found");
+        }
+      } else {
+        if (NeedsDWARF)
+          return make_error<JITLinkError>(
+              "In " + G.getName() + ", compact unwind recard ot " +
+              formatv("{0:x}", B->getAddress()) + " needs DWARF, but no " +
+              EHFrameSectionName + " section exists");
+      }
+
+      if (!KeepAliveAlreadyPresent) {
+        // No FDE edge. We'll need to add a new edge from the function back
+        // to the CU record.
+        Fn.getBlock().addEdge(Edge::KeepAlive, 0, CURecSym, 0);
+      }
+    }
+
+    return Error::success();
+  }
+
+  /// Process all __compact_unwind records and reserve space for __unwind_info.
+  Error processAndReserveUnwindInfo(LinkGraph &G) {
+    // Bail out early if no unwind info.
+    Section *CUSec = G.findSectionByName(CompactUnwindSectionName);
+    if (!CUSec)
+      return Error::success();
+
+    // The __LD/__compact_unwind section is only used as input for the linker.
+    // We'll create a new __TEXT,__unwind_info section for unwind info output.
+    CUSec->setMemLifetime(orc::MemLifetime::NoAlloc);
+
+    // Find / make a mach-header to act as the base for unwind-info offsets
+    // (and to report the arch / subarch to libunwind).
+    if (auto Err = getOrCreateCompactUnwindBase(G))
+      return Err;
+
+    // Error out if there's already unwind-info in the graph: We have no idea
+    // how to merge unwind-info sections.
+    if (G.findSectionByName(UnwindInfoSectionName))
+      return make_error<JITLinkError>("In " + G.getName() + ", " +
+                                      UnwindInfoSectionName +
+                                      " already exists");
+
+    // Process the __compact_unwind section to build the Records vector that
+    // we'll use for writing the __unwind_info section.
+    if (auto Err = processCompactUnwind(G, *CUSec))
+      return Err;
+
+    // Calculate the size of __unwind_info.
+    size_t UnwindInfoSectionSize =
+        UnwindInfoSectionHeaderSize +
+        Personalities.size() * PersonalityEntrySize +
+        (NumSecondLevelPages + 1) * IndexEntrySize + NumLSDAs * LSDAEntrySize +
+        NumSecondLevelPages * SecondLevelPageHeaderSize +
+        Records.size() * SecondLevelPageEntrySize;
+
+    LLVM_DEBUG({
+      dbgs() << "In " << G.getName() << ", reserving "
+             << formatv("{0:x}", UnwindInfoSectionSize) << " bytes for "
+             << UnwindInfoSectionName << "\n";
+    });
+
+    // Create the __unwind_info section and reserve space for it.
+    Section &UnwindInfoSec =
+        G.createSection(UnwindInfoSectionName, orc::MemProt::Read);
+
+    auto UnwindInfoSectionContent = G.allocateBuffer(UnwindInfoSectionSize);
+    memset(UnwindInfoSectionContent.data(), 0, UnwindInfoSectionContent.size());
+    auto &B = G.createMutableContentBlock(
+        UnwindInfoSec, UnwindInfoSectionContent, orc::ExecutorAddr(), 8, 0);
+
+    // Add Keep-alive edges from the __unwind_info block to all of the target
+    // functions.
+    for (auto &R : Records)
+      B.addEdge(Edge::KeepAlive, 0, *R.Fn, 0);
+
+    return Error::success();
+  }
+
+  Error writeUnwindInfo(LinkGraph &G) {
+    Section *CUSec = G.findSectionByName(CompactUnwindSectionName);
+    if (!CUSec || CUSec->empty())
+      return Error::success();
+
+    Section *UnwindInfoSec = G.findSectionByName(UnwindInfoSectionName);
+    if (!UnwindInfoSec)
+      return make_error<JITLinkError>("In " + G.getName() + ", " +
+                                      UnwindInfoSectionName +
+                                      " missing after allocation");
+
+    if (UnwindInfoSec->blocks_size() != 1)
+      return make_error<JITLinkError>(
+          "In " + G.getName() + ", " + UnwindInfoSectionName +
+          " contains more than one block post-allocation");
+
+    LLVM_DEBUG(
+        { dbgs() << "Writing unwind info for " << G.getName() << "...\n"; });
+
+    mergeRecords();
+
+    auto &UnwindInfoBlock = **UnwindInfoSec->blocks().begin();
+    auto Content = UnwindInfoBlock.getMutableContent(G);
+    BinaryStreamWriter Writer(
+        {reinterpret_cast<uint8_t *>(Content.data()), Content.size()},
+        CURecTraits::Endianness);
+
+    // __unwind_info format, from mach-o/compact_unwind_encoding.h on Darwin:
+    //
+    // #define UNWIND_SECTION_VERSION 1
+    // struct unwind_info_section_header
+    // {
+    //     uint32_t    version;            // UNWIND_SECTION_VERSION
+    //     uint32_t    commonEncodingsArraySectionOffset;
+    //     uint32_t    commonEncodingsArrayCount;
+    //     uint32_t    personalityArraySectionOffset;
+    //     uint32_t    personalityArrayCount;
+    //     uint32_t    indexSectionOffset;
+    //     uint32_t    indexCount;
+    //     // compact_unwind_encoding_t[]
+    //     // uint32_t personalities[]
+    //     // unwind_info_section_header_index_entry[]
+    //     // unwind_info_section_header_lsda_index_entry[]
+    // };
+
+    if (auto Err = writeHeader(G, Writer))
+      return Err;
+
+    // Skip common encodings: JITLink doesn't use them.
+
+    if (auto Err = writePersonalities(G, Writer))
+      return Err;
+
+    // Calculate the offset to the LSDAs.
+    size_t SectionOffsetToLSDAs =
+        Writer.getOffset() + (NumSecondLevelPages + 1) * IndexEntrySize;
+
+    // Calculate offset to the 1st second-level page.
+    size_t SectionOffsetToSecondLevelPages =
+        SectionOffsetToLSDAs + NumLSDAs * LSDAEntrySize;
+
+    if (auto Err = writeIndexes(G, Writer, SectionOffsetToLSDAs,
+                                SectionOffsetToSecondLevelPages))
+      return Err;
+
+    if (auto Err = writeLSDAs(G, Writer))
+      return Err;
+
+    if (auto Err = writeSecondLevelPages(G, Writer))
+      return Err;
+
+    LLVM_DEBUG({
+      dbgs() << "    Wrote " << formatv("{0:x}", Writer.getOffset())
+             << " bytes of unwind info.\n";
+    });
+
+    return Error::success();
+  }
+
+private:
+  // Calculate the size of unwind-info.
+  static constexpr size_t MaxPersonalities = 4;
+  static constexpr size_t PersonalityShift = 28;
+
+  static constexpr size_t UnwindInfoSectionHeaderSize = 4 * 7;
+  static constexpr size_t PersonalityEntrySize = 4;
+  static constexpr size_t IndexEntrySize = 3 * 4;
+  static constexpr size_t LSDAEntrySize = 2 * 4;
+  static constexpr size_t SecondLevelPageSize = 4096;
+  static constexpr size_t SecondLevelPageHeaderSize = 8;
+  static constexpr size_t SecondLevelPageEntrySize = 8;
+  static constexpr size_t NumRecordsPerSecondLevelPage =
+      (SecondLevelPageSize - SecondLevelPageHeaderSize) /
+      SecondLevelPageEntrySize;
+
+  struct CompactUnwindRecord {
+    Symbol *Fn = nullptr;
+    uint32_t Size = 0;
+    uint32_t Encoding = 0;
+    Symbol *LSDA = nullptr;
+    Symbol *FDE = nullptr;
+  };
+
+  Error processCompactUnwind(LinkGraph &G, Section &CUSec) {
+    // TODO: Reset NumLSDAs, Personalities and CompactUnwindRecords if
+    // processing more than once.
+    assert(NumLSDAs == 0 && "NumLSDAs should be zero");
+    assert(Records.empty() && "CompactUnwindRecords vector should be empty.");
+    assert(Personalities.empty() && "Personalities vector should be empty.");
+
+    SmallVector<CompactUnwindRecord> NonUniquedRecords;
+    NonUniquedRecords.reserve(CUSec.blocks_size());
+
+    // Process __compact_unwind blocks.
+    for (auto *B : CUSec.blocks()) {
+      CompactUnwindRecord R;
+      R.Encoding = CURecTraits::readEncoding(B->getContent());
+      for (auto &E : B->edges()) {
+        switch (E.getOffset()) {
+        case CURecTraits::FnFieldOffset:
+          // This could be the function-pointer, or the FDE keep-alive. Check
+          // the type to decide.
+          if (E.getKind() == Edge::KeepAlive)
+            R.FDE = &E.getTarget();
+          else
+            R.Fn = &E.getTarget();
+          break;
+        case CURecTraits::PersonalityFieldOffset: {
+          // Add the Personality to the Personalities map and update the
+          // encoding.
+          size_t PersonalityIdx = 0;
+          for (; PersonalityIdx != Personalities.size(); ++PersonalityIdx)
+            if (Personalities[PersonalityIdx] == &E.getTarget())
+              break;
+          if (PersonalityIdx == MaxPersonalities)
+            return make_error<JITLinkError>(
+                "In " + G.getName() +
+                ", __compact_unwind contains too many personalities (max " +
+                formatv("{}", MaxPersonalities) + ")");
+          if (PersonalityIdx == Personalities.size())
+            Personalities.push_back(&E.getTarget());
+
+          R.Encoding |= (PersonalityIdx + 1) << PersonalityShift;
+          break;
+        }
+        case CURecTraits::LSDAFieldOffset:
+          ++NumLSDAs;
+          R.LSDA = &E.getTarget();
+          break;
+        default:
+          return make_error<JITLinkError>("In " + G.getName() +
+                                          ", compact unwind record at " +
+                                          formatv("{0:x}", B->getAddress()) +
+                                          " has unrecognized edge at offset " +
+                                          formatv("{0:x}", E.getOffset()));
+        }
+      }
+      Records.push_back(R);
+    }
+
+    // Sort the records into ascending order.
+    llvm::sort(Records, [](const CompactUnwindRecord &LHS,
+                           const CompactUnwindRecord &RHS) {
+      return LHS.Fn->getAddress() < RHS.Fn->getAddress();
+    });
+
+    // Calculate the number of second-level pages required.
+    NumSecondLevelPages = (Records.size() + NumRecordsPerSecondLevelPage - 1) /
+                          NumRecordsPerSecondLevelPage;
+
+    // Convert personality symbols to GOT entry pointers.
+    typename CURecTraits::GOTManager GOT(G);
+    for (auto &Personality : Personalities)
+      Personality = &GOT.getEntryForTarget(G, *Personality);
+
+    LLVM_DEBUG({
+      dbgs() << "  In " << G.getName() << ", " << CompactUnwindSectionName
+             << ": raw records = " << Records.size()
+             << ", personalities = " << Personalities.size()
+             << ", lsdas = " << NumLSDAs << "\n";
+    });
+
+    return Error::success();
+  }
+
+  void mergeRecords() {
+    SmallVector<CompactUnwindRecord> NonUniqued = std::move(Records);
+    Records.reserve(NonUniqued.size());
+
+    Records.push_back(NonUniqued.front());
+    for (size_t I = 1; I != NonUniqued.size(); ++I) {
+      auto &Next = NonUniqued[I];
+      auto &Last = Records.back();
+
+      bool NextNeedsDWARF = CURecTraits::encodingSpecifiesDWARF(Next.Encoding);
+      bool CannotBeMerged = CURecTraits::encodingCannotBeMerged(Next.Encoding);
+      if (NextNeedsDWARF || (Next.Encoding != Last.Encoding) ||
+          CannotBeMerged || Next.LSDA || Last.LSDA)
+        Records.push_back(Next);
+    }
+
+    // Recalculate derived values that may have changed.
+    NumSecondLevelPages = (Records.size() + NumRecordsPerSecondLevelPage - 1) /
+                          NumRecordsPerSecondLevelPage;
+  }
+
+  Error writeHeader(LinkGraph &G, BinaryStreamWriter &W) {
+    if (!isUInt<32>(NumSecondLevelPages + 1))
+      return make_error<JITLinkError>("In " + G.getName() + ", too many " +
+                                      UnwindInfoSectionName +
+                                      "second-level pages required");
+
+    // Write __unwind_info header.
+    size_t IndexArrayOffset = UnwindInfoSectionHeaderSize +
+                              Personalities.size() * PersonalityEntrySize;
+
+    cantFail(W.writeInteger<uint32_t>(1));
+    cantFail(W.writeInteger<uint32_t>(UnwindInfoSectionHeaderSize));
+    cantFail(W.writeInteger<uint32_t>(0));
+    cantFail(W.writeInteger<uint32_t>(UnwindInfoSectionHeaderSize));
+    cantFail(W.writeInteger<uint32_t>(Personalities.size()));
+    cantFail(W.writeInteger<uint32_t>(IndexArrayOffset));
+    cantFail(W.writeInteger<uint32_t>(NumSecondLevelPages + 1));
+
+    return Error::success();
+  }
+
+  Error writePersonalities(LinkGraph &G, BinaryStreamWriter &W) {
+    // Write personalities.
+    for (auto *PSym : Personalities) {
+      auto Delta = PSym->getAddress() - CompactUnwindBase->getAddress();
+      if (!isUInt<32>(Delta))
+        return makePersonalityRangeError(G, *PSym);
+      cantFail(W.writeInteger<uint32_t>(Delta));
+    }
+    return Error::success();
+  }
+
+  Error writeIndexes(LinkGraph &G, BinaryStreamWriter &W,
+                     size_t SectionOffsetToLSDAs,
+                     size_t SectionOffsetToSecondLevelPages) {
+    // Assume that function deltas are ok in this method -- we'll error
+    // check all of them when we write the second level pages.
+
+    // Write the header index entries.
+    size_t RecordIdx = 0;
+    size_t NumPreviousLSDAs = 0;
+    for (auto &R : Records) {
+      // If this record marks the start of a new second level page.
+      if (RecordIdx % NumRecordsPerSecondLevelPage == 0) {
+        auto FnDelta = R.Fn->getAddress() - CompactUnwindBase->getAddress();
+        auto SecondLevelPageOffset = SectionOffsetToSecondLevelPages +
+                                     (RecordIdx / NumRecordsPerSecondLevelPage);
+        auto LSDAOffset =
+            SectionOffsetToLSDAs + NumPreviousLSDAs * LSDAEntrySize;
+
+        cantFail(W.writeInteger<uint32_t>(FnDelta));
+        cantFail(W.writeInteger<uint32_t>(SecondLevelPageOffset));
+        cantFail(W.writeInteger<uint32_t>(LSDAOffset));
+      }
+      if (R.LSDA)
+        ++NumPreviousLSDAs;
+      ++RecordIdx;
+    }
+
+    // Write the index array terminator.
+    {
+      auto FnEndDelta =
+          Records.back().Fn->getRange().End - CompactUnwindBase->getAddress();
+
+      if (LLVM_UNLIKELY(!isUInt<32>(FnEndDelta)))
+        return make_error<JITLinkError>(
+            "In " + G.getName() + " " + UnwindInfoSectionName +
+            ", delta to end of functions  " +
+            formatv("{0:x}", Records.back().Fn->getRange().End) +
+            " exceeds 32 bits");
+
+      cantFail(W.writeInteger<uint32_t>(FnEndDelta));
+      cantFail(W.writeInteger<uint32_t>(0));
+      cantFail(W.writeInteger<uint32_t>(SectionOffsetToSecondLevelPages));
+    }
+
+    return Error::success();
+  }
+
+  Error writeLSDAs(LinkGraph &G, BinaryStreamWriter &W) {
+    // As with writeIndexes, assume that function deltas are ok for now.
+    for (auto &R : Records) {
+      if (R.LSDA) {
+        auto FnDelta = R.Fn->getAddress() - CompactUnwindBase->getAddress();
+        auto LSDADelta = R.LSDA->getAddress() - CompactUnwindBase->getAddress();
+
+        if (LLVM_UNLIKELY(!isUInt<32>(LSDADelta)))
+          return make_error<JITLinkError>(
+              "In " + G.getName() + " " + UnwindInfoSectionName +
+              ", delta to lsda at " + formatv("{0:x}", R.LSDA->getAddress()) +
+              " exceeds 32 bits");
+
+        cantFail(W.writeInteger<uint32_t>(FnDelta));
+        cantFail(W.writeInteger<uint32_t>(LSDADelta));
+      }
+    }
+
+    return Error::success();
+  }
+
+  Error writeSecondLevelPages(LinkGraph &G, BinaryStreamWriter &W) {
+    size_t RecordIdx = 0;
+
+    for (auto &R : Records) {
+      // When starting a new second-level page, write the page header:
+      //
+      //   2     : uint32_t    -- UNWIND_SECOND_LEVEL_REGULAR
+      //   8     : uint16_t    -- size of second level page table header
+      //   count : uint16_t    -- num entries in this second-level page
+      if (RecordIdx % NumRecordsPerSecondLevelPage == 0) {
+        constexpr uint32_t SecondLevelPageHeaderKind = 2;
+        constexpr uint16_t SecondLevelPageHeaderSize = 8;
+        uint16_t SecondLevelPageNumEntries =
+            std::min(Records.size() - RecordIdx, NumRecordsPerSecondLevelPage);
+
+        cantFail(W.writeInteger<uint32_t>(SecondLevelPageHeaderKind));
+        cantFail(W.writeInteger<uint16_t>(SecondLevelPageHeaderSize));
+        cantFail(W.writeInteger<uint16_t>(SecondLevelPageNumEntries));
+      }
+
+      // Write entry.
+      auto FnDelta = R.Fn->getAddress() - CompactUnwindBase->getAddress();
+
+      if (LLVM_UNLIKELY(!isUInt<32>(FnDelta)))
+        return make_error<JITLinkError>(
+            "In " + G.getName() + " " + UnwindInfoSectionName +
+            ", delta to function at " + formatv("{0:x}", R.Fn->getAddress()) +
+            " exceeds 32 bits");
+
+      cantFail(W.writeInteger<uint32_t>(FnDelta));
+      cantFail(W.writeInteger<uint32_t>(R.Encoding));
+
+      ++RecordIdx;
+    }
+
+    return Error::success();
+  }
+
+  Error getOrCreateCompactUnwindBase(LinkGraph &G) {
+    auto Name = G.intern("__jitlink$libunwind_dso_base");
+    CompactUnwindBase = G.findAbsoluteSymbolByName(Name);
+    if (!CompactUnwindBase) {
+      if (auto LocalCUBase = getOrCreateLocalMachOHeader(G)) {
+        CompactUnwindBase = &*LocalCUBase;
+        auto &B = LocalCUBase->getBlock();
+        G.addDefinedSymbol(B, 0, *Name, B.getSize(), Linkage::Strong,
+                           Scope::Local, false, true);
+      } else
+        return LocalCUBase.takeError();
+    }
+    CompactUnwindBase->setLive(true);
+    return Error::success();
+  }
+
+  Error makePersonalityRangeError(LinkGraph &G, Symbol &PSym) {
+    std::string ErrMsg;
+    {
+      raw_string_ostream ErrStream(ErrMsg);
+      ErrStream << "In " << G.getName() << " " << UnwindInfoSectionName
+                << ", personality ";
+      if (PSym.hasName())
+        ErrStream << PSym.getName() << " ";
+      ErrStream << "at " << PSym.getAddress()
+                << " is out of 32-bit delta range of compact-unwind base at "
+                << CompactUnwindBase->getAddress();
+    }
+    return make_error<JITLinkError>(std::move(ErrMsg));
+  }
+
+  StringRef CompactUnwindSectionName;
+  StringRef UnwindInfoSectionName;
+  StringRef EHFrameSectionName;
+  Symbol *CompactUnwindBase = nullptr;
+
+  size_t NumLSDAs = 0;
+  size_t NumSecondLevelPages = 0;
+  SmallVector<Symbol *, MaxPersonalities> Personalities;
+  SmallVector<CompactUnwindRecord> Records;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif // LIB_EXECUTIONENGINE_JITLINK_COMPACTUNWINDSUPPORTIMPL_H
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
index 3e757f7..179e458c 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -733,121 +733,5 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
   return Error::success();
 }
 
-Error CompactUnwindSplitter::operator()(LinkGraph &G) {
-  auto *CUSec = G.findSectionByName(CompactUnwindSectionName);
-  if (!CUSec)
-    return Error::success();
-
-  if (!G.getTargetTriple().isOSBinFormatMachO())
-    return make_error<JITLinkError>(
-        "Error linking " + G.getName() +
-        ": compact unwind splitting not supported on non-macho target " +
-        G.getTargetTriple().str());
-
-  unsigned CURecordSize = 0;
-  unsigned PersonalityEdgeOffset = 0;
-  unsigned LSDAEdgeOffset = 0;
-  switch (G.getTargetTriple().getArch()) {
-  case Triple::aarch64:
-  case Triple::x86_64:
-    // 64-bit compact-unwind record format:
-    // Range start: 8 bytes.
-    // Range size:  4 bytes.
-    // CU encoding: 4 bytes.
-    // Personality: 8 bytes.
-    // LSDA:        8 bytes.
-    CURecordSize = 32;
-    PersonalityEdgeOffset = 16;
-    LSDAEdgeOffset = 24;
-    break;
-  default:
-    return make_error<JITLinkError>(
-        "Error linking " + G.getName() +
-        ": compact unwind splitting not supported on " +
-        G.getTargetTriple().getArchName());
-  }
-
-  std::vector<Block *> OriginalBlocks(CUSec->blocks().begin(),
-                                      CUSec->blocks().end());
-  LLVM_DEBUG({
-    dbgs() << "In " << G.getName() << " splitting compact unwind section "
-           << CompactUnwindSectionName << " containing "
-           << OriginalBlocks.size() << " initial blocks...\n";
-  });
-
-  while (!OriginalBlocks.empty()) {
-    auto *B = OriginalBlocks.back();
-    OriginalBlocks.pop_back();
-
-    if (B->getSize() == 0) {
-      LLVM_DEBUG({
-        dbgs() << "  Skipping empty block at "
-               << formatv("{0:x16}", B->getAddress()) << "\n";
-      });
-      continue;
-    }
-
-    unsigned NumBlocks = B->getSize() / CURecordSize;
-
-    LLVM_DEBUG({
-      dbgs() << "  Splitting block at " << formatv("{0:x16}", B->getAddress())
-             << " into " << NumBlocks << " compact unwind record(s)\n";
-    });
-
-    if (B->getSize() % CURecordSize)
-      return make_error<JITLinkError>(
-          "Error splitting compact unwind record in " + G.getName() +
-          ": block at " + formatv("{0:x}", B->getAddress()) + " has size " +
-          formatv("{0:x}", B->getSize()) +
-          " (not a multiple of CU record size of " +
-          formatv("{0:x}", CURecordSize) + ")");
-
-    auto Blocks =
-        G.splitBlock(*B, map_range(seq(1U, NumBlocks), [=](Edge::OffsetT Idx) {
-          return Idx * CURecordSize;
-        }));
-
-    for (auto *CURec : Blocks) {
-      bool AddedKeepAlive = false;
-
-      for (auto &E : CURec->edges()) {
-        if (E.getOffset() == 0) {
-          LLVM_DEBUG({
-            dbgs() << "    Updating compact unwind record at "
-                   << CURec->getAddress() << " to point to "
-                   << (E.getTarget().hasName() ? *E.getTarget().getName()
-                                               : StringRef())
-                   << " (at " << E.getTarget().getAddress() << ")\n";
-          });
-
-          if (E.getTarget().isExternal())
-            return make_error<JITLinkError>(
-                "Error adding keep-alive edge for compact unwind record at " +
-                formatv("{0:x}", CURec->getAddress()) + ": target " +
-                *E.getTarget().getName() + " is an external symbol");
-          auto &TgtBlock = E.getTarget().getBlock();
-          auto &CURecSym =
-              G.addAnonymousSymbol(*CURec, 0, CURecordSize, false, false);
-          TgtBlock.addEdge(Edge::KeepAlive, 0, CURecSym, 0);
-          AddedKeepAlive = true;
-        } else if (E.getOffset() != PersonalityEdgeOffset &&
-                   E.getOffset() != LSDAEdgeOffset)
-          return make_error<JITLinkError>(
-              "Unexpected edge at offset " + formatv("{0:x}", E.getOffset()) +
-              " in compact unwind record at " +
-              formatv("{0:x}", CURec->getAddress()));
-      }
-
-      if (!AddedKeepAlive)
-        return make_error<JITLinkError>(
-            "Error adding keep-alive edge for compact unwind record at " +
-            formatv("{0:x}", CURec->getAddress()) +
-            ": no outgoing target edge at offset 0");
-    }
-  }
-
-  return Error::success();
-}
-
 } // end namespace jitlink
 } // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index 6afa012..343218e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -236,17 +236,6 @@ private:
   StringMap<SectionParserFunction> CustomSectionParserFunctions;
 };
 
-/// A pass to split up __LD,__compact_unwind sections.
-class CompactUnwindSplitter {
-public:
-  CompactUnwindSplitter(StringRef CompactUnwindSectionName)
-      : CompactUnwindSectionName(CompactUnwindSectionName) {}
-  Error operator()(LinkGraph &G);
-
-private:
-  StringRef CompactUnwindSectionName;
-};
-
 } // end namespace jitlink
 } // end namespace llvm
 
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 29061ff..4860db4 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -13,7 +13,9 @@
 #include "llvm/ExecutionEngine/JITLink/MachO_arm64.h"
 #include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
 #include "llvm/ExecutionEngine/JITLink/aarch64.h"
+#include "llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h"
 
+#include "CompactUnwindSupport.h"
 #include "DefineExternalSectionStartAndEndSymbols.h"
 #include "MachOLinkGraphBuilder.h"
 
@@ -625,6 +627,27 @@ static Error applyPACSigningToModInitPointers(LinkGraph &G) {
   return Error::success();
 }
 
+struct CompactUnwindTraits_MachO_arm64
+    : public CompactUnwindTraits<CompactUnwindTraits_MachO_arm64,
+                                 /* PointerSize = */ 8> {
+  // FIXME: Reinstate once we no longer need the MSVC workaround. See
+  //        FIXME for CompactUnwindTraits in CompactUnwindSupport.h.
+  // constexpr static size_t PointerSize = 8;
+
+  constexpr static endianness Endianness = endianness::little;
+
+  constexpr static uint32_t EncodingModeMask = 0x0f000000;
+
+  using GOTManager = aarch64::GOTTableManager;
+
+  static bool encodingSpecifiesDWARF(uint32_t Encoding) {
+    constexpr uint32_t DWARFMode = 0x03000000;
+    return (Encoding & EncodingModeMask) == DWARFMode;
+  }
+
+  static bool encodingCannotBeMerged(uint32_t Encoding) { return false; }
+};
+
 void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
                       std::unique_ptr<JITLinkContext> Ctx) {
 
@@ -637,16 +660,21 @@ void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
     else
       Config.PrePrunePasses.push_back(markAllSymbolsLive);
 
-    // Add compact unwind splitter pass.
-    Config.PrePrunePasses.push_back(
-        CompactUnwindSplitter("__LD,__compact_unwind"));
-
     // Add eh-frame passes.
-    // FIXME: Prune eh-frames for which compact-unwind is available once
-    // we support compact-unwind registration with libunwind.
     Config.PrePrunePasses.push_back(createEHFrameSplitterPass_MachO_arm64());
     Config.PrePrunePasses.push_back(createEHFrameEdgeFixerPass_MachO_arm64());
 
+    // Create a compact-unwind manager for use in passes below.
+    auto CompactUnwindMgr =
+        std::make_shared<CompactUnwindManager<CompactUnwindTraits_MachO_arm64>>(
+            orc::MachOCompactUnwindSectionName, orc::MachOUnwindInfoSectionName,
+            orc::MachOEHFrameSectionName);
+
+    // Add compact unwind prepare pass.
+    Config.PrePrunePasses.push_back([CompactUnwindMgr](LinkGraph &G) {
+      return CompactUnwindMgr->prepareForPrune(G);
+    });
+
     // Resolve any external section start / end symbols.
     Config.PostAllocationPasses.push_back(
         createDefineExternalSectionStartAndEndSymbolsPass(
@@ -663,6 +691,16 @@ void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
       Config.PreFixupPasses.push_back(
           aarch64::lowerPointer64AuthEdgesToSigningFunction);
     }
+
+    // Reserve unwind-info space.
+    Config.PostPrunePasses.push_back([CompactUnwindMgr](LinkGraph &G) {
+      return CompactUnwindMgr->processAndReserveUnwindInfo(G);
+    });
+
+    // Translate compact-unwind to unwind-info.
+    Config.PreFixupPasses.push_back([CompactUnwindMgr](LinkGraph &G) {
+      return CompactUnwindMgr->writeUnwindInfo(G);
+    });
   }
 
   if (auto Err = Ctx->modifyPassConfig(*G, Config))
@@ -673,11 +711,11 @@ void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
 }
 
 LinkGraphPassFunction createEHFrameSplitterPass_MachO_arm64() {
-  return DWARFRecordSectionSplitter("__TEXT,__eh_frame");
+  return DWARFRecordSectionSplitter(orc::MachOEHFrameSectionName);
 }
 
 LinkGraphPassFunction createEHFrameEdgeFixerPass_MachO_arm64() {
-  return EHFrameEdgeFixer("__TEXT,__eh_frame", aarch64::PointerSize,
+  return EHFrameEdgeFixer(orc::MachOEHFrameSectionName, aarch64::PointerSize,
                           aarch64::Pointer32, aarch64::Pointer64,
                           aarch64::Delta32, aarch64::Delta64,
                           aarch64::NegDelta32);
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 9547266..d56dfdc 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -13,7 +13,9 @@
 #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
 #include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
 #include "llvm/ExecutionEngine/JITLink/x86_64.h"
+#include "llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h"
 
+#include "CompactUnwindSupport.h"
 #include "DefineExternalSectionStartAndEndSymbols.h"
 #include "MachOLinkGraphBuilder.h"
 
@@ -500,26 +502,57 @@ Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromMachOObject_x86_64(
       .buildGraph();
 }
 
+struct CompactUnwindTraits_MachO_x86_64
+    : public CompactUnwindTraits<CompactUnwindTraits_MachO_x86_64,
+                                 /* PointerSize = */ 8> {
+  // FIXME: Reinstate once we no longer need the MSVC workaround. See
+  //        FIXME for CompactUnwindTraits in CompactUnwindSupport.h.
+  // constexpr static size_t PointerSize = 8;
+
+  constexpr static endianness Endianness = endianness::little;
+
+  constexpr static uint32_t EncodingModeMask = 0x0f000000;
+
+  using GOTManager = x86_64::GOTTableManager;
+
+  static bool encodingSpecifiesDWARF(uint32_t Encoding) {
+    constexpr uint32_t DWARFMode = 0x04000000;
+    return (Encoding & EncodingModeMask) == DWARFMode;
+  }
+
+  static bool encodingCannotBeMerged(uint32_t Encoding) {
+    constexpr uint32_t StackIndirectMode = 0x03000000;
+    return (Encoding & EncodingModeMask) == StackIndirectMode;
+  }
+};
+
 void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
                        std::unique_ptr<JITLinkContext> Ctx) {
 
   PassConfiguration Config;
 
   if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
-    // Add eh-frame passes.
-    Config.PrePrunePasses.push_back(createEHFrameSplitterPass_MachO_x86_64());
-    Config.PrePrunePasses.push_back(createEHFrameEdgeFixerPass_MachO_x86_64());
-
-    // Add compact unwind splitter pass.
-    Config.PrePrunePasses.push_back(
-        CompactUnwindSplitter("__LD,__compact_unwind"));
-
     // Add a mark-live pass.
     if (auto MarkLive = Ctx->getMarkLivePass(G->getTargetTriple()))
       Config.PrePrunePasses.push_back(std::move(MarkLive));
     else
       Config.PrePrunePasses.push_back(markAllSymbolsLive);
 
+    // Add eh-frame passes.
+    Config.PrePrunePasses.push_back(createEHFrameSplitterPass_MachO_x86_64());
+    Config.PrePrunePasses.push_back(createEHFrameEdgeFixerPass_MachO_x86_64());
+
+    // Create a compact-unwind manager for use in passes below.
+    auto CompactUnwindMgr = std::make_shared<
+        CompactUnwindManager<CompactUnwindTraits_MachO_x86_64>>(
+        orc::MachOCompactUnwindSectionName, orc::MachOUnwindInfoSectionName,
+        orc::MachOEHFrameSectionName);
+
+    // Add compact unwind prepare pass.
+    Config.PrePrunePasses.push_back([CompactUnwindMgr](LinkGraph &G) {
+      return CompactUnwindMgr->prepareForPrune(G);
+    });
+
     // Resolve any external section start / end symbols.
     Config.PostAllocationPasses.push_back(
         createDefineExternalSectionStartAndEndSymbolsPass(
@@ -528,6 +561,16 @@ void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
     // Add an in-place GOT/Stubs pass.
     Config.PostPrunePasses.push_back(buildGOTAndStubs_MachO_x86_64);
 
+    // Reserve space for unwind-info.
+    Config.PostPrunePasses.push_back([CompactUnwindMgr](LinkGraph &G) {
+      return CompactUnwindMgr->processAndReserveUnwindInfo(G);
+    });
+
+    // Translate compact-unwind to unwind-info.
+    Config.PreFixupPasses.push_back([CompactUnwindMgr](LinkGraph &G) {
+      return CompactUnwindMgr->writeUnwindInfo(G);
+    });
+
     // Add GOT/Stubs optimizer pass.
     Config.PreFixupPasses.push_back(x86_64::optimizeGOTAndStubAccesses);
   }
@@ -540,11 +583,11 @@ void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
 }
 
 LinkGraphPassFunction createEHFrameSplitterPass_MachO_x86_64() {
-  return DWARFRecordSectionSplitter("__TEXT,__eh_frame");
+  return DWARFRecordSectionSplitter(orc::MachOEHFrameSectionName);
 }
 
 LinkGraphPassFunction createEHFrameEdgeFixerPass_MachO_x86_64() {
-  return EHFrameEdgeFixer("__TEXT,__eh_frame", x86_64::PointerSize,
+  return EHFrameEdgeFixer(orc::MachOEHFrameSectionName, x86_64::PointerSize,
                           x86_64::Pointer32, x86_64::Pointer64, x86_64::Delta32,
                           x86_64::Delta64, x86_64::NegDelta32);
 }
diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
index 2ab5d6d..8a86629 100644
--- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -57,6 +57,7 @@ add_llvm_component_library(LLVMOrcJIT
   ExecutorProcessControl.cpp
   TaskDispatch.cpp
   ThreadSafeModule.cpp
+  UnwindInfoRegistrationPlugin.cpp
   RedirectionManager.cpp
   JITLinkRedirectableSymbolManager.cpp
   ReOptimizeLayer.cpp
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
index 5d2f3cd..c4d65af 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -33,6 +33,9 @@ irManglingOptionsFromTargetOptions(const TargetOptions &Opts) {
 
 /// Compile a Module to an ObjectFile.
 Expected<SimpleCompiler::CompileResult> SimpleCompiler::operator()(Module &M) {
+  if (M.getDataLayout().isDefault())
+    M.setDataLayout(TM.createDataLayout());
+
   CompileResult CachedObject = tryToLoadFromObjectCache(M);
   if (CachedObject)
     return std::move(CachedObject);
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index d47eb44..9f466e7 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -1251,9 +1251,7 @@ JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
   LinkOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols});
 }
 
-std::pair<JITDylib::AsynchronousSymbolQuerySet,
-          std::shared_ptr<SymbolDependenceMap>>
-JITDylib::IL_removeTracker(ResourceTracker &RT) {
+JITDylib::RemoveTrackerResult JITDylib::IL_removeTracker(ResourceTracker &RT) {
   // Note: Should be called under the session lock.
   assert(State != Closed && "JD is defunct");
 
@@ -1292,7 +1290,10 @@ JITDylib::IL_removeTracker(ResourceTracker &RT) {
       SymbolsToFail.push_back(Sym);
   }
 
-  auto Result = ES.IL_failSymbols(*this, std::move(SymbolsToFail));
+  auto [QueriesToFail, FailedSymbols] =
+      ES.IL_failSymbols(*this, std::move(SymbolsToFail));
+
+  std::vector<std::unique_ptr<MaterializationUnit>> DefunctMUs;
 
   // Removed symbols should be taken out of the table altogether.
   for (auto &Sym : SymbolsToRemove) {
@@ -1302,7 +1303,12 @@ JITDylib::IL_removeTracker(ResourceTracker &RT) {
     // Remove Materializer if present.
     if (I->second.hasMaterializerAttached()) {
       // FIXME: Should this discard the symbols?
-      UnmaterializedInfos.erase(Sym);
+      auto J = UnmaterializedInfos.find(Sym);
+      assert(J != UnmaterializedInfos.end() &&
+             "Symbol table indicates MU present, but no UMI record");
+      if (J->second->MU)
+        DefunctMUs.push_back(std::move(J->second->MU));
+      UnmaterializedInfos.erase(J);
     } else {
       assert(!UnmaterializedInfos.count(Sym) &&
              "Symbol has materializer attached");
@@ -1313,7 +1319,8 @@ JITDylib::IL_removeTracker(ResourceTracker &RT) {
 
   shrinkMaterializationInfoMemory();
 
-  return Result;
+  return {std::move(QueriesToFail), std::move(FailedSymbols),
+          std::move(DefunctMUs)};
 }
 
 void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) {
@@ -2180,16 +2187,17 @@ Error ExecutionSession::removeResourceTracker(ResourceTracker &RT) {
   });
   std::vector<ResourceManager *> CurrentResourceManagers;
 
-  JITDylib::AsynchronousSymbolQuerySet QueriesToFail;
-  std::shared_ptr<SymbolDependenceMap> FailedSymbols;
+  JITDylib::RemoveTrackerResult R;
 
   runSessionLocked([&] {
     CurrentResourceManagers = ResourceManagers;
     RT.makeDefunct();
-    std::tie(QueriesToFail, FailedSymbols) =
-        RT.getJITDylib().IL_removeTracker(RT);
+    R = RT.getJITDylib().IL_removeTracker(RT);
   });
 
+  // Release any defunct MaterializationUnits.
+  R.DefunctMUs.clear();
+
   Error Err = Error::success();
 
   auto &JD = RT.getJITDylib();
@@ -2197,9 +2205,9 @@ Error ExecutionSession::removeResourceTracker(ResourceTracker &RT) {
     Err = joinErrors(std::move(Err),
                      L->handleRemoveResources(JD, RT.getKeyUnsafe()));
 
-  for (auto &Q : QueriesToFail)
-    Q->handleFailed(
-        make_error<FailedToMaterialize>(getSymbolStringPool(), FailedSymbols));
+  for (auto &Q : R.QueriesToFail)
+    Q->handleFailed(make_error<FailedToMaterialize>(getSymbolStringPool(),
+                                                    R.FailedSymbols));
 
   return Err;
 }
diff --git a/llvm/lib/ExecutionEngine/Orc/EHFrameRegistrationPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/EHFrameRegistrationPlugin.cpp
index 217c693..161bd68 100644
--- a/llvm/lib/ExecutionEngine/Orc/EHFrameRegistrationPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EHFrameRegistrationPlugin.cpp
@@ -9,6 +9,7 @@
 #include "llvm/ExecutionEngine/Orc/EHFrameRegistrationPlugin.h"
 
 #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+#include "llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h"
 
 #define DEBUG_TYPE "orc"
 
@@ -21,11 +22,19 @@ EHFrameRegistrationPlugin::EHFrameRegistrationPlugin(
     : ES(ES), Registrar(std::move(Registrar)) {}
 
 void EHFrameRegistrationPlugin::modifyPassConfig(
-    MaterializationResponsibility &MR, LinkGraph &G,
+    MaterializationResponsibility &MR, LinkGraph &LG,
     PassConfiguration &PassConfig) {
 
+  if (LG.getTargetTriple().isOSBinFormatMachO())
+    PassConfig.PrePrunePasses.insert(
+        PassConfig.PrePrunePasses.begin(), [](LinkGraph &G) {
+          if (auto *CUSec = G.findSectionByName(MachOCompactUnwindSectionName))
+            G.removeSection(*CUSec);
+          return Error::success();
+        });
+
   PassConfig.PostFixupPasses.push_back(createEHFrameRecorderPass(
-      G.getTargetTriple(), [this, &MR](ExecutorAddr Addr, size_t Size) {
+      LG.getTargetTriple(), [this, &MR](ExecutorAddr Addr, size_t Size) {
         if (Addr) {
           std::lock_guard<std::mutex> Lock(EHFramePluginMutex);
           assert(!InProcessLinks.count(&MR) &&
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
index aa79968..b51fa24b 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
@@ -45,6 +45,7 @@ SelfExecutorProcessControl::SelfExecutorProcessControl(
   this->DylibMgr = this;
   this->JDI = {ExecutorAddr::fromPtr(jitDispatchViaWrapperFunctionManager),
                ExecutorAddr::fromPtr(this)};
+
   if (this->TargetTriple.isOSBinFormatMachO())
     GlobalManglingPrefix = '_';
 
@@ -52,6 +53,12 @@ SelfExecutorProcessControl::SelfExecutorProcessControl(
       ExecutorAddr::fromPtr(&llvm_orc_registerEHFrameSectionWrapper);
   this->BootstrapSymbols[rt::DeregisterEHFrameSectionWrapperName] =
       ExecutorAddr::fromPtr(&llvm_orc_deregisterEHFrameSectionWrapper);
+
+#ifdef __APPLE__
+  this->UnwindInfoMgr = UnwindInfoManager::TryCreate();
+  if (this->UnwindInfoMgr)
+    this->UnwindInfoMgr->addBootstrapSymbols(this->BootstrapSymbols);
+#endif // __APPLE__
 }
 
 Expected<std::unique_ptr<SelfExecutorProcessControl>>
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 80500d0..938fe58ef 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
+#include "llvm/ExecutionEngine/Orc/UnwindInfoRegistrationPlugin.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
@@ -1220,12 +1221,30 @@ Expected<JITDylibSP> setUpGenericLLVMIRPlatform(LLJIT &J) {
 
   if (auto *OLL = dyn_cast<ObjectLinkingLayer>(&J.getObjLinkingLayer())) {
 
-    auto &ES = J.getExecutionSession();
-    if (auto EHFrameRegistrar = EPCEHFrameRegistrar::Create(ES))
-      OLL->addPlugin(std::make_unique<EHFrameRegistrationPlugin>(
-          ES, std::move(*EHFrameRegistrar)));
-    else
-      return EHFrameRegistrar.takeError();
+    bool CompactUnwindInfoSupported = false;
+
+    // Enable compact-unwind support if possible.
+    if (J.getTargetTriple().isOSDarwin() ||
+        J.getTargetTriple().isOSBinFormatMachO()) {
+      if (auto UIRP = UnwindInfoRegistrationPlugin::Create(
+              J.getIRCompileLayer(), PlatformJD)) {
+        CompactUnwindInfoSupported = true;
+        OLL->addPlugin(std::move(*UIRP));
+        LLVM_DEBUG(dbgs() << "Enabled compact-unwind support.\n");
+      } else
+        consumeError(UIRP.takeError());
+    }
+
+    // Otherwise fall back to standard unwind registration.
+    if (!CompactUnwindInfoSupported) {
+      auto &ES = J.getExecutionSession();
+      if (auto EHFrameRegistrar = EPCEHFrameRegistrar::Create(ES)) {
+        OLL->addPlugin(std::make_unique<EHFrameRegistrationPlugin>(
+            ES, std::move(*EHFrameRegistrar)));
+        LLVM_DEBUG(dbgs() << "Enabled eh-frame support.\n");
+      } else
+        return EHFrameRegistrar.takeError();
+    }
   }
 
   J.setPlatformSupport(
diff --git a/llvm/lib/ExecutionEngine/Orc/LinkGraphLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/LinkGraphLinkingLayer.cpp
index b4f78c6..1bb4440 100644
--- a/llvm/lib/ExecutionEngine/Orc/LinkGraphLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LinkGraphLinkingLayer.cpp
@@ -499,7 +499,10 @@ LinkGraphLinkingLayer::LinkGraphLinkingLayer(
 }
 
 LinkGraphLinkingLayer::~LinkGraphLinkingLayer() {
-  assert(Allocs.empty() && "Layer destroyed with resources still attached");
+  assert(Allocs.empty() &&
+         "Layer destroyed with resources still attached "
+         "(ExecutionSession::endSession() must be called prior to "
+         "destruction)");
   getExecutionSession().deregisterResourceManager(*this);
 }
 
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 9479a69..44ae7367 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -9,6 +9,7 @@
 #include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
 
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
 #include "llvm/ExecutionEngine/JITLink/MachO.h"
 #include "llvm/ExecutionEngine/JITLink/aarch64.h"
 #include "llvm/ExecutionEngine/JITLink/x86_64.h"
@@ -1256,7 +1257,8 @@ MachOPlatform::MachOPlatformPlugin::findUnwindSectionInfo(
   // ScanSection records a section range and adds any executable blocks that
   // that section points to to the CodeBlocks vector.
   SmallVector<Block *> CodeBlocks;
-  auto ScanUnwindInfoSection = [&](Section &Sec, ExecutorAddrRange &SecRange) {
+  auto ScanUnwindInfoSection = [&](Section &Sec, ExecutorAddrRange &SecRange,
+                                   auto AddCodeBlocks) {
     if (Sec.blocks().empty())
       return;
     SecRange = (*Sec.blocks().begin())->getRange();
@@ -1264,22 +1266,31 @@ MachOPlatform::MachOPlatformPlugin::findUnwindSectionInfo(
       auto R = B->getRange();
       SecRange.Start = std::min(SecRange.Start, R.Start);
       SecRange.End = std::max(SecRange.End, R.End);
-      for (auto &E : B->edges()) {
-        if (E.getKind() != Edge::KeepAlive || !E.getTarget().isDefined())
-          continue;
-        auto &TargetBlock = E.getTarget().getBlock();
-        auto &TargetSection = TargetBlock.getSection();
-        if ((TargetSection.getMemProt() & MemProt::Exec) == MemProt::Exec)
-          CodeBlocks.push_back(&TargetBlock);
-      }
+      AddCodeBlocks(*B);
     }
   };
 
-  if (Section *EHFrameSec = G.findSectionByName(MachOEHFrameSectionName))
-    ScanUnwindInfoSection(*EHFrameSec, US.DwarfSection);
+  if (Section *EHFrameSec = G.findSectionByName(MachOEHFrameSectionName)) {
+    ScanUnwindInfoSection(*EHFrameSec, US.DwarfSection, [&](Block &B) {
+      if (auto *Fn = jitlink::EHFrameCFIBlockInspector::FromEdgeScan(B)
+                         .getPCBeginEdge())
+        if (Fn->getTarget().isDefined())
+          CodeBlocks.push_back(&Fn->getTarget().getBlock());
+    });
+  }
 
-  if (Section *CUInfoSec = G.findSectionByName(MachOCompactUnwindSectionName))
-    ScanUnwindInfoSection(*CUInfoSec, US.CompactUnwindSection);
+  if (Section *CUInfoSec = G.findSectionByName(MachOUnwindInfoSectionName)) {
+    ScanUnwindInfoSection(
+        *CUInfoSec, US.CompactUnwindSection, [&](Block &B) {
+          for (auto &E : B.edges()) {
+            assert(E.getTarget().isDefined() &&
+                   "unwind-info record edge has external target");
+            assert(E.getKind() == Edge::KeepAlive &&
+                   "unwind-info record has unexpected edge kind");
+            CodeBlocks.push_back(&E.getTarget().getBlock());
+          }
+        });
+  }
 
   // If we didn't find any pointed-to code-blocks then there's no need to
   // register any info.
diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index a73b231..88cceac 100644
--- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -88,7 +88,10 @@ RTDyldObjectLinkingLayer::RTDyldObjectLinkingLayer(
 }
 
 RTDyldObjectLinkingLayer::~RTDyldObjectLinkingLayer() {
-  assert(MemMgrs.empty() && "Layer destroyed with resources still attached");
+  assert(MemMgrs.empty() &&
+         "Layer destroyed with resources still attached"
+         "(ExecutionSession::endSession() must be called prior to "
+         "destruction)");
 }
 
 void RTDyldObjectLinkingLayer::emit(
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp
index be92acd..53488b75 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp
@@ -18,7 +18,7 @@ namespace orc {
 StringRef MachODataCommonSectionName = "__DATA,__common";
 StringRef MachODataDataSectionName = "__DATA,__data";
 StringRef MachOEHFrameSectionName = "__TEXT,__eh_frame";
-StringRef MachOCompactUnwindSectionName = "__TEXT,__unwind_info";
+StringRef MachOCompactUnwindSectionName = "__LD,__compact_unwind";
 StringRef MachOCStringSectionName = "__TEXT,__cstring";
 StringRef MachOModInitFuncSectionName = "__DATA,__mod_init_func";
 StringRef MachOObjCCatListSectionName = "__DATA,__objc_catlist";
@@ -46,6 +46,7 @@ StringRef MachOTextTextSectionName = "__TEXT,__text";
 StringRef MachOThreadBSSSectionName = "__DATA,__thread_bss";
 StringRef MachOThreadDataSectionName = "__DATA,__thread_data";
 StringRef MachOThreadVarsSectionName = "__DATA,__thread_vars";
+StringRef MachOUnwindInfoSectionName = "__TEXT,__unwind_info";
 
 StringRef MachOInitSectionNames[22] = {
     MachOModInitFuncSectionName,         MachOObjCCatListSectionName,
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
index 54a25c0..fef3ff9 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -64,5 +64,19 @@ const char *RunAsIntFunctionWrapperName =
     "__llvm_orc_bootstrap_run_as_int_function_wrapper";
 
 } // end namespace rt
+namespace rt_alt {
+const char *UnwindInfoManagerInstanceName =
+    "orc_rt_alt_UnwindInfoManager_Instance";
+const char *UnwindInfoManagerFindSectionsHelperName =
+    "orc_rt_alt_UnwindInfoManager_findSectionsHelper";
+const char *UnwindInfoManagerEnableWrapperName =
+    "orc_rt_alt_UnwindInfoManager_enable";
+const char *UnwindInfoManagerDisableWrapperName =
+    "orc_rt_alt_UnwindInfoManager_disable";
+const char *UnwindInfoManagerRegisterActionName =
+    "orc_rt_alt_UnwindInfoManager_register";
+const char *UnwindInfoManagerDeregisterActionName =
+    "orc_rt_alt_UnwindInfoManager_deregister";
+} // end namespace rt_alt
 } // end namespace orc
 } // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
index 03677d6..1d29a89 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_component_library(LLVMOrcTargetProcess
   SimpleExecutorMemoryManager.cpp
   SimpleRemoteEPCServer.cpp
   TargetExecutionUtils.cpp
+  UnwindInfoManager.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.cpp
new file mode 100644
index 0000000..9f74815
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.cpp
@@ -0,0 +1,188 @@
+//===------- UnwindInfoManager.cpp - Register unwind info sections --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/UnwindInfoManager.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/Support/DynamicLibrary.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::orc::shared;
+
+static orc::shared::CWrapperFunctionResult
+llvm_orc_rt_alt_UnwindInfoManager_enable(const char *Data, uint64_t Size) {
+  return WrapperFunction<SPSError(SPSExecutorAddr, SPSExecutorAddr)>::handle(
+             Data, Size,
+             [](ExecutorAddr Instance, ExecutorAddr FindFn) {
+               return Instance.toPtr<UnwindInfoManager *>()->enable(
+                   FindFn.toPtr<void *>());
+             })
+      .release();
+}
+
+static orc::shared::CWrapperFunctionResult
+llvm_orc_rt_alt_UnwindInfoManager_disable(const char *Data, uint64_t Size) {
+  return WrapperFunction<SPSError(SPSExecutorAddr)>::handle(
+             Data, Size,
+             [](ExecutorAddr Instance) {
+               return Instance.toPtr<UnwindInfoManager *>()->disable();
+             })
+      .release();
+}
+
+static orc::shared::CWrapperFunctionResult
+llvm_orc_rt_alt_UnwindInfoManager_register(const char *Data, uint64_t Size) {
+  using SPSSig =
+      SPSError(SPSExecutorAddr, SPSSequence<SPSExecutorAddrRange>,
+               SPSExecutorAddr, SPSExecutorAddrRange, SPSExecutorAddrRange);
+
+  return WrapperFunction<SPSSig>::handle(
+             Data, Size,
+             [](ExecutorAddr Instance,
+                std::vector<ExecutorAddrRange> CodeRanges, ExecutorAddr DSOBase,
+                ExecutorAddrRange DWARFRange,
+                ExecutorAddrRange CompactUnwindRange) {
+               return Instance.toPtr<UnwindInfoManager *>()->registerSections(
+                   CodeRanges, DSOBase, DWARFRange, CompactUnwindRange);
+             })
+      .release();
+}
+
+static orc::shared::CWrapperFunctionResult
+llvm_orc_rt_alt_UnwindInfoManager_deregister(const char *Data, uint64_t Size) {
+  using SPSSig = SPSError(SPSExecutorAddr, SPSSequence<SPSExecutorAddrRange>);
+
+  return WrapperFunction<SPSSig>::handle(
+             Data, Size,
+             [](ExecutorAddr Instance,
+                std::vector<ExecutorAddrRange> CodeRanges) {
+               return Instance.toPtr<UnwindInfoManager *>()->deregisterSections(
+                   CodeRanges);
+             })
+      .release();
+}
+
+namespace llvm::orc {
+
+const char *UnwindInfoManager::AddFnName =
+    "__unw_add_find_dynamic_unwind_sections";
+const char *UnwindInfoManager::RemoveFnName =
+    "__unw_remove_find_dynamic_unwind_sections";
+
+std::unique_ptr<UnwindInfoManager> UnwindInfoManager::TryCreate() {
+  std::string ErrMsg;
+  auto DL = sys::DynamicLibrary::getPermanentLibrary(nullptr, &ErrMsg);
+  if (!DL.isValid())
+    return nullptr;
+
+  auto AddFindDynamicUnwindSections =
+      (int (*)(void *))DL.getAddressOfSymbol(AddFnName);
+  if (!AddFindDynamicUnwindSections)
+    return nullptr;
+
+  auto RemoveFindDynamicUnwindSections =
+      (int (*)(void *))DL.getAddressOfSymbol(RemoveFnName);
+  if (!RemoveFindDynamicUnwindSections)
+    return nullptr;
+
+  return std::unique_ptr<UnwindInfoManager>(new UnwindInfoManager(
+      AddFindDynamicUnwindSections, RemoveFindDynamicUnwindSections));
+}
+
+Error UnwindInfoManager::shutdown() { return Error::success(); }
+
+void UnwindInfoManager::addBootstrapSymbols(StringMap<ExecutorAddr> &M) {
+  M[rt_alt::UnwindInfoManagerInstanceName] = ExecutorAddr::fromPtr(this);
+  M[rt_alt::UnwindInfoManagerFindSectionsHelperName] =
+      ExecutorAddr::fromPtr(&findSectionsHelper);
+  M[rt_alt::UnwindInfoManagerEnableWrapperName] =
+      ExecutorAddr::fromPtr(llvm_orc_rt_alt_UnwindInfoManager_enable);
+  M[rt_alt::UnwindInfoManagerDisableWrapperName] =
+      ExecutorAddr::fromPtr(llvm_orc_rt_alt_UnwindInfoManager_disable);
+  M[rt_alt::UnwindInfoManagerRegisterActionName] =
+      ExecutorAddr::fromPtr(llvm_orc_rt_alt_UnwindInfoManager_register);
+  M[rt_alt::UnwindInfoManagerDeregisterActionName] =
+      ExecutorAddr::fromPtr(llvm_orc_rt_alt_UnwindInfoManager_deregister);
+}
+
+Error UnwindInfoManager::enable(void *FindDynamicUnwindSections) {
+  LLVM_DEBUG(dbgs() << "Enabling UnwindInfoManager.\n");
+
+  if (auto Err = AddFindDynamicUnwindSections(FindDynamicUnwindSections))
+    return make_error<StringError>(Twine("Could not register function via ") +
+                                       AddFnName +
+                                       ", error code = " + Twine(Err),
+                                   inconvertibleErrorCode());
+
+  this->FindDynamicUnwindSections = FindDynamicUnwindSections;
+  return Error::success();
+}
+
+Error UnwindInfoManager::disable(void) {
+  LLVM_DEBUG(dbgs() << "Disabling UnwindInfoManager.\n");
+
+  if (FindDynamicUnwindSections)
+    if (auto Err = RemoveFindDynamicUnwindSections(FindDynamicUnwindSections))
+      return make_error<StringError>(
+          Twine("Could not deregister function via ") + RemoveFnName +
+              "error code = " + Twine(Err),
+          inconvertibleErrorCode());
+
+  FindDynamicUnwindSections = nullptr;
+  return Error::success();
+}
+
+Error UnwindInfoManager::registerSections(
+    ArrayRef<ExecutorAddrRange> CodeRanges, ExecutorAddr DSOBase,
+    ExecutorAddrRange DWARFEHFrame, ExecutorAddrRange CompactUnwind) {
+  std::lock_guard<std::mutex> Lock(M);
+  for (auto &R : CodeRanges)
+    UWSecs[R.Start.getValue()] =
+        UnwindSections{static_cast<uintptr_t>(DSOBase.getValue()),
+                       static_cast<uintptr_t>(DWARFEHFrame.Start.getValue()),
+                       static_cast<size_t>(DWARFEHFrame.size()),
+                       static_cast<uintptr_t>(CompactUnwind.Start.getValue()),
+                       static_cast<size_t>(CompactUnwind.size())};
+  return Error::success();
+}
+
+Error UnwindInfoManager::deregisterSections(
+    ArrayRef<ExecutorAddrRange> CodeRanges) {
+  std::lock_guard<std::mutex> Lock(M);
+  for (auto &R : CodeRanges) {
+    auto I = UWSecs.find(R.Start.getValue());
+    if (I == UWSecs.end())
+      return make_error<StringError>(
+          "No unwind-info sections registered for range " +
+              formatv("{0:x} - {1:x}", R.Start, R.End),
+          inconvertibleErrorCode());
+    UWSecs.erase(I);
+  }
+  return Error::success();
+}
+
+int UnwindInfoManager::findSections(uintptr_t Addr, UnwindSections *Info) {
+  std::lock_guard<std::mutex> Lock(M);
+  auto I = UWSecs.upper_bound(Addr);
+  if (I == UWSecs.begin())
+    return 0;
+  --I;
+  *Info = I->second;
+  return 1;
+}
+
+int UnwindInfoManager::findSectionsHelper(UnwindInfoManager *Instance,
+                                          uintptr_t Addr,
+                                          UnwindSections *Info) {
+  return Instance->findSections(Addr, Info);
+}
+
+} // namespace llvm::orc
diff --git a/llvm/lib/ExecutionEngine/Orc/UnwindInfoRegistrationPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/UnwindInfoRegistrationPlugin.cpp
new file mode 100644
index 0000000..ae1f3f9
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/UnwindInfoRegistrationPlugin.cpp
@@ -0,0 +1,238 @@
+//===----- UnwindInfoRegistrationPlugin.cpp - libunwind registration ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/UnwindInfoRegistrationPlugin.h"
+
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ExecutionEngine/Orc/AbsoluteSymbols.h"
+#include "llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm::jitlink;
+
+static const char *FindDynamicUnwindSectionsFunctionName =
+    "_orc_rt_alt_find_dynamic_unwind_sections";
+
+namespace llvm::orc {
+
+Expected<std::shared_ptr<UnwindInfoRegistrationPlugin>>
+UnwindInfoRegistrationPlugin::Create(IRLayer &IRL, JITDylib &PlatformJD,
+                                     ExecutorAddr Instance,
+                                     ExecutorAddr FindHelper,
+                                     ExecutorAddr Enable, ExecutorAddr Disable,
+                                     ExecutorAddr Register,
+                                     ExecutorAddr Deregister) {
+
+  auto &ES = IRL.getExecutionSession();
+
+  // Build bouncer module.
+  auto M = makeBouncerModule(ES);
+  if (!M)
+    return M.takeError();
+
+  auto BouncerRT = PlatformJD.createResourceTracker();
+  auto RemoveBouncerModule = make_scope_exit([&]() {
+    if (auto Err = BouncerRT->remove())
+      ES.reportError(std::move(Err));
+  });
+
+  if (auto Err = PlatformJD.define(absoluteSymbols(
+          {{ES.intern(rt_alt::UnwindInfoManagerInstanceName),
+            ExecutorSymbolDef(Instance, JITSymbolFlags())},
+           {ES.intern(rt_alt::UnwindInfoManagerFindSectionsHelperName),
+            ExecutorSymbolDef(FindHelper, JITSymbolFlags::Callable)}})))
+    return std::move(Err);
+
+  if (auto Err = IRL.add(BouncerRT, std::move(*M)))
+    return Err;
+
+  auto FindUnwindSections =
+      ES.lookup({&PlatformJD}, FindDynamicUnwindSectionsFunctionName);
+  if (!FindUnwindSections)
+    return FindUnwindSections.takeError();
+
+  using namespace shared;
+  using SPSEnableSig = SPSError(SPSExecutorAddr, SPSExecutorAddr);
+  Error CallErr = Error::success();
+  if (auto Err = ES.callSPSWrapper<SPSEnableSig>(
+          Enable, CallErr, Instance, FindUnwindSections->getAddress())) {
+    consumeError(std::move(CallErr));
+    return std::move(Err);
+  }
+
+  if (CallErr)
+    return std::move(CallErr);
+
+  RemoveBouncerModule.release();
+
+  return std::shared_ptr<UnwindInfoRegistrationPlugin>(
+      new UnwindInfoRegistrationPlugin(ES, Instance, Disable, Register,
+                                       Deregister));
+}
+
+Expected<std::shared_ptr<UnwindInfoRegistrationPlugin>>
+UnwindInfoRegistrationPlugin::Create(IRLayer &IRL, JITDylib &PlatformJD) {
+
+  ExecutorAddr Instance, FindHelper, Enable, Disable, Register, Deregister;
+
+  auto &EPC = IRL.getExecutionSession().getExecutorProcessControl();
+  if (auto Err = EPC.getBootstrapSymbols(
+          {{Instance, rt_alt::UnwindInfoManagerInstanceName},
+           {FindHelper, rt_alt::UnwindInfoManagerFindSectionsHelperName},
+           {Enable, rt_alt::UnwindInfoManagerEnableWrapperName},
+           {Disable, rt_alt::UnwindInfoManagerDisableWrapperName},
+           {Register, rt_alt::UnwindInfoManagerRegisterActionName},
+           {Deregister, rt_alt::UnwindInfoManagerDeregisterActionName}}))
+    return std::move(Err);
+
+  return Create(IRL, PlatformJD, Instance, FindHelper, Enable, Disable,
+                Register, Deregister);
+}
+
+UnwindInfoRegistrationPlugin::~UnwindInfoRegistrationPlugin() {
+  using namespace shared;
+  using SPSDisableSig = SPSError(SPSExecutorAddr);
+  Error CallErr = Error::success();
+  if (auto Err = ES.callSPSWrapper<SPSDisableSig>(Disable, CallErr, Instance)) {
+    consumeError(std::move(CallErr));
+    ES.reportError(std::move(Err));
+  }
+  if (CallErr)
+    ES.reportError(std::move(CallErr));
+}
+
+void UnwindInfoRegistrationPlugin::modifyPassConfig(
+    MaterializationResponsibility &MR, LinkGraph &G,
+    PassConfiguration &PassConfig) {
+
+  PassConfig.PostFixupPasses.push_back(
+      [this](LinkGraph &G) { return addUnwindInfoRegistrationActions(G); });
+}
+
+Expected<ThreadSafeModule>
+UnwindInfoRegistrationPlugin::makeBouncerModule(ExecutionSession &ES) {
+  auto Ctx = std::make_unique<LLVMContext>();
+  auto M = std::make_unique<Module>("__libunwind_find_unwind_bouncer", *Ctx);
+  M->setTargetTriple(ES.getTargetTriple().str());
+
+  auto EscapeName = [](const char *N) { return std::string("\01") + N; };
+
+  auto *PtrTy = PointerType::getUnqual(*Ctx);
+  auto *OpaqueStructTy = StructType::create(*Ctx, "UnwindInfoMgr");
+  auto *UnwindMgrInstance = new GlobalVariable(
+      *M, OpaqueStructTy, true, GlobalValue::ExternalLinkage, nullptr,
+      EscapeName(rt_alt::UnwindInfoManagerInstanceName));
+
+  auto *Int64Ty = Type::getInt64Ty(*Ctx);
+  auto *FindHelperTy = FunctionType::get(Int64Ty, {PtrTy, PtrTy, PtrTy}, false);
+  auto *FindHelperFn = Function::Create(
+      FindHelperTy, GlobalValue::ExternalLinkage,
+      EscapeName(rt_alt::UnwindInfoManagerFindSectionsHelperName), *M);
+
+  auto *FindFnTy = FunctionType::get(Int64Ty, {PtrTy, PtrTy}, false);
+  auto *FindFn =
+      Function::Create(FindFnTy, GlobalValue::ExternalLinkage,
+                       EscapeName(FindDynamicUnwindSectionsFunctionName), *M);
+  auto *EntryBlock = BasicBlock::Create(M->getContext(), StringRef(), FindFn);
+  IRBuilder<> IB(EntryBlock);
+
+  std::vector<Value *> FindHelperArgs;
+  FindHelperArgs.push_back(UnwindMgrInstance);
+  for (auto &Arg : FindFn->args())
+    FindHelperArgs.push_back(&Arg);
+
+  IB.CreateRet(IB.CreateCall(FindHelperFn, FindHelperArgs));
+
+  return ThreadSafeModule(std::move(M), std::move(Ctx));
+}
+
+Error UnwindInfoRegistrationPlugin::addUnwindInfoRegistrationActions(
+    LinkGraph &G) {
+  ExecutorAddrRange EHFrameRange, UnwindInfoRange;
+
+  std::vector<Block *> CodeBlocks;
+
+  auto ScanUnwindInfoSection = [&](Section &Sec, ExecutorAddrRange &SecRange) {
+    if (Sec.empty())
+      return;
+
+    SecRange.Start = (*Sec.blocks().begin())->getAddress();
+    for (auto *B : Sec.blocks()) {
+      auto R = B->getRange();
+      SecRange.Start = std::min(SecRange.Start, R.Start);
+      SecRange.End = std::max(SecRange.End, R.End);
+      for (auto &E : B->edges()) {
+        if (E.getKind() != Edge::KeepAlive || !E.getTarget().isDefined())
+          continue;
+        auto &TargetBlock = E.getTarget().getBlock();
+        auto &TargetSection = TargetBlock.getSection();
+        if ((TargetSection.getMemProt() & MemProt::Exec) == MemProt::Exec)
+          CodeBlocks.push_back(&TargetBlock);
+      }
+    }
+  };
+
+  if (auto *EHFrame = G.findSectionByName(MachOEHFrameSectionName))
+    ScanUnwindInfoSection(*EHFrame, EHFrameRange);
+
+  if (auto *UnwindInfo = G.findSectionByName(MachOUnwindInfoSectionName))
+    ScanUnwindInfoSection(*UnwindInfo, UnwindInfoRange);
+
+  if (CodeBlocks.empty())
+    return Error::success();
+
+  if ((EHFrameRange == ExecutorAddrRange() &&
+       UnwindInfoRange == ExecutorAddrRange()))
+    return Error::success();
+
+  llvm::sort(CodeBlocks, [](const Block *LHS, const Block *RHS) {
+    return LHS->getAddress() < RHS->getAddress();
+  });
+
+  SmallVector<ExecutorAddrRange> CodeRanges;
+  for (auto *B : CodeBlocks) {
+    if (CodeRanges.empty() || CodeRanges.back().End != B->getAddress())
+      CodeRanges.push_back(B->getRange());
+    else
+      CodeRanges.back().End = B->getRange().End;
+  }
+
+  ExecutorAddr DSOBase;
+  if (auto *DSOBaseSym = G.findAbsoluteSymbolByName(DSOBaseName))
+    DSOBase = DSOBaseSym->getAddress();
+  else if (auto *DSOBaseSym = G.findExternalSymbolByName(DSOBaseName))
+    DSOBase = DSOBaseSym->getAddress();
+  else if (auto *DSOBaseSym = G.findDefinedSymbolByName(DSOBaseName))
+    DSOBase = DSOBaseSym->getAddress();
+  else
+    return make_error<StringError>("In " + G.getName() +
+                                       " could not find dso base symbol",
+                                   inconvertibleErrorCode());
+
+  using namespace shared;
+  using SPSRegisterArgs =
+      SPSArgList<SPSExecutorAddr, SPSSequence<SPSExecutorAddrRange>,
+                 SPSExecutorAddr, SPSExecutorAddrRange, SPSExecutorAddrRange>;
+  using SPSDeregisterArgs =
+      SPSArgList<SPSExecutorAddr, SPSSequence<SPSExecutorAddrRange>>;
+
+  G.allocActions().push_back(
+      {cantFail(WrapperFunctionCall::Create<SPSRegisterArgs>(
+           Register, Instance, CodeRanges, DSOBase, EHFrameRange,
+           UnwindInfoRange)),
+       cantFail(WrapperFunctionCall::Create<SPSDeregisterArgs>(
+           Deregister, Instance, CodeRanges))});
+
+  return Error::success();
+}
+
+} // namespace llvm::orc
diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
index c26f5b4..cca5f88 100644
--- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
@@ -24,44 +24,91 @@
 using namespace llvm;
 using namespace omp;
 
-OMPContext::OMPContext(bool IsDeviceCompilation, Triple TargetTriple) {
-  // Add the appropriate device kind trait based on the triple and the
-  // IsDeviceCompilation flag.
-  ActiveTraits.set(unsigned(IsDeviceCompilation
-                                ? TraitProperty::device_kind_nohost
-                                : TraitProperty::device_kind_host));
-  switch (TargetTriple.getArch()) {
-  case Triple::arm:
-  case Triple::armeb:
-  case Triple::aarch64:
-  case Triple::aarch64_be:
-  case Triple::aarch64_32:
-  case Triple::loongarch64:
-  case Triple::mips:
-  case Triple::mipsel:
-  case Triple::mips64:
-  case Triple::mips64el:
-  case Triple::ppc:
-  case Triple::ppcle:
-  case Triple::ppc64:
-  case Triple::ppc64le:
-  case Triple::systemz:
-  case Triple::x86:
-  case Triple::x86_64:
-    ActiveTraits.set(unsigned(TraitProperty::device_kind_cpu));
-    break;
-  case Triple::amdgcn:
-  case Triple::nvptx:
-  case Triple::nvptx64:
-    ActiveTraits.set(unsigned(TraitProperty::device_kind_gpu));
-    break;
-  default:
-    break;
+OMPContext::OMPContext(bool IsDeviceCompilation, Triple TargetTriple,
+                       Triple TargetOffloadTriple, int DeviceNum) {
+  // Add the appropriate target device kind trait based on the target triple
+  if (!TargetOffloadTriple.getTriple().empty() && DeviceNum > -1) {
+    // If target triple is present, then target device is not a host
+    ActiveTraits.set(unsigned(TraitProperty::target_device_kind_nohost));
+    switch (TargetOffloadTriple.getArch()) {
+    case Triple::arm:
+    case Triple::armeb:
+    case Triple::aarch64:
+    case Triple::aarch64_be:
+    case Triple::aarch64_32:
+    case Triple::mips:
+    case Triple::mipsel:
+    case Triple::mips64:
+    case Triple::mips64el:
+    case Triple::ppc:
+    case Triple::ppcle:
+    case Triple::ppc64:
+    case Triple::ppc64le:
+    case Triple::systemz:
+    case Triple::x86:
+    case Triple::x86_64:
+      ActiveTraits.set(unsigned(TraitProperty::target_device_kind_cpu));
+      break;
+    case Triple::amdgcn:
+    case Triple::nvptx:
+    case Triple::nvptx64:
+      ActiveTraits.set(unsigned(TraitProperty::target_device_kind_gpu));
+      break;
+    default:
+      break;
+    }
+    // Add the appropriate device architecture trait based on the triple.
+#define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \
+  if (TraitSelector::TraitSelectorEnum == TraitSelector::target_device_arch) { \
+    if (TargetOffloadTriple.getArch() ==                                       \
+        TargetOffloadTriple.getArchTypeForLLVMName(Str))                       \
+      ActiveTraits.set(unsigned(TraitProperty::Enum));                         \
+    if (StringRef(Str) == "x86_64" &&                                          \
+        TargetOffloadTriple.getArch() == Triple::x86_64)                       \
+      ActiveTraits.set(unsigned(TraitProperty::Enum));                         \
   }
+#undef OMP_TRAIT_PROPERTY
+  } else {
+    // Add the appropriate device kind trait based on the triple and the
+    // IsDeviceCompilation flag.
+    ActiveTraits.set(unsigned(IsDeviceCompilation
+                                  ? TraitProperty::device_kind_nohost
+                                  : TraitProperty::device_kind_host));
+    ActiveTraits.set(unsigned(TraitProperty::target_device_kind_host));
+    switch (TargetTriple.getArch()) {
+    case Triple::arm:
+    case Triple::armeb:
+    case Triple::aarch64:
+    case Triple::aarch64_be:
+    case Triple::aarch64_32:
+    case Triple::mips:
+    case Triple::mipsel:
+    case Triple::mips64:
+    case Triple::mips64el:
+    case Triple::ppc:
+    case Triple::ppcle:
+    case Triple::ppc64:
+    case Triple::ppc64le:
+    case Triple::systemz:
+    case Triple::x86:
+    case Triple::x86_64:
+      ActiveTraits.set(unsigned(TraitProperty::device_kind_cpu));
+      ActiveTraits.set(unsigned(TraitProperty::target_device_kind_cpu));
+      break;
+    case Triple::amdgcn:
+    case Triple::nvptx:
+    case Triple::nvptx64:
+      ActiveTraits.set(unsigned(TraitProperty::device_kind_gpu));
+      ActiveTraits.set(unsigned(TraitProperty::target_device_kind_gpu));
+      break;
+    default:
+      break;
+    }
 
-  // Add the appropriate device architecture trait based on the triple.
+    // Add the appropriate device architecture trait based on the triple.
 #define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \
-  if (TraitSelector::TraitSelectorEnum == TraitSelector::device_arch) {        \
+  if (TraitSelector::TraitSelectorEnum == TraitSelector::device_arch ||        \
+      TraitSelector::TraitSelectorEnum == TraitSelector::target_device_arch) { \
     if (TargetTriple.getArch() == TargetTriple.getArchTypeForLLVMName(Str))    \
       ActiveTraits.set(unsigned(TraitProperty::Enum));                         \
     if (StringRef(Str) == "x86_64" &&                                          \
@@ -70,29 +117,30 @@ OMPContext::OMPContext(bool IsDeviceCompilation, Triple TargetTriple) {
   }
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
 
-  // TODO: What exactly do we want to see as device ISA trait?
-  //       The discussion on the list did not seem to have come to an agreed
-  //       upon solution.
+    // TODO: What exactly do we want to see as device ISA trait?
+    //       The discussion on the list did not seem to have come to an agreed
+    //       upon solution.
 
-  // LLVM is the "OpenMP vendor" but we could also interpret vendor as the
-  // target vendor.
-  ActiveTraits.set(unsigned(TraitProperty::implementation_vendor_llvm));
+    // LLVM is the "OpenMP vendor" but we could also interpret vendor as the
+    // target vendor.
+    ActiveTraits.set(unsigned(TraitProperty::implementation_vendor_llvm));
 
-  // The user condition true is accepted but not false.
-  ActiveTraits.set(unsigned(TraitProperty::user_condition_true));
+    // The user condition true is accepted but not false.
+    ActiveTraits.set(unsigned(TraitProperty::user_condition_true));
 
-  // This is for sure some device.
-  ActiveTraits.set(unsigned(TraitProperty::device_kind_any));
+    // This is for sure some device.
+    ActiveTraits.set(unsigned(TraitProperty::device_kind_any));
 
-  LLVM_DEBUG({
-    dbgs() << "[" << DEBUG_TYPE
-           << "] New OpenMP context with the following properties:\n";
-    for (unsigned Bit : ActiveTraits.set_bits()) {
-      TraitProperty Property = TraitProperty(Bit);
-      dbgs() << "\t " << getOpenMPContextTraitPropertyFullName(Property)
-             << "\n";
-    }
-  });
+    LLVM_DEBUG({
+      dbgs() << "[" << DEBUG_TYPE
+             << "] New OpenMP context with the following properties:\n";
+      for (unsigned Bit : ActiveTraits.set_bits()) {
+        TraitProperty Property = TraitProperty(Bit);
+        dbgs() << "\t " << getOpenMPContextTraitPropertyFullName(Property)
+               << "\n";
+      }
+    });
+  }
 }
 
 /// Return true if \p C0 is a subset of \p C1. Note that both arrays are
@@ -213,6 +261,10 @@ static int isVariantApplicableInContextHelper(
       IsActiveTrait = llvm::all_of(VMI.ISATraits, [&](StringRef RawString) {
         return Ctx.matchesISATrait(RawString);
       });
+    if (Property == TraitProperty::target_device_isa___ANY)
+      IsActiveTrait = llvm::all_of(VMI.ISATraits, [&](StringRef RawString) {
+        return Ctx.matchesISATrait(RawString);
+      });
 
     if (std::optional<bool> Result = HandleTrait(Property, IsActiveTrait))
       return *Result;
@@ -298,6 +350,9 @@ static APInt getVariantMatchScore(const VariantMatchInfo &VMI,
     case TraitSet::device:
       // Handled separately below.
       break;
+    case TraitSet::target_device:
+      // TODO: Handling separately.
+      break;
     case TraitSet::invalid:
       llvm_unreachable("Unknown trait set is not to be used!");
     }
@@ -305,6 +360,8 @@ static APInt getVariantMatchScore(const VariantMatchInfo &VMI,
     // device={kind(any)} is "as if" no kind selector was specified.
     if (Property == TraitProperty::device_kind_any)
       continue;
+    if (Property == TraitProperty::target_device_kind_any)
+      continue;
 
     switch (getOpenMPContextTraitSelectorForProperty(Property)) {
     case TraitSelector::device_kind:
@@ -316,6 +373,15 @@ static APInt getVariantMatchScore(const VariantMatchInfo &VMI,
     case TraitSelector::device_isa:
       Score += (1ULL << (NoConstructTraits + 2));
       continue;
+    case TraitSelector::target_device_kind:
+      Score += (1ULL << (NoConstructTraits + 0));
+      continue;
+    case TraitSelector::target_device_arch:
+      Score += (1ULL << (NoConstructTraits + 1));
+      continue;
+    case TraitSelector::target_device_isa:
+      Score += (1ULL << (NoConstructTraits + 2));
+      continue;
     default:
       continue;
     }
@@ -412,7 +478,14 @@ StringRef llvm::omp::getOpenMPContextTraitSetName(TraitSet Kind) {
   llvm_unreachable("Unknown trait set!");
 }
 
-TraitSelector llvm::omp::getOpenMPContextTraitSelectorKind(StringRef S) {
+TraitSelector llvm::omp::getOpenMPContextTraitSelectorKind(StringRef S,
+                                                           TraitSet Set) {
+  if (Set == TraitSet::target_device && S == "kind")
+    return TraitSelector::target_device_kind;
+  if (Set == TraitSet::target_device && S == "arch")
+    return TraitSelector::target_device_arch;
+  if (Set == TraitSet::target_device && S == "isa")
+    return TraitSelector::target_device_isa;
   return StringSwitch<TraitSelector>(S)
 #define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp)                   \
   .Case(Str, TraitSelector::Enum)
@@ -445,6 +518,9 @@ TraitProperty llvm::omp::getOpenMPContextTraitPropertyKind(
   // up to the target to decide if the feature is available.
   if (Set == TraitSet::device && Selector == TraitSelector::device_isa)
     return TraitProperty::device_isa___ANY;
+  if (Set == TraitSet::target_device &&
+      Selector == TraitSelector::target_device_isa)
+    return TraitProperty::target_device_isa___ANY;
 #define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \
   if (Set == TraitSet::TraitSetEnum && Str == S)                               \
     return TraitProperty::Enum;
@@ -466,6 +542,8 @@ StringRef llvm::omp::getOpenMPContextTraitPropertyName(TraitProperty Kind,
                                                        StringRef RawString) {
   if (Kind == TraitProperty::device_isa___ANY)
     return RawString;
+  if (Kind == TraitProperty::target_device_isa___ANY)
+    return RawString;
   switch (Kind) {
 #define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \
   case TraitProperty::Enum:                                                    \
@@ -488,7 +566,8 @@ bool llvm::omp::isValidTraitSelectorForTraitSet(TraitSelector Selector,
                                                 TraitSet Set,
                                                 bool &AllowsTraitScore,
                                                 bool &RequiresProperty) {
-  AllowsTraitScore = Set != TraitSet::construct && Set != TraitSet::device;
+  AllowsTraitScore = Set != TraitSet::construct && Set != TraitSet::device &&
+                     Set != TraitSet::target_device;
   switch (Selector) {
 #define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, ReqProp)                   \
   case TraitSelector::Enum:                                                    \
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 1fdf0ea..e2d6073 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2702,8 +2702,7 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
 
 // Exchange the two operands to this instruction. This instruction is safe to
 // use on any binary instruction and does not modify the semantics of the
-// instruction. If the instruction is order-dependent (SetLT f.e.), the opcode
-// is changed.
+// instruction.
 bool BinaryOperator::swapOperands() {
   if (!isCommutative())
     return true; // Can't commute operands
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index ad174b1..256bce1 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -885,3 +885,31 @@ Value *GCRelocateInst::getDerivedPtr() const {
     return *(Opt->Inputs.begin() + getDerivedPtrIndex());
   return *(GCInst->arg_begin() + getDerivedPtrIndex());
 }
+
+ConvergenceControlInst *ConvergenceControlInst::CreateAnchor(BasicBlock &BB) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(
+      M, llvm::Intrinsic::experimental_convergence_anchor);
+  auto *Call = CallInst::Create(Fn, "", BB.getFirstInsertionPt());
+  return cast<ConvergenceControlInst>(Call);
+}
+
+ConvergenceControlInst *ConvergenceControlInst::CreateEntry(BasicBlock &BB) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(
+      M, llvm::Intrinsic::experimental_convergence_entry);
+  auto *Call = CallInst::Create(Fn, "", BB.getFirstInsertionPt());
+  return cast<ConvergenceControlInst>(Call);
+}
+
+ConvergenceControlInst *
+ConvergenceControlInst::CreateLoop(BasicBlock &BB,
+                                   ConvergenceControlInst *ParentToken) {
+  Module *M = BB.getModule();
+  Function *Fn = Intrinsic::getOrInsertDeclaration(
+      M, llvm::Intrinsic::experimental_convergence_loop);
+  llvm::Value *BundleArgs[] = {ParentToken};
+  llvm::OperandBundleDef OB("convergencectrl", BundleArgs);
+  auto *Call = CallInst::Create(Fn, {}, {OB}, "", BB.getFirstInsertionPt());
+  return cast<ConvergenceControlInst>(Call);
+}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 176caa2..650d23a 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -119,17 +119,20 @@
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/MachineVerifier.h"
 #include "llvm/CodeGen/OptimizePHIs.h"
 #include "llvm/CodeGen/PHIElimination.h"
 #include "llvm/CodeGen/PeepholeOptimizer.h"
+#include "llvm/CodeGen/PostRASchedulerList.h"
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
 #include "llvm/CodeGen/RegAllocFast.h"
 #include "llvm/CodeGen/RegUsageInfoCollector.h"
 #include "llvm/CodeGen/RegUsageInfoPropagate.h"
 #include "llvm/CodeGen/RegisterCoalescerPass.h"
 #include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/RenameIndependentSubregs.h"
 #include "llvm/CodeGen/SafeStack.h"
 #include "llvm/CodeGen/SelectOptimize.h"
 #include "llvm/CodeGen/ShadowStackGCLowering.h"
@@ -138,6 +141,7 @@
 #include "llvm/CodeGen/SpillPlacement.h"
 #include "llvm/CodeGen/StackColoring.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/StackSlotColoring.h"
 #include "llvm/CodeGen/TailDuplication.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TwoAddressInstructionPass.h"
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 5d52664..0593c1c 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -112,6 +112,8 @@ public:
   const MCExpr *lowerBlockAddressConstant(const BlockAddress &BA) override;
 
   void emitStartOfAsmFile(Module &M) override;
+  void emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
+                         ArrayRef<unsigned> JumpTableIndices) override;
   std::tuple<const MCSymbol *, uint64_t, const MCSymbol *,
              codeview::JumpTableEntrySize>
   getCodeViewJumpTableInfo(int JTI, const MachineInstr *BranchInstr,
@@ -268,8 +270,6 @@ private:
 
   void emitFunctionBodyEnd() override;
   void emitGlobalAlias(const Module &M, const GlobalAlias &GA) override;
-  void emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
-                         ArrayRef<unsigned> JumpTableIndices) override;
 
   MCSymbol *GetCPISymbol(unsigned CPID) const override;
   void emitEndOfAsmFile(Module &M) override;
@@ -1276,9 +1276,9 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
 
 void AArch64AsmPrinter::emitJumpTableImpl(const MachineJumpTableInfo &MJTI,
                                           ArrayRef<unsigned> JumpTableIndices) {
+  // Fast return if there is nothing to emit to avoid creating empty sections.
   if (JumpTableIndices.empty())
     return;
-
   const TargetLoweringObjectFile &TLOF = getObjFileLowering();
   const auto &F = MF->getFunction();
   const std::vector<MachineJumpTableEntry> &JT = MJTI.getJumpTables();
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.h b/llvm/lib/Target/AArch64/AArch64MacroFusion.h
index 2999e7a..62da054 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.h
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.h
@@ -20,7 +20,7 @@ namespace llvm {
 
 /// Note that you have to add:
 ///   DAG.addMutation(createAArch64MacroFusionDAGMutation());
-/// to AArch64PassConfig::createMachineScheduler() to have an effect.
+/// to AArch64TargetMachine::createMachineScheduler() to have an effect.
 std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation();
 
 } // llvm
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 07f0724..d10a0c0 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -480,6 +480,33 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
   return I.get();
 }
 
+ScheduleDAGInstrs *
+AArch64TargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+  ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+  if (ST.hasFusion())
+    DAG->addMutation(createAArch64MacroFusionDAGMutation());
+  return DAG;
+}
+
+ScheduleDAGInstrs *
+AArch64TargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+  const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+  ScheduleDAGMI *DAG =
+      new ScheduleDAGMI(C, std::make_unique<AArch64PostRASchedStrategy>(C),
+                        /* RemoveKillFlags=*/true);
+  if (ST.hasFusion()) {
+    // Run the Macro Fusion after RA again since literals are expanded from
+    // pseudos then (v. addPreSched2()).
+    DAG->addMutation(createAArch64MacroFusionDAGMutation());
+    return DAG;
+  }
+
+  return DAG;
+}
+
 void AArch64leTargetMachine::anchor() { }
 
 AArch64leTargetMachine::AArch64leTargetMachine(
@@ -512,33 +539,6 @@ public:
     return getTM<AArch64TargetMachine>();
   }
 
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
-    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
-    DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
-    DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
-    if (ST.hasFusion())
-      DAG->addMutation(createAArch64MacroFusionDAGMutation());
-    return DAG;
-  }
-
-  ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const override {
-    const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
-    ScheduleDAGMI *DAG =
-        new ScheduleDAGMI(C, std::make_unique<AArch64PostRASchedStrategy>(C),
-                          /* RemoveKillFlags=*/true);
-    if (ST.hasFusion()) {
-      // Run the Macro Fusion after RA again since literals are expanded from
-      // pseudos then (v. addPreSched2()).
-      DAG->addMutation(createAArch64MacroFusionDAGMutation());
-      return DAG;
-    }
-
-    return DAG;
-  }
-
   void addIRPasses()  override;
   bool addPreISel() override;
   void addCodeGenPrepare() override;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 621adb38..f8ba41f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -70,6 +70,11 @@ public:
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
     return getPointerSize(SrcAS) == getPointerSize(DestAS);
   }
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
+
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override;
 
 private:
   bool isLittle;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a6edcf1..cc2f097 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4692,13 +4692,24 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
   EVT InputEVT = EVT::getEVT(InputTypeA);
   EVT AccumEVT = EVT::getEVT(AccumType);
 
-  if (VF.isScalable() && !ST->isSVEorStreamingSVEAvailable())
-    return Invalid;
+  unsigned VFMinValue = VF.getKnownMinValue();
+
+  if (VF.isScalable()) {
+    if (!ST->isSVEorStreamingSVEAvailable())
+      return Invalid;
+
+    // Don't accept a partial reduction if the scaled accumulator is vscale x 1,
+    // since we can't lower that type.
+    unsigned Scale =
+        AccumEVT.getScalarSizeInBits() / InputEVT.getScalarSizeInBits();
+    if (VFMinValue == Scale)
+      return Invalid;
+  }
   if (VF.isFixed() && (!ST->isNeonAvailable() || !ST->hasDotProd()))
     return Invalid;
 
   if (InputEVT == MVT::i8) {
-    switch (VF.getKnownMinValue()) {
+    switch (VFMinValue) {
     default:
       return Invalid;
     case 8:
@@ -4717,7 +4728,7 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
   } else if (InputEVT == MVT::i16) {
     // FIXME: Allow i32 accumulator but increase cost, as we would extend
     //        it to i64.
-    if (VF.getKnownMinValue() != 8 || AccumEVT != MVT::i64)
+    if (VFMinValue != 8 || AccumEVT != MVT::i64)
       return Invalid;
   } else
     return Invalid;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 31656c9..2c6b882 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass();
 FunctionPass *createSILowerI1CopiesLegacyPass();
 FunctionPass *createSIShrinkInstructionsLegacyPass();
 FunctionPass *createSILoadStoreOptimizerLegacyPass();
-FunctionPass *createSIWholeQuadModePass();
+FunctionPass *createSIWholeQuadModeLegacyPass();
 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
 FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass();
@@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID;
 void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &);
 extern char &SILoadStoreOptimizerLegacyID;
 
-void initializeSIWholeQuadModePass(PassRegistry &);
+void initializeSIWholeQuadModeLegacyPass(PassRegistry &);
 extern char &SIWholeQuadModeID;
 
 void initializeSILowerControlFlowLegacyPass(PassRegistry &);
@@ -451,7 +451,7 @@ extern char &AMDGPUOpenCLEnqueuedBlockLoweringLegacyID;
 void initializeGCNNSAReassignPass(PassRegistry &);
 extern char &GCNNSAReassignID;
 
-void initializeGCNPreRALongBranchRegPass(PassRegistry &);
+void initializeGCNPreRALongBranchRegLegacyPass(PassRegistry &);
 extern char &GCNPreRALongBranchRegID;
 
 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h
index ad198a3..8aee23d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h
@@ -16,7 +16,7 @@ namespace llvm {
 
 /// Note that you have to add:
 ///   DAG.addMutation(createAMDGPUMacroFusionDAGMutation());
-/// to AMDGPUPassConfig::createMachineScheduler() to have an effect.
+/// to AMDGPUTargetMachine::createMachineScheduler() to have an effect.
 std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation();
 
 } // llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 45e2f0d..41ad144 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -97,6 +97,7 @@ FUNCTION_PASS_WITH_PARAMS(
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
+MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
@@ -111,12 +112,12 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
+MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #undef MACHINE_FUNCTION_PASS
 
 #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
@@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 
 #undef DUMMY_MACHINE_FUNCTION_PASS
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 96062b3..fffd30b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -32,6 +32,7 @@
 #include "AMDGPUWaitSGPRHazards.h"
 #include "GCNDPPCombine.h"
 #include "GCNIterativeScheduler.h"
+#include "GCNPreRALongBranchReg.h"
 #include "GCNSchedStrategy.h"
 #include "GCNVOPDUtils.h"
 #include "R600.h"
@@ -50,6 +51,7 @@
 #include "SIPeepholeSDWA.h"
 #include "SIPreAllocateWWMRegs.h"
 #include "SIShrinkInstructions.h"
+#include "SIWholeQuadMode.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
@@ -67,6 +69,7 @@
 #include "llvm/CodeGen/MIRParser/MIParser.h"
 #include "llvm/CodeGen/MachineCSE.h"
 #include "llvm/CodeGen/MachineLICM.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -528,7 +531,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIInsertHardClausesPass(*PR);
   initializeSIInsertWaitcntsPass(*PR);
   initializeSIModeRegisterPass(*PR);
-  initializeSIWholeQuadModePass(*PR);
+  initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
   initializeSIPreEmitPeepholePass(*PR);
   initializeSILateBranchLoweringPass(*PR);
@@ -546,7 +549,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUResourceUsageAnalysisPass(*PR);
   initializeGCNNSAReassignPass(*PR);
   initializeGCNPreRAOptimizationsPass(*PR);
-  initializeGCNPreRALongBranchRegPass(*PR);
+  initializeGCNPreRALongBranchRegLegacyPass(*PR);
   initializeGCNRewritePartialRegUsesPass(*PR);
   initializeGCNRegPressurePrinterPass(*PR);
   initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
@@ -729,6 +732,16 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
                           : getTargetFeatureString();
 }
 
+llvm::ScheduleDAGInstrs *
+AMDGPUTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
+  ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+  if (ST.shouldClusterStores())
+    DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+  return DAG;
+}
+
 /// Predicate for Internalize pass.
 static bool mustPreserveGV(const GlobalValue &GV) {
   if (const Function *F = dyn_cast<Function>(&GV))
@@ -1046,6 +1059,43 @@ Error GCNTargetMachine::buildCodeGenPipeline(
   return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
 }
 
+ScheduleDAGInstrs *
+GCNTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
+  if (ST.enableSIScheduler())
+    return createSIMachineScheduler(C);
+
+  Attribute SchedStrategyAttr =
+      C->MF->getFunction().getFnAttribute("amdgpu-sched-strategy");
+  StringRef SchedStrategy = SchedStrategyAttr.isValid()
+                                ? SchedStrategyAttr.getValueAsString()
+                                : AMDGPUSchedStrategy;
+
+  if (SchedStrategy == "max-ilp")
+    return createGCNMaxILPMachineScheduler(C);
+
+  if (SchedStrategy == "max-memory-clause")
+    return createGCNMaxMemoryClauseMachineScheduler(C);
+
+  return createGCNMaxOccupancyMachineScheduler(C);
+}
+
+ScheduleDAGInstrs *
+GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+  ScheduleDAGMI *DAG =
+      new GCNPostScheduleDAGMILive(C, std::make_unique<PostGenericScheduler>(C),
+                                   /*RemoveKillFlags=*/true);
+  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
+  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+  if (ST.shouldClusterStores())
+    DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+  DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
+  if ((EnableVOPD.getNumOccurrences() ||
+       getOptLevel() >= CodeGenOptLevel::Less) &&
+      EnableVOPD)
+    DAG->addMutation(createVOPDPairingMutation());
+  return DAG;
+}
 //===----------------------------------------------------------------------===//
 // AMDGPU Legacy Pass Setup
 //===----------------------------------------------------------------------===//
@@ -1071,25 +1121,6 @@ public:
     return getTM<GCNTargetMachine>();
   }
 
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override;
-
-  ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMI *DAG = new GCNPostScheduleDAGMILive(
-        C, std::make_unique<PostGenericScheduler>(C),
-        /*RemoveKillFlags=*/true);
-    const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
-    DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
-    if (ST.shouldClusterStores())
-      DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
-    DAG->addMutation(
-        createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
-    if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
-      DAG->addMutation(createVOPDPairingMutation());
-    return DAG;
-  }
-
   bool addPreISel() override;
   void addMachineSSAOptimization() override;
   bool addILPOpts() override;
@@ -1316,41 +1347,10 @@ bool AMDGPUPassConfig::addGCPasses() {
   return false;
 }
 
-llvm::ScheduleDAGInstrs *
-AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const {
-  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
-  ScheduleDAGMILive *DAG = createGenericSchedLive(C);
-  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
-  if (ST.shouldClusterStores())
-    DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
-  return DAG;
-}
-
 //===----------------------------------------------------------------------===//
 // GCN Legacy Pass Setup
 //===----------------------------------------------------------------------===//
 
-ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
-  MachineSchedContext *C) const {
-  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
-  if (ST.enableSIScheduler())
-    return createSIMachineScheduler(C);
-
-  Attribute SchedStrategyAttr =
-      C->MF->getFunction().getFnAttribute("amdgpu-sched-strategy");
-  StringRef SchedStrategy = SchedStrategyAttr.isValid()
-                                ? SchedStrategyAttr.getValueAsString()
-                                : AMDGPUSchedStrategy;
-
-  if (SchedStrategy == "max-ilp")
-    return createGCNMaxILPMachineScheduler(C);
-
-  if (SchedStrategy == "max-memory-clause")
-    return createGCNMaxMemoryClauseMachineScheduler(C);
-
-  return createGCNMaxOccupancyMachineScheduler(C);
-}
-
 bool GCNPassConfig::addPreISel() {
   AMDGPUPassConfig::addPreISel();
 
@@ -1932,6 +1932,7 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
     GCNTargetMachine &TM, const CGPassBuilderOption &Opts,
     PassInstrumentationCallbacks *PIC)
     : CodeGenPassBuilder(TM, Opts, PIC) {
+  Opt.MISchedPostRA = true;
   Opt.RequiresCodeGenSCCOrder = true;
   // Exceptions and StackMaps are not supported, so these passes will never do
   // anything.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 24b4da3..1455494 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -71,6 +71,8 @@ public:
   bool splitModule(Module &M, unsigned NumParts,
                    function_ref<void(std::unique_ptr<Module> MPart)>
                        ModuleCallback) override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
 };
 
 //===----------------------------------------------------------------------===//
@@ -115,6 +117,10 @@ public:
                                 PerFunctionMIParsingState &PFS,
                                 SMDiagnostic &Error,
                                 SMRange &SourceRange) const override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override;
 };
 
 //===----------------------------------------------------------------------===//
@@ -128,10 +134,6 @@ public:
   AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
     return getTM<AMDGPUTargetMachine>();
   }
-
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override;
-
   void addEarlyCSEOrGVNPass();
   void addStraightLineScalarOptimizationPasses();
   void addIRPasses() override;
diff --git a/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp b/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp
index 76a4148..355bbeb 100644
--- a/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp
@@ -14,6 +14,7 @@
 // distrance threshold tuning of what is considered "long" is handled through
 // amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
 //===----------------------------------------------------------------------===//
+#include "GCNPreRALongBranchReg.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "SIMachineFunctionInfo.h"
@@ -36,7 +37,7 @@ static cl::opt<double> LongBranchFactor(
              "reserved. We lean towards always reserving a register for  "
              "long jumps"));
 
-class GCNPreRALongBranchReg : public MachineFunctionPass {
+class GCNPreRALongBranchReg {
 
   struct BasicBlockInfo {
     // Offset - Distance from the beginning of the function to the beginning
@@ -49,26 +50,38 @@ class GCNPreRALongBranchReg : public MachineFunctionPass {
                          SmallVectorImpl<BasicBlockInfo> &BlockInfo);
 
 public:
+  GCNPreRALongBranchReg() = default;
+  bool run(MachineFunction &MF);
+};
+
+class GCNPreRALongBranchRegLegacy : public MachineFunctionPass {
+public:
   static char ID;
-  GCNPreRALongBranchReg() : MachineFunctionPass(ID) {
-    initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
+  GCNPreRALongBranchRegLegacy() : MachineFunctionPass(ID) {
+    initializeGCNPreRALongBranchRegLegacyPass(*PassRegistry::getPassRegistry());
   }
-  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    return GCNPreRALongBranchReg().run(MF);
+  }
+
   StringRef getPassName() const override {
     return "AMDGPU Pre-RA Long Branch Reg";
   }
+
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
 } // End anonymous namespace.
-char GCNPreRALongBranchReg::ID = 0;
 
-INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
+char GCNPreRALongBranchRegLegacy::ID = 0;
+
+INITIALIZE_PASS(GCNPreRALongBranchRegLegacy, DEBUG_TYPE,
                 "AMDGPU Pre-RA Long Branch Reg", false, false)
 
-char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
+char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchRegLegacy::ID;
 void GCNPreRALongBranchReg::generateBlockInfo(
     MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
 
@@ -99,7 +112,8 @@ void GCNPreRALongBranchReg::generateBlockInfo(
     PrevNum = Num;
   }
 }
-bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
+
+bool GCNPreRALongBranchReg::run(MachineFunction &MF) {
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = STM.getInstrInfo();
   const SIRegisterInfo *TRI = STM.getRegisterInfo();
@@ -136,3 +150,10 @@ bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
   }
   return false;
 }
+
+PreservedAnalyses
+GCNPreRALongBranchRegPass::run(MachineFunction &MF,
+                               MachineFunctionAnalysisManager &MFAM) {
+  GCNPreRALongBranchReg().run(MF);
+  return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.h b/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.h
new file mode 100644
index 0000000..4cd7dea
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.h
@@ -0,0 +1,23 @@
+//===- GCNPreRALongBranchReg.h ----------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNPRERALONGBRANCHREG_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNPRERALONGBRANCHREG_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNPreRALongBranchRegPass
+    : public PassInfoMixin<GCNPreRALongBranchRegPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNPRERALONGBRANCHREG_H
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
index a9e5327..10552a1 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
@@ -90,17 +90,17 @@ R600TargetMachine::getTargetTransformInfo(const Function &F) const {
   return TargetTransformInfo(R600TTIImpl(this, F));
 }
 
+ScheduleDAGInstrs *
+R600TargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  return createR600MachineScheduler(C);
+}
+
 namespace {
 class R600PassConfig final : public AMDGPUPassConfig {
 public:
   R600PassConfig(TargetMachine &TM, PassManagerBase &PM)
       : AMDGPUPassConfig(TM, PM) {}
 
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    return createR600MachineScheduler(C);
-  }
-
   bool addPreISel() override;
   bool addInstSelector() override;
   void addPreRegAlloc() override;
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
index b7f123a..eb4cb91 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
@@ -53,6 +53,8 @@ public:
   MachineFunctionInfo *
   createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
                             const TargetSubtargetInfo *STI) const override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 87eb6d9..3293602 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -67,6 +67,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "SIWholeQuadMode.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -148,11 +149,19 @@ struct WorkItem {
   WorkItem(MachineInstr *MI) : MI(MI) {}
 };
 
-class SIWholeQuadMode : public MachineFunctionPass {
+class SIWholeQuadMode {
+public:
+  SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS,
+                  MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)
+      : ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),
+        TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT),
+        PDT(PDT) {}
+  bool run(MachineFunction &MF);
+
 private:
+  const GCNSubtarget *ST;
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
-  const GCNSubtarget *ST;
   MachineRegisterInfo *MRI;
   LiveIntervals *LIS;
   MachineDominatorTree *MDT;
@@ -225,12 +234,13 @@ private:
   void lowerInitExec(MachineInstr &MI);
   MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
                                                   bool &Changed);
+};
 
+class SIWholeQuadModeLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  SIWholeQuadMode() :
-    MachineFunctionPass(ID) { }
+  SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -250,23 +260,22 @@ public:
         MachineFunctionProperties::Property::IsSSA);
   }
 };
-
 } // end anonymous namespace
 
-char SIWholeQuadMode::ID = 0;
+char SIWholeQuadModeLegacy::ID = 0;
 
-INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
-                      false)
+INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
+                      false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
-                    false)
+INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
+                    false, false)
 
-char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
+char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID;
 
-FunctionPass *llvm::createSIWholeQuadModePass() {
-  return new SIWholeQuadMode;
+FunctionPass *llvm::createSIWholeQuadModeLegacyPass() {
+  return new SIWholeQuadModeLegacy;
 }
 
 #ifndef NDEBUG
@@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
   return InsertPt;
 }
 
-bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
+bool SIWholeQuadMode::run(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
                     << " ------------- \n");
   LLVM_DEBUG(MF.dump(););
@@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
   SetInactiveInstrs.clear();
   StateTransition.clear();
 
-  ST = &MF.getSubtarget<GCNSubtarget>();
-
-  TII = ST->getInstrInfo();
-  TRI = &TII->getRegisterInfo();
-  MRI = &MF.getRegInfo();
-  LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
-  auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
-  MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
-  auto *PDTWrapper =
-      getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
-  PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
-
   if (ST->isWave32()) {
     AndOpc = AMDGPU::S_AND_B32;
     AndTermOpc = AMDGPU::S_AND_B32_term;
@@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
 
   return Changed;
 }
+
+bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) {
+  LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+  MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
+  auto *PDTWrapper =
+      getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
+  MachinePostDominatorTree *PDT =
+      PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
+  SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
+  return Impl.run(MF);
+}
+
+PreservedAnalyses
+SIWholeQuadModePass::run(MachineFunction &MF,
+                         MachineFunctionAnalysisManager &MFAM) {
+  MFPropsModifier _(*this, MF);
+
+  LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
+  MachineDominatorTree *MDT =
+      MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
+  MachinePostDominatorTree *PDT =
+      MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
+  SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
+  bool Changed = Impl.run(MF);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserve<SlotIndexesAnalysis>();
+  PA.preserve<LiveIntervalsAnalysis>();
+  PA.preserve<MachineDominatorTreeAnalysis>();
+  PA.preserve<MachinePostDominatorTreeAnalysis>();
+  return PA;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
new file mode 100644
index 0000000..e30b467
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
@@ -0,0 +1,27 @@
+//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
+#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIWholeQuadModePass : public PassInfoMixin<SIWholeQuadModePass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getClearedProperties() const {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
+  }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 00a3381..ca5ed5c 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -389,10 +389,10 @@ multiclass VOPC_Pseudos <string opName,
     let SchedRW = P.Schedule;
     let isCompare = 1;
     let isCommutable = 1;
-    let AsmMatchConverter =
-        !if (P.HasOpSel, "cvtVOP3OpSel",
-             !if (!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), "cvtVOP3",
-                  ""));
+    let AsmMatchConverter = !cond(
+        P.HasOpSel : "cvtVOP3OpSel",
+        !or(P.HasModifiers, P.HasOMod, P.HasIntClamp) : "cvtVOP3",
+        1 : "");
   }
 
   if P.HasExtSDWA then
@@ -454,6 +454,10 @@ multiclass VOPCX_Pseudos <string opName,
     let isCommutable = 1;
     let SubtargetPredicate = HasNoSdstCMPX;
     let IsVCMPX = 1;
+    let AsmMatchConverter = !cond(
+        P_NoSDst.HasOpSel : "cvtVOP3OpSel",
+        !or(P_NoSDst.HasModifiers, P_NoSDst.HasOMod, P_NoSDst.HasIntClamp) : "cvtVOP3",
+        1 : "");
   }
 
   if P_NoSDst.HasExtSDWA then
@@ -1079,6 +1083,10 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
              VCMPXNoSDstTable<1, opName#"_e64"> {
     let Defs = !if(DefExec, [EXEC], []);
     let SchedRW = p.Schedule;
+    let AsmMatchConverter = !cond(
+        p.HasOpSel : "cvtVOP3OpSel",
+        !or(p.HasModifiers, p.HasOMod, p.HasIntClamp) : "cvtVOP3",
+        1 : "");
   }
 
   if p.HasExtSDWA then
@@ -1127,6 +1135,10 @@ multiclass VOPCX_Class_Pseudos <string opName,
     let Defs = [EXEC];
     let SchedRW = P_NoSDst.Schedule;
     let SubtargetPredicate = HasNoSdstCMPX;
+    let AsmMatchConverter = !cond(
+       P_NoSDst.HasOpSel : "cvtVOP3OpSel",
+       !or(P_NoSDst.HasModifiers, P_NoSDst.HasOMod, P_NoSDst.HasIntClamp) : "cvtVOP3",
+       1 : "");
   }
 
   if P_NoSDst.HasExtSDWA then
diff --git a/llvm/lib/Target/ARM/ARMLatencyMutations.h b/llvm/lib/Target/ARM/ARMLatencyMutations.h
index a4b8de0..b22025a 100644
--- a/llvm/lib/Target/ARM/ARMLatencyMutations.h
+++ b/llvm/lib/Target/ARM/ARMLatencyMutations.h
@@ -47,7 +47,7 @@ protected:
 
 /// Note that you have to add:
 ///   DAG.addMutation(createARMLatencyMutation(ST, AA));
-/// to ARMPassConfig::createMachineScheduler() to have an effect.
+/// to ARMTargetMachine::createMachineScheduler() to have an effect.
 std::unique_ptr<ScheduleDAGMutation>
 createARMLatencyMutations(const class ARMSubtarget &, AAResults *AA);
 
diff --git a/llvm/lib/Target/ARM/ARMMacroFusion.h b/llvm/lib/Target/ARM/ARMMacroFusion.h
index 4896a4a..b8642dc 100644
--- a/llvm/lib/Target/ARM/ARMMacroFusion.h
+++ b/llvm/lib/Target/ARM/ARMMacroFusion.h
@@ -20,7 +20,7 @@ namespace llvm {
 
 /// Note that you have to add:
 ///   DAG.addMutation(createARMMacroFusionDAGMutation());
-/// to ARMPassConfig::createMachineScheduler() to have an effect.
+/// to ARMTargetMachine::createMachineScheduler() to have an effect.
 std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation();
 
 } // llvm
diff --git a/llvm/lib/Target/ARM/ARMProcessors.td b/llvm/lib/Target/ARM/ARMProcessors.td
index 213b104..7453727 100644
--- a/llvm/lib/Target/ARM/ARMProcessors.td
+++ b/llvm/lib/Target/ARM/ARMProcessors.td
@@ -377,6 +377,7 @@ def : ProcessorModel<"star-mc1", CortexM4Model,         [ARMv8mMainline,
                                                          FeatureHasSlowFPVFMx,
                                                          FeatureUseMISched,
                                                          FeatureHasNoBranchPredictor,
+                                                         FeatureAvoidMULS,
                                                          FeatureFixCMSE_CVE_2021_35465]>;
 
 def : ProcessorModel<"cortex-m35p", CortexM4Model,      [ARMv8mMainline,
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 8e7b756..98bdf310 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -325,6 +325,28 @@ ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) const {
   return TargetTransformInfo(ARMTTIImpl(this, F));
 }
 
+ScheduleDAGInstrs *
+ARMBaseTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+  // add DAG Mutations here.
+  const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
+  if (ST.hasFusion())
+    DAG->addMutation(createARMMacroFusionDAGMutation());
+  return DAG;
+}
+
+ScheduleDAGInstrs *
+ARMBaseTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+  ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+  // add DAG Mutations here.
+  const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
+  if (ST.hasFusion())
+    DAG->addMutation(createARMMacroFusionDAGMutation());
+  if (auto Mutation = createARMLatencyMutations(ST, C->AA))
+    DAG->addMutation(std::move(Mutation));
+  return DAG;
+}
+
 ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
@@ -353,28 +375,6 @@ public:
     return getTM<ARMBaseTargetMachine>();
   }
 
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
-    // add DAG Mutations here.
-    const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
-    if (ST.hasFusion())
-      DAG->addMutation(createARMMacroFusionDAGMutation());
-    return DAG;
-  }
-
-  ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
-    // add DAG Mutations here.
-    const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
-    if (ST.hasFusion())
-      DAG->addMutation(createARMMacroFusionDAGMutation());
-    if (auto Mutation = createARMLatencyMutations(ST, C->AA))
-      DAG->addMutation(std::move(Mutation));
-    return DAG;
-  }
-
   void addIRPasses() override;
   void addCodeGenPrepare() override;
   bool addPreISel() override;
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h
index 94d48fc..99fd817 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -96,6 +96,10 @@ public:
                                 PerFunctionMIParsingState &PFS,
                                 SMDiagnostic &Error,
                                 SMRange &SourceRange) const override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override;
 };
 
 /// ARM/Thumb little endian target machine.
diff --git a/llvm/lib/Target/Hexagon/BitTracker.h b/llvm/lib/Target/Hexagon/BitTracker.h
index 08c0359..6d52070 100644
--- a/llvm/lib/Target/Hexagon/BitTracker.h
+++ b/llvm/lib/Target/Hexagon/BitTracker.h
@@ -100,9 +100,9 @@ private:
       bool operator()(const MachineInstr *MI, const MachineInstr *MJ) const;
       DenseMap<const MachineInstr*,unsigned> &Dist;
     };
-    std::priority_queue<MachineInstr*, std::vector<MachineInstr*>, Cmp> Uses;
     DenseSet<const MachineInstr*> Set; // Set to avoid adding duplicate entries.
     DenseMap<const MachineInstr*,unsigned> Dist;
+    std::priority_queue<MachineInstr *, std::vector<MachineInstr *>, Cmp> Uses;
   };
 
   void reset();
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index cfb552c..a195609 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -262,8 +262,7 @@ bool Coloring::color() {
 
   // Explicitly assign "None" to all uncolored nodes.
   for (unsigned I = 0; I != Order.size(); ++I)
-    if (Colors.count(I) == 0)
-      Colors[I] = ColorKind::None;
+    Colors.try_emplace(I, ColorKind::None);
 
   return true;
 }
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index a97bc19..ff4cb3c 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -351,6 +351,11 @@ MachineFunctionInfo *HexagonTargetMachine::createMachineFunctionInfo(
 
 HexagonTargetMachine::~HexagonTargetMachine() = default;
 
+ScheduleDAGInstrs *
+HexagonTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  return createVLIWMachineSched(C);
+}
+
 namespace {
 /// Hexagon Code Generator Pass Configuration Options.
 class HexagonPassConfig : public TargetPassConfig {
@@ -362,11 +367,6 @@ public:
     return getTM<HexagonTargetMachine>();
   }
 
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    return createVLIWMachineSched(C);
-  }
-
   void addIRPasses() override;
   bool addInstSelector() override;
   void addPreRegAlloc() override;
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 65f5b6e..48e0c08 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -50,6 +50,8 @@ public:
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
     return true;
   }
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ac8ce05..ec654e0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -648,9 +648,50 @@ static unsigned int getFenceOp(NVPTX::Ordering O, NVPTX::Scope S,
   if (S == NVPTX::Scope::Cluster)
     T->failIfClustersUnsupported(".cluster scope fence");
 
+  // Fall back to .acq_rel if .acquire, .release is not supported.
+  if (!T->hasSplitAcquireAndReleaseFences() &&
+      (O == NVPTX::Ordering::Acquire || O == NVPTX::Ordering::Release))
+    O = NVPTX::Ordering::AcquireRelease;
+
   switch (O) {
   case NVPTX::Ordering::Acquire:
+    switch (S) {
+    case NVPTX::Scope::System:
+      return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_sys
+                                    : NVPTX::INT_MEMBAR_SYS;
+    case NVPTX::Scope::Block:
+      return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_cta
+                                    : NVPTX::INT_MEMBAR_CTA;
+    case NVPTX::Scope::Cluster:
+      return NVPTX::atomic_thread_fence_acquire_cluster;
+    case NVPTX::Scope::Device:
+      return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_acquire_gpu
+                                    : NVPTX::INT_MEMBAR_GL;
+    case NVPTX::Scope::Thread:
+      report_fatal_error(
+          formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
+                  ScopeToString(S)));
+    }
+    break;
   case NVPTX::Ordering::Release:
+    switch (S) {
+    case NVPTX::Scope::System:
+      return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_sys
+                                    : NVPTX::INT_MEMBAR_SYS;
+    case NVPTX::Scope::Block:
+      return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_cta
+                                    : NVPTX::INT_MEMBAR_CTA;
+    case NVPTX::Scope::Cluster:
+      return NVPTX::atomic_thread_fence_release_cluster;
+    case NVPTX::Scope::Device:
+      return T->hasMemoryOrdering() ? NVPTX::atomic_thread_fence_release_gpu
+                                    : NVPTX::INT_MEMBAR_GL;
+    case NVPTX::Scope::Thread:
+      report_fatal_error(
+          formatv("Unsupported scope \"{}\" for acquire/release/acq_rel fence.",
+                  ScopeToString(S)));
+    }
+    break;
   case NVPTX::Ordering::AcquireRelease: {
     switch (S) {
     case NVPTX::Scope::System:
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 74def43..f94d549 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3867,33 +3867,16 @@ def : Pat <
 // PTX Fence instructions
 ////////////////////////////////////////////////////////////////////////////////
 
-def atomic_thread_fence_seq_cst_sys :
-  NVPTXInst<(outs), (ins), "fence.sc.sys;", []>,
-  Requires<[hasPTX<60>, hasSM<70>]>;
-def atomic_thread_fence_acq_rel_sys :
-  NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>,
-  Requires<[hasPTX<60>, hasSM<70>]>;
-
-def atomic_thread_fence_seq_cst_gpu :
-  NVPTXInst<(outs), (ins), "fence.sc.gpu;", []>,
-  Requires<[hasPTX<60>, hasSM<70>]>;
-def atomic_thread_fence_acq_rel_gpu :
-  NVPTXInst<(outs), (ins), "fence.acq_rel.gpu;", []>,
-  Requires<[hasPTX<60>, hasSM<70>]>;
-
-def atomic_thread_fence_seq_cst_cluster :
-  NVPTXInst<(outs), (ins), "fence.sc.cluster;", []>,
-  Requires<[hasPTX<78>, hasSM<90>]>;
-def atomic_thread_fence_acq_rel_cluster :
-  NVPTXInst<(outs), (ins), "fence.acq_rel.cluster;", []>,
-  Requires<[hasPTX<78>, hasSM<90>]>;
-
-def atomic_thread_fence_seq_cst_cta :
-  NVPTXInst<(outs), (ins), "fence.sc.cta;", []>,
-  Requires<[hasPTX<60>, hasSM<70>]>;
-def atomic_thread_fence_acq_rel_cta :
-  NVPTXInst<(outs), (ins), "fence.acq_rel.cta;", []>,
-  Requires<[hasPTX<60>, hasSM<70>]>;
+class NVPTXFenceInst<string scope, string sem, Predicate ptx>:
+    NVPTXInst<(outs), (ins), "fence."#sem#"."#scope#";", []>,
+    Requires<[ptx, hasSM<70>]>;
+
+foreach scope = ["sys", "gpu", "cluster", "cta"] in {
+  def atomic_thread_fence_seq_cst_#scope: NVPTXFenceInst<scope, "sc", hasPTX<60>>;
+  def atomic_thread_fence_acq_rel_#scope: NVPTXFenceInst<scope, "acq_rel", hasPTX<60>>;
+  def atomic_thread_fence_acquire_#scope: NVPTXFenceInst<scope, "acquire", hasPTX<87>>;
+  def atomic_thread_fence_release_#scope: NVPTXFenceInst<scope, "release", hasPTX<87>>;
+}
 
 def fpimm_any_zero : FPImmLeaf<fAny, [{
   return Imm.isZero();
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 0c4420b..851c915 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -88,6 +88,10 @@ public:
   // Does SM & PTX support memory orderings (weak and atomic: relaxed, acquire,
   // release, acq_rel, sc) ?
   bool hasMemoryOrdering() const { return SmVersion >= 70 && PTXVersion >= 60; }
+  // Does SM & PTX support .acquire and .release qualifiers for fence?
+  bool hasSplitAcquireAndReleaseFences() const {
+    return SmVersion >= 90 && PTXVersion >= 86;
+  }
   // Does SM & PTX support atomic relaxed MMIO operations ?
   bool hasRelaxedMMIO() const { return SmVersion >= 70 && PTXVersion >= 82; }
   bool hasDotInstructions() const {
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.h b/llvm/lib/Target/PowerPC/PPCMacroFusion.h
index cbf49ee..22d15ca 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.h
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.h
@@ -20,7 +20,7 @@ namespace llvm {
 
 /// Note that you have to add:
 ///   DAG.addMutation(createPowerPCMacroFusionDAGMutation());
-/// to PPCPassConfig::createMachineScheduler() to have an effect.
+/// to PPCTargetMachine::createMachineScheduler() to have an effect.
 std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation();
 } // llvm
 
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index b1ad041..5ee13a9 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -403,6 +403,16 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
   return I.get();
 }
 
+ScheduleDAGInstrs *
+PPCTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  return createPPCMachineScheduler(C);
+}
+
+ScheduleDAGInstrs *
+PPCTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+  return createPPCPostMachineScheduler(C);
+}
+
 //===----------------------------------------------------------------------===//
 // Pass Pipeline Configuration
 //===----------------------------------------------------------------------===//
@@ -438,15 +448,6 @@ public:
   bool addLegalizeMachineIR() override;
   bool addRegBankSelect() override;
   bool addGlobalInstructionSelect() override;
-
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    return createPPCMachineScheduler(C);
-  }
-  ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const override {
-    return createPPCPostMachineScheduler(C);
-  }
 };
 
 } // end anonymous namespace
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 026bf2f2..cb02b44 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -63,6 +63,10 @@ public:
   MachineFunctionInfo *
   createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
                             const TargetSubtargetInfo *STI) const override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override;
 
   bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; }
   bool hasGlibcHWCAPAccess() const { return HasGlibcHWCAPAccess; }
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 63864dd..82fb8fb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -119,6 +119,19 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
                                            MachineMemOperand::MOLoad);
       break;
     }
+    case ISD::FP_EXTEND: {
+      // We only have vector patterns for riscv_fpextend_vl in isel.
+      SDLoc DL(N);
+      MVT VT = N->getSimpleValueType(0);
+      if (!VT.isVector())
+        break;
+      SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
+      SDValue TrueMask = CurDAG->getNode(
+          RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
+      Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
+                               TrueMask, VLMAX);
+      break;
+    }
     }
 
     if (Result) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7c3b583..ddda844 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -5213,17 +5214,21 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
     SmallDenseMap<unsigned, SDValue, 4> Values;
     for (unsigned I : seq<unsigned>(Data.size())) {
       const auto &[Idx1, Idx2, _] = Data[I];
-      if (Values.contains(Idx1)) {
-        assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
-               "Expected both indices to be extracted already.");
-        break;
+      // If the shuffle contains permutation of odd number of elements,
+      // Idx1 might be used already in the first iteration.
+      //
+      // Idx1 = shuffle Idx1, Idx2
+      // Idx1 = shuffle Idx1, Idx3
+      SDValue &V = Values.try_emplace(Idx1).first->getSecond();
+      if (!V)
+        V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
+                         (Idx1 % NumOfSrcRegs) * NumOpElts);
+      if (Idx2 != UINT_MAX) {
+        SDValue &V = Values.try_emplace(Idx2).first->getSecond();
+        if (!V)
+          V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
+                           (Idx2 % NumOfSrcRegs) * NumOpElts);
       }
-      SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
-                               (Idx1 % NumOfSrcRegs) * NumOpElts);
-      Values[Idx1] = V;
-      if (Idx2 != UINT_MAX)
-        Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
-                                    (Idx2 % NumOfSrcRegs) * NumOpElts);
     }
     SDValue V;
     for (const auto &[Idx1, Idx2, Mask] : Data) {
@@ -5324,6 +5329,21 @@ static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN,
   return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
 }
 
+/// Is this mask local (i.e. elements only move within their local span), and
+/// repeating (that is, the same rearrangement is being done within each span)?
+static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
+  // TODO: Could improve the case where undef elements exist in the first span.
+  for (auto [I, M] : enumerate(Mask)) {
+    if (M == -1)
+      continue;
+    int ChunkLo = I - (I % Span);
+    int ChunkHi = ChunkLo + Span;
+    if (M < ChunkLo || M >= ChunkHi || M - ChunkLo != Mask[I % Span])
+      return false;
+  }
+  return true;
+}
+
 /// Try to widen element type to get a new mask value for a better permutation
 /// sequence.  This doesn't try to inspect the widened mask for profitability;
 /// we speculate the widened form is equal or better.  This has the effect of
@@ -5685,10 +5705,43 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
                                  : DAG.getUNDEF(XLenVT));
     }
     SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
-    LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
-                                         Subtarget);
-    SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
-                                 DAG.getUNDEF(ContainerVT), TrueMask, VL);
+    LHSIndices =
+        convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
+
+    SDValue Gather;
+    // If we have a locally repeating mask, then we can reuse the first register
+    // in the index register group for all registers within the source register
+    // group.  TODO: This generalizes to m2, and m4.  Also, this is currently
+    // picking up cases with a fully undef tail which could be more directly
+    // handled with fewer redundant vrgathers
+    const MVT M1VT = getLMUL1VT(ContainerVT);
+    auto VLMAX = RISCVTargetLowering::computeVLMAXBounds(M1VT, Subtarget).first;
+    if (ContainerVT.bitsGT(M1VT) && isLocalRepeatingShuffle(Mask, VLMAX)) {
+      EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
+      SDValue SubIndex =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices,
+                      DAG.getVectorIdxConstant(0, DL));
+      auto [InnerTrueMask, InnerVL] =
+          getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
+      int N = ContainerVT.getVectorMinNumElements() /
+              M1VT.getVectorMinNumElements();
+      assert(isPowerOf2_32(N) && N <= 8);
+      Gather = DAG.getUNDEF(ContainerVT);
+      for (int i = 0; i < N; i++) {
+        SDValue SubIdx =
+            DAG.getVectorIdxConstant(M1VT.getVectorMinNumElements() * i, DL);
+        SDValue SubV1 =
+            DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, V1, SubIdx);
+        SDValue SubVec =
+            DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
+                        DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
+        Gather = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Gather,
+                             SubVec, SubIdx);
+      }
+    } else {
+      Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
+                           DAG.getUNDEF(ContainerVT), TrueMask, VL);
+    }
     return convertFromScalableVector(VT, Gather, DAG, Subtarget);
   }
 
@@ -9011,6 +9064,10 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
 
   bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
 
+  // We have regular SD node patterns for direct non-VL extends.
+  if (VT.isScalableVector() && IsDirectConv && !IsVP)
+    return Op;
+
   // Prepare any fixed-length vector operands.
   MVT ContainerVT = VT;
   SDValue Mask, VL;
@@ -22773,6 +22830,231 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
   return true;
 }
 
+static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
+  assert(N);
+  if (N == 1)
+    return true;
+
+  if (isPowerOf2_32(N)) {
+    KnownBits KB = llvm::computeKnownBits(V, DL);
+    return KB.countMinTrailingZeros() >= Log2_32(N);
+  }
+
+  using namespace PatternMatch;
+  // Right now we're only recognizing the simplest pattern.
+  uint64_t C;
+  return match(V, m_c_Mul(m_Value(), m_ConstantInt(C))) && C && C % N == 0;
+}
+
+/// Lower an interleaved vp.load into a vlsegN intrinsic.
+///
+/// E.g. Lower an interleaved vp.load (Factor = 2):
+///   %l = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %ptr,
+///                                                         %mask,
+///                                                         i32 %wide.rvl)
+///   %dl = tail call { <vscale x 32 x i8>, <vscale x 32 x i8> }
+///             @llvm.vector.deinterleave2.nxv64i8(
+///               <vscale x 64 x i8> %l)
+///   %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 0
+///   %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 1
+///
+/// Into:
+///   %rvl = udiv %wide.rvl, 2
+///   %sl = call { <vscale x 32 x i8>, <vscale x 32 x i8> }
+///             @llvm.riscv.vlseg2.mask.nxv32i8.i64(<vscale x 32 x i8> undef,
+///                                                 <vscale x 32 x i8> undef,
+///                                                 ptr %ptr,
+///                                                 %mask,
+///                                                 i64 %rvl,
+///                                                 i64 1)
+///   %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 0
+///   %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 1
+///
+/// NOTE: the deinterleave2 intrinsic won't be touched and is expected to be
+/// removed by the caller
+/// TODO: We probably can loosen the dependency on matching extractvalue when
+/// dealing with factor of 2 (extractvalue is still required for most of other
+/// factors though).
+bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
+    VPIntrinsic *Load, Value *Mask,
+    ArrayRef<Value *> DeinterleaveResults) const {
+  assert(Mask && "Expect a valid mask");
+  assert(Load->getIntrinsicID() == Intrinsic::vp_load &&
+         "Unexpected intrinsic");
+
+  const unsigned Factor = DeinterleaveResults.size();
+
+  auto *WideVTy = dyn_cast<ScalableVectorType>(Load->getType());
+  // TODO: Support fixed vectors.
+  if (!WideVTy)
+    return false;
+
+  unsigned WideNumElements = WideVTy->getElementCount().getKnownMinValue();
+  assert(WideNumElements % Factor == 0 &&
+         "ElementCount of a wide load must be divisible by interleave factor");
+  auto *VTy =
+      VectorType::get(WideVTy->getScalarType(), WideNumElements / Factor,
+                      WideVTy->isScalableTy());
+  auto &DL = Load->getModule()->getDataLayout();
+  Align Alignment = Load->getParamAlign(0).value_or(
+      DL.getABITypeAlign(WideVTy->getElementType()));
+  if (!isLegalInterleavedAccessType(
+          VTy, Factor, Alignment,
+          Load->getArgOperand(0)->getType()->getPointerAddressSpace(), DL))
+    return false;
+
+  IRBuilder<> Builder(Load);
+  Value *WideEVL = Load->getArgOperand(2);
+  // Conservatively check if EVL is a multiple of factor, otherwise some
+  // (trailing) elements might be lost after the transformation.
+  if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor))
+    return false;
+
+  auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
+  Value *EVL = Builder.CreateZExt(
+      Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
+      XLenTy);
+
+  static const Intrinsic::ID IntrMaskIds[] = {
+      Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
+      Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
+      Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
+      Intrinsic::riscv_vlseg8_mask,
+  };
+
+  unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
+  unsigned NumElts = VTy->getElementCount().getKnownMinValue();
+  Type *VecTupTy = TargetExtType::get(
+      Load->getContext(), "riscv.vector.tuple",
+      ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
+                              NumElts * SEW / 8),
+      Factor);
+
+  Value *PoisonVal = PoisonValue::get(VecTupTy);
+
+  Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
+      Load->getModule(), IntrMaskIds[Factor - 2],
+      {VecTupTy, Mask->getType(), EVL->getType()});
+
+  Value *Operands[] = {
+      PoisonVal,
+      Load->getArgOperand(0),
+      Mask,
+      EVL,
+      ConstantInt::get(XLenTy, RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC),
+      ConstantInt::get(XLenTy, Log2_64(SEW))};
+
+  CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands);
+
+  SmallVector<Type *, 8> AggrTypes{Factor, VTy};
+  Value *Return =
+      PoisonValue::get(StructType::get(Load->getContext(), AggrTypes));
+  Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration(
+      Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy});
+  for (unsigned i = 0; i < Factor; ++i) {
+    Value *VecExtract =
+        Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)});
+    Return = Builder.CreateInsertValue(Return, VecExtract, i);
+  }
+
+  for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
+    // We have to create a brand new ExtractValue to replace each
+    // of these old ExtractValue instructions.
+    Value *NewEV =
+        Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
+    DIO->replaceAllUsesWith(NewEV);
+  }
+
+  return true;
+}
+
+/// Lower an interleaved vp.store into a vssegN intrinsic.
+///
+/// E.g. Lower an interleaved vp.store (Factor = 2):
+///
+///   %is = tail call <vscale x 64 x i8>
+///             @llvm.vector.interleave2.nxv64i8(
+///                               <vscale x 32 x i8> %load0,
+///                               <vscale x 32 x i8> %load1
+///   %wide.rvl = shl nuw nsw i32 %rvl, 1
+///   tail call void @llvm.vp.store.nxv64i8.p0(
+///                               <vscale x 64 x i8> %is, ptr %ptr,
+///                               %mask,
+///                               i32 %wide.rvl)
+///
+/// Into:
+///   call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
+///                               <vscale x 32 x i8> %load1,
+///                               <vscale x 32 x i8> %load2, ptr %ptr,
+///                               %mask,
+///                               i64 %rvl)
+bool RISCVTargetLowering::lowerInterleavedIntrinsicToVPStore(
+    VPIntrinsic *Store, Value *Mask,
+    ArrayRef<Value *> InterleaveOperands) const {
+  assert(Mask && "Expect a valid mask");
+  assert(Store->getIntrinsicID() == Intrinsic::vp_store &&
+         "Unexpected intrinsic");
+
+  const unsigned Factor = InterleaveOperands.size();
+
+  auto *VTy = dyn_cast<ScalableVectorType>(InterleaveOperands[0]->getType());
+  // TODO: Support fixed vectors.
+  if (!VTy)
+    return false;
+
+  const DataLayout &DL = Store->getDataLayout();
+  Align Alignment = Store->getParamAlign(1).value_or(
+      DL.getABITypeAlign(VTy->getElementType()));
+  if (!isLegalInterleavedAccessType(
+          VTy, Factor, Alignment,
+          Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL))
+    return false;
+
+  IRBuilder<> Builder(Store);
+  Value *WideEVL = Store->getArgOperand(3);
+  // Conservatively check if EVL is a multiple of factor, otherwise some
+  // (trailing) elements might be lost after the transformation.
+  if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor))
+    return false;
+
+  auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
+  Value *EVL = Builder.CreateZExt(
+      Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
+      XLenTy);
+
+  static const Intrinsic::ID IntrMaskIds[] = {
+      Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+      Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+      Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+      Intrinsic::riscv_vsseg8_mask,
+  };
+
+  unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
+  unsigned NumElts = VTy->getElementCount().getKnownMinValue();
+  Type *VecTupTy = TargetExtType::get(
+      Store->getContext(), "riscv.vector.tuple",
+      ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
+                              NumElts * SEW / 8),
+      Factor);
+
+  Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration(
+      Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
+  Value *StoredVal = PoisonValue::get(VecTupTy);
+  for (unsigned i = 0; i < Factor; ++i)
+    StoredVal = Builder.CreateCall(
+        VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)});
+
+  Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
+      Store->getModule(), IntrMaskIds[Factor - 2],
+      {VecTupTy, Mask->getType(), EVL->getType()});
+
+  Value *Operands[] = {StoredVal, Store->getArgOperand(1), Mask, EVL,
+                       ConstantInt::get(XLenTy, Log2_64(SEW))};
+
+  Builder.CreateCall(VssegNFunc, Operands);
+  return true;
+}
+
 MachineInstr *
 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
                                    MachineBasicBlock::instr_iterator &MBBI,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 77605a3..e9dd8ff 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -910,6 +910,14 @@ public:
   bool lowerInterleaveIntrinsicToStore(
       StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
 
+  bool lowerDeinterleavedIntrinsicToVPLoad(
+      VPIntrinsic *Load, Value *Mask,
+      ArrayRef<Value *> DeinterleaveRes) const override;
+
+  bool lowerInterleavedIntrinsicToVPStore(
+      VPIntrinsic *Store, Value *Mask,
+      ArrayRef<Value *> InterleaveOps) const override;
+
   bool supportKCFIBundles() const override { return true; }
 
   SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index f35dc6e..333ae52 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1412,29 +1412,6 @@ multiclass VPatWidenReductionVL<SDNode vop, PatFrags extop, string instruction_n
   }
 }
 
-multiclass VPatWidenReductionVL_RM<SDNode vop, PatFrags extop, string instruction_name, bit is_float> {
-  foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in {
-    defvar vti = vtiToWti.Vti;
-    defvar wti = vtiToWti.Wti;
-    defvar wti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # wti.SEW # "M1");
-    let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
-                                 GetVTypePredicates<wti>.Predicates) in {
-      def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$passthru),
-                                   (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
-                                   VR:$rs2, (vti.Mask V0), VLOpFrag,
-                                   (XLenVT timm:$policy))),
-               (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
-                  (wti_m1.Vector VR:$passthru), (vti.Vector vti.RegClass:$rs1),
-                  (wti_m1.Vector VR:$rs2), (vti.Mask V0),
-                  // Value to indicate no rounding mode change in
-                  // RISCVInsertReadWriteCSR
-                  FRM_DYN,
-                  GPR:$vl, vti.Log2SEW,
-                  (XLenVT timm:$policy))>;
-    }
-  }
-}
-
 multiclass VPatWidenReductionVL_Ext_VL<SDNode vop, PatFrags extop, string instruction_name, bit is_float> {
   foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in {
     defvar vti = vtiToWti.Vti;
@@ -1443,7 +1420,7 @@ multiclass VPatWidenReductionVL_Ext_VL<SDNode vop, PatFrags extop, string instru
     let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
                                  GetVTypePredicates<wti>.Predicates) in {
       def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$passthru),
-                                   (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
+                                   (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), (XLenVT srcvalue))),
                                    VR:$rs2, (vti.Mask V0), VLOpFrag,
                                    (XLenVT timm:$policy))),
                (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -1462,7 +1439,7 @@ multiclass VPatWidenReductionVL_Ext_VL_RM<SDNode vop, PatFrags extop, string ins
     let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
                                  GetVTypePredicates<wti>.Predicates) in {
       def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$passthru),
-                                   (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
+                                   (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), (XLenVT srcvalue))),
                                    VR:$rs2, (vti.Mask V0), VLOpFrag,
                                    (XLenVT timm:$policy))),
                (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
@@ -2653,13 +2630,9 @@ defm : VPatReductionVL<rvv_vecreduce_FMIN_vl,     "PseudoVFREDMIN", is_float=1>;
 defm : VPatReductionVL<rvv_vecreduce_FMAX_vl,     "PseudoVFREDMAX", is_float=1>;
 
 // 14.4. Vector Widening Floating-Point Reduction Instructions
-defm : VPatWidenReductionVL_RM<rvv_vecreduce_SEQ_FADD_vl, fpext_oneuse,
-                               "PseudoVFWREDOSUM", is_float=1>;
 defm : VPatWidenReductionVL_Ext_VL_RM<rvv_vecreduce_SEQ_FADD_vl,
                                       riscv_fpextend_vl_oneuse,
                                       "PseudoVFWREDOSUM", is_float=1>;
-defm : VPatWidenReductionVL_RM<rvv_vecreduce_FADD_vl, fpext_oneuse,
-                               "PseudoVFWREDUSUM", is_float=1>;
 defm : VPatWidenReductionVL_Ext_VL_RM<rvv_vecreduce_FADD_vl,
                                       riscv_fpextend_vl_oneuse,
                                       "PseudoVFWREDUSUM", is_float=1>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index dde808a..3e49492 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -287,6 +287,39 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
   return true;
 }
 
+ScheduleDAGInstrs *
+RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  ScheduleDAGMILive *DAG = nullptr;
+  if (EnableMISchedLoadStoreClustering) {
+    DAG = createGenericSchedLive(C);
+    DAG->addMutation(createLoadClusterDAGMutation(
+        DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+    DAG->addMutation(createStoreClusterDAGMutation(
+        DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+  }
+
+  const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+  if (!DisableVectorMaskMutation && ST.hasVInstructions()) {
+    DAG = DAG ? DAG : createGenericSchedLive(C);
+    DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI));
+  }
+  return DAG;
+}
+
+ScheduleDAGInstrs *
+RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+  ScheduleDAGMI *DAG = nullptr;
+  if (EnablePostMISchedLoadStoreClustering) {
+    DAG = createGenericSchedPostRA(C);
+    DAG->addMutation(createLoadClusterDAGMutation(
+        DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+    DAG->addMutation(createStoreClusterDAGMutation(
+        DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+  }
+
+  return DAG;
+}
+
 namespace {
 
 class RVVRegisterRegAlloc : public RegisterRegAllocBase<RVVRegisterRegAlloc> {
@@ -360,39 +393,6 @@ public:
     return getTM<RISCVTargetMachine>();
   }
 
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMILive *DAG = nullptr;
-    if (EnableMISchedLoadStoreClustering) {
-      DAG = createGenericSchedLive(C);
-      DAG->addMutation(createLoadClusterDAGMutation(
-          DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
-      DAG->addMutation(createStoreClusterDAGMutation(
-          DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
-    }
-
-    const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
-    if (!DisableVectorMaskMutation && ST.hasVInstructions()) {
-      DAG = DAG ? DAG : createGenericSchedLive(C);
-      DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI));
-    }
-    return DAG;
-  }
-
-  ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMI *DAG = nullptr;
-    if (EnablePostMISchedLoadStoreClustering) {
-      DAG = createGenericSchedPostRA(C);
-      DAG->addMutation(createLoadClusterDAGMutation(
-          DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
-      DAG->addMutation(createStoreClusterDAGMutation(
-          DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
-    }
-
-    return DAG;
-  }
-  
   void addIRPasses() override;
   bool addPreISel() override;
   void addCodeGenPrepare() override;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index b1610e3..c85c2b3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -59,6 +59,10 @@ public:
                                 SMDiagnostic &Error,
                                 SMRange &SourceRange) const override;
   void registerPassBuilderCallbacks(PassBuilder &PB) override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override;
 };
 
 std::unique_ptr<ScheduleDAGMutation>
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index cb2ec1d..8125923 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1176,6 +1176,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     }
     break;
   }
+  case Intrinsic::fmuladd: {
+    // TODO: handle promotion with f16/bf16 with zvfhmin/zvfbfmin
+    auto LT = getTypeLegalizationCost(RetTy);
+    if (ST->hasVInstructions() && LT.second.isVector())
+      return LT.first *
+             getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second, CostKind);
+    break;
+  }
   case Intrinsic::fabs: {
     auto LT = getTypeLegalizationCost(RetTy);
     if (ST->hasVInstructions() && LT.second.isVector()) {
@@ -1375,6 +1383,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                                   : RISCV::VMV_V_X,
                                               LT.second, CostKind);
   }
+  case Intrinsic::experimental_vp_splice: {
+    // To support type-based query from vectorizer, set the index to 0.
+    // Note that index only change the cost from vslide.vx to vslide.vi and in
+    // current implementations they have same costs.
+    return getShuffleCost(TTI::SK_Splice,
+                          cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
+                          0, cast<VectorType>(ICA.getReturnType()));
+  }
   }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {
@@ -2750,7 +2766,12 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
         return false;
     }
 
-    Ops.push_back(&Op->getOperandUse(0));
+    Use *InsertEltUse = &Op->getOperandUse(0);
+    // Sink any fpexts since they might be used in a widening fp pattern.
+    auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
+    if (isa<FPExtInst>(InsertElt->getOperand(1)))
+      Ops.push_back(&InsertElt->getOperandUse(1));
+    Ops.push_back(InsertEltUse);
     Ops.push_back(&OpIdx.value());
   }
   return true;
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 0960245..d4829bc 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -451,6 +451,23 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
   case RISCV::VFDIV_VF:
   case RISCV::VFDIV_VV:
   case RISCV::VFRDIV_VF:
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  case RISCV::VFMACC_VV:
+  case RISCV::VFMACC_VF:
+  case RISCV::VFNMACC_VV:
+  case RISCV::VFNMACC_VF:
+  case RISCV::VFMSAC_VV:
+  case RISCV::VFMSAC_VF:
+  case RISCV::VFNMSAC_VV:
+  case RISCV::VFNMSAC_VF:
+  case RISCV::VFMADD_VV:
+  case RISCV::VFMADD_VF:
+  case RISCV::VFNMADD_VV:
+  case RISCV::VFNMADD_VF:
+  case RISCV::VFMSUB_VV:
+  case RISCV::VFMSUB_VF:
+  case RISCV::VFNMSUB_VV:
+  case RISCV::VFNMSUB_VF:
   // Vector Floating-Point Square-Root Instruction
   case RISCV::VFSQRT_V:
   // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -1016,6 +1033,23 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   // Vector Widening Floating-Point Multiply
   case RISCV::VFWMUL_VF:
   case RISCV::VFWMUL_VV:
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  case RISCV::VFMACC_VV:
+  case RISCV::VFMACC_VF:
+  case RISCV::VFNMACC_VV:
+  case RISCV::VFNMACC_VF:
+  case RISCV::VFMSAC_VV:
+  case RISCV::VFMSAC_VF:
+  case RISCV::VFNMSAC_VV:
+  case RISCV::VFNMSAC_VF:
+  case RISCV::VFMADD_VV:
+  case RISCV::VFMADD_VF:
+  case RISCV::VFNMADD_VV:
+  case RISCV::VFNMADD_VF:
+  case RISCV::VFMSUB_VV:
+  case RISCV::VFMSUB_VF:
+  case RISCV::VFNMSUB_VV:
+  case RISCV::VFNMSUB_VF:
   // Vector Floating-Point MIN/MAX Instructions
   case RISCV::VFMIN_VF:
   case RISCV::VFMIN_VV:
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 95fa7bc..08ee94a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -132,6 +132,15 @@ struct ImageQueryBuiltin {
 #define GET_ImageQueryBuiltins_DECL
 #define GET_ImageQueryBuiltins_IMPL
 
+struct IntegerDotProductBuiltin {
+  StringRef Name;
+  uint32_t Opcode;
+  bool IsSwapReq;
+};
+
+#define GET_IntegerDotProductBuiltins_DECL
+#define GET_IntegerDotProductBuiltins_IMPL
+
 struct ConvertBuiltin {
   StringRef Name;
   InstructionSet::InstructionSet Set;
@@ -1579,20 +1588,84 @@ static bool generateCastToPtrInst(const SPIRV::IncomingCall *Call,
   return true;
 }
 
-static bool generateDotOrFMulInst(const SPIRV::IncomingCall *Call,
+static bool generateDotOrFMulInst(const StringRef DemangledCall,
+                                  const SPIRV::IncomingCall *Call,
                                   MachineIRBuilder &MIRBuilder,
                                   SPIRVGlobalRegistry *GR) {
   if (Call->isSpirvOp())
     return buildOpFromWrapper(MIRBuilder, SPIRV::OpDot, Call,
                               GR->getSPIRVTypeID(Call->ReturnType));
-  unsigned Opcode = GR->getSPIRVTypeForVReg(Call->Arguments[0])->getOpcode();
-  bool IsVec = Opcode == SPIRV::OpTypeVector;
+
+  bool IsVec = GR->getSPIRVTypeForVReg(Call->Arguments[0])->getOpcode() ==
+               SPIRV::OpTypeVector;
   // Use OpDot only in case of vector args and OpFMul in case of scalar args.
-  MIRBuilder.buildInstr(IsVec ? SPIRV::OpDot : SPIRV::OpFMulS)
-      .addDef(Call->ReturnRegister)
-      .addUse(GR->getSPIRVTypeID(Call->ReturnType))
-      .addUse(Call->Arguments[0])
-      .addUse(Call->Arguments[1]);
+  uint32_t OC = IsVec ? SPIRV::OpDot : SPIRV::OpFMulS;
+  bool IsSwapReq = false;
+
+  const auto *ST =
+      static_cast<const SPIRVSubtarget *>(&MIRBuilder.getMF().getSubtarget());
+  if (GR->isScalarOrVectorOfType(Call->ReturnRegister, SPIRV::OpTypeInt) &&
+      (ST->canUseExtension(SPIRV::Extension::SPV_KHR_integer_dot_product) ||
+       ST->isAtLeastSPIRVVer(VersionTuple(1, 6)))) {
+    const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
+    const SPIRV::IntegerDotProductBuiltin *IntDot =
+        SPIRV::lookupIntegerDotProductBuiltin(Builtin->Name);
+    if (IntDot) {
+      OC = IntDot->Opcode;
+      IsSwapReq = IntDot->IsSwapReq;
+    } else if (IsVec) {
+      // Handling "dot" and "dot_acc_sat" builtins which use vectors of
+      // integers.
+      LLVMContext &Ctx = MIRBuilder.getContext();
+      SmallVector<StringRef, 10> TypeStrs;
+      SPIRV::parseBuiltinTypeStr(TypeStrs, DemangledCall, Ctx);
+      bool IsFirstSigned = TypeStrs[0].trim()[0] != 'u';
+      bool IsSecondSigned = TypeStrs[1].trim()[0] != 'u';
+
+      if (Call->BuiltinName == "dot") {
+        if (IsFirstSigned && IsSecondSigned)
+          OC = SPIRV::OpSDot;
+        else if (!IsFirstSigned && !IsSecondSigned)
+          OC = SPIRV::OpUDot;
+        else {
+          OC = SPIRV::OpSUDot;
+          if (!IsFirstSigned)
+            IsSwapReq = true;
+        }
+      } else if (Call->BuiltinName == "dot_acc_sat") {
+        if (IsFirstSigned && IsSecondSigned)
+          OC = SPIRV::OpSDotAccSat;
+        else if (!IsFirstSigned && !IsSecondSigned)
+          OC = SPIRV::OpUDotAccSat;
+        else {
+          OC = SPIRV::OpSUDotAccSat;
+          if (!IsFirstSigned)
+            IsSwapReq = true;
+        }
+      }
+    }
+  }
+
+  MachineInstrBuilder MIB = MIRBuilder.buildInstr(OC)
+                                .addDef(Call->ReturnRegister)
+                                .addUse(GR->getSPIRVTypeID(Call->ReturnType));
+
+  if (IsSwapReq) {
+    MIB.addUse(Call->Arguments[1]);
+    MIB.addUse(Call->Arguments[0]);
+    // needed for dot_acc_sat* builtins
+    for (size_t i = 2; i < Call->Arguments.size(); ++i)
+      MIB.addUse(Call->Arguments[i]);
+  } else {
+    for (size_t i = 0; i < Call->Arguments.size(); ++i)
+      MIB.addUse(Call->Arguments[i]);
+  }
+
+  // Add Packed Vector Format for Integer dot product builtins if arguments are
+  // scalar
+  if (!IsVec && OC != SPIRV::OpFMulS)
+    MIB.addImm(0);
+
   return true;
 }
 
@@ -2576,6 +2649,11 @@ mapBuiltinToOpcode(const StringRef DemangledCall,
     if (const auto *R = SPIRV::lookupGroupUniformBuiltin(Call->Builtin->Name))
       return std::make_tuple(Call->Builtin->Group, R->Opcode, 0);
     break;
+  case SPIRV::IntegerDot:
+    if (const auto *R =
+            SPIRV::lookupIntegerDotProductBuiltin(Call->Builtin->Name))
+      return std::make_tuple(Call->Builtin->Group, R->Opcode, 0);
+    break;
   case SPIRV::WriteImage:
     return std::make_tuple(Call->Builtin->Group, SPIRV::OpImageWrite, 0);
   case SPIRV::Select:
@@ -2635,7 +2713,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
   case SPIRV::CastToPtr:
     return generateCastToPtrInst(Call.get(), MIRBuilder);
   case SPIRV::Dot:
-    return generateDotOrFMulInst(Call.get(), MIRBuilder, GR);
+  case SPIRV::IntegerDot:
+    return generateDotOrFMulInst(DemangledCall, Call.get(), MIRBuilder, GR);
   case SPIRV::Wave:
     return generateWaveInst(Call.get(), MIRBuilder, GR);
   case SPIRV::ICarryBorrow:
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index af3901c..8125e12 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -42,6 +42,7 @@ def Variable : BuiltinGroup;
 def Atomic : BuiltinGroup;
 def Barrier : BuiltinGroup;
 def Dot : BuiltinGroup;
+def IntegerDot : BuiltinGroup;
 def Wave : BuiltinGroup;
 def GetQuery : BuiltinGroup;
 def ImageSizeQuery : BuiltinGroup;
@@ -101,6 +102,8 @@ def lookupBuiltin : SearchIndex {
 // Dot builtin record:
 def : DemangledBuiltin<"dot", OpenCL_std, Dot, 2, 2>;
 def : DemangledBuiltin<"__spirv_Dot", OpenCL_std, Dot, 2, 2>;
+def : DemangledBuiltin<"dot_acc_sat", OpenCL_std, IntegerDot, 3, 3>;
+def : DemangledBuiltin<"__spirv_DotAccSat", OpenCL_std, IntegerDot, 3, 3>;
 
 // Image builtin records:
 def : DemangledBuiltin<"read_imagei", OpenCL_std, ReadImage, 2, 4>;
@@ -1715,3 +1718,47 @@ class CLMemoryFenceFlags<bits<32> value> {
 def CLK_LOCAL_MEM_FENCE : CLMemoryFenceFlags<0x1>;
 def CLK_GLOBAL_MEM_FENCE : CLMemoryFenceFlags<0x2>;
 def CLK_IMAGE_MEM_FENCE : CLMemoryFenceFlags<0x4>;
+
+//===----------------------------------------------------------------------===//
+// Class defining dot builtins that should be translated into a
+// SPIR-V instruction using SPIR-V 1.6 or SPV_KHR_integer_dot_product extension.
+//
+// name is the demangled name of the given builtin.
+// opcode specifies the SPIR-V operation code of the generated instruction.
+// isSwapRequired specifies if the operands need to be swapped (the SPIR-V extension
+// has only one instruction for arguments of different signedness).
+//===----------------------------------------------------------------------===//
+class IntegerDotProductBuiltin<string name, Op operation> {
+  string Name = name;
+  Op Opcode = operation;
+  bit IsSwapReq = !not(!eq(!find(name, "_us"), -1));
+}
+
+// Table gathering all the integer dot product builtins.
+def IntegerDotProductBuiltins : GenericTable {
+  let FilterClass = "IntegerDotProductBuiltin";
+  let Fields = ["Name", "Opcode", "IsSwapReq"];
+}
+
+// Function to lookup group builtins by their name and set.
+def lookupIntegerDotProductBuiltin : SearchIndex {
+  let Table = IntegerDotProductBuiltins;
+  let Key = ["Name"];
+}
+
+// Multiclass used to define incoming builtin records for the SPV_KHR_integer_dot_product extension.
+multiclass DemangledIntegerDotProductBuiltin<string name, bits<8> minNumArgs, bits<8> maxNumArgs, Op operation> {
+  def : DemangledBuiltin<!strconcat("dot", name), OpenCL_std, IntegerDot, minNumArgs, maxNumArgs>;
+  def : IntegerDotProductBuiltin<!strconcat("dot", name), operation>;
+}
+
+// cl_khr_integer_dot_product
+defm : DemangledIntegerDotProductBuiltin<"_4x8packed_uu_uint", 2, 3, OpUDot>;
+defm : DemangledIntegerDotProductBuiltin<"_4x8packed_ss_int", 2, 3, OpSDot>;
+defm : DemangledIntegerDotProductBuiltin<"_4x8packed_us_int", 2, 3, OpSUDot>;
+defm : DemangledIntegerDotProductBuiltin<"_4x8packed_su_int", 2, 3, OpSUDot>;
+
+defm : DemangledIntegerDotProductBuiltin<"_acc_sat_4x8packed_uu_uint", 3, 4, OpUDotAccSat>;
+defm : DemangledIntegerDotProductBuiltin<"_acc_sat_4x8packed_ss_int", 3, 4, OpSDotAccSat>;
+defm : DemangledIntegerDotProductBuiltin<"_acc_sat_4x8packed_us_int", 3, 4, OpSUDotAccSat>;
+defm : DemangledIntegerDotProductBuiltin<"_acc_sat_4x8packed_su_int", 3, 4, OpSUDotAccSat>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 1bc35c6..981e224 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -530,8 +530,18 @@ defm OpISubBorrow: BinOpTypedGen<"OpISubBorrow", 150, subc, 0, 1>;
 def OpUMulExtended: BinOp<"OpUMulExtended", 151>;
 def OpSMulExtended: BinOp<"OpSMulExtended", 152>;
 
-def OpSDot: BinOp<"OpSDot", 4450>;
-def OpUDot: BinOp<"OpUDot", 4451>;
+def OpSDot: Op<4450, (outs ID:$res), (ins TYPE:$type, ID:$vec1, ID:$vec2, variable_ops),
+                  "$res = OpSDot $type $vec1 $vec2">;
+def OpUDot: Op<4451, (outs ID:$res), (ins TYPE:$type, ID:$vec1, ID:$vec2, variable_ops),
+                  "$res = OpUDot $type $vec1 $vec2">;
+def OpSUDot: Op<4452, (outs ID:$res), (ins TYPE:$type, ID:$vec1, ID:$vec2, variable_ops),
+                  "$res = OpSUDot $type $vec1 $vec2">;
+def OpSDotAccSat: Op<4453, (outs ID:$res), (ins TYPE:$type, ID:$vec1, ID:$vec2, ID:$acc, variable_ops),
+                  "$res = OpSDotAccSat $type $vec1 $vec2 $acc">;
+def OpUDotAccSat: Op<4454, (outs ID:$res), (ins TYPE:$type, ID:$vec1, ID:$vec2, ID:$acc, variable_ops),
+                  "$res = OpUDotAccSat $type $vec1 $vec2 $acc">;
+def OpSUDotAccSat: Op<4455, (outs ID:$res), (ins TYPE:$type, ID:$vec1, ID:$vec2, ID:$acc, variable_ops),
+                  "$res = OpSUDotAccSat $type $vec1 $vec2 $acc">;
 
 // 3.42.14 Bit Instructions
 
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index d3afaf42..a7a5ece 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1692,6 +1692,10 @@ void addInstrRequirements(const MachineInstr &MI,
     break;
   case SPIRV::OpSDot:
   case SPIRV::OpUDot:
+  case SPIRV::OpSUDot:
+  case SPIRV::OpSDotAccSat:
+  case SPIRV::OpUDotAccSat:
+  case SPIRV::OpSUDotAccSat:
     AddDotProductRequirements(MI, Reqs, ST);
     break;
   case SPIRV::OpImageRead: {
diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index bc23948..3e9fc31 100644
--- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -130,6 +130,9 @@ class SparcAsmParser : public MCTargetAsmParser {
   bool expandSET(MCInst &Inst, SMLoc IDLoc,
                  SmallVectorImpl<MCInst> &Instructions);
 
+  bool expandSETSW(MCInst &Inst, SMLoc IDLoc,
+                   SmallVectorImpl<MCInst> &Instructions);
+
   bool expandSETX(MCInst &Inst, SMLoc IDLoc,
                   SmallVectorImpl<MCInst> &Instructions);
 
@@ -734,6 +737,69 @@ bool SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
   return false;
 }
 
+bool SparcAsmParser::expandSETSW(MCInst &Inst, SMLoc IDLoc,
+                                 SmallVectorImpl<MCInst> &Instructions) {
+  MCOperand MCRegOp = Inst.getOperand(0);
+  MCOperand MCValOp = Inst.getOperand(1);
+  assert(MCRegOp.isReg());
+  assert(MCValOp.isImm() || MCValOp.isExpr());
+
+  // The imm operand can be either an expression or an immediate.
+  bool IsImm = Inst.getOperand(1).isImm();
+  int64_t ImmValue = IsImm ? MCValOp.getImm() : 0;
+  const MCExpr *ValExpr = IsImm ? MCConstantExpr::create(ImmValue, getContext())
+                                : MCValOp.getExpr();
+
+  bool IsSmallImm = IsImm && isInt<13>(ImmValue);
+  bool NoLowBitsImm = IsImm && ((ImmValue & 0x3FF) == 0);
+
+  MCOperand PrevReg = MCOperand::createReg(Sparc::G0);
+
+  if (!isInt<32>(ImmValue)) {
+    return Error(IDLoc,
+                 "set: argument must be between -2147483648 and 2147483647");
+  }
+
+  // Very small immediates can be expressed without emitting a sethi.
+  if (!IsSmallImm) {
+    // sethi %hi(val), rd
+    Instructions.push_back(
+        MCInstBuilder(SP::SETHIi)
+            .addReg(MCRegOp.getReg())
+            .addExpr(adjustPICRelocation(SparcMCExpr::VK_Sparc_HI, ValExpr)));
+
+    PrevReg = MCRegOp;
+  }
+
+  // If the immediate has the lower bits set or is small, we need to emit an or.
+  if (!NoLowBitsImm || IsSmallImm) {
+    const MCExpr *Expr =
+        IsSmallImm ? ValExpr
+                   : adjustPICRelocation(SparcMCExpr::VK_Sparc_LO, ValExpr);
+
+    // or rd, %lo(val), rd
+    Instructions.push_back(MCInstBuilder(SP::ORri)
+                               .addReg(MCRegOp.getReg())
+                               .addReg(PrevReg.getReg())
+                               .addExpr(Expr));
+
+    // If it's a small immediate there's nothing more to do.
+    if (IsSmallImm)
+      return false;
+  }
+
+  // Large negative or non-immediate expressions would need an sra.
+  if (!IsImm || ImmValue < 0) {
+    // sra rd, %g0, rd
+    Instructions.push_back(MCInstBuilder(SP::SRArr)
+                               .addReg(MCRegOp.getReg())
+                               .addReg(MCRegOp.getReg())
+                               .addReg(Sparc::G0));
+  }
+
+  return false;
+}
+
 bool SparcAsmParser::expandSETX(MCInst &Inst, SMLoc IDLoc,
                                 SmallVectorImpl<MCInst> &Instructions) {
   MCOperand MCRegOp = Inst.getOperand(0);
@@ -826,6 +892,10 @@ bool SparcAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       if (expandSET(Inst, IDLoc, Instructions))
         return true;
       break;
+    case SP::SETSW:
+      if (expandSETSW(Inst, IDLoc, Instructions))
+        return true;
+      break;
     case SP::SETX:
       if (expandSETX(Inst, IDLoc, Instructions))
         return true;
diff --git a/llvm/lib/Target/Sparc/SparcInstrAliases.td b/llvm/lib/Target/Sparc/SparcInstrAliases.td
index 906f51b..bc57ddb 100644
--- a/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -450,6 +450,10 @@ def : InstAlias<"save", (SAVErr G0, G0, G0)>;
 // def : InstAlias<"set $val, $rd", (ORri IntRegs:$rd, (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
 def SET : AsmPseudoInst<(outs IntRegs:$rd), (ins i32imm:$val), "set $val, $rd">;
 
+// setsw value, rd
+// (turns into a sequence of sethi+or+sra, depending on the value)
+def SETSW : AsmPseudoInst<(outs IntRegs:$rd), (ins i32imm:$val), "setsw $val, $rd">, Requires<[HasV9]>;
+
 // setx value, tmp, rd
 // (turns into a sequence of sethi+or+shift, depending on the value)
 def SETX : AsmPseudoInst<(outs I64Regs:$rd),
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 9000df2..092515e 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -205,6 +205,12 @@ SystemZTargetMachine::getSubtargetImpl(const Function &F) const {
   return I.get();
 }
 
+ScheduleDAGInstrs *
+SystemZTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+  return new ScheduleDAGMI(C, std::make_unique<SystemZPostRASchedStrategy>(C),
+                           /*RemoveKillFlags=*/true);
+}
+
 namespace {
 
 /// SystemZ Code Generator Pass Configuration Options.
@@ -217,13 +223,6 @@ public:
     return getTM<SystemZTargetMachine>();
   }
 
-  ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const override {
-    return new ScheduleDAGMI(C,
-                             std::make_unique<SystemZPostRASchedStrategy>(C),
-                             /*RemoveKillFlags=*/true);
-  }
-
   void addIRPasses() override;
   bool addInstSelector() override;
   bool addILPOpts() override;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index e8eeb85..cced57a 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -55,6 +55,8 @@ public:
   MachineFunctionInfo *
   createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
                             const TargetSubtargetInfo *STI) const override;
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override;
 
   bool targetSchedulesPostRAScheduling() const override { return true; };
 };
diff --git a/llvm/lib/Target/X86/X86MacroFusion.h b/llvm/lib/Target/X86/X86MacroFusion.h
index 05388b2..ee07977 100644
--- a/llvm/lib/Target/X86/X86MacroFusion.h
+++ b/llvm/lib/Target/X86/X86MacroFusion.h
@@ -22,7 +22,7 @@ class ScheduleDAGMutation;
 
 /// Note that you have to add:
 ///   DAG.addMutation(createX86MacroFusionDAGMutation());
-/// to X86PassConfig::createMachineScheduler() to have an effect.
+/// to X86TargetMachine::createMachineScheduler() to have an effect.
 std::unique_ptr<ScheduleDAGMutation>
 createX86MacroFusionDAGMutation();
 
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 4faf8bca..af10605 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -224,21 +224,6 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
   }
 }
 
-bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
-                                           unsigned DefSubReg,
-                                           const TargetRegisterClass *SrcRC,
-                                           unsigned SrcSubReg) const {
-  // Prevent rewriting a copy where the destination size is larger than the
-  // input size. See PR41619.
-  // FIXME: Should this be factored into the base implementation somehow.
-  if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
-      SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
-    return false;
-
-  return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
-                                                  SrcRC, SrcSubReg);
-}
-
 const TargetRegisterClass *
 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
   const Function &F = MF.getFunction();
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 68ee372..009d2a8 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -70,11 +70,6 @@ public:
   getLargestLegalSuperClass(const TargetRegisterClass *RC,
                             const MachineFunction &MF) const override;
 
-  bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
-                            unsigned DefSubReg,
-                            const TargetRegisterClass *SrcRC,
-                            unsigned SrcSubReg) const override;
-
   /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
   /// values.
   const TargetRegisterClass *
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 20dfdd2..0430279 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -374,6 +374,20 @@ bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
 
 void X86TargetMachine::reset() { SubtargetMap.clear(); }
 
+ScheduleDAGInstrs *
+X86TargetMachine::createMachineScheduler(MachineSchedContext *C) const {
+  ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+  DAG->addMutation(createX86MacroFusionDAGMutation());
+  return DAG;
+}
+
+ScheduleDAGInstrs *
+X86TargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
+  ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+  DAG->addMutation(createX86MacroFusionDAGMutation());
+  return DAG;
+}
+
 //===----------------------------------------------------------------------===//
 // X86 TTI query.
 //===----------------------------------------------------------------------===//
@@ -399,20 +413,6 @@ public:
     return getTM<X86TargetMachine>();
   }
 
-  ScheduleDAGInstrs *
-  createMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
-    DAG->addMutation(createX86MacroFusionDAGMutation());
-    return DAG;
-  }
-
-  ScheduleDAGInstrs *
-  createPostMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
-    DAG->addMutation(createX86MacroFusionDAGMutation());
-    return DAG;
-  }
-
   void addIRPasses() override;
   bool addInstSelector() override;
   bool addIRTranslator() override;
diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h
index b8d84a8..ced0a9c 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/llvm/lib/Target/X86/X86TargetMachine.h
@@ -79,6 +79,10 @@ public:
   bool isJIT() const { return IsJIT; }
 
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override;
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index ca8a20b..8701f7c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -4150,9 +4150,6 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
     }
   }
 
-  // TODO: This can be generalized to compares of non-signbits using
-  // decomposeBitTestICmp(). It could be enhanced more by using (something like)
-  // foldLogOpOfMaskedICmps().
   const APInt *LC, *RC;
   if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) &&
       LHS0->getType() == RHS0->getType() &&
@@ -4200,6 +4197,21 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
           }
         }
     }
+
+    // Fold (icmp eq/ne (X & Pow2), 0) ^ (icmp eq/ne (Y & Pow2), 0) into
+    // (icmp eq/ne ((X ^ Y) & Pow2), 0)
+    Value *X, *Y;
+    const APInt *Mask;
+    if (ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
+        LC->isZero() && RC->isZero() && LHS->hasOneUse() && RHS->hasOneUse() &&
+        match(LHS0, m_And(m_Value(X), m_Power2(Mask))) &&
+        match(RHS0, m_And(m_Value(Y), m_SpecificInt(*Mask)))) {
+      Value *Xor = Builder.CreateXor(X, Y);
+      Value *And = Builder.CreateAnd(Xor, *Mask);
+      return Builder.CreateICmp(PredL == PredR ? ICmpInst::ICMP_NE
+                                               : ICmpInst::ICMP_EQ,
+                                And, ConstantInt::getNullValue(Xor->getType()));
+    }
   }
 
   // Instead of trying to imitate the folds for and/or, decompose this 'xor'
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 382078e..2e14145 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3432,7 +3432,7 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
 static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
                                         const APInt *Cond1, Value *CtlzOp,
                                         unsigned BitWidth,
-                                        bool &ShouldDropNUW) {
+                                        bool &ShouldDropNoWrap) {
   // The challenge in recognizing std::bit_ceil(X) is that the operand is used
   // for the CTLZ proper and select condition, each possibly with some
   // operation like add and sub.
@@ -3455,7 +3455,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
   ConstantRange CR = ConstantRange::makeExactICmpRegion(
       CmpInst::getInversePredicate(Pred), *Cond1);
 
-  ShouldDropNUW = false;
+  ShouldDropNoWrap = false;
 
   // Match the operation that's used to compute CtlzOp from CommonAncestor.  If
   // CtlzOp == CommonAncestor, return true as no operation is needed.  If a
@@ -3466,11 +3466,12 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
     if (CtlzOp == CommonAncestor)
       return true;
     if (match(CtlzOp, m_Add(m_Specific(CommonAncestor), m_APInt(C)))) {
+      ShouldDropNoWrap = true;
       CR = CR.add(*C);
       return true;
     }
     if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
-      ShouldDropNUW = true;
+      ShouldDropNoWrap = true;
       CR = ConstantRange(*C).sub(CR);
       return true;
     }
@@ -3541,19 +3542,21 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
     Pred = CmpInst::getInversePredicate(Pred);
   }
 
-  bool ShouldDropNUW;
+  bool ShouldDropNoWrap;
 
   if (!match(FalseVal, m_One()) ||
       !match(TrueVal,
              m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
                                                     m_Value(Ctlz)))))) ||
-      !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
+      !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Value())) ||
       !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
-                                   ShouldDropNUW))
+                                   ShouldDropNoWrap))
     return nullptr;
 
-  if (ShouldDropNUW)
+  if (ShouldDropNoWrap) {
     cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
+    cast<Instruction>(CtlzOp)->setHasNoSignedWrap(false);
+  }
 
   // Build 1 << (-CTLZ & (BitWidth-1)).  The negation likely corresponds to a
   // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
@@ -3562,6 +3565,8 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
 
   // Drop range attributes and re-infer them in the next iteration.
   cast<Instruction>(Ctlz)->dropPoisonGeneratingAnnotations();
+  // Set is_zero_poison to false and re-infer them in the next iteration.
+  cast<Instruction>(Ctlz)->setOperand(1, Builder.getFalse());
   IC.addToWorklist(cast<Instruction>(Ctlz));
   Value *Neg = Builder.CreateNeg(Ctlz);
   Value *Masked =
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index ca125d2..d88fdf4 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -276,6 +276,27 @@ static bool hasSupportedLoopDepth(SmallVectorImpl<Loop *> &LoopList,
   }
   return true;
 }
+
+static bool isComputableLoopNest(ScalarEvolution *SE,
+                                 ArrayRef<Loop *> LoopList) {
+  for (Loop *L : LoopList) {
+    const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
+    if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
+      LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
+      return false;
+    }
+    if (L->getNumBackEdges() != 1) {
+      LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
+      return false;
+    }
+    if (!L->getExitingBlock()) {
+      LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
+      return false;
+    }
+  }
+  return true;
+}
+
 namespace {
 
 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
@@ -431,25 +452,6 @@ struct LoopInterchange {
     return processLoopList(LoopList);
   }
 
-  bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
-    for (Loop *L : LoopList) {
-      const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
-      if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
-        LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
-        return false;
-      }
-      if (L->getNumBackEdges() != 1) {
-        LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
-        return false;
-      }
-      if (!L->getExitingBlock()) {
-        LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
-        return false;
-      }
-    }
-    return true;
-  }
-
   unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
     // TODO: Add a better heuristic to select the loop to be interchanged based
     // on the dependence matrix. Currently we select the innermost loop.
@@ -464,10 +466,6 @@ struct LoopInterchange {
            "Unsupported depth of loop nest.");
 
     unsigned LoopNestDepth = LoopList.size();
-    if (!isComputableLoopNest(LoopList)) {
-      LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
-      return false;
-    }
 
     LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
                       << "\n");
@@ -1761,10 +1759,23 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
   // Ensure minimum depth of the loop nest to do the interchange.
   if (!hasSupportedLoopDepth(LoopList, ORE))
     return PreservedAnalyses::all();
+  // Ensure computable loop nest.
+  if (!isComputableLoopNest(&AR.SE, LoopList)) {
+    LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
+    return PreservedAnalyses::all();
+  }
+
+  ORE.emit([&]() {
+    return OptimizationRemarkAnalysis(DEBUG_TYPE, "Dependence",
+                                      LN.getOutermostLoop().getStartLoc(),
+                                      LN.getOutermostLoop().getHeader())
+           << "Computed dependence info, invoking the transform.";
+  });
+
   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
   std::unique_ptr<CacheCost> CC =
       CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
-  
+
   if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
     return PreservedAnalyses::all();
   U.markLoopNestChanged(true);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 660a6ef5..02b79f2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7552,7 +7552,14 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
   VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM,
                         CM.CostKind);
   precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
-  assert((BestFactor.Width == LegacyVF.Width ||
+  // Set PlanForEarlyExitLoop to true if the BestPlan has been built from a
+  // loop with an uncountable early exit. The legacy cost model doesn't
+  // properly model costs for such loops.
+  bool PlanForEarlyExitLoop =
+      BestPlan.getVectorLoopRegion() &&
+      BestPlan.getVectorLoopRegion()->getSingleSuccessor() !=
+          BestPlan.getMiddleBlock();
+  assert((BestFactor.Width == LegacyVF.Width || PlanForEarlyExitLoop ||
           planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
                                                 CostCtx, OrigLoop) ||
           planContainsAdditionalSimplifications(getPlanFor(LegacyVF.Width),
@@ -8542,8 +8549,7 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
 }
 
 VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
-                                           ArrayRef<VPValue *> Operands,
-                                           VPBasicBlock *VPBB) {
+                                           ArrayRef<VPValue *> Operands) {
   switch (I->getOpcode()) {
   default:
     return nullptr;
@@ -8823,10 +8829,8 @@ bool VPRecipeBuilder::getScaledReductions(
   return false;
 }
 
-VPRecipeBase *
-VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
-                                        ArrayRef<VPValue *> Operands,
-                                        VFRange &Range, VPBasicBlock *VPBB) {
+VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
+    Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range) {
   // First, check for specific widening recipes that deal with inductions, Phi
   // nodes, calls and memory operations.
   VPRecipeBase *Recipe;
@@ -8905,7 +8909,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
                                  *CI);
   }
 
-  return tryToWiden(Instr, Operands, VPBB);
+  return tryToWiden(Instr, Operands);
 }
 
 VPRecipeBase *
@@ -8933,14 +8937,15 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
     if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) {
+      bool HasScalarVF = Plan->hasVF(ElementCount::getFixed(1));
       // Now optimize the initial VPlan.
-      if (!Plan->hasVF(ElementCount::getFixed(1)))
+      if (!HasScalarVF)
         VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
                                  *Plan, CM.getMinimalBitwidths());
       VPlanTransforms::optimize(*Plan);
       // TODO: try to put it close to addActiveLaneMask().
       // Discard the plan if it is not EVL-compatible
-      if (CM.foldTailWithEVL() &&
+      if (CM.foldTailWithEVL() && !HasScalarVF &&
           !VPlanTransforms::runPass(VPlanTransforms::tryAddExplicitVectorLength,
                                     *Plan, CM.getMaxSafeElements()))
         break;
@@ -9373,7 +9378,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
       }
 
       VPRecipeBase *Recipe =
-          RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range, VPBB);
+          RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range);
       if (!Recipe)
         Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
 
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 558d75c..e1c0807 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7659,32 +7659,38 @@ buildIntrinsicArgTypes(const CallInst *CI, const Intrinsic::ID ID,
 }
 
 /// Calculates the costs of vectorized intrinsic (if possible) and vectorized
-/// function (if possible) calls.
+/// function (if possible) calls. Returns invalid cost for the corresponding
+/// calls, if they cannot be vectorized/will be scalarized.
 static std::pair<InstructionCost, InstructionCost>
 getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
                    TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
                    ArrayRef<Type *> ArgTys) {
-  Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
-
-  // Calculate the cost of the scalar and vector calls.
-  FastMathFlags FMF;
-  if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
-    FMF = FPCI->getFastMathFlags();
-  IntrinsicCostAttributes CostAttrs(ID, VecTy, ArgTys, FMF);
-  auto IntrinsicCost =
-    TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
-
   auto Shape = VFShape::get(CI->getFunctionType(),
                             ElementCount::getFixed(VecTy->getNumElements()),
                             false /*HasGlobalPred*/);
   Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
-  auto LibCost = IntrinsicCost;
+  auto LibCost = InstructionCost::getInvalid();
   if (!CI->isNoBuiltin() && VecFunc) {
     // Calculate the cost of the vector library call.
     // If the corresponding vector call is cheaper, return its cost.
     LibCost =
         TTI->getCallInstrCost(nullptr, VecTy, ArgTys, TTI::TCK_RecipThroughput);
   }
+  Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
+
+  // Calculate the cost of the vector intrinsic call.
+  FastMathFlags FMF;
+  if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
+    FMF = FPCI->getFastMathFlags();
+  const InstructionCost ScalarLimit = 10000;
+  IntrinsicCostAttributes CostAttrs(ID, VecTy, ArgTys, FMF, nullptr,
+                                    LibCost.isValid() ? LibCost : ScalarLimit);
+  auto IntrinsicCost =
+      TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
+  if ((LibCost.isValid() && IntrinsicCost > LibCost) ||
+      (!LibCost.isValid() && IntrinsicCost > ScalarLimit))
+    IntrinsicCost = InstructionCost::getInvalid();
+
   return {IntrinsicCost, LibCost};
 }
 
@@ -8028,6 +8034,12 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
         return TreeEntry::NeedToGather;
       }
     }
+    SmallVector<Type *> ArgTys =
+        buildIntrinsicArgTypes(CI, ID, VL.size(), 0, TTI);
+    auto *VecTy = getWidenedType(S.getMainOp()->getType(), VL.size());
+    auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
+    if (!VecCallCosts.first.isValid() && !VecCallCosts.second.isValid())
+      return TreeEntry::NeedToGather;
 
     return TreeEntry::Vectorize;
   }
@@ -10706,6 +10718,7 @@ public:
         });
     SmallPtrSet<Value *, 4> UniqueBases;
     unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
+    SmallDenseMap<Value *, APInt, 4> VectorOpsToExtracts;
     for (unsigned Part : seq<unsigned>(NumParts)) {
       unsigned Limit = getNumElems(VL.size(), SliceSize, Part);
       ArrayRef<int> SubMask = Mask.slice(Part * SliceSize, Limit);
@@ -10756,10 +10769,18 @@ public:
             continue;
           }
         }
-        Cost -= TTI.getVectorInstrCost(*EE, EE->getVectorOperandType(),
-                                       CostKind, Idx);
-      }
-    }
+        APInt &DemandedElts =
+            VectorOpsToExtracts
+                .try_emplace(VecBase,
+                             APInt::getZero(getNumElements(VecBase->getType())))
+                .first->getSecond();
+        DemandedElts.setBit(Idx);
+      }
+    }
+    for (const auto &[Vec, DemandedElts] : VectorOpsToExtracts)
+      Cost -= TTI.getScalarizationOverhead(cast<VectorType>(Vec->getType()),
+                                           DemandedElts, /*Insert=*/false,
+                                           /*Extract=*/true, CostKind);
     // Check that gather of extractelements can be represented as just a
     // shuffle of a single/two vectors the scalars are extracted from.
     // Found the bunch of extractelement instructions that must be gathered
@@ -11283,24 +11304,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
   }
   case Instruction::ExtractValue:
   case Instruction::ExtractElement: {
+    APInt DemandedElts;
+    VectorType *SrcVecTy = nullptr;
     auto GetScalarCost = [&](unsigned Idx) {
       if (isa<PoisonValue>(UniqueValues[Idx]))
         return InstructionCost(TTI::TCC_Free);
 
       auto *I = cast<Instruction>(UniqueValues[Idx]);
-      VectorType *SrcVecTy;
-      if (ShuffleOrOp == Instruction::ExtractElement) {
-        auto *EE = cast<ExtractElementInst>(I);
-        SrcVecTy = EE->getVectorOperandType();
-      } else {
-        auto *EV = cast<ExtractValueInst>(I);
-        Type *AggregateTy = EV->getAggregateOperand()->getType();
-        unsigned NumElts;
-        if (auto *ATy = dyn_cast<ArrayType>(AggregateTy))
-          NumElts = ATy->getNumElements();
-        else
-          NumElts = AggregateTy->getStructNumElements();
-        SrcVecTy = getWidenedType(OrigScalarTy, NumElts);
+      if (!SrcVecTy) {
+        if (ShuffleOrOp == Instruction::ExtractElement) {
+          auto *EE = cast<ExtractElementInst>(I);
+          SrcVecTy = EE->getVectorOperandType();
+        } else {
+          auto *EV = cast<ExtractValueInst>(I);
+          Type *AggregateTy = EV->getAggregateOperand()->getType();
+          unsigned NumElts;
+          if (auto *ATy = dyn_cast<ArrayType>(AggregateTy))
+            NumElts = ATy->getNumElements();
+          else
+            NumElts = AggregateTy->getStructNumElements();
+          SrcVecTy = getWidenedType(OrigScalarTy, NumElts);
+        }
       }
       if (I->hasOneUse()) {
         Instruction *Ext = I->user_back();
@@ -11317,10 +11341,18 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
           return Cost;
         }
       }
-      return TTI->getVectorInstrCost(Instruction::ExtractElement, SrcVecTy,
-                                     CostKind, *getExtractIndex(I));
+      if (DemandedElts.isZero())
+        DemandedElts = APInt::getZero(getNumElements(SrcVecTy));
+      DemandedElts.setBit(*getExtractIndex(I));
+      return InstructionCost(TTI::TCC_Free);
+    };
+    auto GetVectorCost = [&, &TTI = *TTI](InstructionCost CommonCost) {
+      return CommonCost - (DemandedElts.isZero()
+                               ? TTI::TCC_Free
+                               : TTI.getScalarizationOverhead(
+                                     SrcVecTy, DemandedElts, /*Insert=*/false,
+                                     /*Extract=*/true, CostKind));
     };
-    auto GetVectorCost = [](InstructionCost CommonCost) { return CommonCost; };
     return GetCostDiff(GetScalarCost, GetVectorCost);
   }
   case Instruction::InsertElement: {
@@ -13663,6 +13695,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
   // Check if the same elements are inserted several times and count them as
   // shuffle candidates.
   APInt ShuffledElements = APInt::getZero(VL.size());
+  APInt DemandedElements = APInt::getZero(VL.size());
   DenseMap<Value *, unsigned> UniqueElements;
   constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   InstructionCost Cost;
@@ -13673,9 +13706,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
       V = nullptr;
     }
     if (!ForPoisonSrc)
-      Cost +=
-          TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
-                                  I, Constant::getNullValue(VecTy), V);
+      DemandedElements.setBit(I);
   };
   SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
   for (unsigned I = 0, E = VL.size(); I < E; ++I) {
@@ -13698,6 +13729,10 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
     ShuffledElements.setBit(I);
     ShuffleMask[I] = Res.first->second;
   }
+  if (!DemandedElements.isZero())
+    Cost +=
+        TTI->getScalarizationOverhead(VecTy, DemandedElements, /*Insert=*/true,
+                                      /*Extract=*/false, CostKind, VL);
   if (ForPoisonSrc) {
     if (isa<FixedVectorType>(ScalarTy)) {
       assert(SLPReVec && "Only supported by REVEC.");
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index e81247c..e8d3ad8 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -128,8 +128,7 @@ class VPRecipeBuilder {
   /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
   /// if it can. The function should only be called if the cost-model indicates
   /// that widening should be performed.
-  VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
-                            VPBasicBlock *VPBB);
+  VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands);
 
   /// Makes Histogram count operations safe for vectorization, by emitting a
   /// llvm.experimental.vector.histogram.add intrinsic in place of the
@@ -174,7 +173,7 @@ public:
   /// the given VF \p Range.
   VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr,
                                        ArrayRef<VPValue *> Operands,
-                                       VFRange &Range, VPBasicBlock *VPBB);
+                                       VFRange &Range);
 
   /// Create and return a partial reduction recipe for a reduction instruction
   /// along with binary operation and reduction phi operands.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fac2072..5b9dcf6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -972,10 +972,7 @@ public:
 
   /// Return the cost of this VPInstruction.
   InstructionCost computeCost(ElementCount VF,
-                              VPCostContext &Ctx) const override {
-    // TODO: Compute accurate cost after retiring the legacy cost model.
-    return 0;
-  }
+                              VPCostContext &Ctx) const override;
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the VPInstruction to \p O.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c84a93d..b734ddf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -711,6 +711,20 @@ Value *VPInstruction::generate(VPTransformState &State) {
   }
 }
 
+InstructionCost VPInstruction::computeCost(ElementCount VF,
+                                           VPCostContext &Ctx) const {
+  switch (getOpcode()) {
+  case VPInstruction::AnyOf: {
+    auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
+    return Ctx.TTI.getArithmeticReductionCost(
+        Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);
+  }
+  default:
+    // TODO: Fill out other opcodes!
+    return 0;
+  }
+}
+
 bool VPInstruction::isVectorToScalar() const {
   return getOpcode() == VPInstruction::ExtractFromEnd ||
          getOpcode() == VPInstruction::ExtractFirstActive ||
diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
index d85c9e2..0928935 100644
--- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
@@ -1508,30 +1508,30 @@ define void @fmuladd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %7 = call <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> undef, <16 x bfloat> undef, <16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.fmuladd.v16f64(<16 x double> undef, <16 x double> undef, <16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x bfloat> @llvm.fmuladd.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef, <vscale x 1 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x bfloat> @llvm.fmuladd.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef, <vscale x 2 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x bfloat> @llvm.fmuladd.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef, <vscale x 4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %19 = call <vscale x 8 x bfloat> @llvm.fmuladd.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef, <vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %20 = call <vscale x 16 x bfloat> @llvm.fmuladd.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef, <vscale x 16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = call <vscale x 1 x float> @llvm.fmuladd.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef, <vscale x 1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <vscale x 2 x float> @llvm.fmuladd.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %23 = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef, <vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %24 = call <vscale x 8 x float> @llvm.fmuladd.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %25 = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef, <vscale x 16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x double> @llvm.fmuladd.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef, <vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %30 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %27 = call <vscale x 2 x double> @llvm.fmuladd.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef, <vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %28 = call <vscale x 4 x double> @llvm.fmuladd.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %29 = call <vscale x 8 x double> @llvm.fmuladd.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x double> undef, <vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %30 = call <vscale x 16 x double> @llvm.fmuladd.nxv16f64(<vscale x 16 x double> undef, <vscale x 16 x double> undef, <vscale x 16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call bfloat @llvm.fmuladd.bf16(bfloat undef, bfloat undef, bfloat undef)
@@ -1568,31 +1568,18 @@ define void @fmuladd() {
 }
 
 define void @fmuladd_f16() {
-; ZVFH-LABEL: 'fmuladd_f16'
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; ZVFHMIN-LABEL: 'fmuladd_f16'
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
-; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; CHECK-LABEL: 'fmuladd_f16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fmuladd.f16(half undef, half undef, half undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.fmuladd.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef, <vscale x 1 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.fmuladd.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x half> undef, <vscale x 2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.fmuladd.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x half> undef, <vscale x 4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %9 = call <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x half> undef, <vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = call <vscale x 16 x half> @llvm.fmuladd.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef, <vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call half @llvm.fmuladd.f16(half undef, half undef, half undef)
   call <2 x half> @llvm.fmuladd.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
index 698cce6..7fc91e7 100644
--- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll
@@ -1614,6 +1614,41 @@ define void @splat() {
   ret void
 }
 
+define void @splice() {
+; CHECK-LABEL: 'splice'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vp.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vp.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1, <vscale x 32 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vp.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vp.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vp.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vp.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vp.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vp.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vp.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vp.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv8i1 = call <vscale x 8 x i1> @llvm.experimental.vp.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vp.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vp.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %splice_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vp.splice.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vp.splice.nxv32i8(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i32 1, <vscale x 32 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv2i16 = call <vscale x 2 x i16> @llvm.experimental.vp.splice.nxv2i16(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv4i16 = call <vscale x 4 x i16> @llvm.experimental.vp.splice.nxv4i16(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x i16> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vp.splice.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vp.splice.nxv16i16(<vscale x 16 x i16> zeroinitializer, <vscale x 16 x i16> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vp.splice.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vp.splice.nxv8i32(<vscale x 8 x i32> zeroinitializer, <vscale x 8 x i32> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vp.splice.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vp.splice.nxv4i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv16i1 = call <vscale x 16 x i1> @llvm.experimental.vp.splice.nxv16i1(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i1> zeroinitializer, i32 1, <vscale x 16 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv8i1 =  call <vscale x 8 x i1> @llvm.experimental.vp.splice.nxv8i1(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i1> zeroinitializer, i32 1, <vscale x 8 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv4i1 = call <vscale x 4 x i1> @llvm.experimental.vp.splice.nxv4i1(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, i32 1, <vscale x 4 x i1> zeroinitializer, i32 poison, i32 poison)
+  %splice_nxv2i1 = call <vscale x 2 x i1> @llvm.experimental.vp.splice.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer, i32 1, <vscale x 2 x i1> zeroinitializer, i32 poison, i32 poison)
+  ret void
+}
+
 declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
 declare <4 x i8> @llvm.vp.add.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
 declare <8 x i8> @llvm.vp.add.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
index d93ef6f8..94d4614 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
@@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
@@ -86,8 +86,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
     ret i16 %r
 }
@@ -100,8 +100,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
     ret i16 %r
 }
@@ -114,8 +114,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
     ret i16 %r
 }
@@ -128,8 +128,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
     ret i16 %r
 }
@@ -142,8 +142,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
     ret i16 %r
 }
@@ -392,8 +392,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -407,8 +407,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -422,8 +422,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -437,8 +437,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -452,8 +452,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
index 912d87d..57cfeb7 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
@@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
@@ -86,8 +86,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
     ret i16 %r
 }
@@ -100,8 +100,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
     ret i16 %r
 }
@@ -114,8 +114,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
     ret i16 %r
 }
@@ -128,8 +128,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
     ret i16 %r
 }
@@ -142,8 +142,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
     ret i16 %r
 }
@@ -392,8 +392,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -407,8 +407,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -422,8 +422,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -437,8 +437,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -452,8 +452,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
index 725558f..28ee1a2 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
@@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
@@ -86,8 +86,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
     ret i16 %r
 }
@@ -100,8 +100,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
     ret i16 %r
 }
@@ -114,8 +114,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
     ret i16 %r
 }
@@ -128,8 +128,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
     ret i16 %r
 }
@@ -142,8 +142,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
     ret i16 %r
 }
@@ -392,8 +392,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -407,8 +407,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -422,8 +422,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -437,8 +437,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -452,8 +452,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
index 004e433..69220a6 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
@@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
@@ -86,8 +86,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
     ret i16 %r
 }
@@ -100,8 +100,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
     ret i16 %r
 }
@@ -114,8 +114,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
     ret i16 %r
 }
@@ -128,8 +128,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
     ret i16 %r
 }
@@ -142,8 +142,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    subs w8, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
     ret i16 %r
 }
@@ -392,8 +392,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -407,8 +407,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -422,8 +422,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -437,8 +437,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -452,8 +452,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w8, w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
index 01317e0..d92899d 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
@@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
@@ -87,8 +87,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
     ret i16 %r
 }
@@ -102,8 +102,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
     ret i16 %r
 }
@@ -117,8 +117,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
     ret i16 %r
 }
@@ -132,8 +132,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
     ret i16 %r
 }
@@ -147,8 +147,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
     ret i16 %r
 }
@@ -397,8 +397,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -412,8 +412,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -427,8 +427,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -442,8 +442,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -457,8 +457,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
index 1bead6d..c09a2c5 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
@@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
@@ -87,8 +87,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
     ret i16 %r
 }
@@ -102,8 +102,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
     ret i16 %r
 }
@@ -117,8 +117,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
     ret i16 %r
 }
@@ -132,8 +132,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
     ret i16 %r
 }
@@ -147,8 +147,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
     ret i16 %r
 }
@@ -397,8 +397,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -412,8 +412,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -427,8 +427,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -442,8 +442,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -457,8 +457,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
index 51d9766..30166b4 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
@@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
@@ -87,8 +87,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
     ret i16 %r
 }
@@ -102,8 +102,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
     ret i16 %r
 }
@@ -117,8 +117,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
     ret i16 %r
 }
@@ -132,8 +132,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
     ret i16 %r
 }
@@ -147,8 +147,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
     ret i16 %r
 }
@@ -397,8 +397,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -412,8 +412,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -427,8 +427,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -442,8 +442,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -457,8 +457,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
index a58e5a9..e9c1b33 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
@@ -12,8 +12,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -27,8 +27,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -42,8 +42,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -57,8 +57,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -72,8 +72,8 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
@@ -87,8 +87,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value monotonic, align 2
     ret i16 %r
 }
@@ -102,8 +102,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acquire, align 2
     ret i16 %r
 }
@@ -117,8 +117,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O1:    ldxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value release, align 2
     ret i16 %r
 }
@@ -132,8 +132,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value acq_rel, align 2
     ret i16 %r
 }
@@ -147,8 +147,8 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O1:    ldaxrh w8, [x0]
-; -O1:    stlxrh w9, w1, [x0]
+; -O1:    ldaxrh w0, [x8]
+; -O1:    stlxrh w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i16 %value seq_cst, align 2
     ret i16 %r
 }
@@ -397,8 +397,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value monotonic, align 1
     ret i8 %r
 }
@@ -412,8 +412,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acquire, align 1
     ret i8 %r
 }
@@ -427,8 +427,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O1:    ldxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value release, align 1
     ret i8 %r
 }
@@ -442,8 +442,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value acq_rel, align 1
     ret i8 %r
 }
@@ -457,8 +457,8 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    subs w9, w9, #1
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O1:    ldaxrb w8, [x0]
-; -O1:    stlxrb w9, w1, [x0]
+; -O1:    ldaxrb w0, [x8]
+; -O1:    stlxrb w9, w1, [x8]
     %r = atomicrmw xchg ptr %ptr, i8 %value seq_cst, align 1
     ret i8 %r
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index de3f323..5bc041a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -1938,7 +1938,6 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
 define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NOLSE-O1-NEXT:  LBB28_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
@@ -2993,7 +2992,6 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
 define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NOLSE-O1-NEXT:  LBB38_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
@@ -5996,7 +5994,6 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_i8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NOLSE-O1-NEXT:  LBB67_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrb w0, [x8]
@@ -6103,7 +6100,6 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_i16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
-; CHECK-NOLSE-O1-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NOLSE-O1-NEXT:  LBB68_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrh w0, [x8]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index c6819ff..2779e89 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -727,8 +727,8 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -746,11 +746,9 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w1 = KILL $w1, implicit-def $x1
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
@@ -776,8 +774,8 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -800,8 +798,8 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -824,8 +822,8 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -848,8 +846,8 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -874,8 +872,8 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -900,8 +898,8 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -980,8 +978,8 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -999,11 +997,9 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w1 = KILL $w1, implicit-def $x1
-  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
@@ -1029,8 +1025,8 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1053,8 +1049,8 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1077,8 +1073,8 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1101,8 +1097,8 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1127,8 +1123,8 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1153,8 +1149,8 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1229,11 +1225,10 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $x8 = ORRXrs $xzr, $x0, 0
-  ; CHECK-NEXT:   renamable $w2 = KILL $w2, implicit-def $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.4(0x04000000)
-  ; CHECK-NEXT:   liveins: $w1, $x2, $x8
+  ; CHECK-NEXT:   liveins: $w1, $w2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
@@ -1242,7 +1237,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0, $x2, $x8
+  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
@@ -1272,11 +1267,10 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $x8 = ORRXrs $xzr, $x0, 0
-  ; CHECK-NEXT:   renamable $w2 = KILL $w2, implicit-def $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.4(0x04000000)
-  ; CHECK-NEXT:   liveins: $w1, $x2, $x8
+  ; CHECK-NEXT:   liveins: $w1, $w2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
@@ -1285,7 +1279,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0, $x2, $x8
+  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
diff --git a/llvm/test/CodeGen/AArch64/a55-fuse-address.mir b/llvm/test/CodeGen/AArch64/a55-fuse-address.mir
index 4edff04..3e1b607 100644
--- a/llvm/test/CodeGen/AArch64/a55-fuse-address.mir
+++ b/llvm/test/CodeGen/AArch64/a55-fuse-address.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -o - %s -mtriple=aarch64 -run-pass=machine-scheduler -verify-machineinstrs | FileCheck %s
+# RUN: llc -o - %s -mtriple=aarch64 -passes=machine-scheduler | FileCheck %s
 --- |
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   target triple = "aarch64"
diff --git a/llvm/test/CodeGen/AArch64/addsub_ext.ll b/llvm/test/CodeGen/AArch64/addsub_ext.ll
index 04a98bd..6c35643 100644
--- a/llvm/test/CodeGen/AArch64/addsub_ext.ll
+++ b/llvm/test/CodeGen/AArch64/addsub_ext.ll
@@ -26,7 +26,6 @@ define i32 @add_z_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @add_z_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxtb
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -37,7 +36,6 @@ define i64 @add_z_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @add_z_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_shli8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxtb #3
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -70,7 +68,6 @@ define i32 @add_s_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @add_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxtb
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -81,7 +78,6 @@ define i64 @add_s_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @add_s_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_shli8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxtb #3
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -114,7 +110,6 @@ define i32 @add_z_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @add_z_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxth
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -125,7 +120,6 @@ define i64 @add_z_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @add_z_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_shli16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxth #3
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -179,7 +173,6 @@ define i32 @add_s_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @add_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxth
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -190,7 +183,6 @@ define i64 @add_s_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @add_s_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_shli16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxth #3
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -244,7 +236,6 @@ define i32 @sub_z_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @sub_z_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxtb
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -255,7 +246,6 @@ define i64 @sub_z_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @sub_z_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_shli8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxtb #3
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -288,7 +278,6 @@ define i32 @sub_s_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @sub_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxtb
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -299,7 +288,6 @@ define i64 @sub_s_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @sub_s_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_shli8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxtb #3
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -332,7 +320,6 @@ define i32 @sub_z_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @sub_z_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxth
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -343,7 +330,6 @@ define i64 @sub_z_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @sub_z_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_shli16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxth #3
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -397,7 +383,6 @@ define i32 @sub_s_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @sub_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxth
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -408,7 +393,6 @@ define i64 @sub_s_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @sub_s_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_shli16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxth #3
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -444,7 +428,7 @@ define i32 @cmp_s_i8i32(i8 %v, i32 %lhs) minsize {
 ; CHECK-NEXT:    cmp w1, w0, uxtb
 ; CHECK-NEXT:    b.ge .LBB40_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB40_2: // %end
 ; CHECK-NEXT:    mov w0, w1
@@ -461,11 +445,10 @@ end:
 define i64 @cmp_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: cmp_s_i8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmp x1, w0, sxtb
 ; CHECK-NEXT:    b.ge .LBB41_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB41_2: // %end
 ; CHECK-NEXT:    mov x0, x1
@@ -485,7 +468,7 @@ define i32 @cmp_s_i16i32(i16 %v, i32 %lhs) minsize {
 ; CHECK-NEXT:    cmp w1, w0, uxth
 ; CHECK-NEXT:    b.ge .LBB42_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB42_2: // %end
 ; CHECK-NEXT:    mov w0, w1
@@ -502,11 +485,10 @@ end:
 define i64 @cmp_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: cmp_s_i16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmp x1, w0, sxth
 ; CHECK-NEXT:    b.ge .LBB43_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB43_2: // %end
 ; CHECK-NEXT:    mov x0, x1
diff --git a/llvm/test/CodeGen/AArch64/ampere1-sched-add.mir b/llvm/test/CodeGen/AArch64/ampere1-sched-add.mir
index e578b5d..3a33291 100644
--- a/llvm/test/CodeGen/AArch64/ampere1-sched-add.mir
+++ b/llvm/test/CodeGen/AArch64/ampere1-sched-add.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
 # RUN: llc -run-pass=machine-scheduler %s -o - | FileCheck %s
+# RUN: llc -passes=machine-scheduler %s -o - | FileCheck %s
 
 --- |
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index 09f00b3..5046c05 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -11,7 +11,6 @@ define void @new_position(i32 %pos) {
 ; CHECK-SD-LABEL: new_position:
 ; CHECK-SD:       ; %bb.0: ; %entry
 ; CHECK-SD-NEXT:    adrp x8, _board@GOTPAGE
-; CHECK-SD-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr x8, [x8, _board@GOTPAGEOFF]
 ; CHECK-SD-NEXT:    ldrb w8, [x8, w0, sxtw]
 ; CHECK-SD-NEXT:    sub w8, w8, #1
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
index d69d1b6..ccd191f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
@@ -127,7 +127,6 @@ declare i64 @llvm.aarch64.ldxr.p0(ptr) nounwind
 define dso_local i32 @test_store_i8(i32, i8 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stxrb w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i8 %val to i64
@@ -138,7 +137,6 @@ define dso_local i32 @test_store_i8(i32, i8 %val, ptr %addr) {
 define dso_local i32 @test_store_i16(i32, i16 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stxrh w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i16 %val to i64
@@ -299,7 +297,6 @@ declare i64 @llvm.aarch64.ldaxr.p0(ptr) nounwind
 define dso_local i32 @test_store_release_i8(i32, i8 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_release_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stlxrb w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i8 %val to i64
@@ -310,7 +307,6 @@ define dso_local i32 @test_store_release_i8(i32, i8 %val, ptr %addr) {
 define dso_local i32 @test_store_release_i16(i32, i16 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_release_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stlxrh w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i16 %val to i64
diff --git a/llvm/test/CodeGen/AArch64/arm64-vaddv.ll b/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
index adfe28e..d60b6d7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
@@ -267,7 +267,6 @@ define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) {
 ; CHECK-LABEL: test_vaddv_u64_to_vec:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addp d0, v0.2d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)
diff --git a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
index ad073d9..cc70dc44 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
@@ -43,7 +43,6 @@ define i8 @test_valid_wrap_optimizable2(ptr %base, i32 %offset) {
 ; CHECK-LABEL: test_valid_wrap_optimizable2:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    mov w8, #-100 ; =0xffffff9c
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ldrb w0, [x8, w1, sxtw]
 ; CHECK-NEXT:    ret
 
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
index fdb1460..42cb3d4 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
@@ -374,7 +374,6 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    adrp x9, var8
 ; CHECK-NEXT:    add x9, x9, :lo12:var8
 ; CHECK-NEXT:  .LBB20_1: // %atomicrmw.start
@@ -392,16 +391,16 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    adrp x9, var16
 ; CHECK-NEXT:    add x9, x9, :lo12:var16
 ; CHECK-NEXT:  .LBB21_1: // %atomicrmw.start
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w8, [x9]
-; CHECK-NEXT:    stlxrh w10, w0, [x9]
+; CHECK-NEXT:    ldaxrh w0, [x9]
+; CHECK-NEXT:    stlxrh w10, w8, [x9]
 ; CHECK-NEXT:    cbnz w10, .LBB21_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    dmb ish
 ; CHECK-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst
@@ -763,7 +762,6 @@ define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    adrp x9, var8
 ; CHECK-NEXT:    add x9, x9, :lo12:var8
@@ -791,7 +789,6 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and w8, w0, #0xffff
 ; CHECK-NEXT:    adrp x9, var16
 ; CHECK-NEXT:    add x9, x9, :lo12:var16
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index 6790655..d8ac89f 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -558,7 +558,6 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i8:
 ; INLINE_ATOMICS:       // %bb.0:
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; INLINE_ATOMICS-NEXT:    adrp x9, var8
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; INLINE_ATOMICS-NEXT:  .LBB20_1: // %atomicrmw.start
@@ -585,7 +584,6 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i16:
 ; INLINE_ATOMICS:       // %bb.0:
-; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; INLINE_ATOMICS-NEXT:    adrp x9, var16
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; INLINE_ATOMICS-NEXT:  .LBB21_1: // %atomicrmw.start
@@ -978,7 +976,6 @@ define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
 ; INLINE_ATOMICS:       // %bb.0:
-; INLINE_ATOMICS-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; INLINE_ATOMICS-NEXT:    and w8, w0, #0xff
 ; INLINE_ATOMICS-NEXT:    adrp x9, var8
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
@@ -1015,7 +1012,6 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
 ; INLINE_ATOMICS:       // %bb.0:
-; INLINE_ATOMICS-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; INLINE_ATOMICS-NEXT:    and w8, w0, #0xffff
 ; INLINE_ATOMICS-NEXT:    adrp x9, var16
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index fb40dfc..0c3a40d 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -476,40 +476,38 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f32_seq_cst_align8:
 ; SOFTFP-NOLSE:       // %bb.0:
 ; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldr x21, [x0]
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
+; SOFTFP-NOLSE-NEXT:    ldr x0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x20, x1
 ; SOFTFP-NOLSE-NEXT:    b .LBB5_2
 ; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w9, wzr
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
 ; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    mov x21, x8
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_6
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB5_6
 ; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    mov x1, x20
+; SOFTFP-NOLSE-NEXT:    mov x21, x0
 ; SOFTFP-NOLSE-NEXT:    bl __adddf3
+; SOFTFP-NOLSE-NEXT:    mov x8, x0
 ; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp x8, x21
+; SOFTFP-NOLSE-NEXT:    ldaxr x0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp x0, x21
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
 ; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT:    stlxr w9, x8, [x19]
 ; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
 ; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w9, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    mov x21, x8
-; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_2
 ; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, double %value seq_cst, align 8
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
index 818dcf3..2408899 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
@@ -478,40 +478,38 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f32_seq_cst_align8:
 ; SOFTFP-NOLSE:       // %bb.0:
 ; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldr x21, [x0]
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
+; SOFTFP-NOLSE-NEXT:    ldr x0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x20, x1
 ; SOFTFP-NOLSE-NEXT:    b .LBB5_2
 ; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w9, wzr
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
 ; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    mov x21, x8
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_6
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB5_6
 ; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    mov x1, x20
+; SOFTFP-NOLSE-NEXT:    mov x21, x0
 ; SOFTFP-NOLSE-NEXT:    bl fmax
+; SOFTFP-NOLSE-NEXT:    mov x8, x0
 ; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp x8, x21
+; SOFTFP-NOLSE-NEXT:    ldaxr x0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp x0, x21
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
 ; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT:    stlxr w9, x8, [x19]
 ; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
 ; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w9, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    mov x21, x8
-; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_2
 ; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmax ptr %ptr, double %value seq_cst, align 8
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
index b969241e..65f1f48 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
@@ -478,40 +478,38 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_f32_seq_cst_align8:
 ; SOFTFP-NOLSE:       // %bb.0:
 ; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldr x21, [x0]
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
+; SOFTFP-NOLSE-NEXT:    ldr x0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x20, x1
 ; SOFTFP-NOLSE-NEXT:    b .LBB5_2
 ; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w9, wzr
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
 ; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    mov x21, x8
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_6
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB5_6
 ; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    mov x1, x20
+; SOFTFP-NOLSE-NEXT:    mov x21, x0
 ; SOFTFP-NOLSE-NEXT:    bl fmin
+; SOFTFP-NOLSE-NEXT:    mov x8, x0
 ; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp x8, x21
+; SOFTFP-NOLSE-NEXT:    ldaxr x0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp x0, x21
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
 ; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT:    stlxr w9, x8, [x19]
 ; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
 ; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w9, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    mov x21, x8
-; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_2
 ; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmin ptr %ptr, double %value seq_cst, align 8
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
index e603337..0f1a2f0 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
@@ -476,40 +476,38 @@ define double @test_atomicrmw_fsub_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_f32_seq_cst_align8:
 ; SOFTFP-NOLSE:       // %bb.0:
 ; SOFTFP-NOLSE-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    ldr x21, [x0]
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
+; SOFTFP-NOLSE-NEXT:    ldr x0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x20, x1
 ; SOFTFP-NOLSE-NEXT:    b .LBB5_2
 ; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w9, wzr
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
 ; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    mov x21, x8
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_6
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB5_6
 ; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
-; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    mov x1, x20
+; SOFTFP-NOLSE-NEXT:    mov x21, x0
 ; SOFTFP-NOLSE-NEXT:    bl __subdf3
+; SOFTFP-NOLSE-NEXT:    mov x8, x0
 ; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
 ; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; SOFTFP-NOLSE-NEXT:    ldaxr x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cmp x8, x21
+; SOFTFP-NOLSE-NEXT:    ldaxr x0, [x19]
+; SOFTFP-NOLSE-NEXT:    cmp x0, x21
 ; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
 ; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
 ; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
-; SOFTFP-NOLSE-NEXT:    stlxr w9, x0, [x19]
+; SOFTFP-NOLSE-NEXT:    stlxr w9, x8, [x19]
 ; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
 ; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w9, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    mov x21, x8
-; SOFTFP-NOLSE-NEXT:    cbz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_2
 ; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; SOFTFP-NOLSE-NEXT:    mov x0, x21
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fsub ptr %ptr, double %value seq_cst, align 8
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
index 98033a8..0e728cd 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE
 
@@ -6,14 +6,14 @@ define half @test_rmw_xchg_f16(ptr %dst, half %new) {
 ; NOLSE-LABEL: test_rmw_xchg_f16:
 ; NOLSE:       // %bb.0:
 ; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
-; NOLSE-NEXT:    fmov w8, s0
+; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
 ; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
-; NOLSE-NEXT:    ldaxrh w9, [x0]
-; NOLSE-NEXT:    stlxrh w10, w8, [x0]
+; NOLSE-NEXT:    ldaxrh w8, [x0]
+; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
-; NOLSE-NEXT:    fmov s0, w9
+; NOLSE-NEXT:    fmov s0, w8
 ; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/cluster-frame-index.mir b/llvm/test/CodeGen/AArch64/cluster-frame-index.mir
index 37ab941..5d761f1 100644
--- a/llvm/test/CodeGen/AArch64/cluster-frame-index.mir
+++ b/llvm/test/CodeGen/AArch64/cluster-frame-index.mir
@@ -1,4 +1,5 @@
 #RUN: llc -mtriple=aarch64-- -mcpu=cyclone -run-pass machine-scheduler -o - %s | FileCheck %s
+#RUN: llc -mtriple=aarch64-- -mcpu=cyclone -passes=machine-scheduler -o - %s | FileCheck %s
 ---
 name:            merge_stack
 # CHECK-LABEL: name: merge_stack
diff --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
index 1cc194e..e87d431 100644
--- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
@@ -31,7 +31,6 @@ entry:
 define i1 @test_EQ_IlsEbT(i64 %a, i16 %b) {
 ; CHECK-LABEL: test_EQ_IlsEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, sxth
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -45,7 +44,6 @@ entry:
 define i1 @test_EQ_IlcEbT(i64 %a, i8 %b) {
 ; CHECK-LABEL: test_EQ_IlcEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, uxtb
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -110,7 +108,6 @@ entry:
 define i1 @test_EQ_IslEbT(i16 %a, i64 %b) {
 ; CHECK-LABEL: test_EQ_IslEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, sxth
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -167,7 +164,6 @@ entry:
 define i1 @test_EQ_IclEbT(i8 %a, i64 %b) {
 ; CHECK-LABEL: test_EQ_IclEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, uxtb
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -249,7 +245,6 @@ entry:
 define i1 @test_NE_IlsEbT(i64 %a, i16 %b) {
 ; CHECK-LABEL: test_NE_IlsEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, sxth
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -263,7 +258,6 @@ entry:
 define i1 @test_NE_IlcEbT(i64 %a, i8 %b) {
 ; CHECK-LABEL: test_NE_IlcEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, uxtb
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -328,7 +322,6 @@ entry:
 define i1 @test_NE_IslEbT(i16 %a, i64 %b) {
 ; CHECK-LABEL: test_NE_IslEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, sxth
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -385,7 +378,6 @@ entry:
 define i1 @test_NE_IclEbT(i8 %a, i64 %b) {
 ; CHECK-LABEL: test_NE_IclEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, uxtb
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index 186d191..b7817eb 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -53,7 +53,6 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) {
 ; CHECK-LABEL: test_return_bool:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:  LBB1_1: ; %cmpxchg.start
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldaxrb w9, [x0]
diff --git a/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir b/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir
index 4bf8aff..5655bfa 100644
--- a/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir
+++ b/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir
@@ -1,9 +1,15 @@
 # RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 -misched-dump-reserved-cycles=true \
 # RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s
 
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 -misched-dump-reserved-cycles=true \
+# RUN: -passes=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s
+
 # RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 -misched-dump-reserved-cycles=false\
 # RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s --check-prefix=NODUMP
 
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 -misched-dump-reserved-cycles=false\
+# RUN: -passes=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s --check-prefix=NODUMP
+
 # REQUIRES: asserts
 ---
 name: f
diff --git a/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir b/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir
index bff6d1d..c90d6bd 100644
--- a/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir
+++ b/llvm/test/CodeGen/AArch64/dump-schedule-trace.mir
@@ -5,16 +5,33 @@
 # RUN:  2>&1 | FileCheck %s --check-prefix=TOP --strict-whitespace
 
 # RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
+# RUN:  -passes=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -misched-prera-direction=topdown -sched-print-cycles=true \
+# RUN:  -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-header-width=21 \
+# RUN:  2>&1 | FileCheck %s --check-prefix=TOP --strict-whitespace
+
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
 # RUN:  -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
 # RUN:  -misched-prera-direction=bottomup -sched-print-cycles=true \
 # RUN:  -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-width=4 \
 # RUN:  2>&1 | FileCheck %s --check-prefix=BOTTOM  --strict-whitespace
 
 # RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
+# RUN:  -passes=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -misched-prera-direction=bottomup -sched-print-cycles=true \
+# RUN:  -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-width=4 \
+# RUN:  2>&1 | FileCheck %s --check-prefix=BOTTOM  --strict-whitespace
+
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
 # RUN:  -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
 # RUN:  -sched-print-cycles=true -misched-dump-schedule-trace=true \
 # RUN:  2>&1 | FileCheck %s --check-prefix=BIDIRECTIONAL
 
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
+# RUN:  -passes=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -sched-print-cycles=true -misched-dump-schedule-trace=true \
+# RUN:  2>&1 | FileCheck %s --check-prefix=BIDIRECTIONAL
+
 # REQUIRES: asserts, aarch64-registered-target
 ---
 name: f
diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index aaa6c7e..8e822d1 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -492,7 +492,6 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_32_b1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    bic w0, w9, w8
@@ -512,7 +511,6 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_32_b2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    bic w0, w9, w8
@@ -552,7 +550,6 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #32 // =0x20
 ; CHECK-NEXT:    mov w9, #-1 // =0xffffffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w10, w0, w1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr w8, w9, w8
@@ -592,7 +589,6 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    mov w10, #-1 // =0xffffffff
 ; CHECK-NEXT:    sub w9, w9, w2
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    lsr w9, w10, w9
 ; CHECK-NEXT:    and w0, w9, w8
@@ -797,7 +793,6 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr32_d1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w0, w1
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    sub w9, w9, w2
@@ -834,7 +829,6 @@ define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-LABEL: bextr32_d3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    sub w9, w9, w2
 ; CHECK-NEXT:    lsr w8, w8, w1
diff --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
index a78addc..b40c065 100644
--- a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -224,7 +224,6 @@ define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #2 // =0x2
 ; CHECK-NEXT:    mov w9, #1 // =0x1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    fmov d1, #3.00000000
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    lsl w9, w9, w0
@@ -433,7 +432,6 @@ define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
 ; CHECK-LABEL: fmul_pow_shl_cnt_safe:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    and w8, w8, #0xffff
 ; CHECK-NEXT:    ucvtf d0, w8
diff --git a/llvm/test/CodeGen/AArch64/force-enable-intervals.mir b/llvm/test/CodeGen/AArch64/force-enable-intervals.mir
index a53d4e7..8d47eee 100644
--- a/llvm/test/CodeGen/AArch64/force-enable-intervals.mir
+++ b/llvm/test/CodeGen/AArch64/force-enable-intervals.mir
@@ -4,10 +4,20 @@
 # RUN:  -o - %s 2>&1 -misched-prera-direction=topdown | FileCheck %s 
 
 # RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \
+# RUN:  -misched-dump-reserved-cycles=true \
+# RUN:  -passes=machine-scheduler -debug-only=machine-scheduler \
+# RUN:  -o - %s 2>&1 -misched-prera-direction=topdown | FileCheck %s 
+
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \
 # RUN:  -misched-dump-reserved-cycles=true -sched-model-force-enable-intervals=true \
 # RUN:  -run-pass=machine-scheduler -debug-only=machine-scheduler \
 # RUN:  -o - %s 2>&1 -misched-prera-direction=topdown | FileCheck %s  --check-prefix=FORCE
 
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 \
+# RUN:  -misched-dump-reserved-cycles=true -sched-model-force-enable-intervals=true \
+# RUN:  -passes=machine-scheduler -debug-only=machine-scheduler \
+# RUN:  -o - %s 2>&1 -misched-prera-direction=topdown | FileCheck %s  --check-prefix=FORCE
+
 # REQUIRES: asserts, aarch64-registered-target
 ---
 name: f
diff --git a/llvm/test/CodeGen/AArch64/fsh.ll b/llvm/test/CodeGen/AArch64/fsh.ll
index 9196d51..c084813 100644
--- a/llvm/test/CodeGen/AArch64/fsh.ll
+++ b/llvm/test/CodeGen/AArch64/fsh.ll
@@ -394,7 +394,6 @@ define i32 @fshl_i32(i32 %a, i32 %b, i32 %c) {
 ; CHECK-SD-LABEL: fshl_i32:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    lsr w8, w1, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsl w10, w0, w2
 ; CHECK-SD-NEXT:    lsr w8, w8, w9
@@ -420,7 +419,6 @@ define i32 @fshr_i32(i32 %a, i32 %b, i32 %c) {
 ; CHECK-SD-LABEL: fshr_i32:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    lsl w8, w0, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 ; CHECK-SD-NEXT:    lsl w8, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 3037a95..e5aa360 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -21,7 +21,6 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
 ; CHECK-SD-LABEL: fshl_i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsr w8, w1, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsl w10, w0, w2
 ; CHECK-SD-NEXT:    lsr w8, w8, w9
@@ -266,7 +265,6 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
 ; CHECK-SD-LABEL: fshr_i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsl w8, w0, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 ; CHECK-SD-NEXT:    lsl w8, w8, w9
@@ -667,7 +665,6 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-SD-LABEL: or_shl_fshl_simplify:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsr w8, w0, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsl w10, w1, w2
 ; CHECK-SD-NEXT:    lsr w8, w8, w9
@@ -696,7 +693,6 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-SD-LABEL: or_lshr_fshr_simplify:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsl w8, w0, #1
-; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 ; CHECK-SD-NEXT:    lsl w8, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index cb9f04a..c3fdc7d 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -15,7 +15,6 @@
 define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x80
 ; CHECK-NEXT:    cset w0, eq
@@ -29,7 +28,6 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_lowestbit_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -43,7 +41,6 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x18
 ; CHECK-NEXT:    cset w0, eq
@@ -59,7 +56,6 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_signbit_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x8000
 ; CHECK-NEXT:    cset w0, eq
@@ -73,7 +69,6 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_lowestbit_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -87,7 +82,6 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0xff0
 ; CHECK-NEXT:    cset w0, eq
@@ -268,7 +262,6 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
 define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_ne:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    ubfx w0, w8, #7, #1
 ; CHECK-NEXT:    ret
@@ -325,7 +318,6 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #128 // =0x80
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    cmp w8, #1
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 32a6245..4a73b10 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -16,7 +16,6 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x80
 ; CHECK-NEXT:    cset w0, eq
@@ -31,7 +30,6 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_lowestbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -46,7 +44,6 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x18
 ; CHECK-NEXT:    cset w0, eq
@@ -63,7 +60,6 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_signbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x8000
 ; CHECK-NEXT:    cset w0, eq
@@ -78,7 +74,6 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_lowestbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -93,7 +88,6 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0xff0
 ; CHECK-NEXT:    cset w0, eq
@@ -272,7 +266,6 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_ne:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    lsr w0, w8, #7
 ; CHECK-NEXT:    ret
@@ -289,7 +282,7 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 ; CHECK-LABEL: scalar_i32_x_is_const_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #43605
+; CHECK-NEXT:    mov w8, #43605 // =0xaa55
 ; CHECK-NEXT:    movk w8, #43605, lsl #16
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    tst w8, #0x1
@@ -303,8 +296,8 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 ; CHECK-LABEL: scalar_i32_x_is_const2_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
-; CHECK-NEXT:    mov w9, #43605
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    mov w9, #43605 // =0xaa55
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    movk w9, #43605, lsl #16
 ; CHECK-NEXT:    tst w8, w9
@@ -319,8 +312,7 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_slt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #24
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    mov w8, #24 // =0x18
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    ubfx w0, w8, #7, #1
@@ -334,8 +326,7 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-128
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    mov w8, #-128 // =0xffffff80
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    and w8, w8, #0x80
diff --git a/llvm/test/CodeGen/AArch64/jump-table-partition.ll b/llvm/test/CodeGen/AArch64/jump-table-partition.ll
index 5def613..d282c36 100644
--- a/llvm/test/CodeGen/AArch64/jump-table-partition.ll
+++ b/llvm/test/CodeGen/AArch64/jump-table-partition.ll
@@ -1,21 +1,23 @@
-; -stats requires asserts
-; REQUIRES: asserts
-
-; Stop after 'finalize-isel' for simpler MIR.
-; Override 'aarch64-enable-atomic-cfg-tidy' to false to turn off simplifycfg
-; pass, which can simplify away switch instructions before isel lowers switch
-; into jump tables.
-; Override 'aarch64-min-jump-table-entries' so 'switch' needs fewer cases to
-; generate a jump table.
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu -stop-after=finalize-isel -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 %s -o %t.mir
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu --run-pass=static-data-splitter -stats -x mir %t.mir -o - 2>&1 | FileCheck %s --check-prefix=STAT
-
- ; @foo has 2 hot and 2 cold jump tables.
- ; The two jump tables with unknown hotness come from @func_without_profile and
- ; @bar respectively.
-; STAT: 2 static-data-splitter - Number of cold jump tables seen
-; STAT: 2 static-data-splitter - Number of hot jump tables seen
-; STAT: 2 static-data-splitter - Number of jump tables with unknown hotness
+; The llc commands override two options
+; - 'aarch64-enable-atomic-cfg-tidy' to false to turn off simplifycfg pass,
+;    which can simplify away switch instructions before isel lowers switch instructions.
+; - 'aarch64-min-jump-table-entries' so 'switch' needs fewer cases to generate
+;    a jump table.
+
+; The static-data-splitter pass doesn't run.
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -function-sections=true \
+; RUN:     -aarch64-enable-atomic-cfg-tidy=false -aarch64-min-jump-table-entries=2 \
+; RUN:     -unique-section-names=true %s -o - 2>&1 | FileCheck %s --check-prefixes=DEFAULT
+
+; DEFAULT: .section .rodata.hot.foo,"a",@progbits
+; DEFAULT:   .LJTI0_0:
+; DEFAULT:   .LJTI0_1:
+; DEFAULT:   .LJTI0_2:
+; DEFAULT:   .LJTI0_3:
+; DEFAULT: .section .rodata.func_without_profile,"a",@progbits
+; DEFAULT:   .LJTI1_0:
+; DEFAULT: .section .rodata.bar_prefix.bar,"a",@progbits
+; DEFAULT: .LJTI2_0
 
 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions \
 ; RUN:     -partition-static-data-sections=true -function-sections=true \
@@ -41,25 +43,24 @@
 ; FUNCLESS: .section .rodata.hot.,"a",@progbits
 ; JT: .LJTI0_0:
 ; JT: .LJTI0_2:
-; NUM:    	.section	.rodata.unlikely.,"a",@progbits,unique,3
-; FUNC:       .section .rodata.unlikely.foo,"a",@progbits
+; NUM:        .section .rodata.unlikely.,"a",@progbits,unique,3
+; FUNC:       .section .rodata.unlikely.foo,
 ; FUNCLESS:   .section .rodata.unlikely.,"a",@progbits
 ; JT: .LJTI0_1:
 ; JT: .LJTI0_3:
 
-; @func_without_profile simulates the functions without profile information
-; (e.g., not instrumented or not profiled), its jump tables are placed in
-; sections without hot or unlikely prefixes.
-; NUM: .section .rodata,"a",@progbits,unique,5
-; FUNC: .section .rodata.func_without_profile,"a",@progbits
-; FUNCLESS: .section .rodata,"a",@progbits
+; func_without_profile doesn't have profiles, so its jumptable doesn't have
+; hotness-based prefix.
+; NUM:        .section .rodata,"a",@progbits,unique,5
+; FUNC:       .section .rodata.func_without_profile,"a",@progbits
+; FUNCLESS:   .section .rodata,"a",@progbits
 ; JT: .LJTI1_0:
 
 ; @bar doesn't have profile information and it has a section prefix.
 ; Tests that its jump tables are placed in sections with function prefixes.
-; NUM: .section .rodata.bar_prefix.,"a",@progbits,unique,7
-; FUNC: .section .rodata.bar_prefix.bar
-; FUNCLESS: .section .rodata.bar_prefix.,"a"
+; NUM:        .section .rodata.bar_prefix.,"a",@progbits,unique,7
+; FUNC:       .section .rodata.bar_prefix.bar
+; FUNCLESS:   .section .rodata.bar_prefix.,"a"
 ; JT: .LJTI2_0
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll
index 3104795..0a62f80 100644
--- a/llvm/test/CodeGen/AArch64/logic-shift.ll
+++ b/llvm/test/CodeGen/AArch64/logic-shift.ll
@@ -5,7 +5,6 @@ define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: or_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
@@ -65,7 +64,6 @@ define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: or_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
@@ -139,7 +137,6 @@ define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: or_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
 ; CHECK-NEXT:    ret
@@ -233,7 +230,6 @@ define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: xor_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
@@ -293,7 +289,6 @@ define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: xor_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
@@ -367,7 +362,6 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: xor_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
 ; CHECK-NEXT:    ret
@@ -461,7 +455,6 @@ define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: and_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
@@ -521,7 +514,6 @@ define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: and_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
@@ -595,7 +587,6 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: and_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/machine-scheduler.mir b/llvm/test/CodeGen/AArch64/machine-scheduler.mir
index 6c0222f..ba2c2b3 100644
--- a/llvm/test/CodeGen/AArch64/machine-scheduler.mir
+++ b/llvm/test/CodeGen/AArch64/machine-scheduler.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-scheduler  -verify-machineinstrs  -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=machine-scheduler -o - %s | FileCheck %s
 
 --- |
   define i64 @load_imp-def(ptr nocapture %P, i32 %v) {
diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-addsub-2reg-const1.mir b/llvm/test/CodeGen/AArch64/macro-fusion-addsub-2reg-const1.mir
index 8c5a85a..2f0d19f 100644
--- a/llvm/test/CodeGen/AArch64/macro-fusion-addsub-2reg-const1.mir
+++ b/llvm/test/CodeGen/AArch64/macro-fusion-addsub-2reg-const1.mir
@@ -1,5 +1,7 @@
 # RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-addsub-2reg-const1 -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-addsub-2reg-const1 -passes=postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
 # RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-addsub-2reg-const1 -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-addsub-2reg-const1 -passes=postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
 ---
 # CHECK-LABEL: name: addsub2reg
 # CHECK: $w8 = ADDWrr killed renamable $w0, killed renamable $w1
diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-last.mir b/llvm/test/CodeGen/AArch64/macro-fusion-last.mir
index 14937a4..affd2bb 100644
--- a/llvm/test/CodeGen/AArch64/macro-fusion-last.mir
+++ b/llvm/test/CodeGen/AArch64/macro-fusion-last.mir
@@ -1,5 +1,7 @@
 # RUN: llc -o - %s -mtriple=aarch64-- -mattr=+arith-bcc-fusion -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+arith-bcc-fusion -passes=postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
 # RUN: llc -o - %s -mtriple=aarch64-- -mattr=-arith-bcc-fusion -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-arith-bcc-fusion -passes=postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
 # Make sure the last instruction is correctly macro-fused when scheduling
 # top-down (post-ra).
 ---
diff --git a/llvm/test/CodeGen/AArch64/misched-branch-targets.mir b/llvm/test/CodeGen/AArch64/misched-branch-targets.mir
index 40f1484..9540826 100644
--- a/llvm/test/CodeGen/AArch64/misched-branch-targets.mir
+++ b/llvm/test/CodeGen/AArch64/misched-branch-targets.mir
@@ -1,6 +1,9 @@
 # RUN: llc -o - -run-pass=machine-scheduler -misched=shuffle %s | FileCheck %s
 # RUN: llc -o - -run-pass=postmisched %s | FileCheck %s
 
+# RUN: llc -o - -passes=machine-scheduler -misched=shuffle %s | FileCheck %s
+# RUN: llc -o - -passes=postmisched %s | FileCheck %s
+
 # REQUIRES: asserts
 # -misched=shuffle is only available with assertions enabled
 
diff --git a/llvm/test/CodeGen/AArch64/misched-bundle.mir b/llvm/test/CodeGen/AArch64/misched-bundle.mir
index ac6112e..8463cb0 100644
--- a/llvm/test/CodeGen/AArch64/misched-bundle.mir
+++ b/llvm/test/CodeGen/AArch64/misched-bundle.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
 # RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a510 -run-pass=machine-scheduler -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck  %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a510 -passes=machine-scheduler -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck  %s
 # REQUIRES: asserts
 
 # CHECK:      SU(0):   renamable $z0 = LD1H renamable $p0, renamable $x1, renamable $x10 :: (load unknown-size, align 1)
diff --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
index ea40f9e..ca92fa1 100644
--- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
+++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
@@ -6,6 +6,14 @@
 # RUN:   -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-header-width=21 \
 # RUN: | FileCheck  %s
 
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon  -mcpu=cortex-a55 %s -o - 2>&1 \
+# RUN:   -misched-dump-reserved-cycles=true \
+# RUN:   -passes=machine-scheduler -debug-only=machine-scheduler \
+# RUN:   -misched-prera-direction=bottomup -sched-print-cycles=true \
+# RUN:   -misched-detail-resource-booking=true \
+# RUN:   -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-header-width=21 \
+# RUN: | FileCheck  %s
+
 # REQUIRES: asserts, aarch64-registered-target
 
 --- |
diff --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir
index 9be91b8..2b34ca5 100644
--- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir
+++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir
@@ -5,6 +5,13 @@
 # RUN:  -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-width=4 \
 # RUN:  2>&1 | FileCheck %s
 
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55  \
+# RUN:  -passes=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -misched-prera-direction=bottomup -sched-print-cycles=true \
+# RUN:  -misched-dump-reserved-cycles=true -misched-detail-resource-booking=true\
+# RUN:  -misched-dump-schedule-trace=true -misched-dump-schedule-trace-col-width=4 \
+# RUN:  2>&1 | FileCheck %s
+
 # REQUIRES: asserts, aarch64-registered-target
 ---
 name: f
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-arith-logic.mir b/llvm/test/CodeGen/AArch64/misched-fusion-arith-logic.mir
index 6227677..60c0026 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-arith-logic.mir
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-arith-logic.mir
@@ -1,5 +1,7 @@
 # RUN: llc -o /dev/null 2>&1 %s -mtriple aarch64-unknown -mattr=fuse-arith-logic -run-pass=machine-scheduler -misched-print-dags | FileCheck %s
+# RUN: llc -o /dev/null 2>&1 %s -mtriple aarch64-unknown -mattr=fuse-arith-logic -passes=machine-scheduler -misched-print-dags | FileCheck %s
 # RUN: llc -o /dev/null 2>&1 %s -mtriple aarch64-unknown -mcpu=exynos-m4         -run-pass=machine-scheduler -misched-print-dags | FileCheck %s
+# RUN: llc -o /dev/null 2>&1 %s -mtriple aarch64-unknown -mcpu=exynos-m4         -passes=machine-scheduler -misched-print-dags | FileCheck %s
 # REQUIRES: asserts
 
 ---
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-cmp.mir b/llvm/test/CodeGen/AArch64/misched-fusion-cmp.mir
index b0450c5..8249816 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-cmp.mir
@@ -1,4 +1,5 @@
 # RUN: llc -o /dev/null 2>&1 %s -mtriple aarch64-unknown -mcpu=cortex-x1 -run-pass=machine-scheduler
+# RUN: llc -o /dev/null 2>&1 %s -mtriple aarch64-unknown -mcpu=cortex-x1 -passes=machine-scheduler
 # Just ensure this doesn't crash.
 
 ---
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-crypto-eor.mir b/llvm/test/CodeGen/AArch64/misched-fusion-crypto-eor.mir
index 623a822..e661353 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-crypto-eor.mir
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-crypto-eor.mir
@@ -1,6 +1,9 @@
 # RUN: llc -o /dev/null %s -run-pass=machine-scheduler -mtriple aarch64-- -mattr=-fuse-aes,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,NOFUSE
 # RUN: llc -o /dev/null %s -run-pass=machine-scheduler -mtriple aarch64-- -mattr=+fuse-aes,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,FUSEAES
 # RUN: llc -o /dev/null %s -run-pass=machine-scheduler -mtriple aarch64-- -mattr=+fuse-aes,+fuse-crypto-eor,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,FUSEAES,FUSECRYPTO
+# RUN: llc -o /dev/null %s -passes=machine-scheduler -mtriple aarch64-- -mattr=-fuse-aes,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,NOFUSE
+# RUN: llc -o /dev/null %s -passes=machine-scheduler -mtriple aarch64-- -mattr=+fuse-aes,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,FUSEAES
+# RUN: llc -o /dev/null %s -passes=machine-scheduler -mtriple aarch64-- -mattr=+fuse-aes,+fuse-crypto-eor,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,FUSEAES,FUSECRYPTO
 # REQUIRES: asserts
 
 name: func
diff --git a/llvm/test/CodeGen/AArch64/misched-move-imm.mir b/llvm/test/CodeGen/AArch64/misched-move-imm.mir
index b5ff01b..65608bb 100644
--- a/llvm/test/CodeGen/AArch64/misched-move-imm.mir
+++ b/llvm/test/CodeGen/AArch64/misched-move-imm.mir
@@ -1,4 +1,5 @@
 # RUN: llc -run-pass=machine-scheduler -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 %s -o /dev/null 2>&1
+# RUN: llc -passes=machine-scheduler -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 %s -o /dev/null 2>&1
 # Just ensure this doesn't crash. Ensures in the neoverse-v2
 # scheduling model we don't attempt to treat the first input
 # operand of MOVZXi as an immediate operand.
diff --git a/llvm/test/CodeGen/AArch64/misched-predicate-virtreg.mir b/llvm/test/CodeGen/AArch64/misched-predicate-virtreg.mir
index 0b14cee..17a6cf7 100644
--- a/llvm/test/CodeGen/AArch64/misched-predicate-virtreg.mir
+++ b/llvm/test/CodeGen/AArch64/misched-predicate-virtreg.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mcpu=exynos-m5 -mtriple=aarch64 -enable-misched -run-pass=machine-scheduler -debug-only=machine-scheduler %s -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mcpu=exynos-m5 -mtriple=aarch64 -enable-misched -passes=machine-scheduler -debug-only=machine-scheduler %s -o /dev/null 2>&1 | FileCheck %s
 # REQUIRES: asserts
 
 # CHECK-LABEL: ********** MI Scheduling **********
diff --git a/llvm/test/CodeGen/AArch64/misched-sort-resource-in-trace.mir b/llvm/test/CodeGen/AArch64/misched-sort-resource-in-trace.mir
index b04fd89b..b652d24 100644
--- a/llvm/test/CodeGen/AArch64/misched-sort-resource-in-trace.mir
+++ b/llvm/test/CodeGen/AArch64/misched-sort-resource-in-trace.mir
@@ -4,10 +4,20 @@
 # RUN:  -misched-dump-schedule-trace=true --misched-sort-resources-in-trace=true 2>&1 | FileCheck --check-prefix=SORTED %s
 
 # RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -verify-machineinstrs \
+# RUN: -passes=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -misched-prera-direction=topdown -sched-print-cycles=true \
+# RUN:  -misched-dump-schedule-trace=true --misched-sort-resources-in-trace=true 2>&1 | FileCheck --check-prefix=SORTED %s
+
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -verify-machineinstrs \
 # RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s \
 # RUN:  -misched-prera-direction=topdown -sched-print-cycles=true \
 # RUN:  -misched-dump-schedule-trace=true --misched-sort-resources-in-trace=false 2>&1 | FileCheck --check-prefix=UNSORTED %s
 
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -verify-machineinstrs \
+# RUN: -passes=machine-scheduler -debug-only=machine-scheduler -o - %s \
+# RUN:  -misched-prera-direction=topdown -sched-print-cycles=true \
+# RUN:  -misched-dump-schedule-trace=true --misched-sort-resources-in-trace=false 2>&1 | FileCheck --check-prefix=UNSORTED %s
+
 # REQUIRES: asserts, aarch64-registered-target
 ---
 name: test
diff --git a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
index 2896137..ebff3f1 100644
--- a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
@@ -163,8 +163,6 @@ entry:
 define double @test_vext_v1i64(<1 x i64> %a) {
 ; CHECK-LABEL: test_vext_v1i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <1 x i64> %a, i32 0
diff --git a/llvm/test/CodeGen/AArch64/sched-postidxalias.mir b/llvm/test/CodeGen/AArch64/sched-postidxalias.mir
index 98ee0fa..02256ca 100644
--- a/llvm/test/CodeGen/AArch64/sched-postidxalias.mir
+++ b/llvm/test/CodeGen/AArch64/sched-postidxalias.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=aarch64 -mcpu=cortex-a55  -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s
+# RUN: llc -mtriple=aarch64 -mcpu=cortex-a55  -passes=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s
 # REQUIRES: asserts
 
 # Both the accesses should have an offset of 0
diff --git a/llvm/test/CodeGen/AArch64/sched-print-cycle.mir b/llvm/test/CodeGen/AArch64/sched-print-cycle.mir
index 59c5157..d58037e 100644
--- a/llvm/test/CodeGen/AArch64/sched-print-cycle.mir
+++ b/llvm/test/CodeGen/AArch64/sched-print-cycle.mir
@@ -1,9 +1,15 @@
 # RUN: llc -mtriple=arm64-apple-macos -mcpu=apple-m1 -sched-print-cycles=true \
 # RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s
 
+# RUN: llc -mtriple=arm64-apple-macos -mcpu=apple-m1 -sched-print-cycles=true \
+# RUN: -passes=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s
+
 # RUN: llc -mtriple=arm64-apple-macos -mcpu=apple-m1 -sched-print-cycles=false \
 # RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s --check-prefix=NOCYCLES
 
+# RUN: llc -mtriple=arm64-apple-macos -mcpu=apple-m1 -sched-print-cycles=false \
+# RUN: -passes=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s --check-prefix=NOCYCLES
+
 # REQUIRES: asserts
 ---
 name: mul_mul
diff --git a/llvm/test/CodeGen/AArch64/scheduledag-constreg.mir b/llvm/test/CodeGen/AArch64/scheduledag-constreg.mir
index 65ec434..66680af 100644
--- a/llvm/test/CodeGen/AArch64/scheduledag-constreg.mir
+++ b/llvm/test/CodeGen/AArch64/scheduledag-constreg.mir
@@ -1,4 +1,5 @@
 # RUN: llc -o /dev/null %s -mtriple=aarch64-- -run-pass=machine-scheduler -enable-misched -debug-only=machine-scheduler 2>&1 | FileCheck %s
+# RUN: llc -o /dev/null %s -mtriple=aarch64-- -passes=machine-scheduler -enable-misched -debug-only=machine-scheduler 2>&1 | FileCheck %s
 # REQUIRES: asserts
 --- |
   define void @func() { ret void }
diff --git a/llvm/test/CodeGen/AArch64/shift-by-signext.ll b/llvm/test/CodeGen/AArch64/shift-by-signext.ll
index 67e2da9..3bcaee5 100644
--- a/llvm/test/CodeGen/AArch64/shift-by-signext.ll
+++ b/llvm/test/CodeGen/AArch64/shift-by-signext.ll
@@ -81,7 +81,6 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
 ; CHECK-LABEL: n6_fshl:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr w8, w1, #1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    mvn w9, w2
 ; CHECK-NEXT:    lsl w10, w0, w2
 ; CHECK-NEXT:    lsr w8, w8, w9
@@ -95,7 +94,6 @@ define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
 ; CHECK-LABEL: n7_fshr:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsl w8, w0, #1
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    mvn w9, w2
 ; CHECK-NEXT:    lsr w10, w1, w2
 ; CHECK-NEXT:    lsl w8, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll
index 0669286..5f24af8 100644
--- a/llvm/test/CodeGen/AArch64/shift.ll
+++ b/llvm/test/CodeGen/AArch64/shift.ll
@@ -21,7 +21,6 @@ define i1 @shl_i1(i1 %0, i1 %1){
 define i8 @shl_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: shl_i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsl w0, w0, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -37,7 +36,6 @@ define i8 @shl_i8(i8 %0, i8 %1){
 define i16 @shl_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: shl_i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsl w0, w0, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -123,7 +121,6 @@ define i8 @ashr_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: ashr_i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sxtb w8, w0
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    asr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -141,7 +138,6 @@ define i16 @ashr_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: ashr_i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sxth w8, w0
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    asr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -230,7 +226,6 @@ define i8 @lshr_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: lshr_i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    and w8, w0, #0xff
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -248,7 +243,6 @@ define i16 @lshr_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: lshr_i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    and w8, w0, #0xffff
-; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index f65a08a..4d383fe 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -298,7 +298,6 @@ exit:
 define i32 @f6(i1 %c, ptr %a, i32 %i) {
 ; CHECK-LABEL: f6:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    tbz w0, #0, .LBB6_2
 ; CHECK-NEXT:  // %bb.1: // %if.then
 ; CHECK-NEXT:    mov w0, wzr
diff --git a/llvm/test/CodeGen/AArch64/sve-aliasing.mir b/llvm/test/CodeGen/AArch64/sve-aliasing.mir
index 3b7c9fe..34a08ad 100644
--- a/llvm/test/CodeGen/AArch64/sve-aliasing.mir
+++ b/llvm/test/CodeGen/AArch64/sve-aliasing.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -o - %s -mtriple=aarch64 -run-pass=machine-scheduler -verify-machineinstrs | FileCheck %s
+# RUN: llc -o - %s -mtriple=aarch64 -passes=machine-scheduler | FileCheck %s
 
 ---
 name:            scalable_v16i1
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
index aef19d2..3e6a7ce 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
@@ -48,10 +48,10 @@ define void @subvector_v32i16(ptr %in, ptr %out) #0 {
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
-; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
-; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
-; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
-; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
+; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
+; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1, x8, lsl #1]
+; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: subvector_v32i16:
@@ -103,10 +103,10 @@ define void @subvector_v16i32(ptr %in, ptr %out) #0 {
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
-; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
-; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
-; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
-; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
+; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
+; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1, x8, lsl #2]
+; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: subvector_v16i32:
@@ -159,10 +159,10 @@ define void @subvector_v8i64(ptr %in, ptr %out) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    mov x8, #4 // =0x4
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
-; CHECK-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
-; CHECK-NEXT:    st1d { z1.d }, p0, [x1]
+; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    st1d { z1.d }, p0, [x1, x8, lsl #3]
+; CHECK-NEXT:    st1d { z0.d }, p0, [x1]
 ; CHECK-NEXT:    ret
   %a = load <8 x i64>, ptr %in
   br label %bb1
@@ -236,10 +236,10 @@ define void @subvector_v32f16(ptr %in, ptr %out) #0 {
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
-; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
-; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
-; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1, x8, lsl #1]
-; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1]
+; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
+; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x1, x8, lsl #1]
+; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x1]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: subvector_v32f16:
@@ -291,10 +291,10 @@ define void @subvector_v16f32(ptr %in, ptr %out) #0 {
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
-; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
-; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
-; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1, x8, lsl #2]
-; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1]
+; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
+; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x1, x8, lsl #2]
+; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x1]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: subvector_v16f32:
@@ -345,10 +345,10 @@ define void @subvector_v8f64(ptr %in, ptr %out) #0 {
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
-; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
-; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
-; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1, x8, lsl #3]
-; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1]
+; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
+; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x1, x8, lsl #3]
+; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x1]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: subvector_v8f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index b61c30a..94d756a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -1335,50 +1335,50 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) {
 define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzu_v16f64_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q5, q6, [x0, #96]
+; CHECK-NEXT:    ldp q0, q1, [x0, #96]
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    ldp q0, q4, [x0, #32]
-; CHECK-NEXT:    ldp q2, q7, [x0, #64]
-; CHECK-NEXT:    ldp q1, q3, [x0]
-; CHECK-NEXT:    fcvtzs z6.d, p0/m, z6.d
-; CHECK-NEXT:    fcvtzs z4.d, p0/m, z4.d
-; CHECK-NEXT:    fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ldp q4, q5, [x0, #64]
+; CHECK-NEXT:    ldp q6, q7, [x0]
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.d
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
-; CHECK-NEXT:    fcvtzs z7.d, p0/m, z7.d
 ; CHECK-NEXT:    fcvtzs z2.d, p0/m, z2.d
-; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.d
-; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.d
-; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z4.d
+; CHECK-NEXT:    fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT:    fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
 ; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    mov z17.s, z6.s[1]
-; CHECK-NEXT:    mov z16.s, z4.s[1]
-; CHECK-NEXT:    mov z18.s, z5.s[1]
-; CHECK-NEXT:    mov z21.s, z0.s[1]
-; CHECK-NEXT:    mov z19.s, z7.s[1]
-; CHECK-NEXT:    mov z20.s, z2.s[1]
-; CHECK-NEXT:    mov z22.s, z3.s[1]
-; CHECK-NEXT:    mov z23.s, z1.s[1]
-; CHECK-NEXT:    zip1 z6.h, z6.h, z17.h
-; CHECK-NEXT:    zip1 z4.h, z4.h, z16.h
-; CHECK-NEXT:    zip1 z5.h, z5.h, z18.h
-; CHECK-NEXT:    zip1 z0.h, z0.h, z21.h
-; CHECK-NEXT:    zip1 z7.h, z7.h, z19.h
-; CHECK-NEXT:    zip1 z2.h, z2.h, z20.h
-; CHECK-NEXT:    zip1 z3.h, z3.h, z22.h
-; CHECK-NEXT:    zip1 z1.h, z1.h, z23.h
-; CHECK-NEXT:    zip1 z5.s, z5.s, z6.s
-; CHECK-NEXT:    zip1 z0.s, z0.s, z4.s
-; CHECK-NEXT:    zip1 z2.s, z2.s, z7.s
-; CHECK-NEXT:    zip1 z1.s, z1.s, z3.s
-; CHECK-NEXT:    zip1 z2.d, z2.d, z5.d
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    mov z17.s, z1.s[1]
+; CHECK-NEXT:    mov z16.s, z3.s[1]
+; CHECK-NEXT:    mov z18.s, z0.s[1]
+; CHECK-NEXT:    mov z21.s, z2.s[1]
+; CHECK-NEXT:    mov z19.s, z5.s[1]
+; CHECK-NEXT:    mov z20.s, z4.s[1]
+; CHECK-NEXT:    mov z22.s, z7.s[1]
+; CHECK-NEXT:    mov z23.s, z6.s[1]
+; CHECK-NEXT:    zip1 z1.h, z1.h, z17.h
+; CHECK-NEXT:    zip1 z3.h, z3.h, z16.h
+; CHECK-NEXT:    zip1 z0.h, z0.h, z18.h
+; CHECK-NEXT:    zip1 z2.h, z2.h, z21.h
+; CHECK-NEXT:    zip1 z5.h, z5.h, z19.h
+; CHECK-NEXT:    zip1 z4.h, z4.h, z20.h
+; CHECK-NEXT:    zip1 z7.h, z7.h, z22.h
+; CHECK-NEXT:    zip1 z6.h, z6.h, z23.h
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    zip1 z2.s, z2.s, z3.s
+; CHECK-NEXT:    zip1 z1.s, z4.s, z5.s
+; CHECK-NEXT:    zip1 z3.s, z6.s, z7.s
 ; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT:    stp q0, q2, [x1]
+; CHECK-NEXT:    zip1 z1.d, z3.d, z2.d
+; CHECK-NEXT:    stp q1, q0, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v16f64_v16i16:
@@ -3033,50 +3033,50 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) {
 define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) {
 ; CHECK-LABEL: fcvtzs_v16f64_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q5, q6, [x0, #96]
+; CHECK-NEXT:    ldp q0, q1, [x0, #96]
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    ldp q0, q4, [x0, #32]
-; CHECK-NEXT:    ldp q2, q7, [x0, #64]
-; CHECK-NEXT:    ldp q1, q3, [x0]
-; CHECK-NEXT:    fcvtzs z6.d, p0/m, z6.d
-; CHECK-NEXT:    fcvtzs z4.d, p0/m, z4.d
-; CHECK-NEXT:    fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT:    ldp q2, q3, [x0, #32]
+; CHECK-NEXT:    ldp q4, q5, [x0, #64]
+; CHECK-NEXT:    ldp q6, q7, [x0]
+; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.d
+; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.d
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
-; CHECK-NEXT:    fcvtzs z7.d, p0/m, z7.d
 ; CHECK-NEXT:    fcvtzs z2.d, p0/m, z2.d
-; CHECK-NEXT:    fcvtzs z3.d, p0/m, z3.d
-; CHECK-NEXT:    fcvtzs z1.d, p0/m, z1.d
-; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    fcvtzs z5.d, p0/m, z5.d
+; CHECK-NEXT:    fcvtzs z4.d, p0/m, z4.d
+; CHECK-NEXT:    fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT:    fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
 ; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    mov z17.s, z6.s[1]
-; CHECK-NEXT:    mov z16.s, z4.s[1]
-; CHECK-NEXT:    mov z18.s, z5.s[1]
-; CHECK-NEXT:    mov z21.s, z0.s[1]
-; CHECK-NEXT:    mov z19.s, z7.s[1]
-; CHECK-NEXT:    mov z20.s, z2.s[1]
-; CHECK-NEXT:    mov z22.s, z3.s[1]
-; CHECK-NEXT:    mov z23.s, z1.s[1]
-; CHECK-NEXT:    zip1 z6.h, z6.h, z17.h
-; CHECK-NEXT:    zip1 z4.h, z4.h, z16.h
-; CHECK-NEXT:    zip1 z5.h, z5.h, z18.h
-; CHECK-NEXT:    zip1 z0.h, z0.h, z21.h
-; CHECK-NEXT:    zip1 z7.h, z7.h, z19.h
-; CHECK-NEXT:    zip1 z2.h, z2.h, z20.h
-; CHECK-NEXT:    zip1 z3.h, z3.h, z22.h
-; CHECK-NEXT:    zip1 z1.h, z1.h, z23.h
-; CHECK-NEXT:    zip1 z5.s, z5.s, z6.s
-; CHECK-NEXT:    zip1 z0.s, z0.s, z4.s
-; CHECK-NEXT:    zip1 z2.s, z2.s, z7.s
-; CHECK-NEXT:    zip1 z1.s, z1.s, z3.s
-; CHECK-NEXT:    zip1 z2.d, z2.d, z5.d
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    mov z17.s, z1.s[1]
+; CHECK-NEXT:    mov z16.s, z3.s[1]
+; CHECK-NEXT:    mov z18.s, z0.s[1]
+; CHECK-NEXT:    mov z21.s, z2.s[1]
+; CHECK-NEXT:    mov z19.s, z5.s[1]
+; CHECK-NEXT:    mov z20.s, z4.s[1]
+; CHECK-NEXT:    mov z22.s, z7.s[1]
+; CHECK-NEXT:    mov z23.s, z6.s[1]
+; CHECK-NEXT:    zip1 z1.h, z1.h, z17.h
+; CHECK-NEXT:    zip1 z3.h, z3.h, z16.h
+; CHECK-NEXT:    zip1 z0.h, z0.h, z18.h
+; CHECK-NEXT:    zip1 z2.h, z2.h, z21.h
+; CHECK-NEXT:    zip1 z5.h, z5.h, z19.h
+; CHECK-NEXT:    zip1 z4.h, z4.h, z20.h
+; CHECK-NEXT:    zip1 z7.h, z7.h, z22.h
+; CHECK-NEXT:    zip1 z6.h, z6.h, z23.h
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    zip1 z2.s, z2.s, z3.s
+; CHECK-NEXT:    zip1 z1.s, z4.s, z5.s
+; CHECK-NEXT:    zip1 z3.s, z6.s, z7.s
 ; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
-; CHECK-NEXT:    stp q0, q2, [x1]
+; CHECK-NEXT:    zip1 z1.d, z3.d, z2.d
+; CHECK-NEXT:    stp q1, q0, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v16f64_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
index 8b296d9..168ad6a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
@@ -151,20 +151,20 @@ define void @zip_v32i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    .cfi_offset b13, -48
 ; CHECK-NEXT:    .cfi_offset b14, -56
 ; CHECK-NEXT:    .cfi_offset b15, -64
-; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ldp q2, q3, [x1]
-; CHECK-NEXT:    mov z5.h, z1.h[7]
-; CHECK-NEXT:    mov z7.h, z1.h[6]
-; CHECK-NEXT:    mov z17.h, z1.h[5]
+; CHECK-NEXT:    mov z5.h, z0.h[7]
+; CHECK-NEXT:    mov z7.h, z0.h[6]
+; CHECK-NEXT:    mov z17.h, z0.h[5]
 ; CHECK-NEXT:    mov z4.h, z3.h[7]
 ; CHECK-NEXT:    mov z6.h, z3.h[6]
 ; CHECK-NEXT:    mov z16.h, z3.h[5]
 ; CHECK-NEXT:    mov z20.h, z2.h[7]
-; CHECK-NEXT:    mov z21.h, z0.h[7]
+; CHECK-NEXT:    mov z21.h, z1.h[7]
 ; CHECK-NEXT:    mov z18.h, z3.h[4]
-; CHECK-NEXT:    mov z19.h, z1.h[4]
+; CHECK-NEXT:    mov z19.h, z0.h[4]
 ; CHECK-NEXT:    mov z22.h, z2.h[6]
-; CHECK-NEXT:    mov z23.h, z0.h[6]
+; CHECK-NEXT:    mov z23.h, z1.h[6]
 ; CHECK-NEXT:    zip1 z24.h, z5.h, z4.h
 ; CHECK-NEXT:    zip1 z25.h, z7.h, z6.h
 ; CHECK-NEXT:    zip1 z17.h, z17.h, z16.h
@@ -176,10 +176,10 @@ define void @zip_v32i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    zip1 z22.h, z23.h, z22.h
 ; CHECK-NEXT:    mov z23.h, z2.h[5]
 ; CHECK-NEXT:    mov z21.h, z6.h[7]
-; CHECK-NEXT:    mov z24.h, z0.h[5]
+; CHECK-NEXT:    mov z24.h, z1.h[5]
 ; CHECK-NEXT:    mov z25.h, z2.h[4]
 ; CHECK-NEXT:    mov z20.h, z7.h[7]
-; CHECK-NEXT:    mov z26.h, z0.h[4]
+; CHECK-NEXT:    mov z26.h, z1.h[4]
 ; CHECK-NEXT:    mov z27.h, z6.h[6]
 ; CHECK-NEXT:    mov z28.h, z7.h[5]
 ; CHECK-NEXT:    mov z29.h, z6.h[5]
@@ -212,22 +212,22 @@ define void @zip_v32i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    zip1 z20.s, z28.s, z27.s
 ; CHECK-NEXT:    zip1 z16.s, z22.s, z16.s
 ; CHECK-NEXT:    zip1 z21.s, z24.s, z23.s
-; CHECK-NEXT:    zip1 z1.h, z1.h, z3.h
+; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z3.s, z26.s, z25.s
 ; CHECK-NEXT:    zip1 z22.s, z30.s, z29.s
 ; CHECK-NEXT:    zip1 z6.h, z6.h, z7.h
 ; CHECK-NEXT:    zip1 z7.d, z17.d, z19.d
 ; CHECK-NEXT:    zip1 z17.d, z20.d, z18.d
-; CHECK-NEXT:    zip1 z0.h, z0.h, z2.h
+; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z2.h, z4.h, z5.h
 ; CHECK-NEXT:    zip1 z4.d, z21.d, z16.d
 ; CHECK-NEXT:    zip1 z3.d, z22.d, z3.d
-; CHECK-NEXT:    add z1.h, z1.h, z6.h
+; CHECK-NEXT:    add z0.h, z0.h, z6.h
 ; CHECK-NEXT:    add z5.h, z7.h, z17.h
-; CHECK-NEXT:    add z0.h, z0.h, z2.h
+; CHECK-NEXT:    add z1.h, z1.h, z2.h
 ; CHECK-NEXT:    add z2.h, z4.h, z3.h
-; CHECK-NEXT:    stp q1, q5, [x0, #32]
-; CHECK-NEXT:    stp q0, q2, [x0]
+; CHECK-NEXT:    stp q0, q5, [x0, #32]
+; CHECK-NEXT:    stp q1, q2, [x0]
 ; CHECK-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
 ; CHECK-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index c9fe258..b0a30b7 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -378,36 +378,36 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-GI-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-GI-NEXT:    mov w9, #0 ; =0x0
-; CHECK-GI-NEXT:    cmeq.4s v5, v0, #0
+; CHECK-GI-NEXT:    cmeq.4s v0, v0, #0
 ; CHECK-GI-NEXT:    fmov s2, w8
 ; CHECK-GI-NEXT:    fmov s4, w9
 ; CHECK-GI-NEXT:    cmeq.4s v1, v1, #0
 ; CHECK-GI-NEXT:    mov.16b v3, v2
-; CHECK-GI-NEXT:    mov.16b v0, v4
+; CHECK-GI-NEXT:    mov.16b v5, v4
 ; CHECK-GI-NEXT:    mov.h v4[1], w8
-; CHECK-GI-NEXT:    bic.16b v1, v1, v5
-; CHECK-GI-NEXT:    mov.16b v5, v2
+; CHECK-GI-NEXT:    bic.16b v0, v1, v0
+; CHECK-GI-NEXT:    mov.16b v1, v2
 ; CHECK-GI-NEXT:    mov.h v2[1], w8
 ; CHECK-GI-NEXT:    mov.h v3[1], w8
-; CHECK-GI-NEXT:    mov.h v0[1], w8
 ; CHECK-GI-NEXT:    mov.h v5[1], w8
+; CHECK-GI-NEXT:    mov.h v1[1], w8
 ; CHECK-GI-NEXT:    mov.h v4[2], w8
-; CHECK-GI-NEXT:    xtn.4h v1, v1
+; CHECK-GI-NEXT:    xtn.4h v0, v0
 ; CHECK-GI-NEXT:    mov.h v2[2], w8
 ; CHECK-GI-NEXT:    mov.h v3[2], w9
-; CHECK-GI-NEXT:    mov.h v0[2], w9
 ; CHECK-GI-NEXT:    mov.h v5[2], w9
+; CHECK-GI-NEXT:    mov.h v1[2], w9
 ; CHECK-GI-NEXT:    mov.h v4[3], w9
 ; CHECK-GI-NEXT:    mov.h v2[3], w9
 ; CHECK-GI-NEXT:    mov.h v3[3], w9
-; CHECK-GI-NEXT:    mov.h v0[3], w8
 ; CHECK-GI-NEXT:    mov.h v5[3], w8
-; CHECK-GI-NEXT:    orr.8b v1, v1, v3
-; CHECK-GI-NEXT:    eor.8b v0, v1, v0
-; CHECK-GI-NEXT:    eor.8b v1, v4, v1
-; CHECK-GI-NEXT:    and.8b v0, v0, v5
-; CHECK-GI-NEXT:    orr.8b v1, v2, v1
-; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    mov.h v1[3], w8
+; CHECK-GI-NEXT:    orr.8b v0, v0, v3
+; CHECK-GI-NEXT:    eor.8b v3, v0, v5
+; CHECK-GI-NEXT:    eor.8b v0, v4, v0
+; CHECK-GI-NEXT:    and.8b v1, v3, v1
+; CHECK-GI-NEXT:    orr.8b v0, v2, v0
+; CHECK-GI-NEXT:    orr.8b v0, v1, v0
 ; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
 ; CHECK-GI-NEXT:    mov.s w8, v0[1]
 ; CHECK-GI-NEXT:    mov.s w9, v0[2]
diff --git a/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir b/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir
index 82ee173..1c4093b 100644
--- a/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/at-least-one-def-value-assert.mir
@@ -1,5 +1,7 @@
 # RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs  -run-pass=machine-scheduler -verify-misched -o /dev/null %s  2>&1  | FileCheck %s
 
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=machine-scheduler -verify-misched -o /dev/null %s  2>&1  | FileCheck %s
+
 # CHECK: *** Bad machine code: No live subrange at use ***
 # CHECK-NEXT: - function:    at_least_one_value_should_be_defined_by_this_mask
 # CHECK-NEXT: - basic block: %bb.0
diff --git a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
index d2846fd..9f2c6d1 100644
--- a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
+++ b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
 
 # Check that we move consumer further from producer, even if one of them is in a bundle.
 
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
index 2b7b03d..e42a1d1 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
+++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-xnack -run-pass post-RA-sched -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-xnack -passes=post-RA-sched -o - %s | FileCheck -check-prefix=GCN %s
 
 # GCN:      FLAT_LOAD_DWORD
 # GCN-NEXT: FLAT_LOAD_DWORD
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
index 0d84dc0..1ae544f 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
+++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass machine-scheduler %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=machine-scheduler %s -o - | FileCheck -check-prefix=GCN %s
 
 # GCN-LABEL: name: cluster_flat_loads
 # GCN:      FLAT_LOAD_DWORD %0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir b/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir
index e066a48..dcc5d9d 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -run-pass=register-coalescer,rename-independent-subregs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -passes=register-coalescer,rename-independent-subregs %s -o - | FileCheck -check-prefix=GCN %s
 
 # This test is for a bug where the following happens:
 #
diff --git a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
index 4945c70..b38dc4d 100644
--- a/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/dbg-value-ends-sched-region.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=machine-scheduler -o - %s | FileCheck %s
 # The DBG_VALUE in bb.5 ends a scheduling region, and its uses should
 # not be tracked like a normal instruction.
 
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
index 8a1c68b..156979d 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=machine-scheduler %s -o - | FileCheck %s
 --- |
 
   declare void @llvm.dbg.value(metadata, metadata, metadata) #0
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir
index 19071be..d415346 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-liveins.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes=machine-scheduler %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s
 # REQUIRES: asserts
 
 # CHECK: ********** MI Scheduling **********
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler.mir
index 4f15e0e..170672d 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes=machine-scheduler %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s
 # REQUIRES: asserts
 
 # CHECK: All regions recorded, starting actual scheduling.
diff --git a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
index 962d49d..204912b 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
+++ b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -run-pass machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=tonga -passes=machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s
 
 # GCN-LABEL: name: flat_load_clustering
 # GCN:      FLAT_LOAD_DWORD
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-kill.mir b/llvm/test/CodeGen/AMDGPU/hazard-kill.mir
index 8327ac2..0547487 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-kill.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-kill.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-sched %s -o - | FileCheck -check-prefix=GFX90 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=post-RA-sched %s -o - | FileCheck -check-prefix=GFX90 %s
 
 # This tests that a KILL isn't considered as a valid instruction for a hazard
 # slot (e.g. m0 def followed by V_INTERP for gfx9)
diff --git a/llvm/test/CodeGen/AMDGPU/high-RP-reschedule.mir b/llvm/test/CodeGen/AMDGPU/high-RP-reschedule.mir
index d57450b..78f21ef 100644
--- a/llvm/test/CodeGen/AMDGPU/high-RP-reschedule.mir
+++ b/llvm/test/CodeGen/AMDGPU/high-RP-reschedule.mir
@@ -1,6 +1,8 @@
 # REQUIRES: asserts
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched -run-pass=machine-scheduler -verify-misched -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck -check-prefix=GCN %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched -run-pass=machine-scheduler -amdgpu-use-amdgpu-trackers=1 -verify-misched -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck -check-prefix=GCN-GCNTRACKER %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched -passes=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched -run-pass=machine-scheduler -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck -check-prefix=GCN-GCNTRACKER %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched -passes=machine-scheduler -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck -check-prefix=GCN-GCNTRACKER %s
 
 --- |
   define amdgpu_kernel void @high-RP-reschedule() { ret void }
diff --git a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
index fc20674..85525aa 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-wwm.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
 
 # Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass
 # to create a second WWM region. This is an unwanted hoisting.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll
index 990a606..1bdaa4c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll
@@ -428,18 +428,17 @@ define amdgpu_cs_chain void @control_flow(<3 x i32> inreg %sgpr, ptr inreg %call
 ; DAGISEL12-NEXT:    s_or_saveexec_b32 s8, -1
 ; DAGISEL12-NEXT:    s_wait_alu 0xfffe
 ; DAGISEL12-NEXT:    v_cndmask_b32_e64 v0, 0x47, v1, s8
-; DAGISEL12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; DAGISEL12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; DAGISEL12-NEXT:    v_cmp_ne_u32_e64 s9, 0, v0
 ; DAGISEL12-NEXT:    s_mov_b32 exec_lo, s8
 ; DAGISEL12-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v13, v1
-; DAGISEL12-NEXT:    v_mov_b32_e32 v11, s9
 ; DAGISEL12-NEXT:    s_or_b32 s4, vcc_lo, s4
 ; DAGISEL12-NEXT:    s_wait_alu 0xfffe
 ; DAGISEL12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
 ; DAGISEL12-NEXT:    s_cbranch_execnz .LBB3_2
 ; DAGISEL12-NEXT:  ; %bb.3: ; %tail.loopexit
 ; DAGISEL12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
-; DAGISEL12-NEXT:    v_add_nc_u32_e32 v10, 42, v1
+; DAGISEL12-NEXT:    v_dual_mov_b32 v11, s9 :: v_dual_add_nc_u32 v10, 42, v1
 ; DAGISEL12-NEXT:  .LBB3_4: ; %Flow1
 ; DAGISEL12-NEXT:    s_wait_alu 0xfffe
 ; DAGISEL12-NEXT:    s_or_b32 exec_lo, exec_lo, s3
@@ -530,13 +529,13 @@ define amdgpu_cs_chain void @control_flow(<3 x i32> inreg %sgpr, ptr inreg %call
 ; DAGISEL10-NEXT:    v_cmp_ne_u32_e64 s9, 0, v0
 ; DAGISEL10-NEXT:    s_mov_b32 exec_lo, s8
 ; DAGISEL10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v13, v1
-; DAGISEL10-NEXT:    v_mov_b32_e32 v11, s9
 ; DAGISEL10-NEXT:    s_or_b32 s4, vcc_lo, s4
 ; DAGISEL10-NEXT:    s_andn2_b32 exec_lo, exec_lo, s4
 ; DAGISEL10-NEXT:    s_cbranch_execnz .LBB3_2
 ; DAGISEL10-NEXT:  ; %bb.3: ; %tail.loopexit
 ; DAGISEL10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; DAGISEL10-NEXT:    v_add_nc_u32_e32 v10, 42, v1
+; DAGISEL10-NEXT:    v_mov_b32_e32 v11, s9
 ; DAGISEL10-NEXT:  .LBB3_4: ; %Flow1
 ; DAGISEL10-NEXT:    s_or_b32 exec_lo, exec_lo, s3
 ; DAGISEL10-NEXT:    s_mov_b32 s3, exec_lo
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
index e32de1e..5dc6d2e 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck -check-prefix=DEBUG %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck -check-prefix=DEBUG %s
 # REQUIRES: asserts
 
 --- |
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 7662abc..71288d7 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX908 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule %s -o - | FileCheck -check-prefix=GFX908 %s
 
 ---
 name:            test_occ_10_max_occ_no_sink
diff --git a/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir b/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
index 2aa4304..ffc86dc 100644
--- a/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=tahiti -passes=machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s
 
 # GCN-LABEL: name: cluster_add_addc
 # GCN: S_NOP 0, implicit-def $vcc
diff --git a/llvm/test/CodeGen/AMDGPU/misched-killflags.mir b/llvm/test/CodeGen/AMDGPU/misched-killflags.mir
index 9d0c322..a7fa8ad 100644
--- a/llvm/test/CodeGen/AMDGPU/misched-killflags.mir
+++ b/llvm/test/CodeGen/AMDGPU/misched-killflags.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass=post-RA-sched -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=post-RA-sched -o - %s | FileCheck %s
 # Make sure ScheduleDAGInstrs::fixupKills does not produce invalid kill flags.
 ---
 name: func0
diff --git a/llvm/test/CodeGen/AMDGPU/movrels-bug.mir b/llvm/test/CodeGen/AMDGPU/movrels-bug.mir
index 9c86273..e20de8c 100644
--- a/llvm/test/CodeGen/AMDGPU/movrels-bug.mir
+++ b/llvm/test/CodeGen/AMDGPU/movrels-bug.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass post-RA-sched  %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -passes=post-RA-sched  %s -o - | FileCheck %s
 
 # This tests a situation where a sub-register of a killed super-register operand
 # of V_MOVRELS happens to have an undef use later on. This leads to the post RA
diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
index 4302810..5452c80 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@@ -740,7 +740,7 @@ define amdgpu_kernel void @simplify_i24_crash(ptr addrspace(1) %out, i32 %arg0,
 ; EG:       ; %bb.0: ; %bb
 ; EG-NEXT:    ALU_PUSH_BEFORE 1, @6, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    JUMP @5 POP:1
-; EG-NEXT:    ALU 14, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 12, @8, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 0
 ; EG-NEXT:    POP @5 POP:1
 ; EG-NEXT:    CF_END
@@ -748,27 +748,25 @@ define amdgpu_kernel void @simplify_i24_crash(ptr addrspace(1) %out, i32 %arg0,
 ; EG-NEXT:     SETNE_INT * T0.W, KC0[2].Z, 0.0,
 ; EG-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
 ; EG-NEXT:    ALU clause starting at 8:
-; EG-NEXT:     MOV T0.X, KC0[3].Y,
-; EG-NEXT:     MOV * T1.X, KC0[2].W,
-; EG-NEXT:     LSHL T0.W, PS, literal.x,
-; EG-NEXT:     LSHL * T1.W, PV.X, literal.x,
+; EG-NEXT:     MOV T0.W, KC0[3].Y,
+; EG-NEXT:     MOV * T1.W, KC0[2].W,
+; EG-NEXT:     LSHL T1.W, PS, literal.x,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT:     ASHR T1.W, PS, literal.x,
-; EG-NEXT:     ASHR * T0.W, PV.W, literal.x,
+; EG-NEXT:     ASHR T0.W, PS, literal.x,
+; EG-NEXT:     ASHR * T1.W, PV.W, literal.x,
 ; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
 ; EG-NEXT:     MOV T2.W, KC0[2].Y,
 ; EG-NEXT:     MULLO_INT * T0.X, PS, PV.W,
 ; EG-NEXT:     LSHR T1.X, PV.W, literal.x,
-; EG-NEXT:     MOV T0.Y, PS,
-; EG-NEXT:     MOV T0.W, KC0[3].X,
-; EG-NEXT:     MOV * T0.W, KC0[3].Z,
+; EG-NEXT:     MOV * T0.Y, PS,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
 ; CM-LABEL: simplify_i24_crash:
 ; CM:       ; %bb.0: ; %bb
 ; CM-NEXT:    ALU_PUSH_BEFORE 1, @6, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    JUMP @5 POP:1
-; CM-NEXT:    ALU 17, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    ALU 15, @8, KC0[CB0:0-32], KC1[]
 ; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
 ; CM-NEXT:    POP @5 POP:1
 ; CM-NEXT:    CF_END
@@ -776,23 +774,21 @@ define amdgpu_kernel void @simplify_i24_crash(ptr addrspace(1) %out, i32 %arg0,
 ; CM-NEXT:     SETNE_INT * T0.W, KC0[2].Z, 0.0,
 ; CM-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
 ; CM-NEXT:    ALU clause starting at 8:
-; CM-NEXT:     MOV * T0.X, KC0[3].Y,
-; CM-NEXT:     MOV * T1.X, KC0[2].W,
-; CM-NEXT:     LSHL T0.Z, PV.X, literal.x,
-; CM-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; CM-NEXT:     MOV T0.Z, KC0[3].Y,
+; CM-NEXT:     MOV * T0.W, KC0[2].W,
+; CM-NEXT:     LSHL T1.Z, PV.W, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.Z, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
 ; CM-NEXT:     MOV T0.Y, KC0[2].Y,
-; CM-NEXT:     ASHR T1.Z, PV.W, literal.x,
+; CM-NEXT:     ASHR T0.Z, PV.W, literal.x,
 ; CM-NEXT:     ASHR * T0.W, PV.Z, literal.x,
 ; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT:     MULLO_INT T0.X, T0.W, T1.Z,
-; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T1.Z,
-; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T1.Z,
-; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T1.Z,
+; CM-NEXT:     MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
 ; CM-NEXT:     LSHR T1.X, T0.Y, literal.x,
-; CM-NEXT:     MOV T0.Y, PV.X,
-; CM-NEXT:     MOV T0.Z, KC0[3].X,
-; CM-NEXT:     MOV * T0.W, KC0[3].Z,
+; CM-NEXT:     MOV * T0.Y, PV.X,
 ; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 bb:
   %cmp = icmp eq i32 %arg0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir b/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir
new file mode 100644
index 0000000..d32163b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/peephole-opt-fold-reg-sequence-subreg.mir
@@ -0,0 +1,189 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -run-pass=peephole-opt -o - %s | FileCheck %s
+
+---
+name:            reg_sequence_extract_subreg_sub0_from_regsequence_sub0_sub1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: reg_sequence_extract_subreg_sub0_from_regsequence_sub0_sub1
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
+    %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_64_align2 = REG_SEQUENCE %0.sub0, %subreg.sub0, %1, %subreg.sub1
+    %3:vgpr_32 = COPY %2.sub0
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            reg_sequence_extract_subreg_sub1_from_regsequence_sub0_sub1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: reg_sequence_extract_subreg_sub1_from_regsequence_sub0_sub1
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
+    %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_64_align2 = REG_SEQUENCE %0.sub0, %subreg.sub0, %1, %subreg.sub1
+    %3:vgpr_32 = COPY %2.sub1
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            reg_sequence_extract_subreg_sub0_from_regsequence_sub1_sub0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: reg_sequence_extract_subreg_sub0_from_regsequence_sub1_sub0
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub1, [[COPY]].sub0, %subreg.sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
+    %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_64_align2 = REG_SEQUENCE %1, %subreg.sub1, %0.sub0, %subreg.sub0
+    %3:vgpr_32 = COPY %2.sub0
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            reg_sequence_extract_subreg_sub1_from_regsequence_sub1_sub0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: reg_sequence_extract_subreg_sub1_from_regsequence_sub1_sub0
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub1, [[COPY]].sub0, %subreg.sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
+    %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_64_align2 = REG_SEQUENCE %1, %subreg.sub1, %0.sub0, %subreg.sub0
+    %3:vgpr_32 = COPY %2.sub1
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            reg_sequence_extract_subreg_sub0_from_vreg96
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: reg_sequence_extract_subreg_sub0_from_vreg96
+    ; CHECK: liveins: $vgpr1_vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[COPY]].sub1, %subreg.sub2
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
+    %0:vreg_64 = COPY $vgpr1_vgpr2
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_96 = REG_SEQUENCE %0.sub0, %subreg.sub0, %1, %subreg.sub1, %0.sub1, %subreg.sub2
+    %3:vgpr_32 = COPY %2.sub0
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            reg_sequence_extract_subreg_sub1_from_vreg96
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: reg_sequence_extract_subreg_sub1_from_vreg96
+    ; CHECK: liveins: $vgpr1_vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]].sub0, %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[COPY]].sub1, %subreg.sub2
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
+    %0:vreg_64 = COPY $vgpr1_vgpr2
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_96 = REG_SEQUENCE %0.sub0, %subreg.sub0, %1, %subreg.sub1, %0.sub1, %subreg.sub2
+    %3:vgpr_32 = COPY %2.sub0
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            reg_sequence_compose_0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: reg_sequence_compose_0
+    ; CHECK: liveins: $vgpr1_vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[V_MOV_B32_e32_]], %subreg.sub2
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
+    %0:vreg_64 = COPY $vgpr1_vgpr2
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_96 = REG_SEQUENCE %0, %subreg.sub0_sub1, %1, %subreg.sub2
+    %3:vgpr_32 = COPY %2.sub1
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            reg_sequence_compose_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr1_vgpr2_vgpr3_vgpr4
+
+    ; CHECK-LABEL: name: reg_sequence_compose_1
+    ; CHECK: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]].sub2_sub3, %subreg.sub0_sub1, [[COPY]].sub0, %subreg.sub2
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
+    %0:vreg_128 = COPY $vgpr1_vgpr2_vgpr3_vgpr4
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %2:vreg_96 = REG_SEQUENCE %0.sub2_sub3, %subreg.sub0_sub1, %0.sub0, %subreg.sub2
+    %3:vgpr_32 = COPY %2.sub1
+    S_ENDPGM 0, implicit %3
+
+...
+
+
+
diff --git a/llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir b/llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir
index d707291..a74f1ad 100644
--- a/llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir
+++ b/llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=post-RA-sched -verify-machineinstrs -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes=post-RA-sched -o -  %s | FileCheck %s
 
 # The scheduler was not inspecting the first instruction in the bundle
 # when adding kill flags, so it would incorrectly mark the first use
diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
index bb86f65..daa9997 100644
--- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
+++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=register-coalescer,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -passes=register-coalescer,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s
 ---
 
 # GCN-LABEL: name: mac_invalid_operands
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index 3f88f98..ac9ef16 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs  -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes=machine-scheduler -verify-misched -o - %s | FileCheck %s
 
 # This would assert that a dead def should have no uses, but the dead
 # def and use have different subreg indices.
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index add7825..2cd7806 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=machine-scheduler %s -o - | FileCheck %s
 
 # The sequence of DBG_VALUEs forms a scheduling region with 0 real
 # instructions. The RegPressure tracker would end up skipping over any
diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
index 3fdb0c7..f797b01 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes=machine-scheduler -verify-misched -o - %s | FileCheck %s
 
 # This would hang after removing edges from the SCHED_BARRIER since the number
 # of Preds/Succs would be left in an inconsistent state.
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index 0903770..3254f5e 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes=machine-scheduler -o - %s | FileCheck %s
 
 --- |
   %struct.widget.0 = type { float, i32, i32 }
diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
index 6796391..3ca61d2 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -verify-misched -run-pass=machine-scheduler -o - %s  | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-misched -passes=machine-scheduler -o - %s  | FileCheck %s
 
 ---
 name:            handleMoveUp_incorrect_interval
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir b/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir
index 0b1fd44..099cfc4 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-barrier-fpmode.mir
@@ -1,6 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=machine-scheduler -o - %s | FileCheck %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=machine-scheduler -o - %s | FileCheck %s
 # Make sure FP mode is not a hard scheduling boundary
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-barrier.mir b/llvm/test/CodeGen/AMDGPU/schedule-barrier.mir
index e67036f..88e11c9 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-barrier.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-barrier.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=machine-scheduler -o - %s | FileCheck %s
 
 ---
 # Check that the high latency loads are both scheduled first, before the
diff --git a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
index a4a9c04..c023014 100644
--- a/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o -  %s | FileCheck %s
 
 ---
 # Test that we don't do silly things when there is no whole wave mode in the
diff --git a/llvm/test/CodeGen/AMDGPU/sreg-xnull-regclass-bitwidth.mir b/llvm/test/CodeGen/AMDGPU/sreg-xnull-regclass-bitwidth.mir
index d8d4f5d..3091fe8 100644
--- a/llvm/test/CodeGen/AMDGPU/sreg-xnull-regclass-bitwidth.mir
+++ b/llvm/test/CodeGen/AMDGPU/sreg-xnull-regclass-bitwidth.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=postmisched -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=postmisched -o - %s | FileCheck %s
 ---
 name:            test_xnull_256
 body:             |
diff --git a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
index 8d75bb3..7656629 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o -  %s | FileCheck %s
 
 --- |
   define amdgpu_ps void @exit_to_exact() {
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 4762760..99327e1 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o -  %s | FileCheck %s
 
 --- |
   define amdgpu_ps void @test_strict_wwm_scc() {
diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
index 43c403f..82f5bfd 100644
--- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
+++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
@@ -1447,12 +1447,13 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16
 ; CHECK-CORTEX-FIX-NEXT:    .pad #24
 ; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #24
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_2
+; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_3
 ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
 ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[0]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[2]
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d17[3]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
@@ -1463,37 +1464,37 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16
 ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    mov r3, r6
-; CHECK-CORTEX-FIX-NEXT:    b .LBB36_3
+; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
+; CHECK-CORTEX-FIX-NEXT:    bne .LBB36_4
 ; CHECK-CORTEX-FIX-NEXT:  .LBB36_2:
-; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #8
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r0, d0[0]
+; CHECK-CORTEX-FIX-NEXT:    b .LBB36_5
+; CHECK-CORTEX-FIX-NEXT:  .LBB36_3:
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[0]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #8
 ; CHECK-CORTEX-FIX-NEXT:    add r7, r2, #4
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[0]}, [r3:32]
 ; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #12
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[1]}, [r7:32]
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[1]}, [r3:32]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[0]
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[1]
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d17[3]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[1]
-; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
 ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #12] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[2]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[3]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
-; CHECK-CORTEX-FIX-NEXT:  .LBB36_3:
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d17[3]
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_5
-; CHECK-CORTEX-FIX-NEXT:  @ %bb.4:
+; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_2
+; CHECK-CORTEX-FIX-NEXT:  .LBB36_4:
 ; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r1]
-; CHECK-CORTEX-FIX-NEXT:    b .LBB36_6
 ; CHECK-CORTEX-FIX-NEXT:  .LBB36_5:
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r0, d0[0]
-; CHECK-CORTEX-FIX-NEXT:  .LBB36_6:
 ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
@@ -1695,8 +1696,8 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1
 ; CHECK-CORTEX-FIX-NEXT:    vmov s0, lr
 ; CHECK-CORTEX-FIX-NEXT:    b .LBB37_5
 ; CHECK-CORTEX-FIX-NEXT:  .LBB37_3:
-; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #8
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32]
+; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #8
 ; CHECK-CORTEX-FIX-NEXT:    add r3, r1, #4
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[0]}, [r2:32]
 ; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #12
@@ -1706,11 +1707,11 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d16[0]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d16[2]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r10, d16[3]
-; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r2
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r11, d17[2]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[3]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
+; CHECK-CORTEX-FIX-NEXT:    vmov s2, r2
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[1]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp] @ 4-byte Spill
@@ -3600,12 +3601,13 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16
 ; CHECK-CORTEX-FIX-NEXT:    .pad #24
 ; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #24
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_2
+; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_3
 ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
 ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[0]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[2]
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d17[3]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
@@ -3616,37 +3618,37 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16
 ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    mov r3, r6
-; CHECK-CORTEX-FIX-NEXT:    b .LBB82_3
+; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
+; CHECK-CORTEX-FIX-NEXT:    bne .LBB82_4
 ; CHECK-CORTEX-FIX-NEXT:  .LBB82_2:
-; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #8
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r0, d0[0]
+; CHECK-CORTEX-FIX-NEXT:    b .LBB82_5
+; CHECK-CORTEX-FIX-NEXT:  .LBB82_3:
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[0]}, [r2:32]
+; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #8
 ; CHECK-CORTEX-FIX-NEXT:    add r7, r2, #4
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[0]}, [r3:32]
 ; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #12
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[1]}, [r7:32]
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[1]}, [r3:32]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[0]
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[1]
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d17[3]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[1]
-; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
 ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #12] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[2]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[3]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
-; CHECK-CORTEX-FIX-NEXT:  .LBB82_3:
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d17[3]
 ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
-; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_5
-; CHECK-CORTEX-FIX-NEXT:  @ %bb.4:
+; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_2
+; CHECK-CORTEX-FIX-NEXT:  .LBB82_4:
 ; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r1]
-; CHECK-CORTEX-FIX-NEXT:    b .LBB82_6
 ; CHECK-CORTEX-FIX-NEXT:  .LBB82_5:
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r0, d0[0]
-; CHECK-CORTEX-FIX-NEXT:  .LBB82_6:
 ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
@@ -3848,8 +3850,8 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1
 ; CHECK-CORTEX-FIX-NEXT:    vmov s0, lr
 ; CHECK-CORTEX-FIX-NEXT:    b .LBB83_5
 ; CHECK-CORTEX-FIX-NEXT:  .LBB83_3:
-; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #8
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32]
+; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #8
 ; CHECK-CORTEX-FIX-NEXT:    add r3, r1, #4
 ; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[0]}, [r2:32]
 ; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #12
@@ -3859,11 +3861,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d16[0]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d16[2]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r10, d16[3]
-; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; CHECK-CORTEX-FIX-NEXT:    vmov s2, r2
-; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r11, d17[2]
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[3]
+; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
+; CHECK-CORTEX-FIX-NEXT:    vmov s2, r2
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
 ; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[1]
 ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp] @ 4-byte Spill
diff --git a/llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll b/llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll
index 522726c..f325fef 100644
--- a/llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll
@@ -64,7 +64,6 @@ entry:
 define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfdotq_lane_f32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vdot.bf16 q0, q1, d4[0]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -108,7 +107,6 @@ entry:
 define <4 x float> @test_vbfmlalbq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfmlalbq_lane_f32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vfmab.bf16 q0, q1, d4[0]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -131,7 +129,6 @@ entry:
 define <4 x float> @test_vbfmlaltq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfmlaltq_lane_f32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vfmat.bf16 q0, q1, d4[0]
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
index 9570c70..7b130f0 100644
--- a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
@@ -1167,7 +1167,6 @@ entry:
 define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
 ; CHECK-LABEL: test_vmulq_lane_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d2 killed $d2 def $q1
 ; CHECK-NEXT:    vmul.f16 q0, q0, d2[3]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -1389,7 +1388,6 @@ entry:
 define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
 ; CHECK-LABEL: test_vdupq_lane_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.16 q0, d0[3]
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll b/llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll
index b16993d..39416cc 100644
--- a/llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll
+++ b/llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll
@@ -53,7 +53,6 @@ entry:
 define arm_aapcs_vfpcc <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %v) {
 ; CHECK-LABEL: test_vdupq_lane_bf16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.16 q0, d0[1]
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/ARM/bf16-shuffle.ll b/llvm/test/CodeGen/ARM/bf16-shuffle.ll
index 9968e78..a45ad8f 100644
--- a/llvm/test/CodeGen/ARM/bf16-shuffle.ll
+++ b/llvm/test/CodeGen/ARM/bf16-shuffle.ll
@@ -229,7 +229,6 @@ entry:
 define dso_local <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %a) {
 ; CHECK-LABEL: test_vdupq_lane_bf16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.16 q0, d0[3]
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
index 0a47b87..1bee32f 100644
--- a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
+++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple arm-arm-eabi -mcpu=cortex-m7 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck %s
+# RUN: llc -mtriple arm-arm-eabi -mcpu=cortex-m7 -passes=postmisched %s -o - | FileCheck %s
 ---
 name:            test_groups
 alignment:       2
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index dd33b09..ec75165 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -610,20 +610,20 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
 ; ARMV7-LABEL: fminnumv432_one_zero_intrinsic:
 ; ARMV7:       @ %bb.0:
 ; ARMV7-NEXT:    vmov d1, r2, r3
-; ARMV7-NEXT:    vldr s8, .LCPI18_0
+; ARMV7-NEXT:    vldr s4, .LCPI18_0
 ; ARMV7-NEXT:    vmov d0, r0, r1
-; ARMV7-NEXT:    vmov.f32 s10, #-1.000000e+00
+; ARMV7-NEXT:    vmov.f32 s6, #-1.000000e+00
 ; ARMV7-NEXT:    vcmp.f32 s1, #0
 ; ARMV7-NEXT:    vmrs APSR_nzcv, fpscr
-; ARMV7-NEXT:    vmov.f32 s4, s3
-; ARMV7-NEXT:    vmin.f32 d6, d2, d5
-; ARMV7-NEXT:    vmin.f32 d3, d1, d5
-; ARMV7-NEXT:    vmin.f32 d2, d0, d5
-; ARMV7-NEXT:    vmovlt.f32 s8, s1
-; ARMV7-NEXT:    vmov.f32 s5, s8
-; ARMV7-NEXT:    vmov.f32 s7, s12
-; ARMV7-NEXT:    vmov r0, r1, d2
-; ARMV7-NEXT:    vmov r2, r3, d3
+; ARMV7-NEXT:    vmov.f32 s8, s3
+; ARMV7-NEXT:    vmin.f32 d7, d1, d3
+; ARMV7-NEXT:    vmin.f32 d6, d0, d3
+; ARMV7-NEXT:    vmin.f32 d4, d4, d3
+; ARMV7-NEXT:    vmovlt.f32 s4, s1
+; ARMV7-NEXT:    vmov.f32 s13, s4
+; ARMV7-NEXT:    vmov.f32 s15, s8
+; ARMV7-NEXT:    vmov r0, r1, d6
+; ARMV7-NEXT:    vmov r2, r3, d7
 ; ARMV7-NEXT:    bx lr
 ; ARMV7-NEXT:    .p2align 2
 ; ARMV7-NEXT:  @ %bb.1:
diff --git a/llvm/test/CodeGen/ARM/misched-branch-targets.mir b/llvm/test/CodeGen/ARM/misched-branch-targets.mir
index d828d9e..610344f 100644
--- a/llvm/test/CodeGen/ARM/misched-branch-targets.mir
+++ b/llvm/test/CodeGen/ARM/misched-branch-targets.mir
@@ -1,5 +1,7 @@
 # RUN: llc -o - -run-pass=machine-scheduler -misched=shuffle %s | FileCheck %s
+# RUN: llc -o - -passes=machine-scheduler -misched=shuffle %s | FileCheck %s
 # RUN: llc -o - -run-pass=postmisched %s | FileCheck %s
+# RUN: llc -o - -passes=postmisched %s | FileCheck %s
 
 # REQUIRES: asserts
 # -misched=shuffle is only available with assertions enabled
diff --git a/llvm/test/CodeGen/ARM/neon-copy.ll b/llvm/test/CodeGen/ARM/neon-copy.ll
index e356b7e..9fdadab 100644
--- a/llvm/test/CodeGen/ARM/neon-copy.ll
+++ b/llvm/test/CodeGen/ARM/neon-copy.ll
@@ -775,7 +775,6 @@ define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.8 q0, d0[5]
 ; CHECK-NEXT:    bx lr
   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -785,7 +784,6 @@ define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.16 q0, d0[2]
 ; CHECK-NEXT:    bx lr
   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -795,7 +793,6 @@ define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.32 q0, d0[1]
 ; CHECK-NEXT:    bx lr
   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/ARM/neon-v8.1a.ll b/llvm/test/CodeGen/ARM/neon-v8.1a.ll
index 2b37d04..6f4ceba 100644
--- a/llvm/test/CodeGen/ARM/neon-v8.1a.ll
+++ b/llvm/test/CodeGen/ARM/neon-v8.1a.ll
@@ -138,7 +138,6 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulahq_lane_s16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vqrdmulh.s16 q8, q1, d4[2]
 ; CHECK-NEXT:    vqadd.s16 q0, q0, q8
 ; CHECK-NEXT:    bx lr
@@ -165,7 +164,6 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulahq_lane_s32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vqrdmulh.s32 q8, q1, d4[0]
 ; CHECK-NEXT:    vqadd.s32 q0, q0, q8
 ; CHECK-NEXT:    bx lr
@@ -192,7 +190,6 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @test_vqrdmulshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulshq_lane_s16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vqrdmulh.s16 q8, q1, d4[2]
 ; CHECK-NEXT:    vqsub.s16 q0, q0, q8
 ; CHECK-NEXT:    bx lr
@@ -219,7 +216,6 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @test_vqrdmulshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulshq_lane_s32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vqrdmulh.s32 q8, q1, d4[0]
 ; CHECK-NEXT:    vqsub.s32 q0, q0, q8
 ; CHECK-NEXT:    bx lr
@@ -297,7 +293,6 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vqrdmlahq_lane_s16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vqrdmlah.s16 q0, q1, d4[3]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -309,7 +304,6 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vqrdmlahq_lane_s32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vqrdmlah.s32 q0, q1, d4[1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -383,7 +377,6 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: test_vqrdmlshq_lane_s16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vqrdmlsh.s16 q0, q1, d4[3]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -395,7 +388,6 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) {
 ; CHECK-LABEL: test_vqrdmlshq_lane_s32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d4 killed $d4 def $q2
 ; CHECK-NEXT:    vqrdmlsh.s32 q0, q1, d4[1]
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/ARM/vdup.ll b/llvm/test/CodeGen/ARM/vdup.ll
index 3f6d38c..7d06801 100644
--- a/llvm/test/CodeGen/ARM/vdup.ll
+++ b/llvm/test/CodeGen/ARM/vdup.ll
@@ -258,7 +258,6 @@ define arm_aapcs_vfpcc <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
 define arm_aapcs_vfpcc <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.8 q0, d0[1]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -268,7 +267,6 @@ define arm_aapcs_vfpcc <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
 define arm_aapcs_vfpcc <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.16 q0, d0[1]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -278,7 +276,6 @@ define arm_aapcs_vfpcc <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
 define arm_aapcs_vfpcc <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.32 q0, d0[1]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -288,7 +285,6 @@ define arm_aapcs_vfpcc <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
 define arm_aapcs_vfpcc <4 x float> @vduplaneQfloat(<2 x float> %A) nounwind {
 ; CHECK-LABEL: vduplaneQfloat:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    vdup.32 q0, d0[1]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
diff --git a/llvm/test/CodeGen/ARM/vext.ll b/llvm/test/CodeGen/ARM/vext.ll
index 7ddf1d0..46f778d 100644
--- a/llvm/test/CodeGen/ARM/vext.ll
+++ b/llvm/test/CodeGen/ARM/vext.ll
@@ -257,22 +257,22 @@ define <8 x i16> @test_illegal(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: test_illegal:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
-; CHECK-NEXT:    vorr d22, d16, d16
+; CHECK-NEXT:    vorr d21, d16, d16
 ; CHECK-NEXT:    vmov.u16 r0, d16[0]
-; CHECK-NEXT:    vorr d23, d16, d16
+; CHECK-NEXT:    vorr d22, d16, d16
 ; CHECK-NEXT:    vmov.u16 r2, d17[3]
 ; CHECK-NEXT:    vmov.u16 r3, d17[1]
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
 ; CHECK-NEXT:    vmov.u16 r1, d19[1]
-; CHECK-NEXT:    vuzp.16 d22, d23
-; CHECK-NEXT:    vuzp.16 d22, d18
+; CHECK-NEXT:    vuzp.16 d21, d22
+; CHECK-NEXT:    vuzp.16 d21, d18
+; CHECK-NEXT:    vext.16 d16, d16, d18, #3
 ; CHECK-NEXT:    vmov.16 d20[0], r0
 ; CHECK-NEXT:    vmov.16 d20[1], r2
 ; CHECK-NEXT:    vmov.16 d20[2], r3
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    vmov.16 d20[3], r1
-; CHECK-NEXT:    vext.16 d21, d16, d18, #3
 ; CHECK-NEXT:    vmov r0, r1, d20
-; CHECK-NEXT:    vmov r2, r3, d21
 ; CHECK-NEXT:    mov pc, lr
        %tmp1 = load <8 x i16>, ptr %A
        %tmp2 = load <8 x i16>, ptr %B
diff --git a/llvm/test/CodeGen/ARM/vldmia-sched.mir b/llvm/test/CodeGen/ARM/vldmia-sched.mir
index 9a38d28..e0cc088 100644
--- a/llvm/test/CodeGen/ARM/vldmia-sched.mir
+++ b/llvm/test/CodeGen/ARM/vldmia-sched.mir
@@ -1,4 +1,5 @@
 # RUN: llc -run-pass=post-RA-sched %s -o - | FileCheck %s
+# RUN: llc -passes=post-RA-sched %s -o - | FileCheck %s
 # CHECK: VLDMDIA
 --- |
   target triple = "thumbv7-w64-windows-gnu"
diff --git a/llvm/test/CodeGen/ARM/vmul.ll b/llvm/test/CodeGen/ARM/vmul.ll
index 9915e05..6f6386a 100644
--- a/llvm/test/CodeGen/ARM/vmul.ll
+++ b/llvm/test/CodeGen/ARM/vmul.ll
@@ -130,7 +130,6 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
 ; CHECK-LABEL: test_vmulQ_lanef32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d2 killed $d2 def $q1
 ; CHECK-NEXT:    vmul.f32 q0, q0, d2[1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -142,7 +141,6 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
 ; CHECK-LABEL: test_vmulQ_lanes16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d2 killed $d2 def $q1
 ; CHECK-NEXT:    vmul.i16 q0, q0, d2[1]
 ; CHECK-NEXT:    bx lr
 entry:
@@ -154,7 +152,6 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
 ; CHECK-LABEL: test_vmulQ_lanes32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    @ kill: def $d2 killed $d2 def $q1
 ; CHECK-NEXT:    vmul.i32 q0, q0, d2[1]
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/ARM/vpadd.ll b/llvm/test/CodeGen/ARM/vpadd.ll
index cc1d1e6a..0597d44b 100644
--- a/llvm/test/CodeGen/ARM/vpadd.ll
+++ b/llvm/test/CodeGen/ARM/vpadd.ll
@@ -508,14 +508,14 @@ define <2 x i16> @fromExtendingExtractVectorElt_2i16(<8 x i16> %in) {
 ; CHECK-LABEL: fromExtendingExtractVectorElt_2i16:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vmov d16, r0, r1
-; CHECK-NEXT:    vmov.u16 r0, d16[0]
 ; CHECK-NEXT:    vmov.u16 r1, d16[1]
-; CHECK-NEXT:    vmov.u16 r3, d16[3]
+; CHECK-NEXT:    vmov.u16 r0, d16[0]
 ; CHECK-NEXT:    vmov.u16 r2, d16[2]
-; CHECK-NEXT:    vmov.32 d16[0], r0
+; CHECK-NEXT:    vmov.u16 r3, d16[3]
 ; CHECK-NEXT:    vmov.32 d17[0], r1
-; CHECK-NEXT:    vmov.32 d16[1], r2
+; CHECK-NEXT:    vmov.32 d16[0], r0
 ; CHECK-NEXT:    vmov.32 d17[1], r3
+; CHECK-NEXT:    vmov.32 d16[1], r2
 ; CHECK-NEXT:    vadd.i32 d16, d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index d54446a..7e1dfba 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -21,11 +21,11 @@ define <8 x i8> @vuzpi8(ptr %A, ptr %B) nounwind {
 define <16 x i8> @vuzpi8_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vuzpi8_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vuzp.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vuzp.8 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, ptr %A
 	%tmp2 = load <8 x i8>, ptr %B
@@ -53,11 +53,11 @@ define <4 x i16> @vuzpi16(ptr %A, ptr %B) nounwind {
 define <8 x i16> @vuzpi16_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vuzpi16_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vuzp.16 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vuzp.16 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, ptr %A
 	%tmp2 = load <4 x i16>, ptr %B
@@ -221,11 +221,11 @@ define <8 x i8> @vuzpi8_undef(ptr %A, ptr %B) nounwind {
 define <16 x i8> @vuzpi8_undef_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vuzpi8_undef_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vuzp.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vuzp.8 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, ptr %A
 	%tmp2 = load <8 x i8>, ptr %B
@@ -269,11 +269,11 @@ define <16 x i16> @vuzpQi16_undef_QQres(ptr %A, ptr %B) nounwind {
 define <8 x i16> @vuzp_lower_shufflemask_undef(ptr %A, ptr %B) {
 ; CHECK-LABEL: vuzp_lower_shufflemask_undef:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vuzp.16 d18, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vuzp.16 d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 entry:
 	%tmp1 = load <4 x i16>, ptr %A
@@ -285,13 +285,13 @@ entry:
 define <4 x i32> @vuzp_lower_shufflemask_zeroed(ptr %A, ptr %B) {
 ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vorr d19, d18, d18
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vtrn.32 d19, d17
-; CHECK-NEXT:    vdup.32 d16, d18[0]
-; CHECK-NEXT:    vmov r2, r3, d17
-; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vorr d18, d17, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vdup.32 d17, d17[0]
+; CHECK-NEXT:    vtrn.32 d18, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 entry:
   %tmp1 = load <2 x i32>, ptr %A
diff --git a/llvm/test/CodeGen/ARM/vzip.ll b/llvm/test/CodeGen/ARM/vzip.ll
index 68f00a2..dda774a 100644
--- a/llvm/test/CodeGen/ARM/vzip.ll
+++ b/llvm/test/CodeGen/ARM/vzip.ll
@@ -21,11 +21,11 @@ define <8 x i8> @vzipi8(ptr %A, ptr %B) nounwind {
 define <16 x i8> @vzipi8_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vzipi8_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vzip.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vzip.8 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, ptr %A
 	%tmp2 = load <8 x i8>, ptr %B
@@ -53,11 +53,11 @@ define <4 x i16> @vzipi16(ptr %A, ptr %B) nounwind {
 define <8 x i16> @vzipi16_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vzipi16_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vzip.16 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vzip.16 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, ptr %A
 	%tmp2 = load <4 x i16>, ptr %B
@@ -221,11 +221,11 @@ define <8 x i8> @vzipi8_undef(ptr %A, ptr %B) nounwind {
 define <16 x i8> @vzipi8_undef_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vzipi8_undef_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vzip.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vzip.8 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, ptr %A
 	%tmp2 = load <8 x i8>, ptr %B
@@ -269,11 +269,11 @@ define <32 x i8> @vzipQi8_undef_QQres(ptr %A, ptr %B) nounwind {
 define <8 x i16> @vzip_lower_shufflemask_undef(ptr %A, ptr %B) {
 ; CHECK-LABEL: vzip_lower_shufflemask_undef:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vzip.16 d18, d17
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vzip.16 d17, d16
 ; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
 entry:
 	%tmp1 = load <4 x i16>, ptr %A
@@ -289,10 +289,10 @@ define <8 x i16> @vzip_lower_shufflemask_undef_rev(ptr %A, ptr %B) {
 ; CHECK-LABEL: vzip_lower_shufflemask_undef_rev:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vldr d16, [r1]
-; CHECK-NEXT:    vldr d19, [r0]
-; CHECK-NEXT:    vtrn.16 d19, d16
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtrn.16 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 entry:
   %tmp1 = load <4 x i16>, ptr %A
diff --git a/llvm/test/CodeGen/AVR/return.ll b/llvm/test/CodeGen/AVR/return.ll
index 8cb9f14..207ad2f 100644
--- a/llvm/test/CodeGen/AVR/return.ll
+++ b/llvm/test/CodeGen/AVR/return.ll
@@ -187,26 +187,26 @@ define i64 @return64_imm() {
 ; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    subi r30, 8
 ; TINY-NEXT:    sbci r31, 0
-; TINY-NEXT:    ldi r24, 25
-; TINY-NEXT:    ldi r25, 22
+; TINY-NEXT:    ldi r20, 25
+; TINY-NEXT:    ldi r21, 22
 ; TINY-NEXT:    subi r30, 252
 ; TINY-NEXT:    sbci r31, 255
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    subi r30, 6
 ; TINY-NEXT:    sbci r31, 0
-; TINY-NEXT:    ldi r24, 104
-; TINY-NEXT:    ldi r25, 37
+; TINY-NEXT:    ldi r20, 104
+; TINY-NEXT:    ldi r21, 37
 ; TINY-NEXT:    subi r30, 254
 ; TINY-NEXT:    sbci r31, 255
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    subi r30, 4
 ; TINY-NEXT:    sbci r31, 0
-; TINY-NEXT:    ldi r24, 204
-; TINY-NEXT:    ldi r25, 204
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    ldi r20, 204
+; TINY-NEXT:    ldi r21, 204
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    ret
     ret i64 13757395258967641292
 }
@@ -243,8 +243,8 @@ define i64 @return64_arg(i64 %x) {
 ; TINY-NEXT:    in r16, 63
 ; TINY-NEXT:    subi r28, 247
 ; TINY-NEXT:    sbci r29, 255
-; TINY-NEXT:    ld r24, Y+
-; TINY-NEXT:    ld r25, Y+
+; TINY-NEXT:    ld r20, Y+
+; TINY-NEXT:    ld r21, Y+
 ; TINY-NEXT:    subi r28, 2
 ; TINY-NEXT:    sbci r29, 0
 ; TINY-NEXT:    subi r28, 9
@@ -252,15 +252,15 @@ define i64 @return64_arg(i64 %x) {
 ; TINY-NEXT:    out 63, r16
 ; TINY-NEXT:    subi r30, 252
 ; TINY-NEXT:    sbci r31, 255
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    subi r30, 6
 ; TINY-NEXT:    sbci r31, 0
 ; TINY-NEXT:    in r16, 63
 ; TINY-NEXT:    subi r28, 249
 ; TINY-NEXT:    sbci r29, 255
-; TINY-NEXT:    ld r24, Y+
-; TINY-NEXT:    ld r25, Y+
+; TINY-NEXT:    ld r20, Y+
+; TINY-NEXT:    ld r21, Y+
 ; TINY-NEXT:    subi r28, 2
 ; TINY-NEXT:    sbci r29, 0
 ; TINY-NEXT:    subi r28, 7
@@ -268,22 +268,22 @@ define i64 @return64_arg(i64 %x) {
 ; TINY-NEXT:    out 63, r16
 ; TINY-NEXT:    subi r30, 254
 ; TINY-NEXT:    sbci r31, 255
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    subi r30, 4
 ; TINY-NEXT:    sbci r31, 0
 ; TINY-NEXT:    in r16, 63
 ; TINY-NEXT:    subi r28, 251
 ; TINY-NEXT:    sbci r29, 255
-; TINY-NEXT:    ld r24, Y+
-; TINY-NEXT:    ld r25, Y+
+; TINY-NEXT:    ld r20, Y+
+; TINY-NEXT:    ld r21, Y+
 ; TINY-NEXT:    subi r28, 2
 ; TINY-NEXT:    sbci r29, 0
 ; TINY-NEXT:    subi r28, 5
 ; TINY-NEXT:    sbci r29, 0
 ; TINY-NEXT:    out 63, r16
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    pop r29
 ; TINY-NEXT:    pop r28
 ; TINY-NEXT:    ret
@@ -336,8 +336,8 @@ define i64 @return64_arg2(i64 %x, i64 %y, i64 %z) {
 ; TINY-NEXT:    in r16, 63
 ; TINY-NEXT:    subi r28, 231
 ; TINY-NEXT:    sbci r29, 255
-; TINY-NEXT:    ld r24, Y+
-; TINY-NEXT:    ld r25, Y+
+; TINY-NEXT:    ld r20, Y+
+; TINY-NEXT:    ld r21, Y+
 ; TINY-NEXT:    subi r28, 2
 ; TINY-NEXT:    sbci r29, 0
 ; TINY-NEXT:    subi r28, 25
@@ -345,15 +345,15 @@ define i64 @return64_arg2(i64 %x, i64 %y, i64 %z) {
 ; TINY-NEXT:    out 63, r16
 ; TINY-NEXT:    subi r30, 252
 ; TINY-NEXT:    sbci r31, 255
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    subi r30, 6
 ; TINY-NEXT:    sbci r31, 0
 ; TINY-NEXT:    in r16, 63
 ; TINY-NEXT:    subi r28, 233
 ; TINY-NEXT:    sbci r29, 255
-; TINY-NEXT:    ld r24, Y+
-; TINY-NEXT:    ld r25, Y+
+; TINY-NEXT:    ld r20, Y+
+; TINY-NEXT:    ld r21, Y+
 ; TINY-NEXT:    subi r28, 2
 ; TINY-NEXT:    sbci r29, 0
 ; TINY-NEXT:    subi r28, 23
@@ -361,22 +361,22 @@ define i64 @return64_arg2(i64 %x, i64 %y, i64 %z) {
 ; TINY-NEXT:    out 63, r16
 ; TINY-NEXT:    subi r30, 254
 ; TINY-NEXT:    sbci r31, 255
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    subi r30, 4
 ; TINY-NEXT:    sbci r31, 0
 ; TINY-NEXT:    in r16, 63
 ; TINY-NEXT:    subi r28, 235
 ; TINY-NEXT:    sbci r29, 255
-; TINY-NEXT:    ld r24, Y+
-; TINY-NEXT:    ld r25, Y+
+; TINY-NEXT:    ld r20, Y+
+; TINY-NEXT:    ld r21, Y+
 ; TINY-NEXT:    subi r28, 2
 ; TINY-NEXT:    sbci r29, 0
 ; TINY-NEXT:    subi r28, 21
 ; TINY-NEXT:    sbci r29, 0
 ; TINY-NEXT:    out 63, r16
-; TINY-NEXT:    st Z+, r24
-; TINY-NEXT:    st Z+, r25
+; TINY-NEXT:    st Z+, r20
+; TINY-NEXT:    st Z+, r21
 ; TINY-NEXT:    pop r29
 ; TINY-NEXT:    pop r28
 ; TINY-NEXT:    ret
diff --git a/llvm/test/CodeGen/BPF/is_trunc_free.ll b/llvm/test/CodeGen/BPF/is_trunc_free.ll
index 21c8a2a..fe00731 100644
--- a/llvm/test/CodeGen/BPF/is_trunc_free.ll
+++ b/llvm/test/CodeGen/BPF/is_trunc_free.ll
@@ -58,7 +58,6 @@ cleanup:                                          ; preds = %entry, %if.end10
 }
 
 ; CHECK: w{{[0-9]+}} = *(u32 *)(r{{[0-9]+}} + 0)
-; CHECK-NOT: w{{[0-9]+}} = w{{[0-9]+}}
 
 declare dso_local i32 @work(ptr, i32) local_unnamed_addr #1
 
diff --git a/llvm/test/CodeGen/Hexagon/bank-conflict-load.mir b/llvm/test/CodeGen/Hexagon/bank-conflict-load.mir
index 01cc0ee..7c741aa 100644
--- a/llvm/test/CodeGen/Hexagon/bank-conflict-load.mir
+++ b/llvm/test/CodeGen/Hexagon/bank-conflict-load.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s
+# RUN: llc -mtriple=hexagon -passes=post-RA-sched %s -o - | FileCheck %s
 
 # The two loads from %a ($r0) can cause a bank conflict. Check that they
 # are not scheduled next to each other.
diff --git a/llvm/test/CodeGen/Hexagon/bank-conflict.mir b/llvm/test/CodeGen/Hexagon/bank-conflict.mir
index 12d7838..0add375 100644
--- a/llvm/test/CodeGen/Hexagon/bank-conflict.mir
+++ b/llvm/test/CodeGen/Hexagon/bank-conflict.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s
+# RUN: llc -mtriple=hexagon -passes=post-RA-sched %s -o - | FileCheck %s
 
 # Test that the Post RA scheduler does not schedule back-to-back loads
 # when there is another instruction to schedule. The scheduler avoids
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
index c333f1b..afc989c 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir
@@ -3,6 +3,13 @@
 
 # Test that the loop carried dependence check correctly identifies a recurrence.
 
+# CHECK: Overlap check:
+# CHECK-NEXT:   BaseMI:   S2_storerh_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s16) into %ir.lsr.iv24)
+# CHECK-NEXT:     Base + 0 + I * 4, Len: 2
+# CHECK-NEXT:   OtherMI:   %{{[0-9]+}}:intregs = L2_loadrh_io %{{[0-9]+}}:intregs, -8 :: (load (s16) from %ir.cgep10)
+# CHECK-NEXT:     Base + -8 + I * 4, Len: 2
+# CHECK-NEXT:   Result: Overlap
+
 # CHECK: Rec NodeSet
 # CHECK: Rec NodeSet
 # CHECK: Rec NodeSet
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
index 16ff599..e16334b 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir
@@ -1,14 +1,26 @@
 # RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
 # REQUIRES: asserts
 
-# Test that the loop carried dependence check correctly identifies a recurrence
+# Test that the loop carried dependence check correctly identifies dependences
 # when the loop variable decreases and the array index offset is negative.
 
-# CHECK: Rec NodeSet
-# CHECK: Rec NodeSet
-# CHECK: SU(3)
-# CHECK: SU(4)
-# CHECK: SU(5)
+# No dependence from the store to the load.
+# CHECK: Overlap check:
+# CHECK-NEXT:   BaseMI:   S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
+# CHECK-NEXT:     Base + 0 + I * -4, Len: 4
+# CHECK-NEXT:   OtherMI:   %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
+# CHECK-NEXT:     Base + -8 + I * -4, Len: 4
+# CHECK-NEXT:   Result: No overlap
+
+# TODO: There is a loop carried dependence from the load to the store but it
+#   is not recognised. addLoopCarriedDependences() should be modified to
+#   recognise the dependence and enable the following checks.
+# CHECK-AFTER-FIX: Overlap check:
+# CHECK-AFTER-FIX-NEXT:   BaseMI:   %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
+# CHECK-AFTER-FIX-NEXT:     Base + -8 + I * -4, Len: 4
+# CHECK-AFTER-FIX-NEXT:   OtherMI:   S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
+# CHECK-AFTER-FIX-NEXT:     Base + 0 + I * -4, Len: 4
+# CHECK-AFTER-FIX-NEXT:   Result: Overlap!
 
 --- |
 
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir
index a1b0aec..91eb225 100644
--- a/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir
@@ -7,6 +7,13 @@
 # requires to use a single CHECK-NOT to match such a Rec NodeSet. Fortunately
 # the atom '.' does not match a newline but anything else on a line.
 
+# CHECK: Overlap check:
+# CHECK-NEXT:   BaseMI:   %13:intregs = S2_storerh_pi %12:intregs(tied-def 0), 2, %20:intregs :: (store (s16))
+# CHECK-NEXT:     Base + 0 + I * 2, Len: 2
+# CHECK-NEXT:   OtherMI:   %19:intregs, %15:intregs = L2_loadrh_pi %14:intregs(tied-def 1), 2 :: (load (s16))
+# CHECK-NEXT:     Base + 0 + I * 2, Len: 2
+# CHECK-NEXT:   Result: No overlap
+
 # CHECK-NOT: Rec NodeSet{{.+[[:space:]]}} SU(5){{.+[[:space:]]}} SU(7)
 
 ...
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep4.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep4.mir
new file mode 100644
index 0000000..ff04886b
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep4.mir
@@ -0,0 +1,37 @@
+# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Loop carried dependence is assumed in cases where increment value cannot be recognized
+# (Not supported for multiple increment instruction)
+
+# CHECK:  Rec NodeSet
+# CHECK:  Rec NodeSet
+# CHECK-NEXT:    SU(1)
+# CHECK-NEXT:    SU(2)
+
+---
+name:            test
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    successors: %bb.1
+
+    %10:intregs = IMPLICIT_DEF
+    %11:intregs = IMPLICIT_DEF
+    J2_loop0i %bb.1, 6, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+
+  bb.1 (machine-block-address-taken):
+    successors: %bb.1, %bb.2
+
+    %0:intregs = PHI %11, %bb.0, %6, %bb.1
+    %4:intregs = L2_loadri_io %0, 0 :: (load (s32))
+    S2_storeri_io %0, 0, %10 :: (store (s32))
+    %7:intregs = A2_addi %0, -8
+    %6:intregs = A2_addi %7, 4
+    ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.2, implicit-def dead $pc
+
+  bb.2:
+
+...
diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep5.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep5.mir
new file mode 100644
index 0000000..bf3cd95
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep5.mir
@@ -0,0 +1,146 @@
+# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test boundary cases for loop carried dependence analysis
+
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %10:intregs, 0, %0:intregs :: (store (s32))
+#CHECK:     Base + 0 + I * 3, Len: 4
+#CHECK:   OtherMI:   dead %30:intregs = L2_loadri_io %10:intregs, 0 :: (load (s32))
+#CHECK:     Base + 0 + I * 3, Len: 4
+#CHECK:   Result: Overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %12:intregs, 0, %1:intregs :: (store (s32))
+#CHECK:     Base + 0 + I * 4, Len: 4
+#CHECK:   OtherMI:   dead %31:intregs = L2_loadri_io %12:intregs, 0 :: (load (s32))
+#CHECK:     Base + 0 + I * 4, Len: 4
+#CHECK:   Result: No overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %14:intregs, 0, %2:intregs :: (store (s64))
+#CHECK:     Base + 0 + I * 4, Len: 8
+#CHECK:   OtherMI:   dead %32:intregs = L2_loadri_io %14:intregs, 0 :: (load (s32))
+#CHECK:     Base + 0 + I * 4, Len: 4
+#CHECK:   Result: Overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %16:intregs, 0, %3:intregs :: (store (s32))
+#CHECK:     Base + 0 + I * 4, Len: 4
+#CHECK:   OtherMI:   dead %33:intregs = L2_loadri_io %16:intregs, -1 :: (load (s32))
+#CHECK:     Base + -1 + I * 4, Len: 4
+#CHECK:   Result: Overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %18:intregs, 1, %4:intregs :: (store (s32))
+#CHECK:     Base + 1 + I * 4, Len: 4
+#CHECK:   OtherMI:   dead %34:intregs = L2_loadri_io %18:intregs, 0 :: (load (s32))
+#CHECK:     Base + 0 + I * 4, Len: 4
+#CHECK:   Result: Overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %20:intregs, 0, %5:intregs :: (store (s32))
+#CHECK:     Base + 0 + I * -3, Len: 4
+#CHECK:   OtherMI:   dead %35:intregs = L2_loadri_io %20:intregs, 0 :: (load (s32))
+#CHECK:     Base + 0 + I * -3, Len: 4
+#CHECK:   Result: Overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %22:intregs, 0, %6:intregs :: (store (s32))
+#CHECK:     Base + 0 + I * -4, Len: 4
+#CHECK:   OtherMI:   dead %36:intregs = L2_loadri_io %22:intregs, 0 :: (load (s32))
+#CHECK:     Base + 0 + I * -4, Len: 4
+#CHECK:   Result: No overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %24:intregs, 0, %7:intregs :: (store (s32))
+#CHECK:     Base + 0 + I * -4, Len: 4
+#CHECK:   OtherMI:   dead %37:intregs = L2_loadri_io %24:intregs, 0 :: (load (s64))
+#CHECK:     Base + 0 + I * -4, Len: 8
+#CHECK:   Result: Overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %26:intregs, 0, %8:intregs :: (store (s32))
+#CHECK:     Base + 0 + I * -4, Len: 4
+#CHECK:   OtherMI:   dead %38:intregs = L2_loadri_io %26:intregs, 1 :: (load (s32))
+#CHECK:     Base + 1 + I * -4, Len: 4
+#CHECK:   Result: Overlap
+#CHECK: Overlap check:
+#CHECK:   BaseMI:   S2_storeri_io %28:intregs, -1, %9:intregs :: (store (s32))
+#CHECK:     Base + -1 + I * -4, Len: 4
+#CHECK:   OtherMI:   dead %39:intregs = L2_loadri_io %28:intregs, 0 :: (load (s32))
+#CHECK:     Base + 0 + I * -4, Len: 4
+#CHECK:   Result: Overlap
+
+---
+name:            test
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    successors: %bb.1
+
+    %100:intregs = IMPLICIT_DEF
+    %200:intregs = IMPLICIT_DEF
+    %300:intregs = IMPLICIT_DEF
+    %400:intregs = IMPLICIT_DEF
+    %500:intregs = IMPLICIT_DEF
+    %600:intregs = IMPLICIT_DEF
+    %700:intregs = IMPLICIT_DEF
+    %800:intregs = IMPLICIT_DEF
+    %900:intregs = IMPLICIT_DEF
+    %1000:intregs = IMPLICIT_DEF
+    J2_loop0i %bb.1, 6, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+
+  bb.1 (machine-block-address-taken):
+    successors: %bb.1, %bb.2
+
+    %0:intregs = PHI %100, %bb.0, %1, %bb.1
+    %2:intregs = PHI %200, %bb.0, %3, %bb.1
+    %4:intregs = PHI %300, %bb.0, %5, %bb.1
+    %6:intregs = PHI %400, %bb.0, %7, %bb.1
+    %8:intregs = PHI %500, %bb.0, %9, %bb.1
+    %10:intregs = PHI %600, %bb.0, %11, %bb.1
+    %12:intregs = PHI %700, %bb.0, %13, %bb.1
+    %14:intregs = PHI %800, %bb.0, %15, %bb.1
+    %16:intregs = PHI %900, %bb.0, %17, %bb.1
+    %18:intregs = PHI %1000, %bb.0, %19, %bb.1
+
+    %1:intregs = A2_addi %0, 3
+    %101:intregs = L2_loadri_io %0, 0 :: (load (s32))
+    S2_storeri_io %0, 0, %100 :: (store (s32))
+
+    %3:intregs = A2_addi %2, 4
+    %201:intregs = L2_loadri_io %2, 0 :: (load (s32))
+    S2_storeri_io %2, 0, %200 :: (store (s32))
+
+    %5:intregs = A2_addi %4, 4
+    %301:intregs = L2_loadri_io %4, 0 :: (load (s32))
+    S2_storeri_io %4, 0, %300 :: (store (s64))
+
+    %7:intregs = A2_addi %6, 4
+    %401:intregs = L2_loadri_io %6, -1 :: (load (s32))
+    S2_storeri_io %6, 0, %400 :: (store (s32))
+
+    %9:intregs = A2_addi %8, 4
+    %501:intregs = L2_loadri_io %8, 0 :: (load (s32))
+    S2_storeri_io %8, 1, %500 :: (store (s32))
+
+    %11:intregs = A2_addi %10, -3
+    %601:intregs = L2_loadri_io %10, 0 :: (load (s32))
+    S2_storeri_io %10, 0, %600 :: (store (s32))
+
+    %13:intregs = A2_addi %12, -4
+    %701:intregs = L2_loadri_io %12, 0 :: (load (s32))
+    S2_storeri_io %12, 0, %700 :: (store (s32))
+
+    %15:intregs = A2_addi %14, -4
+    %801:intregs = L2_loadri_io %14, 0 :: (load (s64))
+    S2_storeri_io %14, 0, %800 :: (store (s32))
+
+    %17:intregs = A2_addi %16, -4
+    %901:intregs = L2_loadri_io %16, 1 :: (load (s32))
+    S2_storeri_io %16, 0, %900 :: (store (s32))
+
+    %19:intregs = A2_addi %18, -4
+    %1001:intregs = L2_loadri_io %18, 0 :: (load (s32))
+    S2_storeri_io %18, -1, %1000 :: (store (s32))
+
+    ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.2, implicit-def dead $pc
+
+  bb.2:
+
+...
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll
index 76434e7..e9384e8 100644
--- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi8.ll
@@ -31,7 +31,7 @@ b3:                                               ; preds = %b3, %b2
   %v6 = load i32, ptr %v5, align 4
   %v7 = tail call i32 @llvm.hexagon.A2.subsat(i32 %v2, i32 %v6)
   store i32 %v7, ptr %v3, align 4
-  %v8 = add i32 %v0, -1
+  %v8 = add i32 %v0, 1
   %v9 = icmp sgt i32 %v8, 1
   br i1 %v9, label %b3, label %b4
 
diff --git a/llvm/test/CodeGen/NVPTX/fence-cluster.ll b/llvm/test/CodeGen/NVPTX/fence-cluster.ll
new file mode 100644
index 0000000..697dce4
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/fence-cluster.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify %}
+
+define void @fence_acquire_cluster() {
+; SM90-LABEL: fence_acquire_cluster(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.acquire.cluster;
+; SM90-NEXT:    ret;
+    fence syncscope("cluster") acquire
+    ret void
+}
+
+
+define void @fence_release_cluster() {
+; SM90-LABEL: fence_release_cluster(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.release.cluster;
+; SM90-NEXT:    ret;
+    fence syncscope("cluster") release
+    ret void
+}
+
+
+define void @fence_acq_rel_cluster() {
+; SM90-LABEL: fence_acq_rel_cluster(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.acq_rel.cluster;
+; SM90-NEXT:    ret;
+    fence syncscope("cluster") acq_rel
+    ret void
+}
+
+
+define void @fence_seq_cst_cluster() {
+; SM90-LABEL: fence_seq_cst_cluster(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.sc.cluster;
+; SM90-NEXT:    ret;
+    fence syncscope("cluster") seq_cst
+    ret void
+}
+
diff --git a/llvm/test/CodeGen/NVPTX/fence-nocluster.ll b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
new file mode 100644
index 0000000..e2bec72
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/fence-nocluster.ll
@@ -0,0 +1,355 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx50 | FileCheck %s --check-prefix=SM30
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_30 -mattr=+ptx50 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx87 | FileCheck %s --check-prefix=SM90
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_90 -mattr=+ptx87 | %ptxas-verify %}
+
+define void @fence_acquire_sys() {
+; SM30-LABEL: fence_acquire_sys(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.sys;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_acquire_sys(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.sys;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_acquire_sys(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.acquire.sys;
+; SM90-NEXT:    ret;
+    fence syncscope("") acquire
+    ret void
+}
+
+
+define void @fence_acquire_cta() {
+; SM30-LABEL: fence_acquire_cta(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.cta;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_acquire_cta(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.cta;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_acquire_cta(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.acquire.cta;
+; SM90-NEXT:    ret;
+    fence syncscope("block") acquire
+    ret void
+}
+
+
+define void @fence_acquire_gpu() {
+; SM30-LABEL: fence_acquire_gpu(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.gl;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_acquire_gpu(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.gpu;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_acquire_gpu(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.acquire.gpu;
+; SM90-NEXT:    ret;
+    fence syncscope("device") acquire
+    ret void
+}
+
+
+define void @fence_release_sys() {
+; SM30-LABEL: fence_release_sys(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.sys;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_release_sys(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.sys;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_release_sys(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.release.sys;
+; SM90-NEXT:    ret;
+    fence syncscope("") release
+    ret void
+}
+
+
+define void @fence_release_cta() {
+; SM30-LABEL: fence_release_cta(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.cta;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_release_cta(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.cta;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_release_cta(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.release.cta;
+; SM90-NEXT:    ret;
+    fence syncscope("block") release
+    ret void
+}
+
+
+define void @fence_release_gpu() {
+; SM30-LABEL: fence_release_gpu(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.gl;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_release_gpu(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.gpu;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_release_gpu(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.release.gpu;
+; SM90-NEXT:    ret;
+    fence syncscope("device") release
+    ret void
+}
+
+
+define void @fence_acq_rel_sys() {
+; SM30-LABEL: fence_acq_rel_sys(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.sys;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_acq_rel_sys(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.sys;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_acq_rel_sys(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.acq_rel.sys;
+; SM90-NEXT:    ret;
+    fence syncscope("") acq_rel
+    ret void
+}
+
+
+define void @fence_acq_rel_cta() {
+; SM30-LABEL: fence_acq_rel_cta(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.cta;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_acq_rel_cta(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.cta;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_acq_rel_cta(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.acq_rel.cta;
+; SM90-NEXT:    ret;
+    fence syncscope("block") acq_rel
+    ret void
+}
+
+
+define void @fence_acq_rel_gpu() {
+; SM30-LABEL: fence_acq_rel_gpu(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.gl;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_acq_rel_gpu(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.acq_rel.gpu;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_acq_rel_gpu(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.acq_rel.gpu;
+; SM90-NEXT:    ret;
+    fence syncscope("device") acq_rel
+    ret void
+}
+
+
+define void @fence_seq_cst_sys() {
+; SM30-LABEL: fence_seq_cst_sys(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.sys;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_seq_cst_sys(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.sc.sys;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_seq_cst_sys(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.sc.sys;
+; SM90-NEXT:    ret;
+    fence syncscope("") seq_cst
+    ret void
+}
+
+
+define void @fence_seq_cst_cta() {
+; SM30-LABEL: fence_seq_cst_cta(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.cta;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_seq_cst_cta(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.sc.cta;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_seq_cst_cta(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.sc.cta;
+; SM90-NEXT:    ret;
+    fence syncscope("block") seq_cst
+    ret void
+}
+
+
+define void @fence_seq_cst_gpu() {
+; SM30-LABEL: fence_seq_cst_gpu(
+; SM30:       {
+; SM30-EMPTY:
+; SM30-EMPTY:
+; SM30-NEXT:  // %bb.0:
+; SM30-NEXT:    membar.gl;
+; SM30-NEXT:    ret;
+;
+; SM70-LABEL: fence_seq_cst_gpu(
+; SM70:       {
+; SM70-EMPTY:
+; SM70-EMPTY:
+; SM70-NEXT:  // %bb.0:
+; SM70-NEXT:    fence.sc.gpu;
+; SM70-NEXT:    ret;
+;
+; SM90-LABEL: fence_seq_cst_gpu(
+; SM90:       {
+; SM90-EMPTY:
+; SM90-EMPTY:
+; SM90-NEXT:  // %bb.0:
+; SM90-NEXT:    fence.sc.gpu;
+; SM90-NEXT:    ret;
+    fence syncscope("device") seq_cst
+    ret void
+}
+
diff --git a/llvm/test/CodeGen/NVPTX/fence-sm-90.ll b/llvm/test/CodeGen/NVPTX/fence-sm-90.ll
deleted file mode 100644
index dce39bf..0000000
--- a/llvm/test/CodeGen/NVPTX/fence-sm-90.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck %s
-; RUN: %if ptxas-12.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
-
-; CHECK-LABEL: fence_sc_cluster
-define void @fence_sc_cluster() local_unnamed_addr {
-  ; CHECK: fence.sc.cluster
-  fence syncscope("cluster") seq_cst
-  ret void
-}
-
-; CHECK-LABEL: fence_acq_rel_cluster
-define void @fence_acq_rel_cluster() local_unnamed_addr {
-  ; CHECK: fence.acq_rel.cluster
-  fence syncscope("cluster") acq_rel
-  ret void
-}
-
-; CHECK-LABEL: fence_release_cluster
-define void @fence_release_cluster() local_unnamed_addr {
-  ; CHECK: fence.acq_rel.cluster
-  fence syncscope("cluster") release
-  ret void
-}
-
-; CHECK-LABEL: fence_acquire_cluster
-define void @fence_acquire_cluster() local_unnamed_addr {
-  ; CHECK: fence.acq_rel.cluster
-  fence syncscope("cluster") acquire
-  ret void
-}
diff --git a/llvm/test/CodeGen/NVPTX/fence.ll b/llvm/test/CodeGen/NVPTX/fence.ll
deleted file mode 100644
index e094ddf..0000000
--- a/llvm/test/CodeGen/NVPTX/fence.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM60
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
-; RUN: %if ptxas-12.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
-
-; TODO: implement and test thread scope.
-
-; CHECK-LABEL: fence_sc_sys
-define void @fence_sc_sys() local_unnamed_addr {
-  ; SM60: membar.sys
-  ; SM70: fence.sc.sys
-  fence seq_cst
-  ret void
-}
-
-; CHECK-LABEL: fence_acq_rel_sys
-define void @fence_acq_rel_sys() local_unnamed_addr {
-  ; SM60: membar.sys
-  ; SM70: fence.acq_rel.sys
-  fence acq_rel
-  ret void
-}
-
-; CHECK-LABEL: fence_release_sys
-define void @fence_release_sys() local_unnamed_addr {
-  ; SM60: membar.sys
-  ; SM70: fence.acq_rel.sys
-  fence release
-  ret void
-}
-
-; CHECK-LABEL: fence_acquire_sys
-define void @fence_acquire_sys() local_unnamed_addr {
-  ; SM60: membar.sys
-  ; SM70: fence.acq_rel.sys
-  fence acquire
-  ret void
-}
-
-; CHECK-LABEL: fence_sc_gpu
-define void @fence_sc_gpu() local_unnamed_addr {
-  ; SM60: membar.gl
-  ; SM70: fence.sc.gpu
-  fence syncscope("device") seq_cst
-  ret void
-}
-
-; CHECK-LABEL: fence_acq_rel_gpu
-define void @fence_acq_rel_gpu() local_unnamed_addr {
-  ; SM60: membar.gl
-  ; SM70: fence.acq_rel.gpu
-  fence syncscope("device") acq_rel
-  ret void
-}
-
-; CHECK-LABEL: fence_release_gpu
-define void @fence_release_gpu() local_unnamed_addr {
-  ; SM60: membar.gl
-  ; SM70: fence.acq_rel.gpu
-  fence syncscope("device") release
-  ret void
-}
-
-; CHECK-LABEL: fence_acquire_gpu
-define void @fence_acquire_gpu() local_unnamed_addr {
-  ; SM60: membar.gl
-  ; SM70: fence.acq_rel.gpu
-  fence syncscope("device") acquire
-  ret void
-}
-
-; CHECK-LABEL: fence_sc_cta
-define void @fence_sc_cta() local_unnamed_addr {
-  ; SM60: membar.cta
-  ; SM70: fence.sc.cta
-  fence syncscope("block") seq_cst
-  ret void
-}
-
-; CHECK-LABEL: fence_acq_rel_cta
-define void @fence_acq_rel_cta() local_unnamed_addr {
-  ; SM60: membar.cta
-  ; SM70: fence.acq_rel.cta
-  fence syncscope("block") acq_rel
-  ret void
-}
-
-; CHECK-LABEL: fence_release_cta
-define void @fence_release_cta() local_unnamed_addr {
-  ; SM60: membar.cta
-  ; SM70: fence.acq_rel.cta
-  fence syncscope("block") release
-  ret void
-}
-
-; CHECK-LABEL: fence_acquire_cta
-define void @fence_acquire_cta() local_unnamed_addr {
-  ; SM60: membar.cta
-  ; SM70: fence.acq_rel.cta
-  fence syncscope("block") acquire
-  ret void
-}
-\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/fence.py b/llvm/test/CodeGen/NVPTX/fence.py
new file mode 100644
index 0000000..b9f9d29
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/fence.py
@@ -0,0 +1,56 @@
+# For manual usage, not as a part of lit tests. Used for generating the following tests:
+# fence-sm30.ll, fence-sm70.ll, fence-sm90.ll
+
+from string import Template
+from itertools import product
+
+fence_func = Template(
+    """
+define void @fence_${ordering}_${ptx_scope}() {
+    fence syncscope(\"${llvm_scope}\") ${ordering}
+    ret void
+}
+"""
+)
+
+run_statement = Template(
+    """; RUN: llc < %s -march=nvptx64 -mcpu=sm_${sm} -mattr=+ptx${ptx} | FileCheck %s --check-prefix=SM${sm}
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_${sm} -mattr=+ptx${ptx} | %ptxas-verify %}"""
+)
+
+# (sm, ptx)
+TESTS = [(30, 50), (70, 60), (90, 87)]
+
+LLVM_SCOPES_NO_CLUSTER = ["", "block", "device"]
+
+SCOPE_LLVM_TO_PTX = {"": "sys", "block": "cta", "cluster": "cluster", "device": "gpu"}
+
+ORDERINGS = ["acquire", "release", "acq_rel", "seq_cst"]
+
+if __name__ == "__main__":
+    # non-cluster orderings are supported on SM30, SM70 and SM90
+    with open("fence-nocluster.ll", "w") as fp:
+        for sm, ptx in TESTS:
+            print(run_statement.substitute(sm=sm, ptx=ptx), file=fp)
+        for ordering, llvm_scope in product(ORDERINGS, LLVM_SCOPES_NO_CLUSTER):
+            print(
+                fence_func.substitute(
+                    llvm_scope=llvm_scope,
+                    ptx_scope=SCOPE_LLVM_TO_PTX[llvm_scope],
+                    ordering=ordering,
+                ),
+                file=fp,
+            )
+
+    # cluster ordering only supported on SM90
+    with open("fence-cluster.ll", "w") as fp:
+        print(run_statement.substitute(sm=90, ptx=87), file=fp)
+        for ordering in ORDERINGS:
+            print(
+                fence_func.substitute(
+                    llvm_scope="cluster",
+                    ptx_scope=SCOPE_LLVM_TO_PTX["cluster"],
+                    ordering=ordering,
+                ),
+                file=fp,
+            )
diff --git a/llvm/test/CodeGen/NVPTX/lit.local.cfg b/llvm/test/CodeGen/NVPTX/lit.local.cfg
index e3f06d1..54a6c33 100644
--- a/llvm/test/CodeGen/NVPTX/lit.local.cfg
+++ b/llvm/test/CodeGen/NVPTX/lit.local.cfg
@@ -1,3 +1,4 @@
 if not "NVPTX" in config.root.targets:
     config.unsupported = True
 config.suffixes.add(".py")
+config.excludes = ["fence.py"]
diff --git a/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll b/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll
index 1dac1b0..0846f72 100644
--- a/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll
+++ b/llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll
@@ -9,7 +9,7 @@ entry:
 
 lnext:
   %elementArray = load ptr, ptr %elementArrayPtr, align 8
-; CHECK: lwz [[LDREG:[0-9]+]], 140(1)                   # 4-byte Folded Reload
+; CHECK: ld [[LDREG:[0-9]+]], 120(1)                   # 8-byte Folded Reload
 ; CHECK: # implicit-def: $x[[TEMPREG:[0-9]+]]
   %element = load i32, ptr %elementArray, align 4
 ; CHECK: mr [[TEMPREG]], [[LDREG]]
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
index 6b188d7..5decd9a 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll
@@ -26,14 +26,18 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
 ; CHECK-NEXT:    std r29, -24(r30) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r29, r5
 ; CHECK-NEXT:    bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_@notoc
-; CHECK-NEXT:    xxsetaccz acc0
-; CHECK-NEXT:    xvf32gerpp acc0, v31, v30
+; CHECK-NEXT:    xxsetaccz acc1
+; CHECK-NEXT:    xvf32gerpp acc1, v31, v30
 ; CHECK-NEXT:    lxv v3, 32(r1)
-; CHECK-NEXT:    lxv v2, 48(r1)
-; CHECK-NEXT:    xvf32gerpp acc0, v3, v2
+; CHECK-NEXT:    lxv vs0, 48(r1)
+; CHECK-NEXT:    xvf32gerpp acc1, v3, vs0
 ; CHECK-NEXT:    lxv v31, -48(r30) # 16-byte Folded Reload
 ; CHECK-NEXT:    lxv v30, -64(r30) # 16-byte Folded Reload
-; CHECK-NEXT:    xxmfacc acc0
+; CHECK-NEXT:    xxmfacc acc1
+; CHECK-NEXT:    xxlor vs0, vs4, vs4
+; CHECK-NEXT:    xxlor vs1, vs5, vs5
+; CHECK-NEXT:    xxlor vs2, vs6, vs6
+; CHECK-NEXT:    xxlor vs3, vs7, vs7
 ; CHECK-NEXT:    stxv vs3, 0(r29)
 ; CHECK-NEXT:    pstxv vs2, 8(r29), 0
 ; CHECK-NEXT:    stxv vs1, 16(r29)
@@ -68,8 +72,8 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound
 ; CHECK-BE-NEXT:    xxsetaccz acc1
 ; CHECK-BE-NEXT:    xvf32gerpp acc1, v31, v30
 ; CHECK-BE-NEXT:    lxv v3, 144(r1)
-; CHECK-BE-NEXT:    lxv v2, 128(r1)
-; CHECK-BE-NEXT:    xvf32gerpp acc1, v2, v3
+; CHECK-BE-NEXT:    lxv vs0, 128(r1)
+; CHECK-BE-NEXT:    xvf32gerpp acc1, vs0, v3
 ; CHECK-BE-NEXT:    lxv v31, -48(r30) # 16-byte Folded Reload
 ; CHECK-BE-NEXT:    lxv v30, -64(r30) # 16-byte Folded Reload
 ; CHECK-BE-NEXT:    xxmfacc acc1
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index 7c19744..31ddc61 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -376,8 +376,8 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PAIRED-LABEL: testLdStPair:
 ; LE-PAIRED:       # %bb.0: # %entry
 ; LE-PAIRED-NEXT:    plxv v3, g@PCREL+32(0), 1
-; LE-PAIRED-NEXT:    plxv v2, g@PCREL+48(0), 1
-; LE-PAIRED-NEXT:    pstxv v2, g@PCREL+80(0), 1
+; LE-PAIRED-NEXT:    plxv vs0, g@PCREL+48(0), 1
+; LE-PAIRED-NEXT:    pstxv vs0, g@PCREL+80(0), 1
 ; LE-PAIRED-NEXT:    pstxv v3, g@PCREL+64(0), 1
 ; LE-PAIRED-NEXT:    blr
 ;
@@ -386,9 +386,9 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    addis r3, r2, g@toc@ha
 ; BE-PAIRED-NEXT:    addi r3, r3, g@toc@l
 ; BE-PAIRED-NEXT:    lxv v3, 48(r3)
-; BE-PAIRED-NEXT:    lxv v2, 32(r3)
+; BE-PAIRED-NEXT:    lxv vs0, 32(r3)
 ; BE-PAIRED-NEXT:    stxv v3, 80(r3)
-; BE-PAIRED-NEXT:    stxv v2, 64(r3)
+; BE-PAIRED-NEXT:    stxv vs0, 64(r3)
 ; BE-PAIRED-NEXT:    blr
 ;
 ; LE-PWR9-LABEL: testLdStPair:
@@ -455,9 +455,9 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PAIRED-NEXT:    lxvx v3, r5, r3
 ; LE-PAIRED-NEXT:    sldi r3, r4, 5
 ; LE-PAIRED-NEXT:    add r4, r5, r3
-; LE-PAIRED-NEXT:    lxv v2, 16(r6)
+; LE-PAIRED-NEXT:    lxv vs0, 16(r6)
 ; LE-PAIRED-NEXT:    stxvx v3, r5, r3
-; LE-PAIRED-NEXT:    stxv v2, 16(r4)
+; LE-PAIRED-NEXT:    stxv vs0, 16(r4)
 ; LE-PAIRED-NEXT:    blr
 ;
 ; BE-PAIRED-LABEL: testXLdStPair:
@@ -466,11 +466,11 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    sldi r3, r3, 5
 ; BE-PAIRED-NEXT:    addi r5, r5, g@toc@l
 ; BE-PAIRED-NEXT:    add r6, r5, r3
-; BE-PAIRED-NEXT:    lxvx v2, r5, r3
+; BE-PAIRED-NEXT:    lxvx vs0, r5, r3
 ; BE-PAIRED-NEXT:    sldi r3, r4, 5
 ; BE-PAIRED-NEXT:    add r4, r5, r3
 ; BE-PAIRED-NEXT:    lxv v3, 16(r6)
-; BE-PAIRED-NEXT:    stxvx v2, r5, r3
+; BE-PAIRED-NEXT:    stxvx vs0, r5, r3
 ; BE-PAIRED-NEXT:    stxv v3, 16(r4)
 ; BE-PAIRED-NEXT:    blr
 ;
@@ -543,8 +543,8 @@ define dso_local void @testUnalignedLdStPair() {
 ; LE-PAIRED-LABEL: testUnalignedLdStPair:
 ; LE-PAIRED:       # %bb.0: # %entry
 ; LE-PAIRED-NEXT:    plxv v3, g@PCREL+11(0), 1
-; LE-PAIRED-NEXT:    plxv v2, g@PCREL+27(0), 1
-; LE-PAIRED-NEXT:    pstxv v2, g@PCREL+35(0), 1
+; LE-PAIRED-NEXT:    plxv vs0, g@PCREL+27(0), 1
+; LE-PAIRED-NEXT:    pstxv vs0, g@PCREL+35(0), 1
 ; LE-PAIRED-NEXT:    pstxv v3, g@PCREL+19(0), 1
 ; LE-PAIRED-NEXT:    blr
 ;
@@ -553,9 +553,9 @@ define dso_local void @testUnalignedLdStPair() {
 ; BE-PAIRED-NEXT:    addis r3, r2, g@toc@ha
 ; BE-PAIRED-NEXT:    addi r3, r3, g@toc@l
 ; BE-PAIRED-NEXT:    plxv v3, 27(r3), 0
-; BE-PAIRED-NEXT:    plxv v2, 11(r3), 0
+; BE-PAIRED-NEXT:    plxv vs0, 11(r3), 0
 ; BE-PAIRED-NEXT:    pstxv v3, 35(r3), 0
-; BE-PAIRED-NEXT:    pstxv v2, 19(r3), 0
+; BE-PAIRED-NEXT:    pstxv vs0, 19(r3), 0
 ; BE-PAIRED-NEXT:    blr
 ;
 ; LE-PWR9-LABEL: testUnalignedLdStPair:
diff --git a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
index cfa9315..dd62b0d 100644
--- a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
@@ -20,14 +20,14 @@ declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
 define void @ass_pair(ptr %ptr, <16 x i8> %vc) {
 ; CHECK-LABEL: ass_pair:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stxv v2, 0(r3)
 ; CHECK-NEXT:    stxv v2, 16(r3)
+; CHECK-NEXT:    stxv v2, 0(r3)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NOMMA-LABEL: ass_pair:
 ; CHECK-NOMMA:       # %bb.0: # %entry
-; CHECK-NOMMA-NEXT:    stxv v2, 0(r3)
 ; CHECK-NOMMA-NEXT:    stxv v2, 16(r3)
+; CHECK-NOMMA-NEXT:    stxv v2, 0(r3)
 ; CHECK-NOMMA-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: ass_pair:
@@ -53,32 +53,32 @@ define void @disass_pair(ptr %ptr1, ptr %ptr2, ptr %ptr3) {
 ; CHECK-LABEL: disass_pair:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxv v3, 0(r3)
-; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxv vs0, 16(r3)
 ; CHECK-NEXT:    stxv v3, 0(r4)
-; CHECK-NEXT:    stxv v2, 0(r5)
+; CHECK-NEXT:    stxv vs0, 0(r5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NOMMA-LABEL: disass_pair:
 ; CHECK-NOMMA:       # %bb.0: # %entry
 ; CHECK-NOMMA-NEXT:    lxv v3, 0(r3)
-; CHECK-NOMMA-NEXT:    lxv v2, 16(r3)
+; CHECK-NOMMA-NEXT:    lxv vs0, 16(r3)
 ; CHECK-NOMMA-NEXT:    stxv v3, 0(r4)
-; CHECK-NOMMA-NEXT:    stxv v2, 0(r5)
+; CHECK-NOMMA-NEXT:    stxv vs0, 0(r5)
 ; CHECK-NOMMA-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: disass_pair:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v3, 16(r3)
-; CHECK-BE-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r4)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r4)
 ; CHECK-BE-NEXT:    stxv v3, 0(r5)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-BE-NOMMA-LABEL: disass_pair:
 ; CHECK-BE-NOMMA:       # %bb.0: # %entry
 ; CHECK-BE-NOMMA-NEXT:    lxv v3, 16(r3)
-; CHECK-BE-NOMMA-NEXT:    lxv v2, 0(r3)
-; CHECK-BE-NOMMA-NEXT:    stxv v2, 0(r4)
+; CHECK-BE-NOMMA-NEXT:    lxv vs0, 0(r3)
+; CHECK-BE-NOMMA-NEXT:    stxv vs0, 0(r4)
 ; CHECK-BE-NOMMA-NEXT:    stxv v3, 0(r5)
 ; CHECK-BE-NOMMA-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/peephole-subreg-def.mir b/llvm/test/CodeGen/PowerPC/peephole-subreg-def.mir
index e89e206..2a234e8 100644
--- a/llvm/test/CodeGen/PowerPC/peephole-subreg-def.mir
+++ b/llvm/test/CodeGen/PowerPC/peephole-subreg-def.mir
@@ -18,16 +18,17 @@ body:             |
 
     ; CHECK-LABEL: name: test_peephole_subreg_def
     ; CHECK: liveins: $x3
-    ; CHECK: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
-    ; CHECK: [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 [[COPY]], 1
-    ; CHECK: [[EXTSW:%[0-9]+]]:g8rc_and_g8rc_nox0 = EXTSW [[ADDI8_]]
-    ; CHECK: [[LI8_:%[0-9]+]]:g8rc = LI8 0
-    ; CHECK: STB8 [[LI8_]], 0, [[EXTSW]]
-    ; CHECK: [[COPY1:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[EXTSW]].sub_32
-    ; CHECK: [[COPY2:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[COPY1]]
-    ; CHECK: [[ADDI:%[0-9]+]]:gprc = ADDI killed [[COPY2]], 1
-    ; CHECK: [[EXTSW_32_64_:%[0-9]+]]:g8rc_and_g8rc_nox0 = EXTSW_32_64 killed [[ADDI]]
-    ; CHECK: STB8 [[LI8_]], 0, killed [[EXTSW_32_64_]]
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+    ; CHECK-NEXT: [[ADDI8_:%[0-9]+]]:g8rc = ADDI8 [[COPY]], 1
+    ; CHECK-NEXT: [[EXTSW:%[0-9]+]]:g8rc_and_g8rc_nox0 = EXTSW [[ADDI8_]]
+    ; CHECK-NEXT: [[LI8_:%[0-9]+]]:g8rc = LI8 0
+    ; CHECK-NEXT: STB8 [[LI8_]], 0, [[EXTSW]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[EXTSW]].sub_32
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[EXTSW]].sub_32
+    ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gprc = ADDI killed [[COPY2]], 1
+    ; CHECK-NEXT: [[EXTSW_32_64_:%[0-9]+]]:g8rc_and_g8rc_nox0 = EXTSW_32_64 killed [[ADDI]]
+    ; CHECK-NEXT: STB8 [[LI8_]], 0, killed [[EXTSW_32_64_]]
     %0:g8rc_and_g8rc_nox0 = COPY $x3
     %1:g8rc = ADDI8 %0, 1
     %2:g8rc_and_g8rc_nox0 = EXTSW %1
diff --git a/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir b/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir
index 627e553..8bdbe28 100644
--- a/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir
+++ b/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -run-pass=postmisched -o - %s | FileCheck %s
+# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -passes=postmisched -o - %s | FileCheck %s
 ---
 # Check that postmisched's TopDepthReduce heuristic moves the MULLD later
 # because of the dependency on x5
diff --git a/llvm/test/CodeGen/RISCV/memmove.ll b/llvm/test/CodeGen/RISCV/memmove.ll
new file mode 100644
index 0000000..62915bd4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/memmove.ll
@@ -0,0 +1,670 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv32 \
+; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
+; RUN: llc < %s -mtriple=riscv64 \
+; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
+; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \
+; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
+; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \
+; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
+
+; ----------------------------------------------------------------------
+; Fully unaligned cases
+
+define void @unaligned_memmove0(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: unaligned_memmove0:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: unaligned_memmove0:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove1(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: unaligned_memmove1:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lbu a1, 0(a1)
+; RV32-BOTH-NEXT:    sb a1, 0(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: unaligned_memmove1:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    lbu a1, 0(a1)
+; RV64-BOTH-NEXT:    sb a1, 0(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove2(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-LABEL: unaligned_memmove2:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lbu a2, 0(a1)
+; RV32-NEXT:    lbu a1, 1(a1)
+; RV32-NEXT:    sb a2, 0(a0)
+; RV32-NEXT:    sb a1, 1(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: unaligned_memmove2:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lbu a2, 0(a1)
+; RV64-NEXT:    lbu a1, 1(a1)
+; RV64-NEXT:    sb a2, 0(a0)
+; RV64-NEXT:    sb a1, 1(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: unaligned_memmove2:
+; RV32-FAST:       # %bb.0: # %entry
+; RV32-FAST-NEXT:    lh a1, 0(a1)
+; RV32-FAST-NEXT:    sh a1, 0(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: unaligned_memmove2:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    lh a1, 0(a1)
+; RV64-FAST-NEXT:    sh a1, 0(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove3(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-LABEL: unaligned_memmove3:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lbu a2, 0(a1)
+; RV32-NEXT:    lbu a3, 1(a1)
+; RV32-NEXT:    lbu a1, 2(a1)
+; RV32-NEXT:    sb a2, 0(a0)
+; RV32-NEXT:    sb a3, 1(a0)
+; RV32-NEXT:    sb a1, 2(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: unaligned_memmove3:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lbu a2, 0(a1)
+; RV64-NEXT:    lbu a3, 1(a1)
+; RV64-NEXT:    lbu a1, 2(a1)
+; RV64-NEXT:    sb a2, 0(a0)
+; RV64-NEXT:    sb a3, 1(a0)
+; RV64-NEXT:    sb a1, 2(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: unaligned_memmove3:
+; RV32-FAST:       # %bb.0: # %entry
+; RV32-FAST-NEXT:    lh a2, 0(a1)
+; RV32-FAST-NEXT:    lbu a1, 2(a1)
+; RV32-FAST-NEXT:    sh a2, 0(a0)
+; RV32-FAST-NEXT:    sb a1, 2(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: unaligned_memmove3:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    lh a2, 0(a1)
+; RV64-FAST-NEXT:    lbu a1, 2(a1)
+; RV64-FAST-NEXT:    sh a2, 0(a0)
+; RV64-FAST-NEXT:    sb a1, 2(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove4(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-LABEL: unaligned_memmove4:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lbu a2, 0(a1)
+; RV32-NEXT:    lbu a3, 1(a1)
+; RV32-NEXT:    lbu a4, 2(a1)
+; RV32-NEXT:    lbu a1, 3(a1)
+; RV32-NEXT:    sb a2, 0(a0)
+; RV32-NEXT:    sb a3, 1(a0)
+; RV32-NEXT:    sb a4, 2(a0)
+; RV32-NEXT:    sb a1, 3(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: unaligned_memmove4:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lbu a2, 0(a1)
+; RV64-NEXT:    lbu a3, 1(a1)
+; RV64-NEXT:    lbu a4, 2(a1)
+; RV64-NEXT:    lbu a1, 3(a1)
+; RV64-NEXT:    sb a2, 0(a0)
+; RV64-NEXT:    sb a3, 1(a0)
+; RV64-NEXT:    sb a4, 2(a0)
+; RV64-NEXT:    sb a1, 3(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: unaligned_memmove4:
+; RV32-FAST:       # %bb.0: # %entry
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: unaligned_memmove4:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    lw a1, 0(a1)
+; RV64-FAST-NEXT:    sw a1, 0(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 4, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove7(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-LABEL: unaligned_memmove7:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lbu a2, 4(a1)
+; RV32-NEXT:    lbu a3, 5(a1)
+; RV32-NEXT:    lbu a4, 6(a1)
+; RV32-NEXT:    lbu a5, 0(a1)
+; RV32-NEXT:    lbu a6, 1(a1)
+; RV32-NEXT:    lbu a7, 2(a1)
+; RV32-NEXT:    lbu a1, 3(a1)
+; RV32-NEXT:    sb a2, 4(a0)
+; RV32-NEXT:    sb a3, 5(a0)
+; RV32-NEXT:    sb a4, 6(a0)
+; RV32-NEXT:    sb a5, 0(a0)
+; RV32-NEXT:    sb a6, 1(a0)
+; RV32-NEXT:    sb a7, 2(a0)
+; RV32-NEXT:    sb a1, 3(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: unaligned_memmove7:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lbu a2, 4(a1)
+; RV64-NEXT:    lbu a3, 5(a1)
+; RV64-NEXT:    lbu a4, 6(a1)
+; RV64-NEXT:    lbu a5, 0(a1)
+; RV64-NEXT:    lbu a6, 1(a1)
+; RV64-NEXT:    lbu a7, 2(a1)
+; RV64-NEXT:    lbu a1, 3(a1)
+; RV64-NEXT:    sb a2, 4(a0)
+; RV64-NEXT:    sb a3, 5(a0)
+; RV64-NEXT:    sb a4, 6(a0)
+; RV64-NEXT:    sb a5, 0(a0)
+; RV64-NEXT:    sb a6, 1(a0)
+; RV64-NEXT:    sb a7, 2(a0)
+; RV64-NEXT:    sb a1, 3(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: unaligned_memmove7:
+; RV32-FAST:       # %bb.0: # %entry
+; RV32-FAST-NEXT:    lw a2, 0(a1)
+; RV32-FAST-NEXT:    lh a3, 4(a1)
+; RV32-FAST-NEXT:    lbu a1, 6(a1)
+; RV32-FAST-NEXT:    sw a2, 0(a0)
+; RV32-FAST-NEXT:    sh a3, 4(a0)
+; RV32-FAST-NEXT:    sb a1, 6(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: unaligned_memmove7:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    lw a2, 0(a1)
+; RV64-FAST-NEXT:    lh a3, 4(a1)
+; RV64-FAST-NEXT:    lbu a1, 6(a1)
+; RV64-FAST-NEXT:    sw a2, 0(a0)
+; RV64-FAST-NEXT:    sh a3, 4(a0)
+; RV64-FAST-NEXT:    sb a1, 6(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 7, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove8(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-LABEL: unaligned_memmove8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lbu a2, 0(a1)
+; RV32-NEXT:    lbu a3, 1(a1)
+; RV32-NEXT:    lbu a4, 2(a1)
+; RV32-NEXT:    lbu a5, 3(a1)
+; RV32-NEXT:    lbu a6, 4(a1)
+; RV32-NEXT:    lbu a7, 5(a1)
+; RV32-NEXT:    lbu t0, 6(a1)
+; RV32-NEXT:    lbu a1, 7(a1)
+; RV32-NEXT:    sb a6, 4(a0)
+; RV32-NEXT:    sb a7, 5(a0)
+; RV32-NEXT:    sb t0, 6(a0)
+; RV32-NEXT:    sb a1, 7(a0)
+; RV32-NEXT:    sb a2, 0(a0)
+; RV32-NEXT:    sb a3, 1(a0)
+; RV32-NEXT:    sb a4, 2(a0)
+; RV32-NEXT:    sb a5, 3(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: unaligned_memmove8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lbu a2, 0(a1)
+; RV64-NEXT:    lbu a3, 1(a1)
+; RV64-NEXT:    lbu a4, 2(a1)
+; RV64-NEXT:    lbu a5, 3(a1)
+; RV64-NEXT:    lbu a6, 4(a1)
+; RV64-NEXT:    lbu a7, 5(a1)
+; RV64-NEXT:    lbu t0, 6(a1)
+; RV64-NEXT:    lbu a1, 7(a1)
+; RV64-NEXT:    sb a6, 4(a0)
+; RV64-NEXT:    sb a7, 5(a0)
+; RV64-NEXT:    sb t0, 6(a0)
+; RV64-NEXT:    sb a1, 7(a0)
+; RV64-NEXT:    sb a2, 0(a0)
+; RV64-NEXT:    sb a3, 1(a0)
+; RV64-NEXT:    sb a4, 2(a0)
+; RV64-NEXT:    sb a5, 3(a0)
+; RV64-NEXT:    ret
+;
+; RV32-FAST-LABEL: unaligned_memmove8:
+; RV32-FAST:       # %bb.0: # %entry
+; RV32-FAST-NEXT:    lw a2, 0(a1)
+; RV32-FAST-NEXT:    lw a1, 4(a1)
+; RV32-FAST-NEXT:    sw a2, 0(a0)
+; RV32-FAST-NEXT:    sw a1, 4(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: unaligned_memmove8:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    ld a1, 0(a1)
+; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove15(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-LABEL: unaligned_memmove15:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    li a2, 15
+; RV32-NEXT:    tail memmove
+;
+; RV64-LABEL: unaligned_memmove15:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 15
+; RV64-NEXT:    tail memmove
+;
+; RV32-FAST-LABEL: unaligned_memmove15:
+; RV32-FAST:       # %bb.0: # %entry
+; RV32-FAST-NEXT:    lbu a2, 14(a1)
+; RV32-FAST-NEXT:    lw a3, 0(a1)
+; RV32-FAST-NEXT:    lw a4, 4(a1)
+; RV32-FAST-NEXT:    lw a5, 8(a1)
+; RV32-FAST-NEXT:    lh a1, 12(a1)
+; RV32-FAST-NEXT:    sb a2, 14(a0)
+; RV32-FAST-NEXT:    sw a3, 0(a0)
+; RV32-FAST-NEXT:    sw a4, 4(a0)
+; RV32-FAST-NEXT:    sw a5, 8(a0)
+; RV32-FAST-NEXT:    sh a1, 12(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: unaligned_memmove15:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    ld a2, 0(a1)
+; RV64-FAST-NEXT:    lw a3, 8(a1)
+; RV64-FAST-NEXT:    lh a4, 12(a1)
+; RV64-FAST-NEXT:    lbu a1, 14(a1)
+; RV64-FAST-NEXT:    sd a2, 0(a0)
+; RV64-FAST-NEXT:    sw a3, 8(a0)
+; RV64-FAST-NEXT:    sh a4, 12(a0)
+; RV64-FAST-NEXT:    sb a1, 14(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 15, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove16(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-LABEL: unaligned_memmove16:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    tail memmove
+;
+; RV64-LABEL: unaligned_memmove16:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    tail memmove
+;
+; RV32-FAST-LABEL: unaligned_memmove16:
+; RV32-FAST:       # %bb.0: # %entry
+; RV32-FAST-NEXT:    lw a2, 0(a1)
+; RV32-FAST-NEXT:    lw a3, 4(a1)
+; RV32-FAST-NEXT:    lw a4, 8(a1)
+; RV32-FAST-NEXT:    lw a1, 12(a1)
+; RV32-FAST-NEXT:    sw a2, 0(a0)
+; RV32-FAST-NEXT:    sw a3, 4(a0)
+; RV32-FAST-NEXT:    sw a4, 8(a0)
+; RV32-FAST-NEXT:    sw a1, 12(a0)
+; RV32-FAST-NEXT:    ret
+;
+; RV64-FAST-LABEL: unaligned_memmove16:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    ld a2, 0(a1)
+; RV64-FAST-NEXT:    ld a1, 8(a1)
+; RV64-FAST-NEXT:    sd a2, 0(a0)
+; RV64-FAST-NEXT:    sd a1, 8(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
+  ret void
+}
+
+define void @unaligned_memmove31(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: unaligned_memmove31:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    li a2, 31
+; RV32-BOTH-NEXT:    tail memmove
+;
+; RV64-LABEL: unaligned_memmove31:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a2, 31
+; RV64-NEXT:    tail memmove
+;
+; RV64-FAST-LABEL: unaligned_memmove31:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    lh a2, 28(a1)
+; RV64-FAST-NEXT:    lbu a3, 30(a1)
+; RV64-FAST-NEXT:    ld a4, 0(a1)
+; RV64-FAST-NEXT:    ld a5, 8(a1)
+; RV64-FAST-NEXT:    ld a6, 16(a1)
+; RV64-FAST-NEXT:    lw a1, 24(a1)
+; RV64-FAST-NEXT:    sh a2, 28(a0)
+; RV64-FAST-NEXT:    sb a3, 30(a0)
+; RV64-FAST-NEXT:    sd a4, 0(a0)
+; RV64-FAST-NEXT:    sd a5, 8(a0)
+; RV64-FAST-NEXT:    sd a6, 16(a0)
+; RV64-FAST-NEXT:    sw a1, 24(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false)
+  ret void
+}
+
+; ----------------------------------------------------------------------
+; Fully aligned cases
+
+define void @aligned_memmove0(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove0:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove0:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 0, i1 false)
+  ret void
+}
+
+define void @aligned_memmove1(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove1:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lbu a1, 0(a1)
+; RV32-BOTH-NEXT:    sb a1, 0(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove1:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    lbu a1, 0(a1)
+; RV64-BOTH-NEXT:    sb a1, 0(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 1, i1 false)
+  ret void
+}
+
+define void @aligned_memmove2(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove2:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lh a1, 0(a1)
+; RV32-BOTH-NEXT:    sh a1, 0(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove2:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    lh a1, 0(a1)
+; RV64-BOTH-NEXT:    sh a1, 0(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 2, i1 false)
+  ret void
+}
+
+define void @aligned_memmove3(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove3:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lh a2, 0(a1)
+; RV32-BOTH-NEXT:    lbu a1, 2(a1)
+; RV32-BOTH-NEXT:    sh a2, 0(a0)
+; RV32-BOTH-NEXT:    sb a1, 2(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove3:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    lh a2, 0(a1)
+; RV64-BOTH-NEXT:    lbu a1, 2(a1)
+; RV64-BOTH-NEXT:    sh a2, 0(a0)
+; RV64-BOTH-NEXT:    sb a1, 2(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 3, i1 false)
+  ret void
+}
+
+define void @aligned_memmove4(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove4:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lw a1, 0(a1)
+; RV32-BOTH-NEXT:    sw a1, 0(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove4:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    lw a1, 0(a1)
+; RV64-BOTH-NEXT:    sw a1, 0(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 4, i1 false)
+  ret void
+}
+
+define void @aligned_memmove7(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove7:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lw a2, 0(a1)
+; RV32-BOTH-NEXT:    lh a3, 4(a1)
+; RV32-BOTH-NEXT:    lbu a1, 6(a1)
+; RV32-BOTH-NEXT:    sw a2, 0(a0)
+; RV32-BOTH-NEXT:    sh a3, 4(a0)
+; RV32-BOTH-NEXT:    sb a1, 6(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove7:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    lw a2, 0(a1)
+; RV64-BOTH-NEXT:    lh a3, 4(a1)
+; RV64-BOTH-NEXT:    lbu a1, 6(a1)
+; RV64-BOTH-NEXT:    sw a2, 0(a0)
+; RV64-BOTH-NEXT:    sh a3, 4(a0)
+; RV64-BOTH-NEXT:    sb a1, 6(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 7, i1 false)
+  ret void
+}
+
+define void @aligned_memmove8(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove8:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lw a2, 0(a1)
+; RV32-BOTH-NEXT:    sw a2, 0(a0)
+; RV32-BOTH-NEXT:    lw a1, 4(a1)
+; RV32-BOTH-NEXT:    sw a1, 4(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove8:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    ld a1, 0(a1)
+; RV64-BOTH-NEXT:    sd a1, 0(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 8, i1 false)
+  ret void
+}
+
+define void @aligned_memmove15(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove15:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lw a2, 0(a1)
+; RV32-BOTH-NEXT:    lw a3, 8(a1)
+; RV32-BOTH-NEXT:    lh a4, 12(a1)
+; RV32-BOTH-NEXT:    lbu a5, 14(a1)
+; RV32-BOTH-NEXT:    sw a2, 0(a0)
+; RV32-BOTH-NEXT:    lw a1, 4(a1)
+; RV32-BOTH-NEXT:    sw a1, 4(a0)
+; RV32-BOTH-NEXT:    sw a3, 8(a0)
+; RV32-BOTH-NEXT:    sh a4, 12(a0)
+; RV32-BOTH-NEXT:    sb a5, 14(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove15:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    ld a2, 0(a1)
+; RV64-BOTH-NEXT:    lw a3, 8(a1)
+; RV64-BOTH-NEXT:    lh a4, 12(a1)
+; RV64-BOTH-NEXT:    lbu a1, 14(a1)
+; RV64-BOTH-NEXT:    sd a2, 0(a0)
+; RV64-BOTH-NEXT:    sw a3, 8(a0)
+; RV64-BOTH-NEXT:    sh a4, 12(a0)
+; RV64-BOTH-NEXT:    sb a1, 14(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 15, i1 false)
+  ret void
+}
+
+define void @aligned_memmove16(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove16:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lw a2, 0(a1)
+; RV32-BOTH-NEXT:    lw a3, 8(a1)
+; RV32-BOTH-NEXT:    sw a2, 0(a0)
+; RV32-BOTH-NEXT:    lw a2, 4(a1)
+; RV32-BOTH-NEXT:    lw a1, 12(a1)
+; RV32-BOTH-NEXT:    sw a2, 4(a0)
+; RV32-BOTH-NEXT:    sw a3, 8(a0)
+; RV32-BOTH-NEXT:    sw a1, 12(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: aligned_memmove16:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    ld a2, 0(a1)
+; RV64-BOTH-NEXT:    ld a1, 8(a1)
+; RV64-BOTH-NEXT:    sd a2, 0(a0)
+; RV64-BOTH-NEXT:    sd a1, 8(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 16, i1 false)
+  ret void
+}
+
+define void @aligned_memmove31(ptr nocapture %dest, ptr %src) nounwind {
+; RV32-BOTH-LABEL: aligned_memmove31:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    li a2, 31
+; RV32-BOTH-NEXT:    tail memmove
+;
+; RV64-BOTH-LABEL: aligned_memmove31:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    lh a2, 28(a1)
+; RV64-BOTH-NEXT:    lbu a3, 30(a1)
+; RV64-BOTH-NEXT:    ld a4, 0(a1)
+; RV64-BOTH-NEXT:    ld a5, 8(a1)
+; RV64-BOTH-NEXT:    ld a6, 16(a1)
+; RV64-BOTH-NEXT:    lw a1, 24(a1)
+; RV64-BOTH-NEXT:    sh a2, 28(a0)
+; RV64-BOTH-NEXT:    sb a3, 30(a0)
+; RV64-BOTH-NEXT:    sd a4, 0(a0)
+; RV64-BOTH-NEXT:    sd a5, 8(a0)
+; RV64-BOTH-NEXT:    sd a6, 16(a0)
+; RV64-BOTH-NEXT:    sw a1, 24(a0)
+; RV64-BOTH-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 31, i1 false)
+  ret void
+}
+
+; ------------------------------------------------------------------------
+; A few partially aligned cases
+
+
+define void @memmove16_align4(ptr nocapture %dest, ptr nocapture %src) nounwind {
+; RV32-BOTH-LABEL: memmove16_align4:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lw a2, 0(a1)
+; RV32-BOTH-NEXT:    lw a3, 4(a1)
+; RV32-BOTH-NEXT:    lw a4, 8(a1)
+; RV32-BOTH-NEXT:    lw a1, 12(a1)
+; RV32-BOTH-NEXT:    sw a2, 0(a0)
+; RV32-BOTH-NEXT:    sw a3, 4(a0)
+; RV32-BOTH-NEXT:    sw a4, 8(a0)
+; RV32-BOTH-NEXT:    sw a1, 12(a0)
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-LABEL: memmove16_align4:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lw a2, 0(a1)
+; RV64-NEXT:    lw a3, 4(a1)
+; RV64-NEXT:    lw a4, 8(a1)
+; RV64-NEXT:    lw a1, 12(a1)
+; RV64-NEXT:    sw a2, 0(a0)
+; RV64-NEXT:    sw a3, 4(a0)
+; RV64-NEXT:    sw a4, 8(a0)
+; RV64-NEXT:    sw a1, 12(a0)
+; RV64-NEXT:    ret
+;
+; RV64-FAST-LABEL: memmove16_align4:
+; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    ld a2, 0(a1)
+; RV64-FAST-NEXT:    ld a1, 8(a1)
+; RV64-FAST-NEXT:    sd a2, 0(a0)
+; RV64-FAST-NEXT:    sd a1, 8(a0)
+; RV64-FAST-NEXT:    ret
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 16, i1 false)
+  ret void
+}
+
+define i32 @memmove11_align8(ptr nocapture %dest, ptr %src) {
+; RV32-BOTH-LABEL: memmove11_align8:
+; RV32-BOTH:       # %bb.0: # %entry
+; RV32-BOTH-NEXT:    lw a2, 0(a1)
+; RV32-BOTH-NEXT:    lh a3, 8(a1)
+; RV32-BOTH-NEXT:    lbu a4, 10(a1)
+; RV32-BOTH-NEXT:    sw a2, 0(a0)
+; RV32-BOTH-NEXT:    lw a1, 4(a1)
+; RV32-BOTH-NEXT:    sw a1, 4(a0)
+; RV32-BOTH-NEXT:    sh a3, 8(a0)
+; RV32-BOTH-NEXT:    sb a4, 10(a0)
+; RV32-BOTH-NEXT:    li a0, 0
+; RV32-BOTH-NEXT:    ret
+;
+; RV64-BOTH-LABEL: memmove11_align8:
+; RV64-BOTH:       # %bb.0: # %entry
+; RV64-BOTH-NEXT:    ld a2, 0(a1)
+; RV64-BOTH-NEXT:    lh a3, 8(a1)
+; RV64-BOTH-NEXT:    lbu a1, 10(a1)
+; RV64-BOTH-NEXT:    sd a2, 0(a0)
+; RV64-BOTH-NEXT:    sh a3, 8(a0)
+; RV64-BOTH-NEXT:    sb a1, 10(a0)
+; RV64-BOTH-NEXT:    li a0, 0
+; RV64-BOTH-NEXT:    ret
+entry:
+  call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 11, i1 false)
+  ret i32 0
+}
+
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
diff --git a/llvm/test/CodeGen/RISCV/misched-postra-direction.mir b/llvm/test/CodeGen/RISCV/misched-postra-direction.mir
index 2cca042..e4b934c 100644
--- a/llvm/test/CodeGen/RISCV/misched-postra-direction.mir
+++ b/llvm/test/CodeGen/RISCV/misched-postra-direction.mir
@@ -11,6 +11,19 @@
 # RUN:   -misched-dump-schedule-trace -misched-postra-direction=bidirectional \
 # RUN:   -o - %s 2>&1 | FileCheck --check-prefix=BIDIRECTIONAL %s
 
+# RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -passes=postmisched \
+# RUN:   -enable-post-misched -debug-only=machine-scheduler \
+# RUN:  -misched-dump-schedule-trace -misched-postra-direction=topdown \
+# RUN:  -o - %s 2>&1 | FileCheck --check-prefix=TOPDOWN %s
+# RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -passes=postmisched \
+# RUN:   -enable-post-misched -debug-only=machine-scheduler \
+# RUN:   -misched-dump-schedule-trace -misched-postra-direction=bottomup \
+# RUN:   -o - %s 2>&1 | FileCheck --check-prefix=BOTTOMUP %s
+# RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -passes=postmisched \
+# RUN:   -enable-post-misched -debug-only=machine-scheduler \
+# RUN:   -misched-dump-schedule-trace -misched-postra-direction=bidirectional \
+# RUN:   -o - %s 2>&1 | FileCheck --check-prefix=BIDIRECTIONAL %s
+
 # REQUIRES: asserts
 
 ---
diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll
index 5e610c4..583d872 100644
--- a/llvm/test/CodeGen/RISCV/rvv/abd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll
@@ -103,15 +103,21 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
   ret <vscale x 4 x i32> %abs
 }
 
-; FIXME: Crashes legalization if enabled
-;; define <vscale x 2 x i64> @sabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-;;   %a.sext = sext <vscale x 2 x i64> %a to <vscale x 2 x i128>
-;;   %b.sext = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
-;;   %sub = sub <vscale x 2 x i128> %a.sext, %b.sext
-;;   %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
-;;   %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
-;;   ret <vscale x 2 x i64> %trunc
-;; }
+define <vscale x 2 x i64> @sabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sabd_d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vmin.vv v12, v8, v10
+; CHECK-NEXT:    vmax.vv v8, v8, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 2 x i64> %a to <vscale x 2 x i128>
+  %b.sext = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
+  %sub = sub <vscale x 2 x i128> %a.sext, %b.sext
+  %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
+  %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %trunc
+}
 
 define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: sabd_d_promoted_ops:
@@ -231,15 +237,21 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
   ret <vscale x 4 x i32> %abs
 }
 
-; FIXME: Crashes legalization if enabled
-;; define <vscale x 2 x i64> @uabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-;;   %a.zext = zext <vscale x 2 x i64> %a to <vscale x 2 x i128>
-;;   %b.zext = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
-;;   %sub = sub <vscale x 2 x i128> %a.zext, %b.zext
-;;   %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
-;;   %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
-;;   ret <vscale x 2 x i64> %trunc
-;; }
+define <vscale x 2 x i64> @uabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uabd_d:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vminu.vv v12, v8, v10
+; CHECK-NEXT:    vmaxu.vv v8, v8, v10
+; CHECK-NEXT:    vsub.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 2 x i64> %a to <vscale x 2 x i128>
+  %b.zext = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
+  %sub = sub <vscale x 2 x i128> %a.zext, %b.zext
+  %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
+  %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %trunc
+}
 
 define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
 ; CHECK-LABEL: uabd_d_promoted_ops:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index cd4b19f..5f275da 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -1735,7 +1735,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    sub sp, sp, a1
 ; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 48
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -1770,8 +1771,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 24
-; RV32-NEXT:    mul a4, a4, a5
+; RV32-NEXT:    slli a4, a4, 5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 48
 ; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
@@ -1786,13 +1786,12 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    addi a3, sp, 32
 ; RV32-NEXT:    vlse64.v v8, (a3), zero
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 5
+; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -1806,24 +1805,21 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v24, v24, v16, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v24, v16, v24, v0.t
 ; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
 ; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
@@ -1841,8 +1837,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vlse64.v v8, (a4), zero
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
@@ -1856,8 +1851,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
 ; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 24
-; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    slli a2, a2, 5
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 48
 ; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
@@ -1875,7 +1869,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    and a0, a0, a3
 ; RV32-NEXT:    vmv1r.v v0, v7
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -1891,31 +1886,44 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
-; RV32-NEXT:    vsub.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v24, 2, v0.t
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
@@ -1924,8 +1932,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 24
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -3921,7 +3928,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    sub sp, sp, a1
 ; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 48
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -3956,8 +3964,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 24
-; RV32-NEXT:    mul a4, a4, a5
+; RV32-NEXT:    slli a4, a4, 5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 48
 ; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
@@ -3972,13 +3979,12 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    addi a3, sp, 32
 ; RV32-NEXT:    vlse64.v v8, (a3), zero
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 5
+; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -3992,24 +3998,21 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v24, v24, v16, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v24, v16, v24, v0.t
 ; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
 ; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
@@ -4027,8 +4030,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vlse64.v v8, (a4), zero
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
@@ -4042,8 +4044,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
 ; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 24
-; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    slli a2, a2, 5
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 48
 ; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
@@ -4061,7 +4062,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    and a0, a0, a3
 ; RV32-NEXT:    vmv1r.v v0, v7
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 48
 ; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -4077,31 +4079,44 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
-; RV32-NEXT:    vsub.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v24, 2, v0.t
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
@@ -4110,8 +4125,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 24
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 48
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 9dbe261..585a331 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -1444,13 +1444,12 @@ define void @fma_v6bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vle16.v v8, (a2)
 ; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v14, v12
-; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vse16.v v10, (a0)
 ; CHECK-NEXT:    ret
@@ -1513,13 +1512,12 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -1602,14 +1600,13 @@ define void @fmsub_v6bf16(ptr %x, ptr %y, ptr %z) {
 ; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    vle16.v v10, (a1)
 ; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vxor.vx v8, v8, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v12, v14
-; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vse16.v v10, (a0)
 ; CHECK-NEXT:    ret
@@ -1677,14 +1674,13 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a1)
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v14
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -3381,14 +3377,13 @@ define void @fma_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-NEXT:    vle16.v v8, (a1)
 ; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v14, v12
-; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vse16.v v10, (a0)
 ; CHECK-NEXT:    ret
@@ -3452,14 +3447,13 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -3541,14 +3535,13 @@ define void @fma_fv_v6bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-NEXT:    vle16.v v8, (a1)
 ; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    fmv.x.w a1, fa0
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v14, v12
-; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vse16.v v10, (a0)
 ; CHECK-NEXT:    ret
@@ -3612,14 +3605,13 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
@@ -3705,7 +3697,6 @@ define void @fmsub_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-NEXT:    vle16.v v8, (a1)
 ; CHECK-NEXT:    vle16.v v9, (a0)
 ; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.x v10, a2
 ; CHECK-NEXT:    vxor.vx v8, v8, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
@@ -3713,7 +3704,7 @@ define void @fmsub_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vfmadd.vv v8, v12, v14
-; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
 ; CHECK-NEXT:    vse16.v v10, (a0)
 ; CHECK-NEXT:    ret
@@ -3782,7 +3773,6 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vmv.v.x v10, a2
 ; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
@@ -3790,7 +3780,7 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v12, v14
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
 ; ZVFHMIN-NEXT:    vse16.v v10, (a0)
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 0c7d792..1516c67 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -183,10 +183,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 92
+; RV32-NEXT:    li a3, 88
 ; RV32-NEXT:    mul a2, a2, a3
 ; RV32-NEXT:    sub sp, sp, a2
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdc, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 92 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb
 ; RV32-NEXT:    addi a3, a1, 256
 ; RV32-NEXT:    addi a4, a1, 128
 ; RV32-NEXT:    li a2, 32
@@ -198,15 +198,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vle32.v v16, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li t1, 76
+; RV32-NEXT:    li t1, 72
 ; RV32-NEXT:    mul a1, a1, t1
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vle32.v v8, (a4)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a4, 68
-; RV32-NEXT:    mul a1, a1, a4
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -221,7 +220,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v24, v16, v6
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 52
+; RV32-NEXT:    li a5, 48
 ; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
@@ -236,13 +235,13 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
 ; RV32-NEXT:    vslidedown.vi v24, v16, 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 60
+; RV32-NEXT:    li a3, 56
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 84
+; RV32-NEXT:    li a3, 80
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -250,14 +249,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmerge.vvm v20, v24, v16, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    li a3, 36
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs4r.v v20, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vmv1r.v v0, v7
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 76
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -267,7 +266,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v8, v24, v4
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 44
+; RV32-NEXT:    li a3, 40
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -282,14 +281,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vle16.v v14, (a4)
 ; RV32-NEXT:    vmv.s.x v12, a3
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 84
+; RV32-NEXT:    li a3, 80
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmv4r.v v8, v24
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 60
+; RV32-NEXT:    li a3, 56
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -304,8 +303,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 68
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
@@ -314,7 +312,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v16, v24, v14
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a3, 28
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -330,22 +329,22 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    addi a4, a4, 12
 ; RV32-NEXT:    addi a5, a5, 768
 ; RV32-NEXT:    addi a7, a7, -1024
-; RV32-NEXT:    vmv.s.x v1, a6
-; RV32-NEXT:    vmv.s.x v12, t0
+; RV32-NEXT:    vmv.s.x v13, a6
+; RV32-NEXT:    vmv.s.x v2, t0
 ; RV32-NEXT:    vmv.s.x v0, a1
-; RV32-NEXT:    vmv.s.x v3, a3
-; RV32-NEXT:    vmv.s.x v2, a4
-; RV32-NEXT:    vmv.s.x v13, a5
-; RV32-NEXT:    vmv.s.x v14, a7
+; RV32-NEXT:    vmv.s.x v12, a3
+; RV32-NEXT:    vmv.s.x v3, a4
+; RV32-NEXT:    vmv.s.x v14, a5
+; RV32-NEXT:    vmv.s.x v1, a7
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 60
+; RV32-NEXT:    li a3, 56
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmv4r.v v8, v16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 84
+; RV32-NEXT:    li a3, 80
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -354,15 +353,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vmerge.vvm v20, v8, v16, v0
 ; RV32-NEXT:    addi a1, sp, 16
 ; RV32-NEXT:    vs4r.v v20, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vmv1r.v v0, v3
+; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 68
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 76
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -370,28 +368,23 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vmerge.vvm v24, v16, v24, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    li a3, 12
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vmv1r.v v0, v2
+; RV32-NEXT:    vmv1r.v v0, v3
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 84
+; RV32-NEXT:    li a3, 80
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmerge.vvm v24, v8, v24, v0
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 12
-; RV32-NEXT:    mul a1, a1, a3
-; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vmv1r.v v0, v13
+; RV32-NEXT:    vmerge.vvm v4, v8, v24, v0
+; RV32-NEXT:    vmv1r.v v0, v14
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 76
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -403,45 +396,44 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vmv1r.v v0, v13
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 84
+; RV32-NEXT:    li a3, 80
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmerge.vvm v4, v8, v24, v0
+; RV32-NEXT:    vmerge.vvm v12, v8, v24, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 28
+; RV32-NEXT:    li a3, 20
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v4, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vmv1r.v v0, v14
+; RV32-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 76
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 68
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vmerge.vvm v16, v24, v16, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 76
+; RV32-NEXT:    li a2, 72
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vmv1r.v v0, v2
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 84
+; RV32-NEXT:    li a2, 80
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -449,190 +441,208 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 68
-; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, 32
 ; RV32-NEXT:    addi a1, a1, 4
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
+; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    li a2, 36
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v20, v8, v16
+; RV32-NEXT:    vrgatherei16.vv v16, v8, v12
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 52
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v20, v8
+; RV32-NEXT:    vmv.v.v v16, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 84
+; RV32-NEXT:    li a2, 80
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v20, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, 48
 ; RV32-NEXT:    lui a2, %hi(.LCPI8_3)
 ; RV32-NEXT:    addi a2, a2, %lo(.LCPI8_3)
 ; RV32-NEXT:    addi a1, a1, 5
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v28, (a2)
+; RV32-NEXT:    vle16.v v24, (a2)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v20, a1
+; RV32-NEXT:    vmv.v.x v25, a1
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    li a2, 24
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v8, v12, v20
+; RV32-NEXT:    vrgatherei16.vv v16, v8, v25
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 44
+; RV32-NEXT:    li a2, 40
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v8, v16
+; RV32-NEXT:    vmv.v.v v16, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 52
+; RV32-NEXT:    li a2, 56
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v24, v12, v28
+; RV32-NEXT:    vrgatherei16.vv v16, v8, v24
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 28
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v24, v16
+; RV32-NEXT:    vmv.v.v v16, v8
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs4r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI8_4)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_4)
 ; RV32-NEXT:    lui a2, %hi(.LCPI8_5)
 ; RV32-NEXT:    addi a2, a2, %lo(.LCPI8_5)
 ; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT:    vle16.v v12, (a1)
+; RV32-NEXT:    vle16.v v26, (a1)
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v28, (a2)
+; RV32-NEXT:    vle16.v v24, (a2)
 ; RV32-NEXT:    lui a1, %hi(.LCPI8_6)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_6)
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle16.v v30, (a1)
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 4
-; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v16, v0, v12
+; RV32-NEXT:    vle16.v v2, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    li a2, 12
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl4r.v v20, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v8, v16, v26
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v12, v20, v28
+; RV32-NEXT:    vrgatherei16.vv v20, v4, v24
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v12, v16
+; RV32-NEXT:    vmv.v.v v20, v8
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    slli a1, a1, 2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v16, v0, v30
+; RV32-NEXT:    vrgatherei16.vv v24, v8, v2
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 48
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI8_7)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_7)
 ; RV32-NEXT:    lui a2, %hi(.LCPI8_8)
 ; RV32-NEXT:    addi a2, a2, %lo(.LCPI8_8)
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v20, (a1)
+; RV32-NEXT:    vle16.v v12, (a1)
 ; RV32-NEXT:    lui a1, %hi(.LCPI8_9)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_9)
 ; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT:    vle16.v v8, (a2)
+; RV32-NEXT:    vle16.v v16, (a2)
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vle16.v v10, (a1)
+; RV32-NEXT:    vle16.v v18, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 28
+; RV32-NEXT:    li a2, 20
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v28, v0, v20
+; RV32-NEXT:    vrgatherei16.vv v24, v0, v12
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 48
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v28, v16
+; RV32-NEXT:    vmv.v.v v24, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 76
+; RV32-NEXT:    li a2, 72
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v16, v0, v8
+; RV32-NEXT:    vrgatherei16.vv v8, v0, v16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 60
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 68
-; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v4, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vrgatherei16.vv v16, v4, v10
+; RV32-NEXT:    vrgatherei16.vv v8, v4, v18
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 60
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v16, v0
+; RV32-NEXT:    vmv.v.v v8, v0
 ; RV32-NEXT:    addi a1, a0, 320
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vse32.v v16, (a1)
+; RV32-NEXT:    vse32.v v8, (a1)
 ; RV32-NEXT:    addi a1, a0, 256
-; RV32-NEXT:    vse32.v v28, (a1)
+; RV32-NEXT:    vse32.v v24, (a1)
 ; RV32-NEXT:    addi a1, a0, 192
-; RV32-NEXT:    vse32.v v12, (a1)
+; RV32-NEXT:    vse32.v v20, (a1)
 ; RV32-NEXT:    addi a1, a0, 128
-; RV32-NEXT:    vse32.v v24, (a1)
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vse32.v v8, (a1)
 ; RV32-NEXT:    addi a1, a0, 64
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 52
+; RV32-NEXT:    li a3, 56
 ; RV32-NEXT:    mul a2, a2, a3
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 84
+; RV32-NEXT:    li a2, 80
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a0)
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 92
+; RV32-NEXT:    li a1, 88
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    .cfi_def_cfa sp, 16
@@ -665,145 +675,154 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    vmv.v.x v17, a6
 ; RV64-NEXT:    addi a6, a2, 65
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vrgather.vi v24, v8, 4
+; RV64-NEXT:    vrgather.vi v4, v8, 4
 ; RV64-NEXT:    vrgather.vi v20, v8, 5
 ; RV64-NEXT:    csrr a7, vlenb
-; RV64-NEXT:    li t0, 68
+; RV64-NEXT:    li t0, 84
 ; RV64-NEXT:    mul a7, a7, t0
 ; RV64-NEXT:    add a7, sp, a7
 ; RV64-NEXT:    addi a7, a7, 16
 ; RV64-NEXT:    vs4r.v v20, (a7) # Unknown-size Folded Spill
 ; RV64-NEXT:    vrgatherei16.vv v20, v8, v16
 ; RV64-NEXT:    csrr a7, vlenb
-; RV64-NEXT:    li t0, 84
-; RV64-NEXT:    mul a7, a7, t0
+; RV64-NEXT:    slli a7, a7, 6
 ; RV64-NEXT:    add a7, sp, a7
 ; RV64-NEXT:    addi a7, a7, 16
 ; RV64-NEXT:    vs4r.v v20, (a7) # Unknown-size Folded Spill
 ; RV64-NEXT:    vrgatherei16.vv v20, v8, v17
 ; RV64-NEXT:    csrr a7, vlenb
-; RV64-NEXT:    li t0, 72
+; RV64-NEXT:    li t0, 56
 ; RV64-NEXT:    mul a7, a7, t0
 ; RV64-NEXT:    add a7, sp, a7
 ; RV64-NEXT:    addi a7, a7, 16
 ; RV64-NEXT:    vs4r.v v20, (a7) # Unknown-size Folded Spill
 ; RV64-NEXT:    vrgather.vi v16, v8, 2
 ; RV64-NEXT:    csrr a7, vlenb
-; RV64-NEXT:    slli a7, a7, 6
+; RV64-NEXT:    li t0, 72
+; RV64-NEXT:    mul a7, a7, t0
 ; RV64-NEXT:    add a7, sp, a7
 ; RV64-NEXT:    addi a7, a7, 16
 ; RV64-NEXT:    vs4r.v v16, (a7) # Unknown-size Folded Spill
 ; RV64-NEXT:    vrgather.vi v16, v8, 3
 ; RV64-NEXT:    csrr a7, vlenb
-; RV64-NEXT:    li t0, 56
+; RV64-NEXT:    li t0, 68
 ; RV64-NEXT:    mul a7, a7, t0
 ; RV64-NEXT:    add a7, sp, a7
 ; RV64-NEXT:    addi a7, a7, 16
 ; RV64-NEXT:    vs4r.v v16, (a7) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
-; RV64-NEXT:    vslidedown.vi v16, v8, 8
+; RV64-NEXT:    vslidedown.vi v8, v8, 8
 ; RV64-NEXT:    csrr a7, vlenb
-; RV64-NEXT:    li t0, 48
+; RV64-NEXT:    li t0, 40
 ; RV64-NEXT:    mul a7, a7, t0
 ; RV64-NEXT:    add a7, sp, a7
 ; RV64-NEXT:    addi a7, a7, 16
-; RV64-NEXT:    vs8r.v v16, (a7) # Unknown-size Folded Spill
-; RV64-NEXT:    vmv.s.x v21, a4
+; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
+; RV64-NEXT:    vmv.s.x v0, a4
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs1r.v v0, (a4) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v8, (a1)
-; RV64-NEXT:    vle64.v v0, (a3)
+; RV64-NEXT:    vle64.v v24, (a1)
+; RV64-NEXT:    vle64.v v16, (a3)
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    li a3, 76
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT:    vle16.v v2, (a5)
-; RV64-NEXT:    vmv.s.x v20, a6
-; RV64-NEXT:    vmv1r.v v0, v21
-; RV64-NEXT:    vmv1r.v v7, v21
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vle16.v v12, (a5)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs2r.v v12, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vmv.s.x v2, a6
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    vrgather.vi v24, v16, 2, v0.t
+; RV64-NEXT:    vrgather.vi v4, v8, 2, v0.t
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a3, 60
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
-; RV64-NEXT:    vmv1r.v v0, v20
+; RV64-NEXT:    vs4r.v v4, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vmv1r.v v0, v2
+; RV64-NEXT:    vmv8r.v v8, v24
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    li a3, 48
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vmerge.vvm v24, v16, v8, v0
-; RV64-NEXT:    vmv8r.v v16, v8
+; RV64-NEXT:    vmerge.vvm v24, v16, v24, v0
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 76
-; RV64-NEXT:    mul a1, a1, a3
+; RV64-NEXT:    slli a1, a1, 4
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT:    vrgatherei16.vv v8, v24, v2
+; RV64-NEXT:    vl2r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vrgatherei16.vv v0, v24, v16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vs8r.v v0, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    lui a1, 2
 ; RV64-NEXT:    lui a3, %hi(.LCPI8_1)
 ; RV64-NEXT:    addi a3, a3, %lo(.LCPI8_1)
 ; RV64-NEXT:    addi a1, a1, 130
-; RV64-NEXT:    vle16.v v8, (a3)
+; RV64-NEXT:    vle16.v v16, (a3)
 ; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    slli a3, a3, 3
 ; RV64-NEXT:    add a3, sp, a3
 ; RV64-NEXT:    addi a3, a3, 16
-; RV64-NEXT:    vs2r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vs2r.v v16, (a3) # Unknown-size Folded Spill
 ; RV64-NEXT:    vmv.s.x v2, a1
-; RV64-NEXT:    vmv1r.v v0, v7
-; RV64-NEXT:    addi a1, sp, 16
-; RV64-NEXT:    vs1r.v v7, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 68
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a3, 84
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 48
+; RV64-NEXT:    li a3, 40
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    vrgather.vi v24, v8, 3, v0.t
+; RV64-NEXT:    vrgather.vi v24, v16, 3, v0.t
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 68
+; RV64-NEXT:    li a3, 84
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    li a3, 76
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vmerge.vvm v24, v8, v16, v0
+; RV64-NEXT:    vmerge.vvm v24, v16, v8, v0
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl2r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vrgatherei16.vv v0, v24, v16
+; RV64-NEXT:    vl2r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vrgatherei16.vv v0, v24, v8
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 24
-; RV64-NEXT:    mul a1, a1, a3
+; RV64-NEXT:    slli a1, a1, 4
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v0, (a1) # Unknown-size Folded Spill
@@ -814,40 +833,39 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    vmv.s.x v0, a1
 ; RV64-NEXT:    vmv.s.x v2, a3
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 76
+; RV64-NEXT:    li a3, 48
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vmerge.vvm v24, v8, v16, v0
+; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v8, v16, v24, v0
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    slli a1, a1, 5
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV64-NEXT:    addi a1, sp, 16
 ; RV64-NEXT:    vl1r.v v7, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vmv1r.v v0, v7
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 84
-; RV64-NEXT:    mul a1, a1, a3
+; RV64-NEXT:    slli a1, a1, 6
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 48
+; RV64-NEXT:    li a3, 40
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vmv4r.v v8, v16
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    vrgather.vi v24, v16, 4, v0.t
+; RV64-NEXT:    vrgather.vi v12, v16, 4, v0.t
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 84
-; RV64-NEXT:    mul a1, a1, a3
+; RV64-NEXT:    slli a1, a1, 6
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a3, 76
@@ -856,53 +874,48 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vmerge.vvm v24, v8, v16, v0
+; RV64-NEXT:    vmerge.vvm v16, v16, v24, v0
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV64-NEXT:    vmv8r.v v16, v8
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vmv1r.v v0, v7
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 72
-; RV64-NEXT:    mul a1, a1, a3
-; RV64-NEXT:    add a1, sp, a1
-; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 48
+; RV64-NEXT:    li a3, 56
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vmv4r.v v8, v24
+; RV64-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    vrgather.vi v12, v24, 5, v0.t
+; RV64-NEXT:    vrgather.vi v24, v8, 5, v0.t
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 72
+; RV64-NEXT:    li a3, 56
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    lui a1, 96
 ; RV64-NEXT:    li a3, 192
 ; RV64-NEXT:    vmv.s.x v3, a3
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.v.x v12, a1
+; RV64-NEXT:    vmv.v.x v24, a1
 ; RV64-NEXT:    vmv1r.v v0, v3
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    li a3, 72
+; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl4r.v v28, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    vrgatherei16.vv v24, v8, v12, v0.t
+; RV64-NEXT:    vrgatherei16.vv v28, v8, v24, v0.t
+; RV64-NEXT:    vmv4r.v v16, v8
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    li a3, 72
+; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vs4r.v v28, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    lui a1, %hi(.LCPI8_2)
 ; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_2)
 ; RV64-NEXT:    li a3, 1040
@@ -910,50 +923,67 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    addi a4, a4, 1
 ; RV64-NEXT:    vmv.s.x v0, a3
 ; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vmv.v.x v12, a4
+; RV64-NEXT:    vmv.v.x v5, a4
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV64-NEXT:    vle16.v v6, (a1)
-; RV64-NEXT:    vmv8r.v v24, v16
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a3, 76
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vmerge.vvm v16, v24, v16, v0
-; RV64-NEXT:    addi a1, sp, 16
-; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a3, 48
+; RV64-NEXT:    mul a1, a1, a3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v24, v8, v24, v0
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vmv1r.v v0, v3
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    li a3, 68
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl4r.v v28, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT:    vrgatherei16.vv v16, v8, v12, v0.t
+; RV64-NEXT:    vrgatherei16.vv v28, v16, v5, v0.t
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    li a3, 68
 ; RV64-NEXT:    mul a1, a1, a3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs4r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vs4r.v v28, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    addi a1, a2, -2016
 ; RV64-NEXT:    vmv.s.x v0, a1
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vrgatherei16.vv v16, v24, v6
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vrgatherei16.vv v16, v8, v6
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a2, 76
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vmerge.vvm v8, v24, v8, v0
+; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v8, v16, v8, v0
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a2, 76
 ; RV64-NEXT:    mul a1, a1, a2
@@ -962,80 +992,94 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    lui a1, %hi(.LCPI8_3)
 ; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_3)
-; RV64-NEXT:    vle16.v v24, (a1)
+; RV64-NEXT:    vle16.v v8, (a1)
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    li a2, 60
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a2, 60
+; RV64-NEXT:    li a2, 24
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 6, e64, m4, tu, ma
-; RV64-NEXT:    vmv.v.v v8, v0
+; RV64-NEXT:    vmv.v.v v12, v0
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a2, 60
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a2, 68
+; RV64-NEXT:    li a2, 84
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl4r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a2, 24
-; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    slli a1, a1, 4
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vmv.v.v v0, v8
+; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vmv.v.v v12, v24
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a2, 84
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT:    vmv.v.v v8, v16
+; RV64-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a2, 84
+; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 40
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
+; RV64-NEXT:    vmv.v.v v16, v24
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    slli a1, a1, 3
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vrgatherei16.vv v16, v8, v24
+; RV64-NEXT:    vrgatherei16.vv v0, v24, v8
 ; RV64-NEXT:    lui a1, %hi(.LCPI8_4)
 ; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_4)
 ; RV64-NEXT:    vle16.v v8, (a1)
 ; RV64-NEXT:    lui a1, %hi(.LCPI8_5)
 ; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_5)
-; RV64-NEXT:    vle16.v v6, (a1)
+; RV64-NEXT:    vle16.v v10, (a1)
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a2, 72
+; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs2r.v v10, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 56
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT:    vmv.v.v v12, v16
-; RV64-NEXT:    addi a1, sp, 16
-; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vmv.v.v v12, v0
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vrgatherei16.vv v24, v16, v8
+; RV64-NEXT:    vrgatherei16.vv v24, v0, v8
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    li a2, 72
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
@@ -1046,25 +1090,32 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 6
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl2r.v v20, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vrgatherei16.vv v24, v16, v6
+; RV64-NEXT:    vrgatherei16.vv v24, v0, v20
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    li a2, 56
+; RV64-NEXT:    li a2, 68
 ; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vl4r.v v28, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT:    vmv.v.v v16, v24
+; RV64-NEXT:    vmv.v.v v28, v24
 ; RV64-NEXT:    addi a1, a0, 256
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
 ; RV64-NEXT:    vse64.v v8, (a1)
 ; RV64-NEXT:    addi a1, a0, 320
-; RV64-NEXT:    vse64.v v16, (a1)
+; RV64-NEXT:    vse64.v v28, (a1)
 ; RV64-NEXT:    addi a1, a0, 192
 ; RV64-NEXT:    vse64.v v12, (a1)
 ; RV64-NEXT:    addi a1, a0, 128
+; RV64-NEXT:    vse64.v v16, (a1)
+; RV64-NEXT:    addi a1, a0, 64
 ; RV64-NEXT:    csrr a2, vlenb
 ; RV64-NEXT:    li a3, 84
 ; RV64-NEXT:    mul a2, a2, a3
@@ -1072,8 +1123,6 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    addi a2, a2, 16
 ; RV64-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
 ; RV64-NEXT:    vse64.v v8, (a1)
-; RV64-NEXT:    addi a1, a0, 64
-; RV64-NEXT:    vse64.v v0, (a1)
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a2, 60
 ; RV64-NEXT:    mul a1, a1, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 7fe1406..7354f9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -4032,7 +4032,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ;
 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
 ; RV64ZVE32F:       # %bb.0:
-; RV64ZVE32F-NEXT:    ld a4, 40(a0)
+; RV64ZVE32F-NEXT:    ld a5, 40(a0)
 ; RV64ZVE32F-NEXT:    ld a3, 48(a0)
 ; RV64ZVE32F-NEXT:    ld a2, 56(a0)
 ; RV64ZVE32F-NEXT:    ld t1, 8(a0)
@@ -4040,8 +4040,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ; RV64ZVE32F-NEXT:    ld a7, 24(a0)
 ; RV64ZVE32F-NEXT:    ld a6, 32(a0)
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
-; RV64ZVE32F-NEXT:    andi t2, a5, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a4, v0
+; RV64ZVE32F-NEXT:    andi t2, a4, 1
 ; RV64ZVE32F-NEXT:    beqz t2, .LBB44_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    ld a0, 0(a0)
@@ -4051,7 +4051,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ; RV64ZVE32F-NEXT:    add t2, a1, t2
 ; RV64ZVE32F-NEXT:    sd a0, 0(t2)
 ; RV64ZVE32F-NEXT:  .LBB44_2: # %else
-; RV64ZVE32F-NEXT:    andi a0, a5, 2
+; RV64ZVE32F-NEXT:    andi a0, a4, 2
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB44_4
 ; RV64ZVE32F-NEXT:  # %bb.3: # %cond.store1
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
@@ -4064,18 +4064,18 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ; RV64ZVE32F-NEXT:  .LBB44_4: # %else2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT:    andi a0, a5, 4
+; RV64ZVE32F-NEXT:    andi a0, a4, 4
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB44_12
 ; RV64ZVE32F-NEXT:  # %bb.5: # %else4
-; RV64ZVE32F-NEXT:    andi a0, a5, 8
+; RV64ZVE32F-NEXT:    andi a0, a4, 8
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB44_13
 ; RV64ZVE32F-NEXT:  .LBB44_6: # %else6
-; RV64ZVE32F-NEXT:    andi a0, a5, 16
+; RV64ZVE32F-NEXT:    andi a0, a4, 16
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB44_14
 ; RV64ZVE32F-NEXT:  .LBB44_7: # %else8
-; RV64ZVE32F-NEXT:    andi a0, a5, 32
+; RV64ZVE32F-NEXT:    andi a0, a4, 32
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB44_9
 ; RV64ZVE32F-NEXT:  .LBB44_8: # %cond.store9
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
@@ -4083,13 +4083,13 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ; RV64ZVE32F-NEXT:    andi a0, a0, 255
 ; RV64ZVE32F-NEXT:    slli a0, a0, 3
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
-; RV64ZVE32F-NEXT:    sd a4, 0(a0)
+; RV64ZVE32F-NEXT:    sd a5, 0(a0)
 ; RV64ZVE32F-NEXT:  .LBB44_9: # %else10
-; RV64ZVE32F-NEXT:    andi a0, a5, 64
+; RV64ZVE32F-NEXT:    andi a0, a4, 64
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB44_15
 ; RV64ZVE32F-NEXT:  # %bb.10: # %else12
-; RV64ZVE32F-NEXT:    andi a0, a5, -128
+; RV64ZVE32F-NEXT:    andi a0, a4, -128
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB44_16
 ; RV64ZVE32F-NEXT:  .LBB44_11: # %else14
 ; RV64ZVE32F-NEXT:    ret
@@ -4099,7 +4099,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ; RV64ZVE32F-NEXT:    slli a0, a0, 3
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
 ; RV64ZVE32F-NEXT:    sd t0, 0(a0)
-; RV64ZVE32F-NEXT:    andi a0, a5, 8
+; RV64ZVE32F-NEXT:    andi a0, a4, 8
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB44_6
 ; RV64ZVE32F-NEXT:  .LBB44_13: # %cond.store5
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
@@ -4108,7 +4108,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ; RV64ZVE32F-NEXT:    slli a0, a0, 3
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
 ; RV64ZVE32F-NEXT:    sd a7, 0(a0)
-; RV64ZVE32F-NEXT:    andi a0, a5, 16
+; RV64ZVE32F-NEXT:    andi a0, a4, 16
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB44_7
 ; RV64ZVE32F-NEXT:  .LBB44_14: # %cond.store7
 ; RV64ZVE32F-NEXT:    vmv.x.s a0, v9
@@ -4116,7 +4116,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ; RV64ZVE32F-NEXT:    slli a0, a0, 3
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
 ; RV64ZVE32F-NEXT:    sd a6, 0(a0)
-; RV64ZVE32F-NEXT:    andi a0, a5, 32
+; RV64ZVE32F-NEXT:    andi a0, a4, 32
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB44_8
 ; RV64ZVE32F-NEXT:    j .LBB44_9
 ; RV64ZVE32F-NEXT:  .LBB44_15: # %cond.store11
@@ -4125,7 +4125,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
 ; RV64ZVE32F-NEXT:    slli a0, a0, 3
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
 ; RV64ZVE32F-NEXT:    sd a3, 0(a0)
-; RV64ZVE32F-NEXT:    andi a0, a5, -128
+; RV64ZVE32F-NEXT:    andi a0, a4, -128
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB44_11
 ; RV64ZVE32F-NEXT:  .LBB44_16: # %cond.store13
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
@@ -4798,7 +4798,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ;
 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
 ; RV64ZVE32F:       # %bb.0:
-; RV64ZVE32F-NEXT:    ld a4, 40(a0)
+; RV64ZVE32F-NEXT:    ld a5, 40(a0)
 ; RV64ZVE32F-NEXT:    ld a3, 48(a0)
 ; RV64ZVE32F-NEXT:    ld a2, 56(a0)
 ; RV64ZVE32F-NEXT:    ld t1, 8(a0)
@@ -4806,8 +4806,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ; RV64ZVE32F-NEXT:    ld a7, 24(a0)
 ; RV64ZVE32F-NEXT:    ld a6, 32(a0)
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.x.s a5, v0
-; RV64ZVE32F-NEXT:    andi t2, a5, 1
+; RV64ZVE32F-NEXT:    vmv.x.s a4, v0
+; RV64ZVE32F-NEXT:    andi t2, a4, 1
 ; RV64ZVE32F-NEXT:    beqz t2, .LBB47_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.store
 ; RV64ZVE32F-NEXT:    ld a0, 0(a0)
@@ -4818,7 +4818,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ; RV64ZVE32F-NEXT:    add t2, a1, t2
 ; RV64ZVE32F-NEXT:    sd a0, 0(t2)
 ; RV64ZVE32F-NEXT:  .LBB47_2: # %else
-; RV64ZVE32F-NEXT:    andi a0, a5, 2
+; RV64ZVE32F-NEXT:    andi a0, a4, 2
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB47_4
 ; RV64ZVE32F-NEXT:  # %bb.3: # %cond.store1
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
@@ -4831,18 +4831,18 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ; RV64ZVE32F-NEXT:  .LBB47_4: # %else2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT:    andi a0, a5, 4
+; RV64ZVE32F-NEXT:    andi a0, a4, 4
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB47_12
 ; RV64ZVE32F-NEXT:  # %bb.5: # %else4
-; RV64ZVE32F-NEXT:    andi a0, a5, 8
+; RV64ZVE32F-NEXT:    andi a0, a4, 8
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB47_13
 ; RV64ZVE32F-NEXT:  .LBB47_6: # %else6
-; RV64ZVE32F-NEXT:    andi a0, a5, 16
+; RV64ZVE32F-NEXT:    andi a0, a4, 16
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB47_14
 ; RV64ZVE32F-NEXT:  .LBB47_7: # %else8
-; RV64ZVE32F-NEXT:    andi a0, a5, 32
+; RV64ZVE32F-NEXT:    andi a0, a4, 32
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB47_9
 ; RV64ZVE32F-NEXT:  .LBB47_8: # %cond.store9
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
@@ -4850,13 +4850,13 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ; RV64ZVE32F-NEXT:    slli a0, a0, 48
 ; RV64ZVE32F-NEXT:    srli a0, a0, 45
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
-; RV64ZVE32F-NEXT:    sd a4, 0(a0)
+; RV64ZVE32F-NEXT:    sd a5, 0(a0)
 ; RV64ZVE32F-NEXT:  .LBB47_9: # %else10
-; RV64ZVE32F-NEXT:    andi a0, a5, 64
+; RV64ZVE32F-NEXT:    andi a0, a4, 64
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB47_15
 ; RV64ZVE32F-NEXT:  # %bb.10: # %else12
-; RV64ZVE32F-NEXT:    andi a0, a5, -128
+; RV64ZVE32F-NEXT:    andi a0, a4, -128
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB47_16
 ; RV64ZVE32F-NEXT:  .LBB47_11: # %else14
 ; RV64ZVE32F-NEXT:    ret
@@ -4866,7 +4866,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ; RV64ZVE32F-NEXT:    srli a0, a0, 45
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
 ; RV64ZVE32F-NEXT:    sd t0, 0(a0)
-; RV64ZVE32F-NEXT:    andi a0, a5, 8
+; RV64ZVE32F-NEXT:    andi a0, a4, 8
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB47_6
 ; RV64ZVE32F-NEXT:  .LBB47_13: # %cond.store5
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
@@ -4875,7 +4875,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ; RV64ZVE32F-NEXT:    srli a0, a0, 45
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
 ; RV64ZVE32F-NEXT:    sd a7, 0(a0)
-; RV64ZVE32F-NEXT:    andi a0, a5, 16
+; RV64ZVE32F-NEXT:    andi a0, a4, 16
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB47_7
 ; RV64ZVE32F-NEXT:  .LBB47_14: # %cond.store7
 ; RV64ZVE32F-NEXT:    vmv.x.s a0, v9
@@ -4883,7 +4883,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ; RV64ZVE32F-NEXT:    srli a0, a0, 45
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
 ; RV64ZVE32F-NEXT:    sd a6, 0(a0)
-; RV64ZVE32F-NEXT:    andi a0, a5, 32
+; RV64ZVE32F-NEXT:    andi a0, a4, 32
 ; RV64ZVE32F-NEXT:    bnez a0, .LBB47_8
 ; RV64ZVE32F-NEXT:    j .LBB47_9
 ; RV64ZVE32F-NEXT:  .LBB47_15: # %cond.store11
@@ -4892,7 +4892,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
 ; RV64ZVE32F-NEXT:    srli a0, a0, 45
 ; RV64ZVE32F-NEXT:    add a0, a1, a0
 ; RV64ZVE32F-NEXT:    sd a3, 0(a0)
-; RV64ZVE32F-NEXT:    andi a0, a5, -128
+; RV64ZVE32F-NEXT:    andi a0, a4, -128
 ; RV64ZVE32F-NEXT:    beqz a0, .LBB47_11
 ; RV64ZVE32F-NEXT:  .LBB47_16: # %cond.store13
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
index afd560f..c0c17d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll
@@ -431,3 +431,31 @@ define void @shuffle_i256_ldst(ptr %p) vscale_range(2,2) {
   store <4 x i256> %res, ptr %p
   ret void
 }
+
+define void @shuffle_3_input_vectors() vscale_range(4,4) {
+; CHECK-LABEL: shuffle_3_input_vectors:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 1
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 6
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; CHECK-NEXT:    vslidedown.vi v20, v8, 1, v0.t
+; CHECK-NEXT:    vslideup.vi v20, v9, 3
+; CHECK-NEXT:    vslidedown.vi v21, v9, 1
+; CHECK-NEXT:    vmv1r.v v22, v8
+; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vmsgt.vi v8, v16, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    sb a0, 0(zero)
+; CHECK-NEXT:    ret
+  %1 = shufflevector <32 x i64> zeroinitializer, <32 x i64> splat (i64 1), <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 poison, i32 poison, i32 33, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %2 = icmp slt <32 x i64> zeroinitializer, %1
+  %3 = bitcast <32 x i1> %2 to i32
+  %4 = trunc i32 %3 to i8
+  store i8 %4, ptr null, align 1
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
index 5fd7e47..71a1507 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
@@ -874,27 +874,30 @@ define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) {
 define <32 x i8> @reverse_v32i8_2(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: reverse_v32i8_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    vid.v v12
-; CHECK-NEXT:    addi a1, a0, -1
-; CHECK-NEXT:    vrsub.vx v12, v12, a1
-; CHECK-NEXT:    lui a1, 16
-; CHECK-NEXT:    addi a1, a1, -1
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    addi a2, a0, -1
+; CHECK-NEXT:    vrsub.vx v10, v10, a2
+; CHECK-NEXT:    lui a2, 16
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vrgatherei16.vv v15, v8, v12
-; CHECK-NEXT:    vrgatherei16.vv v14, v9, v12
+; CHECK-NEXT:    vrgatherei16.vv v15, v8, v10
+; CHECK-NEXT:    vrgatherei16.vv v14, v12, v10
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vid.v v10
+; CHECK-NEXT:    addi a2, a2, -1
+; CHECK-NEXT:    vrsub.vi v10, v10, 15
+; CHECK-NEXT:    vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vrgather.vv v17, v13, v10
+; CHECK-NEXT:    vrgather.vv v16, v9, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vmv.s.x v0, a2
 ; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    addi a0, a0, -32
-; CHECK-NEXT:    vrsub.vi v12, v8, 15
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v14, a0
-; CHECK-NEXT:    vrgather.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
 ; CHECK-NEXT:    ret
   %res = shufflevector <16 x i8> %a, <16 x i8> %b,  <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <32 x i8> %res
@@ -943,23 +946,25 @@ define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) {
 define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: reverse_v16i16_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vid.v v10
 ; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    vrsub.vi v10, v10, 7
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vrgather.vv v13, v12, v10
+; CHECK-NEXT:    vrgather.vv v12, v9, v10
 ; CHECK-NEXT:    vid.v v9
 ; CHECK-NEXT:    srli a1, a0, 1
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    vrsub.vx v9, v9, a1
-; CHECK-NEXT:    vrgather.vv v13, v8, v9
-; CHECK-NEXT:    vrgather.vv v12, v11, v9
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    li a1, 255
 ; CHECK-NEXT:    addi a0, a0, -16
-; CHECK-NEXT:    vrsub.vi v14, v8, 7
+; CHECK-NEXT:    vrgather.vv v15, v8, v9
+; CHECK-NEXT:    vrgather.vv v14, v10, v9
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vslidedown.vx v8, v12, a0
-; CHECK-NEXT:    vrgather.vv v8, v10, v14, v0.t
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vslidedown.vx v8, v14, a0
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x i16> %a, <8 x i16> %b,  <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <16 x i16> %res
@@ -1024,24 +1029,27 @@ define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) {
 define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: reverse_v8i32_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vid.v v10
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    vid.v v9
-; CHECK-NEXT:    srli a1, a0, 2
-; CHECK-NEXT:    addi a1, a1, -1
-; CHECK-NEXT:    vrsub.vx v9, v9, a1
-; CHECK-NEXT:    vrgather.vv v13, v8, v9
-; CHECK-NEXT:    vrgather.vv v12, v11, v9
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vid.v v12
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    vmv.v.i v0, 15
+; CHECK-NEXT:    vrsub.vi v10, v10, 3
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v15, v11, v10
+; CHECK-NEXT:    vrgatherei16.vv v14, v9, v10
+; CHECK-NEXT:    srli a1, a0, 2
 ; CHECK-NEXT:    srli a0, a0, 1
-; CHECK-NEXT:    vrsub.vi v14, v8, 3
+; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    addi a0, a0, -8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vrsub.vx v10, v12, a1
+; CHECK-NEXT:    vrgather.vv v13, v8, v10
+; CHECK-NEXT:    vrgather.vv v12, v9, v10
+; CHECK-NEXT:    vmv.v.i v0, 15
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v12, a0
-; CHECK-NEXT:    vrgatherei16.vv v8, v10, v14, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v8, v14, v0
 ; CHECK-NEXT:    ret
   %res = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <8 x i32> %res
@@ -1197,23 +1205,25 @@ define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) {
 define <16 x half> @reverse_v16f16_2(<8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: reverse_v16f16_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vid.v v10
 ; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    vrsub.vi v10, v10, 7
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vrgather.vv v13, v12, v10
+; CHECK-NEXT:    vrgather.vv v12, v9, v10
 ; CHECK-NEXT:    vid.v v9
 ; CHECK-NEXT:    srli a1, a0, 1
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    vrsub.vx v9, v9, a1
-; CHECK-NEXT:    vrgather.vv v13, v8, v9
-; CHECK-NEXT:    vrgather.vv v12, v11, v9
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    li a1, 255
 ; CHECK-NEXT:    addi a0, a0, -16
-; CHECK-NEXT:    vrsub.vi v14, v8, 7
+; CHECK-NEXT:    vrgather.vv v15, v8, v9
+; CHECK-NEXT:    vrgather.vv v14, v10, v9
 ; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vslidedown.vx v8, v12, a0
-; CHECK-NEXT:    vrgather.vv v8, v10, v14, v0.t
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vslidedown.vx v8, v14, a0
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x half> %a, <8 x half> %b,  <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <16 x half> %res
@@ -1269,24 +1279,27 @@ define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) {
 define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: reverse_v8f32_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vmv1r.v v10, v9
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vid.v v10
 ; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    vid.v v9
-; CHECK-NEXT:    srli a1, a0, 2
-; CHECK-NEXT:    addi a1, a1, -1
-; CHECK-NEXT:    vrsub.vx v9, v9, a1
-; CHECK-NEXT:    vrgather.vv v13, v8, v9
-; CHECK-NEXT:    vrgather.vv v12, v11, v9
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vid.v v12
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    vmv.v.i v0, 15
+; CHECK-NEXT:    vrsub.vi v10, v10, 3
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v15, v11, v10
+; CHECK-NEXT:    vrgatherei16.vv v14, v9, v10
+; CHECK-NEXT:    srli a1, a0, 2
 ; CHECK-NEXT:    srli a0, a0, 1
-; CHECK-NEXT:    vrsub.vi v14, v8, 3
+; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    addi a0, a0, -8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vrsub.vx v10, v12, a1
+; CHECK-NEXT:    vrgather.vv v13, v8, v10
+; CHECK-NEXT:    vrgather.vv v12, v9, v10
+; CHECK-NEXT:    vmv.v.i v0, 15
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; CHECK-NEXT:    vslidedown.vx v8, v12, a0
-; CHECK-NEXT:    vrgatherei16.vv v8, v10, v14, v0.t
+; CHECK-NEXT:    vmerge.vvm v8, v8, v14, v0
 ; CHECK-NEXT:    ret
   %res = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <8 x float> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
index 464b4ec..86d8a27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
@@ -515,8 +515,10 @@ define <8 x i16> @shuffle_v8i16_as_i64_16(<8 x i16> %v) {
 ; ZVKB-ZVE32X-NEXT:    vsetivli zero, 8, e16, m2, ta, ma
 ; ZVKB-ZVE32X-NEXT:    vle8.v v10, (a0)
 ; ZVKB-ZVE32X-NEXT:    vsext.vf2 v12, v10
+; ZVKB-ZVE32X-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVKB-ZVE32X-NEXT:    vrgather.vv v11, v9, v12
 ; ZVKB-ZVE32X-NEXT:    vrgather.vv v10, v8, v12
-; ZVKB-ZVE32X-NEXT:    vmv.v.v v8, v10
+; ZVKB-ZVE32X-NEXT:    vmv2r.v v8, v10
 ; ZVKB-ZVE32X-NEXT:    ret
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
   ret <8 x i16> %shuffle
@@ -562,9 +564,10 @@ define <8 x i16> @shuffle_v8i16_as_i64_32(<8 x i16> %v) {
 ; ZVKB-ZVE32X-NEXT:    vmv.s.x v10, a0
 ; ZVKB-ZVE32X-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
 ; ZVKB-ZVE32X-NEXT:    vsext.vf2 v12, v10
-; ZVKB-ZVE32X-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVKB-ZVE32X-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v11, v9, v12
 ; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v10, v8, v12
-; ZVKB-ZVE32X-NEXT:    vmv.v.v v8, v10
+; ZVKB-ZVE32X-NEXT:    vmv2r.v v8, v10
 ; ZVKB-ZVE32X-NEXT:    ret
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
   ret <8 x i16> %shuffle
@@ -609,8 +612,10 @@ define <8 x i16> @shuffle_v8i16_as_i64_48(<8 x i16> %v) {
 ; ZVKB-ZVE32X-NEXT:    vsetivli zero, 8, e16, m2, ta, ma
 ; ZVKB-ZVE32X-NEXT:    vle8.v v10, (a0)
 ; ZVKB-ZVE32X-NEXT:    vsext.vf2 v12, v10
+; ZVKB-ZVE32X-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVKB-ZVE32X-NEXT:    vrgather.vv v11, v9, v12
 ; ZVKB-ZVE32X-NEXT:    vrgather.vv v10, v8, v12
-; ZVKB-ZVE32X-NEXT:    vmv.v.v v8, v10
+; ZVKB-ZVE32X-NEXT:    vmv2r.v v8, v10
 ; ZVKB-ZVE32X-NEXT:    ret
   %shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
   ret <8 x i16> %shuffle
@@ -655,9 +660,12 @@ define <8 x i32> @shuffle_v8i32_as_i64(<8 x i32> %v) {
 ; ZVKB-ZVE32X-NEXT:    vsetivli zero, 8, e16, m2, ta, ma
 ; ZVKB-ZVE32X-NEXT:    vle8.v v12, (a0)
 ; ZVKB-ZVE32X-NEXT:    vsext.vf2 v16, v12
-; ZVKB-ZVE32X-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVKB-ZVE32X-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v13, v9, v16
 ; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v12, v8, v16
-; ZVKB-ZVE32X-NEXT:    vmv.v.v v8, v12
+; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v14, v10, v16
+; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v15, v11, v16
+; ZVKB-ZVE32X-NEXT:    vmv4r.v v8, v12
 ; ZVKB-ZVE32X-NEXT:    ret
   %shuffle = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   ret <8 x i32> %shuffle
@@ -726,8 +734,10 @@ define <8 x half> @shuffle_v8f16_as_i64_16(<8 x half> %v) {
 ; ZVKB-ZVE32X-NEXT:    vsetivli zero, 8, e16, m2, ta, ma
 ; ZVKB-ZVE32X-NEXT:    vle8.v v10, (a0)
 ; ZVKB-ZVE32X-NEXT:    vsext.vf2 v12, v10
+; ZVKB-ZVE32X-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVKB-ZVE32X-NEXT:    vrgather.vv v11, v9, v12
 ; ZVKB-ZVE32X-NEXT:    vrgather.vv v10, v8, v12
-; ZVKB-ZVE32X-NEXT:    vmv.v.v v8, v10
+; ZVKB-ZVE32X-NEXT:    vmv2r.v v8, v10
 ; ZVKB-ZVE32X-NEXT:    ret
   %shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
   ret <8 x half> %shuffle
@@ -773,9 +783,10 @@ define <8 x half> @shuffle_v8f16_as_i64_32(<8 x half> %v) {
 ; ZVKB-ZVE32X-NEXT:    vmv.s.x v10, a0
 ; ZVKB-ZVE32X-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
 ; ZVKB-ZVE32X-NEXT:    vsext.vf2 v12, v10
-; ZVKB-ZVE32X-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; ZVKB-ZVE32X-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v11, v9, v12
 ; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v10, v8, v12
-; ZVKB-ZVE32X-NEXT:    vmv.v.v v8, v10
+; ZVKB-ZVE32X-NEXT:    vmv2r.v v8, v10
 ; ZVKB-ZVE32X-NEXT:    ret
   %shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
   ret <8 x half> %shuffle
@@ -820,8 +831,10 @@ define <8 x half> @shuffle_v8f16_as_i64_48(<8 x half> %v) {
 ; ZVKB-ZVE32X-NEXT:    vsetivli zero, 8, e16, m2, ta, ma
 ; ZVKB-ZVE32X-NEXT:    vle8.v v10, (a0)
 ; ZVKB-ZVE32X-NEXT:    vsext.vf2 v12, v10
+; ZVKB-ZVE32X-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVKB-ZVE32X-NEXT:    vrgather.vv v11, v9, v12
 ; ZVKB-ZVE32X-NEXT:    vrgather.vv v10, v8, v12
-; ZVKB-ZVE32X-NEXT:    vmv.v.v v8, v10
+; ZVKB-ZVE32X-NEXT:    vmv2r.v v8, v10
 ; ZVKB-ZVE32X-NEXT:    ret
   %shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
   ret <8 x half> %shuffle
@@ -866,9 +879,12 @@ define <8 x float> @shuffle_v8f32_as_i64(<8 x float> %v) {
 ; ZVKB-ZVE32X-NEXT:    vsetivli zero, 8, e16, m2, ta, ma
 ; ZVKB-ZVE32X-NEXT:    vle8.v v12, (a0)
 ; ZVKB-ZVE32X-NEXT:    vsext.vf2 v16, v12
-; ZVKB-ZVE32X-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; ZVKB-ZVE32X-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v13, v9, v16
 ; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v12, v8, v16
-; ZVKB-ZVE32X-NEXT:    vmv.v.v v8, v12
+; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v14, v10, v16
+; ZVKB-ZVE32X-NEXT:    vrgatherei16.vv v15, v11, v16
+; ZVKB-ZVE32X-NEXT:    vmv4r.v v8, v12
 ; ZVKB-ZVE32X-NEXT:    ret
   %shuffle = shufflevector <8 x float> %v, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   ret <8 x float> %shuffle
@@ -920,3 +936,87 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2)
   %shuffle = shufflevector <8 x float> %v, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   ret <8 x float> %shuffle
 }
+
+define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_v8i64_as_i128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI29_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI29_0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v16, (a0)
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v13, v9, v16
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
+; CHECK-NEXT:    vrgatherei16.vv v14, v10, v16
+; CHECK-NEXT:    vrgatherei16.vv v15, v11, v16
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+;
+; ZVKB-V-LABEL: shuffle_v8i64_as_i128:
+; ZVKB-V:       # %bb.0:
+; ZVKB-V-NEXT:    lui a0, %hi(.LCPI29_0)
+; ZVKB-V-NEXT:    addi a0, a0, %lo(.LCPI29_0)
+; ZVKB-V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVKB-V-NEXT:    vle16.v v16, (a0)
+; ZVKB-V-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; ZVKB-V-NEXT:    vrgatherei16.vv v13, v9, v16
+; ZVKB-V-NEXT:    vrgatherei16.vv v12, v8, v16
+; ZVKB-V-NEXT:    vrgatherei16.vv v14, v10, v16
+; ZVKB-V-NEXT:    vrgatherei16.vv v15, v11, v16
+; ZVKB-V-NEXT:    vmv4r.v v8, v12
+; ZVKB-V-NEXT:    ret
+  %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) {
+; CHECK-LABEL: shuffle_v8i64_as_i256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI30_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI30_0)
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle16.v v16, (a0)
+; CHECK-NEXT:    vrgatherei16.vv v12, v8, v16
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
+;
+; ZVKB-V-LABEL: shuffle_v8i64_as_i256:
+; ZVKB-V:       # %bb.0:
+; ZVKB-V-NEXT:    lui a0, %hi(.LCPI30_0)
+; ZVKB-V-NEXT:    addi a0, a0, %lo(.LCPI30_0)
+; ZVKB-V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; ZVKB-V-NEXT:    vle16.v v16, (a0)
+; ZVKB-V-NEXT:    vrgatherei16.vv v12, v8, v16
+; ZVKB-V-NEXT:    vmv.v.v v8, v12
+; ZVKB-V-NEXT:    ret
+  %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 3, i32 6, i32 5, i32 4, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0) {
+; CHECK-LABEL: shuffle_v8i64_as_i256_zvl256b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a0, %hi(.LCPI31_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI31_0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v11, v9, v12
+; CHECK-NEXT:    vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+;
+; ZVKB-V-LABEL: shuffle_v8i64_as_i256_zvl256b:
+; ZVKB-V:       # %bb.0:
+; ZVKB-V-NEXT:    lui a0, %hi(.LCPI31_0)
+; ZVKB-V-NEXT:    addi a0, a0, %lo(.LCPI31_0)
+; ZVKB-V-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVKB-V-NEXT:    vle16.v v12, (a0)
+; ZVKB-V-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; ZVKB-V-NEXT:    vrgatherei16.vv v11, v9, v12
+; ZVKB-V-NEXT:    vrgatherei16.vv v10, v8, v12
+; ZVKB-V-NEXT:    vmv2r.v v8, v10
+; ZVKB-V-NEXT:    ret
+  %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 3, i32 6, i32 5, i32 4, i32 7>
+  ret <8 x i64> %shuffle
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index 8b527fb..7fad68d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -755,14 +755,12 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT:    vmv1r.v v9, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -789,14 +787,12 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vmv1r.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v9, v8, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v9, v0
-; ZVFHMIN-NEXT:    vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmax.vv v9, v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index 16c119d..8cae0bb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -755,14 +755,12 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT:    vmv1r.v v9, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v8, v9, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v9, v8, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -789,14 +787,12 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vsca
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v10
 ; ZVFHMIN-NEXT:    vmfeq.vv v0, v9, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    vmv1r.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmerge.vvm v10, v9, v8, v0
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT:    vmerge.vvm v8, v8, v9, v0
-; ZVFHMIN-NEXT:    vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT:    vfmin.vv v9, v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 30ef3dc..e627270 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -30,13 +30,11 @@ define <vscale x 1 x i1> @fcmp_oeq_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_oeq_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_oeq_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmfeq.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -47,13 +45,11 @@ define <vscale x 1 x i1> @fcmp_oeq_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_oeq_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_oeq_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmfeq.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -77,13 +73,11 @@ define <vscale x 1 x i1> @fcmp_ogt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_ogt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ogt_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -94,13 +88,11 @@ define <vscale x 1 x i1> @fcmp_ogt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_ogt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ogt_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -124,13 +116,11 @@ define <vscale x 1 x i1> @fcmp_oge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_oge_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_oge_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmfge.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -141,13 +131,11 @@ define <vscale x 1 x i1> @fcmp_oge_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_oge_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_oge_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmfle.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -171,13 +159,11 @@ define <vscale x 1 x i1> @fcmp_olt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_olt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_olt_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -188,13 +174,11 @@ define <vscale x 1 x i1> @fcmp_olt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_olt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_olt_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmfgt.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -218,13 +202,11 @@ define <vscale x 1 x i1> @fcmp_ole_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_ole_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ole_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmfle.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -235,13 +217,11 @@ define <vscale x 1 x i1> @fcmp_ole_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_ole_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ole_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmfge.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -267,14 +247,12 @@ define <vscale x 1 x i1> @fcmp_one_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_one_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_one_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v9, v10, v9, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v9, fa5, v0.t
+; CHECK-NEXT:    vmfgt.vf v9, v9, fa5, v0.t
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -286,14 +264,12 @@ define <vscale x 1 x i1> @fcmp_one_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_one_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_one_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v10, v9, v0.t
-; CHECK-NEXT:    vmflt.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v9, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v9, v9, fa5, v0.t
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -320,16 +296,14 @@ define <vscale x 1 x i1> @fcmp_ord_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_ord_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ord_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vmfeq.vf v8, v8, fa5, v0.t
 ; CHECK-NEXT:    vmfeq.vv v9, v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10, v0.t
 ; CHECK-NEXT:    vmand.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -341,16 +315,14 @@ define <vscale x 1 x i1> @fcmp_ord_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_ord_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ord_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vmfeq.vf v8, v8, fa5, v0.t
 ; CHECK-NEXT:    vmfeq.vv v9, v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v10, v10, v0.t
 ; CHECK-NEXT:    vmand.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -377,14 +349,12 @@ define <vscale x 1 x i1> @fcmp_ueq_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_ueq_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ueq_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    vmflt.vv v9, v10, v9, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v9, fa5, v0.t
+; CHECK-NEXT:    vmfgt.vf v9, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -396,14 +366,12 @@ define <vscale x 1 x i1> @fcmp_ueq_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_ueq_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ueq_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v10, v9, v0.t
-; CHECK-NEXT:    vmflt.vv v9, v9, v10, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v9, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v9, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -429,13 +397,11 @@ define <vscale x 1 x i1> @fcmp_ugt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_ugt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ugt_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vmfle.vf v8, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -447,13 +413,11 @@ define <vscale x 1 x i1> @fcmp_ugt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_ugt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ugt_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vmfge.vf v8, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -479,13 +443,11 @@ define <vscale x 1 x i1> @fcmp_uge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_uge_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_uge_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -497,13 +459,11 @@ define <vscale x 1 x i1> @fcmp_uge_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_uge_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_uge_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -529,13 +489,11 @@ define <vscale x 1 x i1> @fcmp_ult_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_ult_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ult_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vmfge.vf v8, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -547,13 +505,11 @@ define <vscale x 1 x i1> @fcmp_ult_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_ult_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ult_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vmfle.vf v8, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -579,13 +535,11 @@ define <vscale x 1 x i1> @fcmp_ule_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_ule_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ule_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -597,13 +551,11 @@ define <vscale x 1 x i1> @fcmp_ule_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_ule_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ule_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v9, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -628,13 +580,11 @@ define <vscale x 1 x i1> @fcmp_une_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_une_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_une_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v10, v8, v0.t
+; CHECK-NEXT:    vmfne.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -645,13 +595,11 @@ define <vscale x 1 x i1> @fcmp_une_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_une_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_une_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v8, v10, v0.t
+; CHECK-NEXT:    vmfne.vf v0, v9, fa5, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -677,16 +625,14 @@ define <vscale x 1 x i1> @fcmp_uno_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscal
 define <vscale x 1 x i1> @fcmp_uno_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_uno_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vmfne.vf v8, v8, fa5, v0.t
 ; CHECK-NEXT:    vmfne.vv v9, v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfne.vv v8, v10, v10, v0.t
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -698,16 +644,14 @@ define <vscale x 1 x i1> @fcmp_uno_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat
 define <vscale x 1 x i1> @fcmp_uno_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_uno_vf_swap_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa5
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vmfne.vf v8, v8, fa5, v0.t
 ; CHECK-NEXT:    vmfne.vv v9, v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vmfne.vv v8, v10, v10, v0.t
 ; CHECK-NEXT:    vmor.mm v0, v8, v9
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
@@ -751,13 +695,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmfeq.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -769,13 +711,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfeq.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -801,13 +741,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -819,13 +757,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -851,13 +787,11 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_oge_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfge.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -869,13 +803,11 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_oge_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmfle.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -901,13 +833,11 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_olt_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -919,13 +849,11 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_olt_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -951,13 +879,11 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ole_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmfle.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -969,13 +895,11 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ole_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfge.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1002,14 +926,12 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_one_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v9, v16, v12, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmfgt.vf v9, v12, fa5, v0.t
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1021,14 +943,12 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_one_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v9, v12, v16, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v9, v12, fa5, v0.t
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1057,17 +977,16 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ord_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v10, v12, v12, v0.t
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vmfeq.vf v10, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; CHECK-NEXT:    vmand.mm v0, v10, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1078,17 +997,16 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ord_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v10, v12, v12, v0.t
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vmfeq.vf v10, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; CHECK-NEXT:    vmand.mm v0, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1114,14 +1032,12 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    vmflt.vv v9, v16, v12, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmfgt.vf v9, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1133,14 +1049,12 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ueq_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; CHECK-NEXT:    vmflt.vv v9, v12, v16, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
+; CHECK-NEXT:    vmflt.vf v9, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1166,13 +1080,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmfle.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1184,13 +1096,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfge.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1216,13 +1126,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_uge_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1234,13 +1142,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_uge_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1266,13 +1172,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ult_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfge.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1284,13 +1188,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ult_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmfle.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1316,13 +1218,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ule_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1334,13 +1234,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_ule_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1366,13 +1264,11 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_une_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    vmfne.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1384,13 +1280,11 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_une_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a1
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v8, v16, v12, v0.t
+; CHECK-NEXT:    vmfne.vf v8, v12, fa5, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -1419,17 +1313,16 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_uno_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v10, v12, v12, v0.t
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vmfne.vf v10, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v8, v12, v12, v0.t
-; CHECK-NEXT:    vmor.mm v0, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1440,17 +1333,16 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: fcmp_uno_vf_swap_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a1, fa0
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v10, v12, v12, v0.t
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT:    vmfne.vf v10, v12, fa5, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v8, v12, v12, v0.t
-; CHECK-NEXT:    vmor.mm v0, v8, v10
+; CHECK-NEXT:    vmor.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1677,13 +1569,11 @@ define <vscale x 1 x i1> @fcmp_oeq_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1700,13 +1590,11 @@ define <vscale x 1 x i1> @fcmp_oeq_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1742,13 +1630,11 @@ define <vscale x 1 x i1> @fcmp_ogt_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1765,13 +1651,11 @@ define <vscale x 1 x i1> @fcmp_ogt_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1807,13 +1691,11 @@ define <vscale x 1 x i1> @fcmp_oge_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfge.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1830,13 +1712,11 @@ define <vscale x 1 x i1> @fcmp_oge_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmfle.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1872,13 +1752,11 @@ define <vscale x 1 x i1> @fcmp_olt_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1895,13 +1773,11 @@ define <vscale x 1 x i1> @fcmp_olt_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1937,13 +1813,11 @@ define <vscale x 1 x i1> @fcmp_ole_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmfle.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -1960,13 +1834,11 @@ define <vscale x 1 x i1> @fcmp_ole_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfge.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -2008,14 +1880,12 @@ define <vscale x 1 x i1> @fcmp_one_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v9, v10, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v10, v9, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v9, fa5, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v9, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2035,14 +1905,12 @@ define <vscale x 1 x i1> @fcmp_one_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v9, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v9, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v9, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2087,16 +1955,14 @@ define <vscale x 1 x i1> @fcmp_ord_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v8, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vf v8, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmfeq.vv v9, v9, v9, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vmand.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2118,16 +1984,14 @@ define <vscale x 1 x i1> @fcmp_ord_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v8, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vf v8, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmfeq.vv v9, v9, v9, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vmand.mm v0, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2170,14 +2034,12 @@ define <vscale x 1 x i1> @fcmp_ueq_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v9, v10, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v10, v9, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v9, fa5, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v9, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2197,14 +2059,12 @@ define <vscale x 1 x i1> @fcmp_ueq_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v9, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v9, v10, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v9, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v9, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2244,13 +2104,11 @@ define <vscale x 1 x i1> @fcmp_ugt_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmfle.vf v8, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2269,13 +2127,11 @@ define <vscale x 1 x i1> @fcmp_ugt_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfge.vf v8, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2315,13 +2171,11 @@ define <vscale x 1 x i1> @fcmp_uge_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2340,13 +2194,11 @@ define <vscale x 1 x i1> @fcmp_uge_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2386,13 +2238,11 @@ define <vscale x 1 x i1> @fcmp_ult_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfge.vf v8, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2411,13 +2261,11 @@ define <vscale x 1 x i1> @fcmp_ult_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmfle.vf v8, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2457,13 +2305,11 @@ define <vscale x 1 x i1> @fcmp_ule_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2482,13 +2328,11 @@ define <vscale x 1 x i1> @fcmp_ule_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2525,13 +2369,11 @@ define <vscale x 1 x i1> @fcmp_une_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v0, v10, v8, v0.t
+; ZVFHMIN-NEXT:    vmfne.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -2548,13 +2390,11 @@ define <vscale x 1 x i1> @fcmp_une_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v0, v8, v10, v0.t
+; ZVFHMIN-NEXT:    vmfne.vf v0, v9, fa5, v0.t
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -2598,16 +2438,14 @@ define <vscale x 1 x i1> @fcmp_uno_vf_nxv1f16(<vscale x 1 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v8, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmfne.vf v8, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmfne.vv v9, v9, v9, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2629,16 +2467,14 @@ define <vscale x 1 x i1> @fcmp_uno_vf_swap_nxv1f16(<vscale x 1 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_swap_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v8, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmfne.vf v8, v8, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmfne.vv v9, v9, v9, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v10, v10, v0.t
 ; ZVFHMIN-NEXT:    vmor.mm v0, v8, v9
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
@@ -2702,13 +2538,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2727,13 +2561,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfeq.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2773,13 +2605,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2798,13 +2628,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2844,13 +2672,11 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfge.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2869,13 +2695,11 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmfle.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2915,13 +2739,11 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2940,13 +2762,11 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2986,13 +2806,11 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmfle.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3011,13 +2829,11 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfge.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3060,14 +2876,12 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v9, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3087,14 +2901,12 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3141,17 +2953,16 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v12, v12, v0.t
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vf v10, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v10, v8
+; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -3172,17 +2983,16 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v12, v12, v0.t
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmfeq.vf v10, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmand.mm v0, v10, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -3224,14 +3034,12 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v9, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3251,14 +3059,12 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3298,13 +3104,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmfle.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3323,13 +3127,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfge.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3369,13 +3171,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3394,13 +3194,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3440,13 +3238,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfge.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3465,13 +3261,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmfle.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3511,13 +3305,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3536,13 +3328,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3582,13 +3372,11 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v16, v0.t
+; ZVFHMIN-NEXT:    vmfne.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3607,13 +3395,11 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v8, v16, v12, v0.t
+; ZVFHMIN-NEXT:    vmfne.vf v8, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vmv1r.v v0, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -3660,17 +3446,16 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b,
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v12, v12, v0.t
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmfne.vf v10, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v10, v8
+; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -3691,17 +3476,16 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f16(<vscale x 8 x half> %va, half
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_swap_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v12, v12, v0.t
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vmfne.vf v10, v12, fa5, v0.t
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12, v0.t
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmor.mm v0, v10, v8
 ; ZVFHMIN-NEXT:    ret
   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
index 7ef46a7..ae868fe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
@@ -31,13 +31,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -48,13 +46,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_oeq_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_oeq_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v16, v12
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -78,13 +74,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -108,13 +102,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v16, v12
+; CHECK-NEXT:    vmfgt.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -125,13 +117,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ogt_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v12, v16
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -155,13 +145,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v16, v12
+; CHECK-NEXT:    vmfgt.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -185,13 +173,11 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_oge_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v16, v12
+; CHECK-NEXT:    vmfge.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -202,13 +188,11 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_oge_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_oge_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -232,13 +216,11 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_oge_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v16, v12
+; CHECK-NEXT:    vmfge.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -262,13 +244,11 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_olt_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v12, v16
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -279,13 +259,11 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_olt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_olt_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v16, v12
+; CHECK-NEXT:    vmfgt.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -309,13 +287,11 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_olt_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v12, v16
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -339,13 +315,11 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ole_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -356,13 +330,11 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ole_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ole_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v16, v12
+; CHECK-NEXT:    vmfge.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -386,13 +358,11 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_ole_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -418,14 +388,12 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_one_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16
-; CHECK-NEXT:    vmflt.vv v9, v16, v12
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5
+; CHECK-NEXT:    vmfgt.vf v9, v12, fa5
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -437,14 +405,12 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_one_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_one_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12
-; CHECK-NEXT:    vmflt.vv v9, v12, v16
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5
+; CHECK-NEXT:    vmflt.vf v9, v12, fa5
 ; CHECK-NEXT:    vmor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -469,13 +435,11 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_one_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -503,17 +467,15 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ord_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v10, v12, v12
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vmfeq.vf v10, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmand.mm v0, v10, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -524,17 +486,15 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ord_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ord_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v10, v12, v12
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vmfeq.vf v10, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmand.mm v0, v8, v10
+; CHECK-NEXT:    vmand.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -562,17 +522,15 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_ord_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v10, v12, v12
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vmfeq.vf v10, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfeq.vv v8, v12, v12
-; CHECK-NEXT:    vmand.mm v0, v10, v8
+; CHECK-NEXT:    vmand.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -598,14 +556,12 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16
-; CHECK-NEXT:    vmflt.vv v9, v16, v12
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5
+; CHECK-NEXT:    vmfgt.vf v9, v12, fa5
 ; CHECK-NEXT:    vmnor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -617,14 +573,12 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ueq_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12
-; CHECK-NEXT:    vmflt.vv v9, v12, v16
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5
+; CHECK-NEXT:    vmflt.vf v9, v12, fa5
 ; CHECK-NEXT:    vmnor.mm v0, v9, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -649,13 +603,11 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfeq.vv v0, v12, v16
+; CHECK-NEXT:    vmfeq.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -680,13 +632,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v12, v16
+; CHECK-NEXT:    vmfle.vf v8, v12, fa5
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -698,13 +648,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ugt_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12
+; CHECK-NEXT:    vmfge.vf v8, v12, fa5
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -729,13 +677,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v16, v12
+; CHECK-NEXT:    vmfgt.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -760,13 +706,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_uge_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -778,13 +722,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_uge_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_uge_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -809,13 +751,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_uge_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v16, v12
+; CHECK-NEXT:    vmfge.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -840,13 +780,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ult_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v16, v12
+; CHECK-NEXT:    vmfge.vf v8, v12, fa5
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -858,13 +796,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ult_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ult_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v8, v12, v16
+; CHECK-NEXT:    vmfle.vf v8, v12, fa5
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -889,13 +825,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_ult_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v0, v12, v16
+; CHECK-NEXT:    vmflt.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -920,13 +854,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ule_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v16, v12
+; CHECK-NEXT:    vmfgt.vf v8, v12, fa5
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -938,13 +870,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_ule_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_ule_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmflt.vv v8, v12, v16
+; CHECK-NEXT:    vmflt.vf v8, v12, fa5
 ; CHECK-NEXT:    vmnot.m v0, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
@@ -969,13 +899,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_ule_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfle.vv v0, v12, v16
+; CHECK-NEXT:    vmfle.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -999,13 +927,11 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_une_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1016,13 +942,11 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_une_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_une_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v16, v12
+; CHECK-NEXT:    vmfne.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1046,13 +970,11 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_une_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v0, v12, v16
+; CHECK-NEXT:    vmfne.vf v0, v12, fa5
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1080,17 +1002,15 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscal
 define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_uno_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v10, v12, v12
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vmfne.vf v10, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v8, v12, v12
-; CHECK-NEXT:    vmor.mm v0, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1101,17 +1021,15 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat
 define <vscale x 8 x i1> @fcmp_uno_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: fcmp_uno_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v10, v12, v12
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vmfne.vf v10, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v8, v12, v12
-; CHECK-NEXT:    vmor.mm v0, v8, v10
+; CHECK-NEXT:    vmor.mm v0, v10, v8
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1139,17 +1057,15 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va,
 define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
 ; CHECK-LABEL: fcmp_uno_vf_nxv8bf16_nonans:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vmfne.vv v10, v12, v12
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vfmv.v.f v12, fa5
+; CHECK-NEXT:    vmfne.vf v10, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; CHECK-NEXT:    vmfne.vv v8, v12, v12
-; CHECK-NEXT:    vmor.mm v0, v10, v8
+; CHECK-NEXT:    vmor.mm v0, v8, v10
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -1185,13 +1101,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfeq.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1208,13 +1122,11 @@ define <vscale x 8 x i1> @fcmp_oeq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfeq.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1250,13 +1162,11 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_oeq_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfeq.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1292,13 +1202,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfgt.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1315,13 +1223,11 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1357,13 +1263,11 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_ogt_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfgt.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1399,13 +1303,11 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfge.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1422,13 +1324,11 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfle.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1464,13 +1364,11 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_oge_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfge.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1506,13 +1404,11 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1529,13 +1425,11 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfgt.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1571,13 +1465,11 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_olt_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1613,13 +1505,11 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfle.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1636,13 +1526,11 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfge.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1678,13 +1566,11 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_ole_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfle.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1726,14 +1612,12 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vmflt.vv v9, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5
+; ZVFHMIN-NEXT:    vmfgt.vf v9, v12, fa5
 ; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -1753,14 +1637,12 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_one_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5
+; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5
 ; ZVFHMIN-NEXT:    vmor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -1797,13 +1679,11 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfne.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1848,17 +1728,15 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v12, v12
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vmfeq.vf v10, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmand.mm v0, v10, v8
+; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1878,17 +1756,15 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v12, v12
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vmfeq.vf v10, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmand.mm v0, v10, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1933,17 +1809,15 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v10, v12, v12
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vmfeq.vf v10, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfeq.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmand.mm v0, v10, v8
+; ZVFHMIN-NEXT:    vmand.mm v0, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -1985,14 +1859,12 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
-; ZVFHMIN-NEXT:    vmflt.vv v9, v16, v12
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5
+; ZVFHMIN-NEXT:    vmfgt.vf v9, v12, fa5
 ; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2012,14 +1884,12 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vmflt.vv v9, v12, v16
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5
+; ZVFHMIN-NEXT:    vmflt.vf v9, v12, fa5
 ; ZVFHMIN-NEXT:    vmnor.mm v0, v9, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2056,13 +1926,11 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfeq.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfeq.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2101,13 +1969,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16
+; ZVFHMIN-NEXT:    vmfle.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2126,13 +1992,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12
+; ZVFHMIN-NEXT:    vmfge.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2169,13 +2033,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfgt.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2214,13 +2076,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2239,13 +2099,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2282,13 +2140,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfge.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2327,13 +2183,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v16, v12
+; ZVFHMIN-NEXT:    vmfge.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2352,13 +2206,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v8, v12, v16
+; ZVFHMIN-NEXT:    vmfle.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2395,13 +2247,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmflt.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2440,13 +2290,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v16, v12
+; ZVFHMIN-NEXT:    vmfgt.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2465,13 +2313,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmflt.vv v8, v12, v16
+; ZVFHMIN-NEXT:    vmflt.vf v8, v12, fa5
 ; ZVFHMIN-NEXT:    vmnot.m v0, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
@@ -2508,13 +2354,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfle.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfle.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2550,13 +2394,11 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfne.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2573,13 +2415,11 @@ define <vscale x 8 x i1> @fcmp_une_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_une_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v0, v16, v12
+; ZVFHMIN-NEXT:    vmfne.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2615,13 +2455,11 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_une_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v0, v12, v16
+; ZVFHMIN-NEXT:    vmfne.vf v0, v12, fa5
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2666,17 +2504,15 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v12, v12
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vmfne.vf v10, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v10, v8
+; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2696,17 +2532,15 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b)
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v12, v12
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vmfne.vf v10, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
+; ZVFHMIN-NEXT:    vmor.mm v0, v10, v8
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -2751,17 +2585,15 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha
 ;
 ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16_nonans:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vmfne.vv v10, v12, v12
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT:    vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT:    vmfne.vf v10, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; ZVFHMIN-NEXT:    vmfne.vv v8, v12, v12
-; ZVFHMIN-NEXT:    vmor.mm v0, v10, v8
+; ZVFHMIN-NEXT:    vmor.mm v0, v8, v10
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %b, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 735621a..1948675 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -5741,3 +5741,152 @@ vector.body:
 for.cond.cleanup:
   ret void
 }
+
+define void @sink_splat_vfwadd_vf(ptr nocapture %a, ptr nocapture %b, float %f) {
+; CHECK-LABEL: sink_splat_vfwadd_vf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a2, 1020
+; CHECK-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; CHECK-NEXT:  .LBB125_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vl1re32.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, 4
+; CHECK-NEXT:    addi a2, a2, -4
+; CHECK-NEXT:    vfwadd.vf v10, v8, fa0
+; CHECK-NEXT:    vs2r.v v10, (a0)
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    j .LBB125_1
+entry:
+  %f.ext = fpext float %f to double
+  %broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
+  %broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr float, ptr %a, i64 %index
+  %wide.load = load <vscale x 2 x float>, ptr %0
+  %ext = fpext <vscale x 2 x float> %wide.load to <vscale x 2 x double>
+  %1 = fadd <vscale x 2 x double> %ext, %broadcast.splat
+  %2 = getelementptr double, ptr %b, i64 %index
+  store <vscale x 2 x double> %1, ptr %0
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 32, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+define void @sink_splat_vfwadd_wf(ptr nocapture %a, ptr nocapture %b, float %f) {
+; CHECK-LABEL: sink_splat_vfwadd_wf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a2, 1020
+; CHECK-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; CHECK-NEXT:  .LBB126_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vl2re64.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, 4
+; CHECK-NEXT:    addi a2, a2, -4
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
+; CHECK-NEXT:    vs2r.v v8, (a0)
+; CHECK-NEXT:    addi a0, a0, 32
+; CHECK-NEXT:    j .LBB126_1
+entry:
+  %f.ext = fpext float %f to double
+  %broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
+  %broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr double, ptr %a, i64 %index
+  %wide.load = load <vscale x 2 x double>, ptr %0
+  %1 = fadd <vscale x 2 x double> %wide.load, %broadcast.splat
+  %2 = getelementptr double, ptr %b, i64 %index
+  store <vscale x 2 x double> %1, ptr %0
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 32, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+define void @sink_splat_vfwmul_vf(ptr nocapture %a, ptr nocapture %b, float %f) {
+; CHECK-LABEL: sink_splat_vfwmul_vf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a2, 1020
+; CHECK-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; CHECK-NEXT:  .LBB127_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vl1re32.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, 4
+; CHECK-NEXT:    addi a2, a2, -4
+; CHECK-NEXT:    vfwmul.vf v10, v8, fa0
+; CHECK-NEXT:    vs2r.v v10, (a0)
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    j .LBB127_1
+entry:
+  %f.ext = fpext float %f to double
+  %broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
+  %broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr float, ptr %a, i64 %index
+  %wide.load = load <vscale x 2 x float>, ptr %0
+  %ext = fpext <vscale x 2 x float> %wide.load to <vscale x 2 x double>
+  %1 = fmul <vscale x 2 x double> %ext, %broadcast.splat
+  %2 = getelementptr double, ptr %b, i64 %index
+  store <vscale x 2 x double> %1, ptr %0
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 32, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+; Even though there's no vfwmul.wf we'll sink the fcvt.d.s. Make sure
+; early-machinelicm undos the sink after isel.
+define void @sink_splat_vfwmul_wf(ptr nocapture %a, ptr nocapture %b, float %f) {
+; CHECK-LABEL: sink_splat_vfwmul_wf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a2, 1020
+; CHECK-NEXT:    fcvt.d.s fa5, fa0
+; CHECK-NEXT:    vsetvli a3, zero, e64, m2, ta, ma
+; CHECK-NEXT:  .LBB128_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vl2re64.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, 4
+; CHECK-NEXT:    addi a2, a2, -4
+; CHECK-NEXT:    vfmul.vf v8, v8, fa5
+; CHECK-NEXT:    vs2r.v v8, (a0)
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    j .LBB128_1
+entry:
+  %f.ext = fpext float %f to double
+  %broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
+  %broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr float, ptr %a, i64 %index
+  %wide.load = load <vscale x 2 x double>, ptr %0
+  %1 = fmul <vscale x 2 x double> %wide.load, %broadcast.splat
+  %2 = getelementptr double, ptr %b, i64 %index
+  store <vscale x 2 x double> %1, ptr %0
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 32, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 864acb3..53929d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -216,11 +216,11 @@ define <vscale x 128 x i8> @vector_interleave_nxv128i8_nxv64i8(<vscale x 64 x i8
 ; ZVBB-LABEL: vector_interleave_nxv128i8_nxv64i8:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
-; ZVBB-NEXT:    vmv8r.v v24, v8
-; ZVBB-NEXT:    vwsll.vi v8, v16, 8
+; ZVBB-NEXT:    vwsll.vi v24, v16, 8
 ; ZVBB-NEXT:    vwsll.vi v0, v20, 8
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
+; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
+; ZVBB-NEXT:    vmv8r.v v8, v24
 ; ZVBB-NEXT:    vmv8r.v v16, v0
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 128 x i8> @llvm.vector.interleave2.nxv128i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b)
@@ -243,11 +243,11 @@ define <vscale x 64 x i16> @vector_interleave_nxv64i16_nxv32i16(<vscale x 32 x i
 ; ZVBB-LABEL: vector_interleave_nxv64i16_nxv32i16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVBB-NEXT:    vmv8r.v v24, v8
-; ZVBB-NEXT:    vwsll.vi v8, v16, 16
+; ZVBB-NEXT:    vwsll.vi v24, v16, 16
 ; ZVBB-NEXT:    vwsll.vi v0, v20, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
+; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
+; ZVBB-NEXT:    vmv8r.v v8, v24
 ; ZVBB-NEXT:    vmv8r.v v16, v0
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 64 x i16> @llvm.vector.interleave2.nxv64i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b)
@@ -269,13 +269,13 @@ define <vscale x 32 x i32> @vector_interleave_nxv32i32_nxv16i32(<vscale x 16 x i
 ;
 ; ZVBB-LABEL: vector_interleave_nxv32i32_nxv16i32:
 ; ZVBB:       # %bb.0:
-; ZVBB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; ZVBB-NEXT:    vmv8r.v v24, v8
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vwsll.vx v8, v16, a0
+; ZVBB-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; ZVBB-NEXT:    vwsll.vx v24, v16, a0
 ; ZVBB-NEXT:    vwsll.vx v0, v20, a0
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
+; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
+; ZVBB-NEXT:    vmv8r.v v8, v24
 ; ZVBB-NEXT:    vmv8r.v v16, v0
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 32 x i32> @llvm.vector.interleave2.nxv32i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b)
@@ -588,11 +588,11 @@ define <vscale x 64 x bfloat> @vector_interleave_nxv64bf16_nxv32bf16(<vscale x 3
 ; ZVBB-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVBB-NEXT:    vmv8r.v v24, v8
-; ZVBB-NEXT:    vwsll.vi v8, v16, 16
+; ZVBB-NEXT:    vwsll.vi v24, v16, 16
 ; ZVBB-NEXT:    vwsll.vi v0, v20, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
+; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
+; ZVBB-NEXT:    vmv8r.v v8, v24
 ; ZVBB-NEXT:    vmv8r.v v16, v0
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 64 x bfloat> @llvm.vector.interleave2.nxv64bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
@@ -615,11 +615,11 @@ define <vscale x 64 x half> @vector_interleave_nxv64f16_nxv32f16(<vscale x 32 x
 ; ZVBB-LABEL: vector_interleave_nxv64f16_nxv32f16:
 ; ZVBB:       # %bb.0:
 ; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
-; ZVBB-NEXT:    vmv8r.v v24, v8
-; ZVBB-NEXT:    vwsll.vi v8, v16, 16
+; ZVBB-NEXT:    vwsll.vi v24, v16, 16
 ; ZVBB-NEXT:    vwsll.vi v0, v20, 16
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
+; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
+; ZVBB-NEXT:    vmv8r.v v8, v24
 ; ZVBB-NEXT:    vmv8r.v v16, v0
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 64 x half> @llvm.vector.interleave2.nxv64f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
@@ -641,13 +641,13 @@ define <vscale x 32 x float> @vector_interleave_nxv32f32_nxv16f32(<vscale x 16 x
 ;
 ; ZVBB-LABEL: vector_interleave_nxv32f32_nxv16f32:
 ; ZVBB:       # %bb.0:
-; ZVBB-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
-; ZVBB-NEXT:    vmv8r.v v24, v8
 ; ZVBB-NEXT:    li a0, 32
-; ZVBB-NEXT:    vwsll.vx v8, v16, a0
+; ZVBB-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; ZVBB-NEXT:    vwsll.vx v24, v16, a0
 ; ZVBB-NEXT:    vwsll.vx v0, v20, a0
-; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
-; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
+; ZVBB-NEXT:    vwaddu.wv v24, v24, v8
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v12
+; ZVBB-NEXT:    vmv8r.v v8, v24
 ; ZVBB-NEXT:    vmv8r.v v16, v0
 ; ZVBB-NEXT:    ret
   %res = call <vscale x 32 x float> @llvm.vector.interleave2.nxv32f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
index 19c5ee4..a21918e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
@@ -30,13 +30,11 @@ define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfadd_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v10, v8
+; CHECK-NEXT:    vfadd.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -64,13 +62,11 @@ define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfadd_vf_nxv2bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfadd.vv v9, v10, v8
+; CHECK-NEXT:    vfadd.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -98,13 +94,11 @@ define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfadd_vf_nxv4bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfadd.vv v10, v10, v12
+; CHECK-NEXT:    vfadd.vf v10, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
 ; CHECK-NEXT:    ret
@@ -132,13 +126,11 @@ define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfadd_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v12, v16
+; CHECK-NEXT:    vfadd.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -151,13 +143,11 @@ define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 define <vscale x 8 x bfloat> @vfadd_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfadd_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfadd.vv v12, v16, v12
+; CHECK-NEXT:    vfadd.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -185,13 +175,11 @@ define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfadd_vf_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfadd.vv v16, v16, v24
+; CHECK-NEXT:    vfadd.vf v16, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    ret
@@ -310,13 +298,11 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfadd_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -356,13 +342,11 @@ define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfadd_vf_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfadd.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -402,13 +386,11 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfadd_vf_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; ZVFHMIN-NEXT:    vfadd.vf v10, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -448,13 +430,11 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfadd_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v12, v16
+; ZVFHMIN-NEXT:    vfadd.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -473,13 +453,11 @@ define <vscale x 8 x half> @vfadd_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfadd_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfadd.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -519,13 +497,11 @@ define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ;
 ; ZVFHMIN-LABEL: vfadd_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vfadd.vf v16, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
index b4a9b1f..e671ba8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
@@ -26,13 +26,11 @@ define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfdiv_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v10, v8
+; CHECK-NEXT:    vfdiv.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -60,13 +58,11 @@ define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfdiv_vf_nxv2bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfdiv.vv v9, v10, v8
+; CHECK-NEXT:    vfdiv.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -94,13 +90,11 @@ define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfdiv_vf_nxv4bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfdiv.vv v10, v10, v12
+; CHECK-NEXT:    vfdiv.vf v10, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
 ; CHECK-NEXT:    ret
@@ -128,13 +122,11 @@ define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfdiv_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v12, v16
+; CHECK-NEXT:    vfdiv.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -147,13 +139,11 @@ define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 define <vscale x 8 x bfloat> @vfdiv_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfdiv_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfdiv.vv v12, v16, v12
+; CHECK-NEXT:    vfrdiv.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -181,13 +171,11 @@ define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfdiv_vf_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfdiv.vv v16, v16, v24
+; CHECK-NEXT:    vfdiv.vf v16, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    ret
@@ -306,13 +294,11 @@ define <vscale x 1 x half> @vfdiv_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfdiv_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -352,13 +338,11 @@ define <vscale x 2 x half> @vfdiv_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfdiv_vf_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfdiv.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -398,13 +382,11 @@ define <vscale x 4 x half> @vfdiv_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfdiv_vf_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v10, v10, v12
+; ZVFHMIN-NEXT:    vfdiv.vf v10, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -444,13 +426,11 @@ define <vscale x 8 x half> @vfdiv_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfdiv_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v12, v16
+; ZVFHMIN-NEXT:    vfdiv.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -469,13 +449,11 @@ define <vscale x 8 x half> @vfdiv_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfdiv_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfrdiv.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -515,13 +493,11 @@ define <vscale x 16 x half> @vfdiv_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ;
 ; ZVFHMIN-LABEL: vfdiv_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vfdiv.vf v16, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
index 1090230..42166be 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
@@ -46,16 +46,14 @@ define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16_commuted(<vscale x 1 x bfloat>
 define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) {
 ; CHECK-LABEL: vfmadd_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v12, v11, v10
+; CHECK-NEXT:    vfmadd.vf v9, fa5, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -84,16 +82,14 @@ define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsc
 define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) {
 ; CHECK-LABEL: vfmadd_vf_nxv2bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v11, v9
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmadd.vv v9, v11, v10
+; CHECK-NEXT:    vfmadd.vf v11, fa5, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v11
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -122,16 +118,14 @@ define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsc
 define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) {
 ; CHECK-LABEL: vfmadd_vf_nxv4bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v14, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmadd.vv v14, v12, v10
+; CHECK-NEXT:    vfmadd.vf v12, fa5, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -160,16 +154,14 @@ define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsc
 define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) {
 ; CHECK-LABEL: vfmadd_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v20, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmadd.vv v20, v16, v12
+; CHECK-NEXT:    vfmadd.vf v16, fa5, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v20
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -198,16 +190,14 @@ define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <
 define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) {
 ; CHECK-LABEL: vfmadd_vf_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v0, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmadd.vv v0, v24, v16
+; CHECK-NEXT:    vfmadd.vf v24, fa5, v16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v24
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0
   %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -640,16 +630,14 @@ define <vscale x 1 x half> @vfmadd_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vfmadd_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v12, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vf v9, fa5, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
@@ -690,16 +678,14 @@ define <vscale x 2 x half> @vfmadd_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vfmadd_vf_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vf v11, fa5, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v11
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
@@ -740,16 +726,14 @@ define <vscale x 4 x half> @vfmadd_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vfmadd_vf_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; ZVFHMIN-NEXT:    vfmadd.vf v12, fa5, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 4 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
@@ -790,16 +774,14 @@ define <vscale x 8 x half> @vfmadd_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x
 ;
 ; ZVFHMIN-LABEL: vfmadd_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vmv.v.x v8, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v20, v16, v12
+; ZVFHMIN-NEXT:    vfmadd.vf v16, fa5, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v20
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
@@ -840,16 +822,14 @@ define <vscale x 16 x half> @vfmadd_vf_nxv16f16(<vscale x 16 x half> %va, <vscal
 ;
 ; ZVFHMIN-LABEL: vfmadd_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v0, v24, v16
+; ZVFHMIN-NEXT:    vfmadd.vf v24, fa5, v16
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v24
 ; ZVFHMIN-NEXT:    ret
   %head = insertelement <vscale x 16 x half> poison, half %c, i32 0
   %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
index db034fb..395eebb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
@@ -30,13 +30,11 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmax_nxv1bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v10, v8
+; CHECK-NEXT:    vfmax.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -66,13 +64,11 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmax_nxv2bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmax.vv v9, v10, v8
+; CHECK-NEXT:    vfmax.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -102,13 +98,11 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmax_nxv4bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmax.vv v10, v10, v12
+; CHECK-NEXT:    vfmax.vf v10, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
 ; CHECK-NEXT:    ret
@@ -138,13 +132,11 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmax_nxv8bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmax.vv v12, v12, v16
+; CHECK-NEXT:    vfmax.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -174,13 +166,11 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmax_nxv16bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmax.vv v16, v16, v24
+; CHECK-NEXT:    vfmax.vf v16, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    ret
@@ -303,13 +293,11 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vf(<vscale x 1 x half> %a, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmax_nxv1f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -351,13 +339,11 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vf(<vscale x 2 x half> %a, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmax_nxv2f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmax.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -399,13 +385,11 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vf(<vscale x 4 x half> %a, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmax_nxv4f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v10, v10, v12
+; ZVFHMIN-NEXT:    vfmax.vf v10, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -447,13 +431,11 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vf(<vscale x 8 x half> %a, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmax_nxv8f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v12, v12, v16
+; ZVFHMIN-NEXT:    vfmax.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -495,13 +477,11 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vf(<vscale x 16 x half> %a, half %b)
 ;
 ; ZVFHMIN-LABEL: vfmax_nxv16f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmax.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vfmax.vf v16, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
index 3ee82c3..283c510 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
@@ -30,13 +30,11 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
 define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmin_nxv1bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v10, v8
+; CHECK-NEXT:    vfmin.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -66,13 +64,11 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
 define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmin_nxv2bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmin.vv v9, v10, v8
+; CHECK-NEXT:    vfmin.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -102,13 +98,11 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
 define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmin_nxv4bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmin.vv v10, v10, v12
+; CHECK-NEXT:    vfmin.vf v10, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
 ; CHECK-NEXT:    ret
@@ -138,13 +132,11 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
 define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmin_nxv8bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmin.vv v12, v12, v16
+; CHECK-NEXT:    vfmin.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -174,13 +166,11 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
 define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) {
 ; CHECK-LABEL: vfmin_nxv16bf16_vf:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmin.vv v16, v16, v24
+; CHECK-NEXT:    vfmin.vf v16, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    ret
@@ -303,13 +293,11 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vf(<vscale x 1 x half> %a, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmin_nxv1f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmin.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -351,13 +339,11 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vf(<vscale x 2 x half> %a, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmin_nxv2f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmin.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -399,13 +385,11 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vf(<vscale x 4 x half> %a, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmin_nxv4f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v10, v10, v12
+; ZVFHMIN-NEXT:    vfmin.vf v10, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -447,13 +431,11 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vf(<vscale x 8 x half> %a, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmin_nxv8f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v12, v12, v16
+; ZVFHMIN-NEXT:    vfmin.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -495,13 +477,11 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vf(<vscale x 16 x half> %a, half %b)
 ;
 ; ZVFHMIN-LABEL: vfmin_nxv16f16_vf:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmin.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vfmin.vf v16, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
index fb8ed3f..6fc9ccb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
@@ -30,13 +30,11 @@ define <vscale x 1 x bfloat> @vfmul_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 define <vscale x 1 x bfloat> @vfmul_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfmul_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmul.vv v9, v10, v8
+; CHECK-NEXT:    vfmul.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -64,13 +62,11 @@ define <vscale x 2 x bfloat> @vfmul_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 define <vscale x 2 x bfloat> @vfmul_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfmul_vf_nxv2bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfmul.vv v9, v10, v8
+; CHECK-NEXT:    vfmul.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -98,13 +94,11 @@ define <vscale x 4 x bfloat> @vfmul_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 define <vscale x 4 x bfloat> @vfmul_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfmul_vf_nxv4bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfmul.vv v10, v10, v12
+; CHECK-NEXT:    vfmul.vf v10, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
 ; CHECK-NEXT:    ret
@@ -132,13 +126,11 @@ define <vscale x 8 x bfloat> @vfmul_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfmul_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmul.vv v12, v12, v16
+; CHECK-NEXT:    vfmul.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -151,13 +143,11 @@ define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 define <vscale x 8 x bfloat> @vfmul_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfmul_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfmul.vv v12, v16, v12
+; CHECK-NEXT:    vfmul.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -185,13 +175,11 @@ define <vscale x 16 x bfloat> @vfmul_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 define <vscale x 16 x bfloat> @vfmul_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfmul_vf_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfmul.vv v16, v16, v24
+; CHECK-NEXT:    vfmul.vf v16, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    ret
@@ -310,13 +298,11 @@ define <vscale x 1 x half> @vfmul_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmul_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -356,13 +342,11 @@ define <vscale x 2 x half> @vfmul_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmul_vf_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfmul.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -402,13 +386,11 @@ define <vscale x 4 x half> @vfmul_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmul_vf_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v10, v10, v12
+; ZVFHMIN-NEXT:    vfmul.vf v10, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -448,13 +430,11 @@ define <vscale x 8 x half> @vfmul_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmul_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v12, v16
+; ZVFHMIN-NEXT:    vfmul.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -473,13 +453,11 @@ define <vscale x 8 x half> @vfmul_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfmul_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfmul.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -519,13 +497,11 @@ define <vscale x 16 x half> @vfmul_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ;
 ; ZVFHMIN-LABEL: vfmul_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vfmul.vf v16, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
index f806447..9ab41ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
@@ -30,13 +30,11 @@ define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
 define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfsub_vf_nxv1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v10, v8
+; CHECK-NEXT:    vfsub.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -64,13 +62,11 @@ define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
 define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfsub_vf_nxv2bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vfsub.vv v9, v10, v8
+; CHECK-NEXT:    vfsub.vf v9, v9, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
 ; CHECK-NEXT:    ret
@@ -98,13 +94,11 @@ define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
 define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfsub_vf_nxv4bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v9
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vfsub.vv v10, v10, v12
+; CHECK-NEXT:    vfsub.vf v10, v10, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
 ; CHECK-NEXT:    ret
@@ -132,13 +126,11 @@ define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
 define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfsub_vf_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v12, v16
+; CHECK-NEXT:    vfsub.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -151,13 +143,11 @@ define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
 define <vscale x 8 x bfloat> @vfsub_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfsub_fv_nxv8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT:    vmv.v.x v10, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v10
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT:    vfsub.vv v12, v16, v12
+; CHECK-NEXT:    vfrsub.vf v12, v12, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
 ; CHECK-NEXT:    ret
@@ -185,13 +175,11 @@ define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
 define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
 ; CHECK-LABEL: vfsub_vf_nxv16bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.x.h a0, fa0
-; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT:    vmv.v.x v12, a0
+; CHECK-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT:    vfwcvtbf16.f.f.v v24, v12
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT:    vfsub.vv v16, v16, v24
+; CHECK-NEXT:    vfsub.vf v16, v16, fa5
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v16
 ; CHECK-NEXT:    ret
@@ -310,13 +298,11 @@ define <vscale x 1 x half> @vfsub_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfsub_vf_nxv1f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -356,13 +342,11 @@ define <vscale x 2 x half> @vfsub_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfsub_vf_nxv2f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v9, v10, v8
+; ZVFHMIN-NEXT:    vfsub.vf v9, v9, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    ret
@@ -402,13 +386,11 @@ define <vscale x 4 x half> @vfsub_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfsub_vf_nxv4f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; ZVFHMIN-NEXT:    vfsub.vf v10, v10, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
 ; ZVFHMIN-NEXT:    ret
@@ -448,13 +430,11 @@ define <vscale x 8 x half> @vfsub_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfsub_vf_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v12, v16
+; ZVFHMIN-NEXT:    vfsub.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -473,13 +453,11 @@ define <vscale x 8 x half> @vfsub_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
 ;
 ; ZVFHMIN-LABEL: vfsub_fv_nxv8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v12, v16, v12
+; ZVFHMIN-NEXT:    vfrsub.vf v12, v12, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v12
 ; ZVFHMIN-NEXT:    ret
@@ -519,13 +497,11 @@ define <vscale x 16 x half> @vfsub_vf_nxv16f16(<vscale x 16 x half> %va, half %b
 ;
 ; ZVFHMIN-LABEL: vfsub_vf_nxv16f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
-; ZVFHMIN-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v12, a0
+; ZVFHMIN-NEXT:    fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v16, v16, v24
+; ZVFHMIN-NEXT:    vfsub.vf v16, v16, fa5
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
 ; ZVFHMIN-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 163166c..053f120 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -4031,3 +4031,323 @@ define <vscale x 4 x float> @vfsgnjx_vf(<vscale x 4 x float> %a, float %b, iXLen
   %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
   ret <vscale x 4 x float> %2
 }
+
+define <vscale x 4 x float> @vfmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmacc.vv v8, v12, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmacc_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmacc.vv v8, v12, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmacc_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmacc.vf v8, fa0, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfnmacc.vv v8, v12, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmacc_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfnmacc.vv v8, v12, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfnmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmacc_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfnmacc.vf v8, fa0, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmsac.vv v8, v12, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsac_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmsac.vv v8, v12, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsac_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmsac.vf v8, fa0, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfnmsac.vv v8, v12, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsac_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfnmsac.vv v8, v12, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfnmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsac_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfnmsac.vf v8, fa0, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmadd.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmadd.vv v8, v10, v12
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmadd_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmadd.vf v8, fa0, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfnmadd.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmadd_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfnmadd.vv v8, v10, v12
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfnmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmadd_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfnmadd.vf v8, fa0, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmsub.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmsub.vv v8, v10, v12
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfmsub_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfmsub.vf v8, fa0, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfnmsub.vv v8, v10, v12
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsub_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfnmsub.vv v8, v10, v12
+; VLOPT-NEXT:    vfadd.vv v8, v8, v12
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vf:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfnmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vfnmsub_vf:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vfnmsub.vf v8, fa0, v10
+; VLOPT-NEXT:    vfadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 7, iXLen -1, iXLen 3)
+  %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 7, iXLen %vl)
+  ret <vscale x 4 x float> %2
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
new file mode 100644
index 0000000..e481891
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -0,0 +1,816 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV32 %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV64 %s
+
+define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %evl) {
+; RV32-LABEL: load_factor2_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    srli a1, a1, 1
+; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT:    vlseg2e32.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: load_factor2_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT:    vlseg2e32.v v8, (a0)
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 2
+  %wide.masked.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %rvl)
+  %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %wide.masked.load)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
+  %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %res1
+}
+
+define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor4_v2(ptr %ptr, i32 %evl) {
+; RV32-LABEL: load_factor4_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    srli a1, a1, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT:    vlseg4e32.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: load_factor4_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 34
+; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT:    vlseg4e32.v v8, (a0)
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 4
+  %wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
+  %d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
+  %d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
+  %d0.1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 1
+  %d1 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.0)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 0
+  %t2 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 1
+  %d2 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.1)
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d2, 0
+  %t3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d2, 1
+
+  %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  %res2 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res1, <vscale x 2 x i32> %t2, 2
+  %res3 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res2, <vscale x 2 x i32> %t3, 3
+  ret { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res3
+}
+
+define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor8_v2(ptr %ptr, i32 %evl) {
+; RV32-LABEL: load_factor8_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    srli a1, a1, 3
+; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT:    vlseg8e32.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: load_factor8_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 35
+; RV64-NEXT:    srli a1, a1, 35
+; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT:    vlseg8e32.v v8, (a0)
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 8
+  %wide.masked.load = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %rvl)
+  %d0 = call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %wide.masked.load)
+  %d0.0 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %d0, 0
+  %d0.1 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %d0, 1
+  %d1 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %d0.0)
+  %d1.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d1, 0
+  %d1.1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d1, 1
+  %d2 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %d0.1)
+  %d2.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d2, 0
+  %d2.1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d2, 1
+
+  %d3 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d1.0)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d3, 0
+  %t4 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d3, 1
+  %d4 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d1.1)
+  %t2 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d4, 0
+  %t6 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d4, 1
+  %d5 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d2.0)
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d5, 0
+  %t5 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d5, 1
+  %d6 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d2.1)
+  %t3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d6, 0
+  %t7 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d6, 1
+
+  %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  %res2 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res1, <vscale x 2 x i32> %t2, 2
+  %res3 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res2, <vscale x 2 x i32> %t3, 3
+  %res4 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res3, <vscale x 2 x i32> %t4, 4
+  %res5 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res4, <vscale x 2 x i32> %t5, 5
+  %res6 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res5, <vscale x 2 x i32> %t6, 6
+  %res7 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res6, <vscale x 2 x i32> %t7, 7
+  ret { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res7
+}
+
+define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
+; RV32-LABEL: store_factor2_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    srli a1, a1, 1
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT:    vsseg2e32.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_factor2_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT:    vsseg2e32.v v8, (a0)
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 2
+  %interleaved.vec = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1)
+  call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %interleaved.vec, ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %rvl)
+  ret void
+}
+
+define void @store_factor4_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
+; RV32-LABEL: store_factor4_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    srli a1, a1, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT:    vmv1r.v v10, v8
+; RV32-NEXT:    vmv1r.v v11, v9
+; RV32-NEXT:    vsseg4e32.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_factor4_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 35
+; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT:    vmv1r.v v10, v8
+; RV64-NEXT:    vmv1r.v v11, v9
+; RV64-NEXT:    vsseg4e32.v v8, (a0)
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 8
+  %interleaved.vec0 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
+  %interleaved.vec1 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v1, <vscale x 1 x i32> %v1)
+  %interleaved.vec2 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %interleaved.vec0, <vscale x 2 x i32> %interleaved.vec1)
+  call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %interleaved.vec2, ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %rvl)
+  ret void
+}
+
+define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
+; RV32-LABEL: store_factor8_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    srli a1, a1, 3
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT:    vmv1r.v v10, v8
+; RV32-NEXT:    vmv1r.v v11, v9
+; RV32-NEXT:    vmv1r.v v12, v8
+; RV32-NEXT:    vmv1r.v v13, v9
+; RV32-NEXT:    vmv1r.v v14, v8
+; RV32-NEXT:    vmv1r.v v15, v9
+; RV32-NEXT:    vsseg8e32.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_factor8_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 35
+; RV64-NEXT:    srli a1, a1, 35
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT:    vmv1r.v v10, v8
+; RV64-NEXT:    vmv1r.v v11, v9
+; RV64-NEXT:    vmv1r.v v12, v8
+; RV64-NEXT:    vmv1r.v v13, v9
+; RV64-NEXT:    vmv1r.v v14, v8
+; RV64-NEXT:    vmv1r.v v15, v9
+; RV64-NEXT:    vsseg8e32.v v8, (a0)
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 8
+  %interleaved.vec0 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
+  %interleaved.vec1 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
+  %interleaved.vec2 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %interleaved.vec0, <vscale x 2 x i32> %interleaved.vec1)
+  %interleaved.vec3 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v1, <vscale x 1 x i32> %v1)
+  %interleaved.vec4 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v1, <vscale x 1 x i32> %v1)
+  %interleaved.vec5 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %interleaved.vec3, <vscale x 2 x i32> %interleaved.vec4)
+  %interleaved.vec6 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %interleaved.vec2, <vscale x 4 x i32> %interleaved.vec5)
+  call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %interleaved.vec6, ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
+  ret void
+}
+
+define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
+; RV32-LABEL: masked_load_factor2_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    srli a1, a1, 1
+; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT:    vlseg2e32.v v8, (a0), v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: masked_load_factor2_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT:    vlseg2e32.v v8, (a0), v0.t
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 2
+  %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %mask)
+  %wide.masked.load = tail call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %interleaved.mask, i32 %rvl)
+  %deinterleaved.results = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 4 x i32> %wide.masked.load)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
+  %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %res1
+}
+
+define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor4_v2(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
+; RV32-LABEL: masked_load_factor4_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    srli a1, a1, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT:    vlseg4e32.v v8, (a0), v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: masked_load_factor4_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 34
+; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT:    vlseg4e32.v v8, (a0), v0.t
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 4
+  %interleaved.mask0 = call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %mask)
+  %interleaved.mask1 = call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %mask)
+  %interleaved.mask2 = call <vscale x 8 x i1> @llvm.vector.interleave2.nxv8i1(<vscale x 4 x i1> %interleaved.mask0, <vscale x 4 x i1> %interleaved.mask1)
+  %wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> %interleaved.mask2, i32 %rvl)
+  %d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
+  %d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
+  %d0.1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 1
+  %d1 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.0)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 0
+  %t2 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 1
+  %d2 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.1)
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d2, 0
+  %t3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d2, 1
+
+  %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  %res2 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res1, <vscale x 2 x i32> %t2, 2
+  %res3 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res2, <vscale x 2 x i32> %t3, 3
+  ret { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res3
+}
+
+define void @masked_store_factor2_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
+; RV32-LABEL: masked_store_factor2_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    srli a1, a1, 1
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT:    vsseg2e32.v v8, (a0), v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: masked_store_factor2_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 33
+; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT:    vmv1r.v v9, v8
+; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT:    vsseg2e32.v v8, (a0), v0.t
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 2
+  %interleaved.mask = tail call <vscale x 2 x i1> @llvm.vector.interleave2.nxv2i1(<vscale x 1 x i1> %mask, <vscale x 1 x i1> %mask)
+  %interleaved.vec = tail call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
+  tail call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %interleaved.vec, ptr %ptr, <vscale x 2 x i1> %interleaved.mask, i32 %rvl)
+  ret void
+}
+
+define void @masked_load_store_factor2_v2_shared_mask(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
+; RV32-LABEL: masked_load_store_factor2_v2_shared_mask:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    srli a1, a1, 1
+; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV32-NEXT:    vlseg2e32.v v8, (a0), v0.t
+; RV32-NEXT:    vsseg2e32.v v8, (a0), v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: masked_load_store_factor2_v2_shared_mask:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; RV64-NEXT:    vlseg2e32.v v8, (a0), v0.t
+; RV64-NEXT:    vsseg2e32.v v8, (a0), v0.t
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 2
+  %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %mask)
+  %wide.masked.load = tail call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %interleaved.mask, i32 %rvl)
+  %deinterleaved.results = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 4 x i32> %wide.masked.load)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
+  %interleaved.vec = tail call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %t0, <vscale x 2 x i32> %t1)
+  tail call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %interleaved.vec, ptr %ptr, <vscale x 4 x i1> %interleaved.mask, i32 %rvl)
+  ret void
+}
+
+define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %mask, ptr %ptr, i32 %evl) {
+; RV32-LABEL: masked_load_store_factor2_v2_shared_mask_extract:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vmv1r.v v8, v0
+; RV32-NEXT:    vmv.v.i v9, 0
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vmv.v.i v10, 0
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vmerge.vim v11, v9, 1, v0
+; RV32-NEXT:    srli a3, a3, 2
+; RV32-NEXT:    vwaddu.vv v12, v11, v11
+; RV32-NEXT:    vwmaccu.vx v12, a2, v11
+; RV32-NEXT:    vmsne.vi v0, v12, 0
+; RV32-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vslidedown.vx v11, v12, a3
+; RV32-NEXT:    vmerge.vim v10, v10, 1, v0
+; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vmsne.vi v0, v11, 0
+; RV32-NEXT:    add a2, a3, a3
+; RV32-NEXT:    vmerge.vim v9, v9, 1, v0
+; RV32-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
+; RV32-NEXT:    vslideup.vx v10, v9, a3
+; RV32-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vmsne.vi v0, v10, 0
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
+; RV32-NEXT:    vle32.v v10, (a0), v0.t
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; RV32-NEXT:    vnsrl.wx v13, v10, a1
+; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    vnsrl.wi v12, v10, 0
+; RV32-NEXT:    srli a2, a2, 1
+; RV32-NEXT:    vmv1r.v v0, v8
+; RV32-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
+; RV32-NEXT:    vsseg2e32.v v12, (a0), v0.t
+; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: masked_load_store_factor2_v2_shared_mask_extract:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; RV64-NEXT:    vmv1r.v v8, v0
+; RV64-NEXT:    vmv.v.i v9, 0
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vmv.v.i v10, 0
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a4, a1, 33
+; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-NEXT:    vmerge.vim v11, v9, 1, v0
+; RV64-NEXT:    srli a3, a3, 2
+; RV64-NEXT:    vwaddu.vv v12, v11, v11
+; RV64-NEXT:    vwmaccu.vx v12, a2, v11
+; RV64-NEXT:    vmsne.vi v0, v12, 0
+; RV64-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vslidedown.vx v11, v12, a3
+; RV64-NEXT:    vmerge.vim v10, v10, 1, v0
+; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-NEXT:    vmsne.vi v0, v11, 0
+; RV64-NEXT:    add a1, a3, a3
+; RV64-NEXT:    vmerge.vim v9, v9, 1, v0
+; RV64-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; RV64-NEXT:    vslideup.vx v10, v9, a3
+; RV64-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vmsne.vi v0, v10, 0
+; RV64-NEXT:    srli a1, a4, 32
+; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT:    vle32.v v10, (a0), v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wx v13, v10, a1
+; RV64-NEXT:    vmv.x.s a1, v10
+; RV64-NEXT:    vnsrl.wi v12, v10, 0
+; RV64-NEXT:    srli a4, a4, 33
+; RV64-NEXT:    vmv1r.v v0, v8
+; RV64-NEXT:    vsetvli zero, a4, e32, m1, ta, ma
+; RV64-NEXT:    vsseg2e32.v v12, (a0), v0.t
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 2
+  %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %mask)
+  %wide.masked.load = tail call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %interleaved.mask, i32 %rvl)
+  %deinterleaved.results = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 4 x i32> %wide.masked.load)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
+  %r0 = extractelement <vscale x 4 x i32> %wide.masked.load, i32 0
+  %interleaved.vec = tail call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %t0, <vscale x 2 x i32> %t1)
+  tail call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %interleaved.vec, ptr %ptr, <vscale x 4 x i1> %interleaved.mask, i32 %rvl)
+  ret i32 %r0
+}
+
+define void @masked_store_factor4_v2(<vscale x 1 x i1> %mask, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
+; RV32-LABEL: masked_store_factor4_v2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    srli a1, a1, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT:    vmv1r.v v10, v8
+; RV32-NEXT:    vmv1r.v v11, v9
+; RV32-NEXT:    vsseg4e32.v v8, (a0), v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: masked_store_factor4_v2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 34
+; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT:    vmv1r.v v10, v8
+; RV64-NEXT:    vmv1r.v v11, v9
+; RV64-NEXT:    vsseg4e32.v v8, (a0), v0.t
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 4
+  %interleaved.mask0 = call <vscale x 2 x i1> @llvm.vector.interleave2.nxv2i1(<vscale x 1 x i1> %mask, <vscale x 1 x i1> %mask)
+  %interleaved.mask1 = call <vscale x 2 x i1> @llvm.vector.interleave2.nxv2i1(<vscale x 1 x i1> %mask, <vscale x 1 x i1> %mask)
+  %interleaved.mask2 = call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %interleaved.mask0, <vscale x 2 x i1> %interleaved.mask1)
+  %interleaved.vec0 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
+  %interleaved.vec1 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v1, <vscale x 1 x i32> %v1)
+  %interleaved.vec2 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %interleaved.vec0, <vscale x 2 x i32> %interleaved.vec1)
+  call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %interleaved.vec2, ptr %ptr, <vscale x 4 x i1> %interleaved.mask2, i32 %rvl)
+  ret void
+}
+
+; Negative tests
+
+; We should not transform this function because the deinterleave tree is not in a desired form.
+define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @incorrect_extract_value_index(ptr %ptr, i32 %evl) {
+; RV32-LABEL: incorrect_extract_value_index:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT:    vnsrl.wx v9, v12, a0
+; RV32-NEXT:    vnsrl.wi v8, v12, 0
+; RV32-NEXT:    vmv.v.v v10, v9
+; RV32-NEXT:    vmv.v.v v11, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: incorrect_extract_value_index:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 34
+; RV64-NEXT:    srli a1, a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wx v9, v12, a0
+; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vmv.v.v v10, v9
+; RV64-NEXT:    vmv.v.v v11, v9
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 4
+  %wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
+  %d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
+  %d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
+  %d0.1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
+  %d1 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.0)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 0
+  %t2 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 1
+  %d2 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.1)
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d2, 1
+  %t3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d2, 1
+
+  %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  %res2 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res1, <vscale x 2 x i32> %t2, 2
+  %res3 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res2, <vscale x 2 x i32> %t3, 3
+  ret { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res3
+}
+
+; We should not transform this function because the expression is not a balanced tree.
+define {<vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32>} @not_balanced_load_tree(ptr %ptr, i32 %evl) {
+; RV32-LABEL: not_balanced_load_tree:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vle32.v v12, (a0)
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; RV32-NEXT:    vnsrl.wx v8, v12, a0
+; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT:    vnsrl.wi v10, v16, 0
+; RV32-NEXT:    vnsrl.wx v11, v16, a0
+; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; RV32-NEXT:    vnsrl.wx v12, v11, a0
+; RV32-NEXT:    vnsrl.wi v11, v11, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: not_balanced_load_tree:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 34
+; RV64-NEXT:    srli a1, a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vle32.v v12, (a0)
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; RV64-NEXT:    vnsrl.wx v8, v12, a0
+; RV64-NEXT:    vnsrl.wi v16, v12, 0
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wi v10, v16, 0
+; RV64-NEXT:    vnsrl.wx v11, v16, a0
+; RV64-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; RV64-NEXT:    vnsrl.wx v12, v11, a0
+; RV64-NEXT:    vnsrl.wi v11, v11, 0
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 4
+  %wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
+  %d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
+  %d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
+  %t0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 1
+  %d1 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.0)
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 0
+  %d1.1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 1
+  %d2 = call { <vscale x 1 x i32>, <vscale x 1 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 2 x i32> %d1.1)
+  %t2 = extractvalue { <vscale x 1 x i32>, <vscale x 1 x i32> } %d2, 0
+  %t3 = extractvalue { <vscale x 1 x i32>, <vscale x 1 x i32> } %d2, 1
+
+  %res0 = insertvalue { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } poison, <vscale x 4 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  %res2 = insertvalue { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } %res1, <vscale x 1 x i32> %t2, 2
+  %res3 = insertvalue { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } %res2, <vscale x 1 x i32> %t3, 3
+  ret { <vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1 x i32> } %res3
+}
+
+define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32> %v1, <vscale x 4 x i32> %v2, ptr %ptr, i32 %evl) {
+; RV32-LABEL: not_balanced_store_tree:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
+; RV32-NEXT:    vwaddu.vv v12, v8, v8
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    vwmaccu.vx v12, a2, v8
+; RV32-NEXT:    srli a3, a3, 3
+; RV32-NEXT:    vsetvli a4, zero, e32, m1, ta, ma
+; RV32-NEXT:    vslidedown.vx v8, v12, a3
+; RV32-NEXT:    add a4, a3, a3
+; RV32-NEXT:    vsetvli zero, a4, e32, m1, ta, ma
+; RV32-NEXT:    vslideup.vx v12, v8, a3
+; RV32-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; RV32-NEXT:    vwaddu.vv v14, v12, v9
+; RV32-NEXT:    vwmaccu.vx v14, a2, v9
+; RV32-NEXT:    vsetvli a3, zero, e32, m2, ta, ma
+; RV32-NEXT:    vwaddu.vv v16, v14, v10
+; RV32-NEXT:    vwmaccu.vx v16, a2, v10
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vse32.v v16, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: not_balanced_store_tree:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
+; RV64-NEXT:    vwaddu.vv v12, v8, v8
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a1, a1, 34
+; RV64-NEXT:    vwmaccu.vx v12, a2, v8
+; RV64-NEXT:    srli a3, a3, 3
+; RV64-NEXT:    vsetvli a4, zero, e32, m1, ta, ma
+; RV64-NEXT:    vslidedown.vx v8, v12, a3
+; RV64-NEXT:    add a4, a3, a3
+; RV64-NEXT:    vsetvli zero, a4, e32, m1, ta, ma
+; RV64-NEXT:    vslideup.vx v12, v8, a3
+; RV64-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; RV64-NEXT:    vwaddu.vv v14, v12, v9
+; RV64-NEXT:    vwmaccu.vx v14, a2, v9
+; RV64-NEXT:    vsetvli a3, zero, e32, m2, ta, ma
+; RV64-NEXT:    vwaddu.vv v16, v14, v10
+; RV64-NEXT:    vwmaccu.vx v16, a2, v10
+; RV64-NEXT:    srli a1, a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vse32.v v16, (a0)
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 4
+  %interleaved.vec0 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
+  %interleaved.vec1 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 2 x i32> %interleaved.vec0, <vscale x 2 x i32> %v1)
+  %interleaved.vec2 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 4 x i32> %interleaved.vec1, <vscale x 4 x i32> %v2)
+  call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %interleaved.vec2, ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
+  ret void
+}
+
+; We only support scalable vectors for now.
+define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @not_scalable_vectors(ptr %ptr, i32 %evl) {
+; RV32-LABEL: not_scalable_vectors:
+; RV32:       # %bb.0:
+; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vnsrl.wx v12, v8, a0
+; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT:    vnsrl.wx v10, v11, a0
+; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wx v11, v12, a0
+; RV32-NEXT:    vnsrl.wi v9, v12, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: not_scalable_vectors:
+; RV64:       # %bb.0:
+; RV64-NEXT:    slli a1, a1, 34
+; RV64-NEXT:    srli a1, a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wx v12, v8, a0
+; RV64-NEXT:    vnsrl.wi v11, v8, 0
+; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-NEXT:    vnsrl.wx v10, v11, a0
+; RV64-NEXT:    vnsrl.wi v8, v11, 0
+; RV64-NEXT:    vnsrl.wx v11, v12, a0
+; RV64-NEXT:    vnsrl.wi v9, v12, 0
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 4
+  %wide.masked.load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %rvl)
+  %d0 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %wide.masked.load)
+  %d0.0 = extractvalue { <4 x i32>, <4 x i32> } %d0, 0
+  %d0.1 = extractvalue { <4 x i32>, <4 x i32> } %d0, 1
+  %d1 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.0)
+  %t0 = extractvalue { <2 x i32>, <2 x i32> } %d1, 0
+  %t2 = extractvalue { <2 x i32>, <2 x i32> } %d1, 1
+  %d2 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.1)
+  %t1 = extractvalue { <2 x i32>, <2 x i32> } %d2, 0
+  %t3 = extractvalue { <2 x i32>, <2 x i32> } %d2, 1
+
+  %res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> %t0, 0
+  %res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1
+  %res2 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res1, <2 x i32> %t2, 2
+  %res3 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res2, <2 x i32> %t3, 3
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3
+}
+
+define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1> %mask0, <vscale x 2 x i1> %mask1, ptr %ptr, i32 %evl) {
+; RV32-LABEL: not_same_mask:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vmv1r.v v9, v0
+; RV32-NEXT:    vmv1r.v v0, v8
+; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vmv.v.i v10, 0
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vmerge.vim v11, v8, 1, v0
+; RV32-NEXT:    vmv1r.v v0, v9
+; RV32-NEXT:    vmerge.vim v9, v8, 1, v0
+; RV32-NEXT:    srli a3, a3, 2
+; RV32-NEXT:    vwaddu.vv v12, v9, v11
+; RV32-NEXT:    vwmaccu.vx v12, a2, v11
+; RV32-NEXT:    vmsne.vi v0, v12, 0
+; RV32-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vslidedown.vx v9, v12, a3
+; RV32-NEXT:    vmerge.vim v10, v10, 1, v0
+; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; RV32-NEXT:    vmsne.vi v0, v9, 0
+; RV32-NEXT:    add a2, a3, a3
+; RV32-NEXT:    vmerge.vim v8, v8, 1, v0
+; RV32-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
+; RV32-NEXT:    vslideup.vx v10, v8, a3
+; RV32-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT:    vmsne.vi v0, v10, 0
+; RV32-NEXT:    slli a1, a1, 1
+; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV32-NEXT:    vle32.v v10, (a0), v0.t
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT:    vnsrl.wx v9, v10, a0
+; RV32-NEXT:    vnsrl.wi v8, v10, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: not_same_mask:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; RV64-NEXT:    vmv1r.v v9, v0
+; RV64-NEXT:    vmv1r.v v0, v8
+; RV64-NEXT:    vmv.v.i v8, 0
+; RV64-NEXT:    li a2, -1
+; RV64-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vmv.v.i v10, 0
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a1, a1, 33
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
+; RV64-NEXT:    vmerge.vim v11, v8, 1, v0
+; RV64-NEXT:    vmv1r.v v0, v9
+; RV64-NEXT:    vmerge.vim v9, v8, 1, v0
+; RV64-NEXT:    srli a3, a3, 2
+; RV64-NEXT:    vwaddu.vv v12, v9, v11
+; RV64-NEXT:    vwmaccu.vx v12, a2, v11
+; RV64-NEXT:    vmsne.vi v0, v12, 0
+; RV64-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vslidedown.vx v9, v12, a3
+; RV64-NEXT:    vmerge.vim v10, v10, 1, v0
+; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
+; RV64-NEXT:    vmsne.vi v0, v9, 0
+; RV64-NEXT:    add a2, a3, a3
+; RV64-NEXT:    vmerge.vim v8, v8, 1, v0
+; RV64-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
+; RV64-NEXT:    vslideup.vx v10, v8, a3
+; RV64-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-NEXT:    vmsne.vi v0, v10, 0
+; RV64-NEXT:    srli a1, a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; RV64-NEXT:    vle32.v v10, (a0), v0.t
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wx v9, v10, a0
+; RV64-NEXT:    vnsrl.wi v8, v10, 0
+; RV64-NEXT:    ret
+  %rvl = mul i32 %evl, 2
+  %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask0, <vscale x 2 x i1> %mask1)
+  %wide.masked.load = tail call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %interleaved.mask, i32 %rvl)
+  %deinterleaved.results = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 4 x i32> %wide.masked.load)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
+  %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %res1
+}
+
+; EVL should be a multiple of factor
+define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @invalid_evl(ptr %ptr, i32 %evl) {
+; RV32-LABEL: invalid_evl:
+; RV32:       # %bb.0:
+; RV32-NEXT:    ori a1, a1, 1
+; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT:    vle32.v v8, (a0)
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; RV32-NEXT:    vnsrl.wx v12, v8, a0
+; RV32-NEXT:    vnsrl.wi v14, v8, 0
+; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT:    vnsrl.wx v10, v14, a0
+; RV32-NEXT:    vnsrl.wi v8, v14, 0
+; RV32-NEXT:    vnsrl.wx v11, v12, a0
+; RV32-NEXT:    vnsrl.wi v9, v12, 0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: invalid_evl:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ori a1, a1, 1
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT:    vle32.v v8, (a0)
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; RV64-NEXT:    vnsrl.wx v12, v8, a0
+; RV64-NEXT:    vnsrl.wi v14, v8, 0
+; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT:    vnsrl.wx v10, v14, a0
+; RV64-NEXT:    vnsrl.wi v8, v14, 0
+; RV64-NEXT:    vnsrl.wx v11, v12, a0
+; RV64-NEXT:    vnsrl.wi v9, v12, 0
+; RV64-NEXT:    ret
+  %rvl = or i32 %evl, 1
+  %wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
+  %d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
+  %d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
+  %d0.1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 1
+  %d1 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.0)
+  %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 0
+  %t2 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d1, 1
+  %d2 = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %d0.1)
+  %t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d2, 0
+  %t3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %d2, 1
+
+  %res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> %t0, 0
+  %res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
+  %res2 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res1, <vscale x 2 x i32> %t2, 2
+  %res3 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res2, <vscale x 2 x i32> %t3, 3
+  ret { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %res3
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
index b47edf9..ccea5b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
@@ -533,13 +533,12 @@ define double @vpreduce_ord_fadd_fpext_vp_fpext_nxv1f32_nxv1f64(double %s, <vsca
 define float @vpreduce_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vpreduce_fadd_fpext_nxv1f16_nxv1f32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT:    vfmv.s.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfredusum.vs v8, v9, v8, v0.t
-; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    vfwredusum.vs v9, v8, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfmv.f.s fa0, v9
 ; CHECK-NEXT:    ret
   %w = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
   %r = call reassoc float @llvm.vp.reduce.fadd(float %s, <vscale x 1 x float> %w, <vscale x 1 x i1> %m, i32 %evl)
@@ -549,13 +548,12 @@ define float @vpreduce_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half>
 define float @vpreduce_ord_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vpreduce_ord_fadd_fpext_nxv1f16_nxv1f32:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT:    vfmv.s.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
-; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfredosum.vs v8, v9, v8, v0.t
-; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    vfwredosum.vs v9, v8, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfmv.f.s fa0, v9
 ; CHECK-NEXT:    ret
   %w = fpext <vscale x 1 x half> %v to <vscale x 1 x float>
   %r = call float @llvm.vp.reduce.fadd(float %s, <vscale x 1 x float> %w, <vscale x 1 x i1> %m, i32 %evl)
@@ -565,13 +563,12 @@ define float @vpreduce_ord_fadd_fpext_nxv1f16_nxv1f32(float %s, <vscale x 1 x ha
 define double @vpreduce_fadd_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vpreduce_fadd_fpext_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfredusum.vs v8, v9, v8, v0.t
-; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    vfmv.s.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vfwredusum.vs v9, v8, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vfmv.f.s fa0, v9
 ; CHECK-NEXT:    ret
   %w = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
   %r = call reassoc double @llvm.vp.reduce.fadd(double %s, <vscale x 1 x double> %w, <vscale x 1 x i1> %m, i32 %evl)
@@ -581,13 +578,12 @@ define double @vpreduce_fadd_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x floa
 define double @vpreduce_ord_fadd_fpext_nxv1f32_nxv1f64(double %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vpreduce_ord_fadd_fpext_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vfredosum.vs v8, v9, v8, v0.t
-; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    vfmv.s.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT:    vfwredosum.vs v9, v8, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vfmv.f.s fa0, v9
 ; CHECK-NEXT:    ret
   %w = fpext <vscale x 1 x float> %v to <vscale x 1 x double>
   %r = call double @llvm.vp.reduce.fadd(double %s, <vscale x 1 x double> %w, <vscale x 1 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll b/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll
index a9e666e..b2ea38f2 100644
--- a/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll
+++ b/llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll
@@ -165,7 +165,7 @@ define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x
 ; SOFT-FLOAT-32-NEXT:    mov %i0, %o0
 ; SOFT-FLOAT-32-NEXT:    call __mulsf3
 ; SOFT-FLOAT-32-NEXT:    mov %i4, %o1
-; SOFT-FLOAT-32-NEXT:    mov %o0, %l6
+; SOFT-FLOAT-32-NEXT:    mov %o0, %i0
 ; SOFT-FLOAT-32-NEXT:    mov %i1, %o0
 ; SOFT-FLOAT-32-NEXT:    call __mulsf3
 ; SOFT-FLOAT-32-NEXT:    mov %i5, %o1
@@ -173,26 +173,28 @@ define <4 x float> @fmuladd_contract_v4f32(<4 x float> %a, <4 x float> %b, <4 x
 ; SOFT-FLOAT-32-NEXT:    mov %i2, %o0
 ; SOFT-FLOAT-32-NEXT:    call __mulsf3
 ; SOFT-FLOAT-32-NEXT:    mov %l5, %o1
-; SOFT-FLOAT-32-NEXT:    mov %o0, %i4
+; SOFT-FLOAT-32-NEXT:    mov %o0, %i2
 ; SOFT-FLOAT-32-NEXT:    mov %i3, %o0
 ; SOFT-FLOAT-32-NEXT:    call __mulsf3
 ; SOFT-FLOAT-32-NEXT:    mov %l4, %o1
 ; SOFT-FLOAT-32-NEXT:    call __addsf3
 ; SOFT-FLOAT-32-NEXT:    mov %l3, %o1
 ; SOFT-FLOAT-32-NEXT:    mov %o0, %i3
-; SOFT-FLOAT-32-NEXT:    mov %i4, %o0
+; SOFT-FLOAT-32-NEXT:    mov %i2, %o0
 ; SOFT-FLOAT-32-NEXT:    call __addsf3
 ; SOFT-FLOAT-32-NEXT:    mov %l2, %o1
-; SOFT-FLOAT-32-NEXT:    mov %o0, %i2
+; SOFT-FLOAT-32-NEXT:    mov %o0, %i4
 ; SOFT-FLOAT-32-NEXT:    mov %i1, %o0
 ; SOFT-FLOAT-32-NEXT:    call __addsf3
 ; SOFT-FLOAT-32-NEXT:    mov %l1, %o1
 ; SOFT-FLOAT-32-NEXT:    mov %o0, %i1
-; SOFT-FLOAT-32-NEXT:    mov %l6, %o0
+; SOFT-FLOAT-32-NEXT:    mov %i0, %o0
 ; SOFT-FLOAT-32-NEXT:    call __addsf3
 ; SOFT-FLOAT-32-NEXT:    mov %l0, %o1
+; SOFT-FLOAT-32-NEXT:    ! kill: def $o0 killed $o0 def $o0_o1
+; SOFT-FLOAT-32-NEXT:    mov %o0, %i0
 ; SOFT-FLOAT-32-NEXT:    ret
-; SOFT-FLOAT-32-NEXT:    restore %g0, %o0, %o0
+; SOFT-FLOAT-32-NEXT:    restore %g0, %i4, %o2
 ;
 ; SOFT-FLOAT-64-LABEL: fmuladd_contract_v4f32:
 ; SOFT-FLOAT-64:         .cfi_startproc
diff --git a/llvm/test/CodeGen/SPARC/fp128.ll b/llvm/test/CodeGen/SPARC/fp128.ll
index 521e333..99bfb8d 100644
--- a/llvm/test/CodeGen/SPARC/fp128.ll
+++ b/llvm/test/CodeGen/SPARC/fp128.ll
@@ -54,11 +54,11 @@ entry:
 
 ; CHECK-LABEL: f128_spill_large:
 ; CHECK:       sethi 4, %g1
-; CHECK:       std %f{{.+}}, [%fp+-16]
-; CHECK-NEXT:  std %f{{.+}}, [%fp+-8]
-; CHECK:       ldd [%fp+-16], %f{{.+}}
-; CHECK-NEXT:  ldd [%fp+-8], %f{{.+}}
 
+; CHECK:       std %f{{.+}}, [%[[S0:.+]]]
+; CHECK:       std %f{{.+}}, [%[[S1:.+]]]
+; CHECK-DAG:   ldd [%[[S0]]], %f{{.+}}
+; CHECK-DAG:   ldd [%[[S1]]], %f{{.+}}
 define void @f128_spill_large(ptr noalias sret(<251 x fp128>) %scalar.result, ptr byval(<251 x fp128>) %a) {
 entry:
   %0 = load <251 x fp128>, ptr %a, align 8
@@ -102,10 +102,10 @@ entry:
 
 
 ; CHECK-LABEL: f128_abs:
-; CHECK:       ldd [%o0], %f0
-; CHECK:       ldd [%o0+8], %f2
-; BE:          fabss %f0, %f0
-; EL:          fabss %f3, %f3
+; CHECK-DAG:       ldd [%o0], [[REG:%f[0-9]+]]
+; CHECK-DAG:       ldd [%o0+8], %f{{[0-9]+}}
+; BE:          fabss [[REG]], [[REG]]
+; EL:          fabss %f1, %f1
 
 define void @f128_abs(ptr noalias sret(fp128) %scalar.result, ptr byval(fp128) %a) {
 entry:
@@ -229,10 +229,10 @@ entry:
 }
 
 ; CHECK-LABEL: f128_neg:
-; CHECK:       ldd [%o0], %f0
-; CHECK:       ldd [%o0+8], %f2
-; BE:          fnegs %f0, %f0
-; EL:          fnegs %f3, %f3
+; CHECK-DAG:       ldd [%o0], [[REG:%f[0-9]+]]
+; CHECK-DAG:       ldd [%o0+8], %f{{[0-9]+}}
+; BE:          fnegs [[REG]], [[REG]]
+; LE:          fnegs [[REG]], [[REG]]
 
 define void @f128_neg(ptr noalias sret(fp128) %scalar.result, ptr byval(fp128) %a) {
 entry:
diff --git a/llvm/test/CodeGen/SPARC/fp16-promote.ll b/llvm/test/CodeGen/SPARC/fp16-promote.ll
index f09c37b..a15104c 100644
--- a/llvm/test/CodeGen/SPARC/fp16-promote.ll
+++ b/llvm/test/CodeGen/SPARC/fp16-promote.ll
@@ -89,10 +89,10 @@ define void @test_fpextend_fp128(ptr %p, ptr %out) nounwind {
 ; V8-OPT-NEXT:    call _Q_stoq
 ; V8-OPT-NEXT:    ld [%fp+-20], %o0
 ; V8-OPT-NEXT:    unimp 16
-; V8-OPT-NEXT:    ldd [%fp+-16], %f0
-; V8-OPT-NEXT:    ldd [%fp+-8], %f2
-; V8-OPT-NEXT:    std %f2, [%i1+8]
-; V8-OPT-NEXT:    std %f0, [%i1]
+; V8-OPT-NEXT:    ldd [%fp+-8], %f0
+; V8-OPT-NEXT:    ldd [%fp+-16], %f4
+; V8-OPT-NEXT:    std %f0, [%i1+8]
+; V8-OPT-NEXT:    std %f4, [%i1]
 ; V8-OPT-NEXT:    ret
 ; V8-OPT-NEXT:    restore
 ;
@@ -133,10 +133,10 @@ define void @test_fpextend_fp128(ptr %p, ptr %out) nounwind {
 ; V9-NEXT:    call _Q_stoq
 ; V9-NEXT:    ld [%fp+-20], %o0
 ; V9-NEXT:    unimp 16
-; V9-NEXT:    ldd [%fp+-16], %f0
-; V9-NEXT:    ldd [%fp+-8], %f2
-; V9-NEXT:    std %f2, [%i1+8]
-; V9-NEXT:    std %f0, [%i1]
+; V9-NEXT:    ldd [%fp+-8], %f0
+; V9-NEXT:    ldd [%fp+-16], %f4
+; V9-NEXT:    std %f0, [%i1+8]
+; V9-NEXT:    std %f4, [%i1]
 ; V9-NEXT:    ret
 ; V9-NEXT:    restore
 ;
@@ -149,10 +149,10 @@ define void @test_fpextend_fp128(ptr %p, ptr %out) nounwind {
 ; SPARC64-NEXT:    fmovs %f0, %f3
 ; SPARC64-NEXT:    call _Qp_stoq
 ; SPARC64-NEXT:    nop
-; SPARC64-NEXT:    ldd [%fp+2031], %f0
-; SPARC64-NEXT:    ldd [%fp+2039], %f2
-; SPARC64-NEXT:    std %f2, [%i1+8]
-; SPARC64-NEXT:    std %f0, [%i1]
+; SPARC64-NEXT:    ldd [%fp+2039], %f0
+; SPARC64-NEXT:    ldd [%fp+2031], %f4
+; SPARC64-NEXT:    std %f0, [%i1+8]
+; SPARC64-NEXT:    std %f4, [%i1]
 ; SPARC64-NEXT:    ret
 ; SPARC64-NEXT:    restore
   %a = load half, ptr %p
@@ -270,8 +270,8 @@ define void @test_fptrunc_fp128(ptr %dp, ptr %p) nounwind {
 ; V8-OPT:       ! %bb.0:
 ; V8-OPT-NEXT:    save %sp, -104, %sp
 ; V8-OPT-NEXT:    ldd [%i0], %f0
-; V8-OPT-NEXT:    ldd [%i0+8], %f2
-; V8-OPT-NEXT:    std %f2, [%sp+100]
+; V8-OPT-NEXT:    ldd [%i0+8], %f4
+; V8-OPT-NEXT:    std %f4, [%sp+100]
 ; V8-OPT-NEXT:    call __trunctfhf2
 ; V8-OPT-NEXT:    std %f0, [%sp+92]
 ; V8-OPT-NEXT:    sth %o0, [%i1]
@@ -302,8 +302,8 @@ define void @test_fptrunc_fp128(ptr %dp, ptr %p) nounwind {
 ; V9:       ! %bb.0:
 ; V9-NEXT:    save %sp, -104, %sp
 ; V9-NEXT:    ldd [%i0], %f0
-; V9-NEXT:    ldd [%i0+8], %f2
-; V9-NEXT:    std %f2, [%sp+100]
+; V9-NEXT:    ldd [%i0+8], %f4
+; V9-NEXT:    std %f4, [%sp+100]
 ; V9-NEXT:    call __trunctfhf2
 ; V9-NEXT:    std %f0, [%sp+92]
 ; V9-NEXT:    sth %o0, [%i1]
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_char4.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_char4.ll
new file mode 100644
index 0000000..8c68017
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_char4.ll
@@ -0,0 +1,53 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32v1.6-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32v1.6-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_integer_dot_product %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_integer_dot_product %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: Capability DotProduct
+; CHECK: Capability DotProductInput4x8Bit
+; CHECK-EXT: OpExtension "SPV_KHR_integer_dot_product"
+; CHECK-NOT: OpExtension "SPV_KHR_integer_dot_product"
+
+; CHECK: Name %[[#SignedA:]] "ia"
+; CHECK: Name %[[#UnsignedA:]] "ua"
+; CHECK: Name %[[#SignedB:]] "ib"
+; CHECK: Name %[[#UnsignedB:]] "ub"
+
+; CHECK: SDot %[[#]] %[[#SignedA]] %[[#SignedB]]
+; CHECK: SUDot %[[#]] %[[#SignedA]] %[[#UnsignedB]]
+; CHECK: SUDot %[[#]] %[[#SignedB]] %[[#UnsignedA]]
+; CHECK: UDot %[[#]] %[[#UnsignedA]] %[[#UnsignedB]]
+
+; CHECK: SDotAccSat %[[#]] %[[#SignedA]] %[[#SignedB]] %[[#]]
+; CHECK: SUDotAccSat %[[#]] %[[#SignedA]] %[[#UnsignedB]] %[[#]]
+; CHECK: SUDotAccSat %[[#]] %[[#SignedB]] %[[#UnsignedA]] %[[#]]
+; CHECK: UDotAccSat %[[#]] %[[#UnsignedA]] %[[#UnsignedB]] %[[#]]
+
+define spir_kernel void @test(<4 x i8> %ia, <4 x i8> %ua, <4 x i8> %ib, <4 x i8> %ub, <4 x i8> %ires, <4 x i8> %ures) {
+entry:
+  %call = tail call spir_func i32 @_Z3dotDv4_cS_(<4 x i8> %ia, <4 x i8> %ib) #2
+  %call1 = tail call spir_func i32 @_Z3dotDv4_cDv4_h(<4 x i8> %ia, <4 x i8> %ub) #2
+  %call2 = tail call spir_func i32 @_Z3dotDv4_hDv4_c(<4 x i8> %ua, <4 x i8> %ib) #2
+  %call3 = tail call spir_func i32 @_Z3dotDv4_hS_(<4 x i8> %ua, <4 x i8> %ub) #2
+  %call4 = tail call spir_func i32 @_Z11dot_acc_satDv4_cS_i(<4 x i8> %ia, <4 x i8> %ib, i32 %call2) #2
+  %call5 = tail call spir_func i32 @_Z11dot_acc_satDv4_cDv4_hi(<4 x i8> %ia, <4 x i8> %ub, i32 %call4) #2
+  %call6 = tail call spir_func i32 @_Z11dot_acc_satDv4_hDv4_ci(<4 x i8> %ua, <4 x i8> %ib, i32 %call5) #2
+  %call7 = tail call spir_func i32 @_Z11dot_acc_satDv4_hS_j(<4 x i8> %ua, <4 x i8> %ub, i32 %call3) #2
+  ret void
+}
+
+declare spir_func i32 @_Z3dotDv4_cS_(<4 x i8>, <4 x i8>)
+declare spir_func i32 @_Z3dotDv4_cDv4_h(<4 x i8>, <4 x i8>)
+declare spir_func i32 @_Z3dotDv4_hDv4_c(<4 x i8>, <4 x i8>)
+declare spir_func i32 @_Z3dotDv4_hS_(<4 x i8>, <4 x i8>)
+declare spir_func i32 @_Z11dot_acc_satDv4_cS_i(<4 x i8>, <4 x i8>, i32)
+declare spir_func i32 @_Z11dot_acc_satDv4_cDv4_hi(<4 x i8>, <4 x i8>, i32)
+declare spir_func i32 @_Z11dot_acc_satDv4_hDv4_ci(<4 x i8>, <4 x i8>, i32)
+declare spir_func i32 @_Z11dot_acc_satDv4_hS_j(<4 x i8>, <4 x i8>, i32)
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_int.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_int.ll
new file mode 100644
index 0000000..284f5c3
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_int.ll
@@ -0,0 +1,53 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32v1.6-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32v1.6-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_integer_dot_product %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_integer_dot_product %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: Capability DotProduct
+; CHECK: Capability DotProductInput4x8BitPacked
+; CHECK-EXT: OpExtension "SPV_KHR_integer_dot_product"
+; CHECK-NOT: OpExtension "SPV_KHR_integer_dot_product"
+
+; CHECK: Name %[[#SignedA:]] "ia"
+; CHECK: Name %[[#UnsignedA:]] "ua"
+; CHECK: Name %[[#SignedB:]] "ib"
+; CHECK: Name %[[#UnsignedB:]] "ub"
+
+; CHECK: SDot %[[#]] %[[#SignedA]] %[[#SignedB]] 0
+; CHECK: SUDot %[[#]] %[[#SignedA]] %[[#UnsignedB]] 0
+; CHECK: SUDot %[[#]] %[[#SignedB]] %[[#UnsignedA]] 0
+; CHECK: UDot %[[#]] %[[#UnsignedA]] %[[#UnsignedB]] 0
+
+; CHECK: SDotAccSat %[[#]] %[[#SignedA]] %[[#SignedB]] %[[#]] 0
+; CHECK: SUDotAccSat %[[#]] %[[#SignedA]] %[[#UnsignedB]] %[[#]] 0
+; CHECK: SUDotAccSat %[[#]] %[[#SignedB]] %[[#UnsignedA]] %[[#]] 0
+; CHECK: UDotAccSat %[[#]] %[[#UnsignedA]] %[[#UnsignedB]] %[[#]] 0
+
+define spir_kernel void @test(i32 %ia, i32 %ua, i32 %ib, i32 %ub, i32 %ires, i32 %ures) {
+entry:
+  %call = tail call spir_func i32 @_Z20dot_4x8packed_ss_intjj(i32 %ia, i32 %ib) #2
+  %call1 = tail call spir_func i32 @_Z20dot_4x8packed_su_intjj(i32 %ia, i32 %ub) #2
+  %call2 = tail call spir_func i32 @_Z20dot_4x8packed_us_intjj(i32 %ua, i32 %ib) #2
+  %call3 = tail call spir_func i32 @_Z21dot_4x8packed_uu_uintjj(i32 %ua, i32 %ub) #2
+  %call4 = tail call spir_func i32 @_Z28dot_acc_sat_4x8packed_ss_intjji(i32 %ia, i32 %ib, i32 %ires) #2
+  %call5 = tail call spir_func i32 @_Z28dot_acc_sat_4x8packed_su_intjji(i32 %ia, i32 %ub, i32 %ires) #2
+  %call6 = tail call spir_func i32 @_Z28dot_acc_sat_4x8packed_us_intjji(i32 %ua, i32 %ib, i32 %ires) #2
+  %call7 = tail call spir_func i32 @_Z29dot_acc_sat_4x8packed_uu_uintjjj(i32 %ua, i32 %ub, i32 %ures) #2
+  ret void
+}
+
+declare spir_func i32 @_Z20dot_4x8packed_ss_intjj(i32, i32)
+declare spir_func i32 @_Z20dot_4x8packed_su_intjj(i32, i32)
+declare spir_func i32 @_Z20dot_4x8packed_us_intjj(i32, i32)
+declare spir_func i32 @_Z21dot_4x8packed_uu_uintjj(i32, i32)
+declare spir_func i32 @_Z28dot_acc_sat_4x8packed_ss_intjji(i32, i32, i32)
+declare spir_func i32 @_Z28dot_acc_sat_4x8packed_su_intjji(i32, i32, i32)
+declare spir_func i32 @_Z28dot_acc_sat_4x8packed_us_intjji(i32, i32, i32)
+declare spir_func i32 @_Z29dot_acc_sat_4x8packed_uu_uintjjj(i32, i32, i32)
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_short2.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_short2.ll
new file mode 100644
index 0000000..8849453
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_integer_dot_product/SPV_KHR_integer_dot_product_OCLtoSPIRV_short2.ll
@@ -0,0 +1,53 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32v1.6-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32v1.6-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_integer_dot_product %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-EXT
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_integer_dot_product %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: Capability DotProduct
+; CHECK: Capability DotProductInputAll
+; CHECK-EXT: OpExtension "SPV_KHR_integer_dot_product"
+; CHECK-NOT: OpExtension "SPV_KHR_integer_dot_product"
+
+; CHECK: Name %[[#SignedA:]] "ia"
+; CHECK: Name %[[#UnsignedA:]] "ua"
+; CHECK: Name %[[#SignedB:]] "ib"
+; CHECK: Name %[[#UnsignedB:]] "ub"
+
+; CHECK: SDot %[[#]] %[[#SignedA]] %[[#SignedB]]
+; CHECK: SUDot %[[#]] %[[#SignedA]] %[[#UnsignedB]]
+; CHECK: SUDot %[[#]] %[[#SignedB]] %[[#UnsignedA]]
+; CHECK: UDot %[[#]] %[[#UnsignedA]] %[[#UnsignedB]]
+
+; CHECK: SDotAccSat %[[#]] %[[#SignedA]] %[[#SignedB]] %[[#]]
+; CHECK: SUDotAccSat %[[#]] %[[#SignedA]] %[[#UnsignedB]] %[[#]]
+; CHECK: SUDotAccSat %[[#]] %[[#SignedB]] %[[#UnsignedA]] %[[#]]
+; CHECK: UDotAccSat %[[#]] %[[#UnsignedA]] %[[#UnsignedB]] %[[#]]
+
+define spir_kernel void @test(<2 x i16> %ia, <2 x i16> %ua, <2 x i16> %ib, <2 x i16> %ub, <2 x i16> %ires, <2 x i16> %ures) {
+entry:
+  %call = tail call spir_func i32 @_Z3dotDv2_sS_(<2 x i16> %ia, <2 x i16> %ib) #2
+  %call1 = tail call spir_func i32 @_Z3dotDv2_sDv2_t(<2 x i16> %ia, <2 x i16> %ub) #2
+  %call2 = tail call spir_func i32 @_Z3dotDv2_tDv2_s(<2 x i16> %ua, <2 x i16> %ib) #2
+  %call3 = tail call spir_func i32 @_Z3dotDv2_tS_(<2 x i16> %ua, <2 x i16> %ub) #2
+  %call4 = tail call spir_func i32 @_Z11dot_acc_satDv2_sS_i(<2 x i16> %ia, <2 x i16> %ib, i32 %call2) #2
+  %call5 = tail call spir_func i32 @_Z11dot_acc_satDv2_sDv2_ti(<2 x i16> %ia, <2 x i16> %ub, i32 %call4) #2
+  %call6 = tail call spir_func i32 @_Z11dot_acc_satDv2_tDv2_si(<2 x i16> %ua, <2 x i16> %ib, i32 %call5) #2
+  %call7 = tail call spir_func i32 @_Z11dot_acc_satDv2_tS_j(<2 x i16> %ua, <2 x i16> %ub, i32 %call3) #2
+  ret void
+}
+
+declare spir_func i32 @_Z3dotDv2_sS_(<2 x i16>, <2 x i16>)
+declare spir_func i32 @_Z3dotDv2_sDv2_t(<2 x i16>, <2 x i16>)
+declare spir_func i32 @_Z3dotDv2_tDv2_s(<2 x i16>, <2 x i16>)
+declare spir_func i32 @_Z3dotDv2_tS_(<2 x i16>, <2 x i16>)
+declare spir_func i32 @_Z11dot_acc_satDv2_sS_i(<2 x i16>, <2 x i16>, i32)
+declare spir_func i32 @_Z11dot_acc_satDv2_sDv2_ti(<2 x i16>, <2 x i16>, i32)
+declare spir_func i32 @_Z11dot_acc_satDv2_tDv2_si(<2 x i16>, <2 x i16>, i32)
+declare spir_func i32 @_Z11dot_acc_satDv2_tS_j(<2 x i16>, <2 x i16>, i32)
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-01.ll b/llvm/test/CodeGen/SystemZ/int-uadd-01.ll
index 7d4aa9f..ec36065 100644
--- a/llvm/test/CodeGen/SystemZ/int-uadd-01.ll
+++ b/llvm/test/CodeGen/SystemZ/int-uadd-01.ll
@@ -271,10 +271,10 @@ define zeroext i1 @f14(ptr %ptr0) {
 ; CHECK-NEXT:    .cfi_offset %r15, -40
 ; CHECK-NEXT:    aghi %r15, -168
 ; CHECK-NEXT:    .cfi_def_cfa_offset 328
-; CHECK-NEXT:    l %r6, 0(%r2)
-; CHECK-NEXT:    l %r13, 8(%r2)
-; CHECK-NEXT:    l %r12, 16(%r2)
-; CHECK-NEXT:    l %r7, 24(%r2)
+; CHECK-NEXT:    l %r7, 0(%r2)
+; CHECK-NEXT:    l %r6, 8(%r2)
+; CHECK-NEXT:    l %r13, 16(%r2)
+; CHECK-NEXT:    l %r12, 24(%r2)
 ; CHECK-NEXT:    l %r8, 32(%r2)
 ; CHECK-NEXT:    l %r9, 40(%r2)
 ; CHECK-NEXT:    l %r10, 48(%r2)
@@ -282,16 +282,16 @@ define zeroext i1 @f14(ptr %ptr0) {
 ; CHECK-NEXT:    mvc 160(4,%r15), 64(%r2) # 4-byte Folded Spill
 ; CHECK-NEXT:    mvc 164(4,%r15), 72(%r2) # 4-byte Folded Spill
 ; CHECK-NEXT:    brasl %r14, foo@PLT
-; CHECK-NEXT:    alr %r2, %r6
+; CHECK-NEXT:    alr %r2, %r7
 ; CHECK-NEXT:    ipm %r0
 ; CHECK-NEXT:    risbg %r0, %r0, 63, 191, 35
-; CHECK-NEXT:    alr %r2, %r13
+; CHECK-NEXT:    alr %r2, %r6
 ; CHECK-NEXT:    ipm %r1
 ; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
-; CHECK-NEXT:    alr %r2, %r12
+; CHECK-NEXT:    alr %r2, %r13
 ; CHECK-NEXT:    ipm %r1
 ; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
-; CHECK-NEXT:    alr %r2, %r7
+; CHECK-NEXT:    alr %r2, %r12
 ; CHECK-NEXT:    ipm %r1
 ; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
 ; CHECK-NEXT:    alr %r2, %r8
diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-02.ll b/llvm/test/CodeGen/SystemZ/int-uadd-02.ll
index 46c5b4f..ad6aa79 100644
--- a/llvm/test/CodeGen/SystemZ/int-uadd-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-uadd-02.ll
@@ -215,10 +215,10 @@ define zeroext i1 @f11(ptr %ptr0) {
 ; CHECK-NEXT:    .cfi_offset %r15, -40
 ; CHECK-NEXT:    aghi %r15, -176
 ; CHECK-NEXT:    .cfi_def_cfa_offset 336
-; CHECK-NEXT:    lg %r6, 0(%r2)
-; CHECK-NEXT:    lg %r13, 16(%r2)
-; CHECK-NEXT:    lg %r12, 32(%r2)
-; CHECK-NEXT:    lg %r7, 48(%r2)
+; CHECK-NEXT:    lg %r7, 0(%r2)
+; CHECK-NEXT:    lg %r6, 16(%r2)
+; CHECK-NEXT:    lg %r13, 32(%r2)
+; CHECK-NEXT:    lg %r12, 48(%r2)
 ; CHECK-NEXT:    lg %r8, 64(%r2)
 ; CHECK-NEXT:    lg %r9, 80(%r2)
 ; CHECK-NEXT:    lg %r10, 96(%r2)
@@ -226,16 +226,16 @@ define zeroext i1 @f11(ptr %ptr0) {
 ; CHECK-NEXT:    mvc 160(8,%r15), 128(%r2) # 8-byte Folded Spill
 ; CHECK-NEXT:    mvc 168(8,%r15), 144(%r2) # 8-byte Folded Spill
 ; CHECK-NEXT:    brasl %r14, foo@PLT
-; CHECK-NEXT:    algr %r2, %r6
+; CHECK-NEXT:    algr %r2, %r7
 ; CHECK-NEXT:    ipm %r0
 ; CHECK-NEXT:    risbg %r0, %r0, 63, 191, 35
-; CHECK-NEXT:    algr %r2, %r13
+; CHECK-NEXT:    algr %r2, %r6
 ; CHECK-NEXT:    ipm %r1
 ; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
-; CHECK-NEXT:    algr %r2, %r12
+; CHECK-NEXT:    algr %r2, %r13
 ; CHECK-NEXT:    ipm %r1
 ; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
-; CHECK-NEXT:    algr %r2, %r7
+; CHECK-NEXT:    algr %r2, %r12
 ; CHECK-NEXT:    ipm %r1
 ; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
 ; CHECK-NEXT:    algr %r2, %r8
diff --git a/llvm/test/CodeGen/SystemZ/pr60413.ll b/llvm/test/CodeGen/SystemZ/pr60413.ll
index 62f5d49..6dee3d3 100644
--- a/llvm/test/CodeGen/SystemZ/pr60413.ll
+++ b/llvm/test/CodeGen/SystemZ/pr60413.ll
@@ -40,38 +40,38 @@ define dso_local void @m() local_unnamed_addr #1 {
 ; CHECK-NEXT:    vn %v0, %v0, %v6
 ; CHECK-NEXT:    vn %v4, %v4, %v6
 ; CHECK-NEXT:    vperm %v1, %v1, %v1, %v5
-; CHECK-NEXT:    vn %v5, %v1, %v6
-; CHECK-NEXT:    vperm %v1, %v0, %v3, %v2
-; CHECK-NEXT:    vn %v2, %v1, %v6
-; CHECK-NEXT:    vrepif %v1, 127
-; CHECK-NEXT:    vchlf %v3, %v5, %v1
-; CHECK-NEXT:    vlgvf %r3, %v3, 1
-; CHECK-NEXT:    vlgvf %r2, %v3, 0
+; CHECK-NEXT:    vn %v1, %v1, %v6
+; CHECK-NEXT:    vperm %v2, %v0, %v3, %v2
+; CHECK-NEXT:    vn %v2, %v2, %v6
+; CHECK-NEXT:    vrepif %v3, 127
+; CHECK-NEXT:    vchlf %v1, %v1, %v3
+; CHECK-NEXT:    vlgvf %r3, %v1, 1
+; CHECK-NEXT:    vlgvf %r2, %v1, 0
 ; CHECK-NEXT:    risbg %r2, %r2, 48, 176, 15
 ; CHECK-NEXT:    rosbg %r2, %r3, 49, 49, 14
-; CHECK-NEXT:    vlgvf %r3, %v3, 2
+; CHECK-NEXT:    vlgvf %r3, %v1, 2
 ; CHECK-NEXT:    rosbg %r2, %r3, 50, 50, 13
-; CHECK-NEXT:    vlgvf %r3, %v3, 3
+; CHECK-NEXT:    vlgvf %r3, %v1, 3
 ; CHECK-NEXT:    rosbg %r2, %r3, 51, 51, 12
-; CHECK-NEXT:    vchlf %v3, %v4, %v1
-; CHECK-NEXT:    vlgvf %r3, %v3, 0
+; CHECK-NEXT:    vchlf %v1, %v4, %v3
+; CHECK-NEXT:    vlgvf %r3, %v1, 0
 ; CHECK-NEXT:    rosbg %r2, %r3, 52, 52, 11
-; CHECK-NEXT:    vlgvf %r3, %v3, 1
+; CHECK-NEXT:    vlgvf %r3, %v1, 1
 ; CHECK-NEXT:    rosbg %r2, %r3, 53, 53, 10
-; CHECK-NEXT:    vlgvf %r3, %v3, 2
+; CHECK-NEXT:    vlgvf %r3, %v1, 2
 ; CHECK-NEXT:    rosbg %r2, %r3, 54, 54, 9
-; CHECK-NEXT:    vlgvf %r3, %v3, 3
+; CHECK-NEXT:    vlgvf %r3, %v1, 3
 ; CHECK-NEXT:    rosbg %r2, %r3, 55, 55, 8
-; CHECK-NEXT:    vchlf %v2, %v2, %v1
-; CHECK-NEXT:    vlgvf %r3, %v2, 0
+; CHECK-NEXT:    vchlf %v1, %v2, %v3
+; CHECK-NEXT:    vlgvf %r3, %v1, 0
 ; CHECK-NEXT:    rosbg %r2, %r3, 56, 56, 7
-; CHECK-NEXT:    vlgvf %r3, %v2, 1
+; CHECK-NEXT:    vlgvf %r3, %v1, 1
 ; CHECK-NEXT:    rosbg %r2, %r3, 57, 57, 6
-; CHECK-NEXT:    vlgvf %r3, %v2, 2
+; CHECK-NEXT:    vlgvf %r3, %v1, 2
 ; CHECK-NEXT:    rosbg %r2, %r3, 58, 58, 5
-; CHECK-NEXT:    vlgvf %r3, %v2, 3
+; CHECK-NEXT:    vlgvf %r3, %v1, 3
 ; CHECK-NEXT:    rosbg %r2, %r3, 59, 59, 4
-; CHECK-NEXT:    vchlf %v0, %v0, %v1
+; CHECK-NEXT:    vchlf %v0, %v0, %v3
 ; CHECK-NEXT:    vlgvf %r3, %v0, 0
 ; CHECK-NEXT:    rosbg %r2, %r3, 60, 60, 3
 ; CHECK-NEXT:    vlgvf %r3, %v0, 1
diff --git a/llvm/test/CodeGen/Thumb2/avoidmuls.mir b/llvm/test/CodeGen/Thumb2/avoidmuls.mir
index 8651520..7bfa4b9 100644
--- a/llvm/test/CodeGen/Thumb2/avoidmuls.mir
+++ b/llvm/test/CodeGen/Thumb2/avoidmuls.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m33 -run-pass=thumb2-reduce-size %s -o - | FileCheck %s --check-prefix=MUL
+# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=star-mc1 -run-pass=thumb2-reduce-size %s -o - | FileCheck %s --check-prefix=MUL
 # RUN: llc -mtriple=thumbv7m-none-eabi --run-pass=thumb2-reduce-size %s -o - | FileCheck %s --check-prefix=MULS
 
 ---
@@ -17,4 +18,4 @@ body:             |
 
 # MULS-LABEL: test
 # MULS: tMUL
-# MULS-NOT: t2MUL
-\ No newline at end of file
+# MULS-NOT: t2MUL
diff --git a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-uniform-cases.ll
index c5f61b7..dc67abc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-uniform-cases.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-uniform-cases.ll
@@ -212,70 +212,70 @@ define arm_aapcs_vfpcc <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b,
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    .pad #64
 ; CHECK-NEXT:    sub sp, #64
-; CHECK-NEXT:    vldr s23, [sp, #140]
-; CHECK-NEXT:    vmov.f32 s20, s13
-; CHECK-NEXT:    vldr s22, [sp, #132]
-; CHECK-NEXT:    vmov.f32 s25, s11
-; CHECK-NEXT:    vmov.f32 s13, s10
+; CHECK-NEXT:    vmov.f32 s21, s11
+; CHECK-NEXT:    vldr s31, [sp, #140]
+; CHECK-NEXT:    vmov.f32 s20, s9
+; CHECK-NEXT:    vldr s30, [sp, #132]
+; CHECK-NEXT:    vstrw.32 q5, [sp, #32] @ 16-byte Spill
+; CHECK-NEXT:    vmov.f32 s28, s13
+; CHECK-NEXT:    vmov.f32 s23, s6
 ; CHECK-NEXT:    vldr s19, [sp, #136]
-; CHECK-NEXT:    vmov.f32 s11, s6
-; CHECK-NEXT:    vldr s18, [sp, #128]
 ; CHECK-NEXT:    vmov.f32 s6, s5
-; CHECK-NEXT:    vldr s31, [sp, #188]
-; CHECK-NEXT:    vmov.f32 s10, s4
-; CHECK-NEXT:    vldr s30, [sp, #180]
-; CHECK-NEXT:    vmov.f32 s21, s15
-; CHECK-NEXT:    vldr s29, [sp, #172]
+; CHECK-NEXT:    vldr s18, [sp, #128]
+; CHECK-NEXT:    vmov.f32 s22, s4
+; CHECK-NEXT:    vldr s27, [sp, #184]
+; CHECK-NEXT:    vmov.f32 s29, s15
+; CHECK-NEXT:    vldr s26, [sp, #176]
 ; CHECK-NEXT:    vmov.f32 s5, s3
-; CHECK-NEXT:    vldr s28, [sp, #164]
+; CHECK-NEXT:    vldr s25, [sp, #168]
 ; CHECK-NEXT:    vmov.f32 s4, s1
-; CHECK-NEXT:    vmov.f32 s24, s9
+; CHECK-NEXT:    vldr s24, [sp, #160]
 ; CHECK-NEXT:    vmov.f32 s16, s12
-; CHECK-NEXT:    vstrw.32 q6, [sp, #32] @ 16-byte Spill
-; CHECK-NEXT:    vmov.f32 s12, s8
-; CHECK-NEXT:    vldr s27, [sp, #184]
+; CHECK-NEXT:    vldr s11, [sp, #188]
 ; CHECK-NEXT:    vmov.f32 s17, s14
-; CHECK-NEXT:    vldr s26, [sp, #176]
-; CHECK-NEXT:    vmov.f32 s9, s2
-; CHECK-NEXT:    vldr s25, [sp, #168]
-; CHECK-NEXT:    vmov.f32 s8, s0
-; CHECK-NEXT:    vmul.f32 q0, q5, q1
+; CHECK-NEXT:    vldr s9, [sp, #172]
+; CHECK-NEXT:    vmov.f32 s21, s2
+; CHECK-NEXT:    vmov.f32 s20, s0
+; CHECK-NEXT:    vmul.f32 q0, q7, q1
 ; CHECK-NEXT:    vmul.f32 q1, q4, q1
 ; CHECK-NEXT:    vneg.f32 q0, q0
-; CHECK-NEXT:    vldr s24, [sp, #160]
-; CHECK-NEXT:    vfma.f32 q1, q5, q2
 ; CHECK-NEXT:    vstrw.32 q0, [sp, #16] @ 16-byte Spill
-; CHECK-NEXT:    vstrw.32 q3, [sp, #48] @ 16-byte Spill
-; CHECK-NEXT:    vsub.f32 q6, q6, q1
+; CHECK-NEXT:    vfma.f32 q1, q7, q5
+; CHECK-NEXT:    vsub.f32 q7, q6, q1
 ; CHECK-NEXT:    vldrw.u32 q1, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT:    vmov.f32 s13, s10
+; CHECK-NEXT:    vldr s10, [sp, #180]
+; CHECK-NEXT:    vmov.f32 s12, s8
+; CHECK-NEXT:    vfma.f32 q1, q4, q5
+; CHECK-NEXT:    vstrw.32 q3, [sp, #48] @ 16-byte Spill
+; CHECK-NEXT:    vldr s8, [sp, #164]
 ; CHECK-NEXT:    vldr s13, [sp, #156]
-; CHECK-NEXT:    vfma.f32 q1, q4, q2
+; CHECK-NEXT:    vldrw.u32 q5, [sp, #32] @ 16-byte Reload
 ; CHECK-NEXT:    vldr s12, [sp, #148]
-; CHECK-NEXT:    vadd.f32 q1, q7, q1
-; CHECK-NEXT:    vldrw.u32 q7, [sp, #32] @ 16-byte Reload
+; CHECK-NEXT:    vadd.f32 q1, q2, q1
 ; CHECK-NEXT:    vldr s1, [sp, #152]
+; CHECK-NEXT:    vmul.f32 q2, q3, q5
 ; CHECK-NEXT:    vstrw.32 q3, [sp] @ 16-byte Spill
-; CHECK-NEXT:    vmul.f32 q2, q3, q7
 ; CHECK-NEXT:    vldr s0, [sp, #144]
 ; CHECK-NEXT:    vldrw.u32 q3, [sp, #48] @ 16-byte Reload
 ; CHECK-NEXT:    vneg.f32 q2, q2
-; CHECK-NEXT:    vldr s21, [sp, #200]
+; CHECK-NEXT:    vldr s25, [sp, #200]
 ; CHECK-NEXT:    vfma.f32 q2, q0, q3
-; CHECK-NEXT:    vmul.f32 q0, q0, q7
-; CHECK-NEXT:    vldrw.u32 q7, [sp] @ 16-byte Reload
-; CHECK-NEXT:    vldr s20, [sp, #192]
+; CHECK-NEXT:    vmul.f32 q0, q0, q5
+; CHECK-NEXT:    vldrw.u32 q5, [sp] @ 16-byte Reload
+; CHECK-NEXT:    vldr s24, [sp, #192]
 ; CHECK-NEXT:    vldr s17, [sp, #204]
 ; CHECK-NEXT:    vldr s16, [sp, #196]
-; CHECK-NEXT:    vfma.f32 q0, q7, q3
-; CHECK-NEXT:    vsub.f32 q3, q5, q0
+; CHECK-NEXT:    vfma.f32 q0, q5, q3
+; CHECK-NEXT:    vsub.f32 q3, q6, q0
 ; CHECK-NEXT:    vmov.f32 s1, s4
 ; CHECK-NEXT:    vadd.f32 q4, q4, q2
 ; CHECK-NEXT:    vmov.f32 s3, s5
 ; CHECK-NEXT:    vmov.f32 s5, s6
-; CHECK-NEXT:    vmov.f32 s0, s24
-; CHECK-NEXT:    vmov.f32 s2, s25
-; CHECK-NEXT:    vmov.f32 s4, s26
-; CHECK-NEXT:    vmov.f32 s6, s27
+; CHECK-NEXT:    vmov.f32 s0, s28
+; CHECK-NEXT:    vmov.f32 s2, s29
+; CHECK-NEXT:    vmov.f32 s4, s30
+; CHECK-NEXT:    vmov.f32 s6, s31
 ; CHECK-NEXT:    vmov.f32 s8, s12
 ; CHECK-NEXT:    vmov.f32 s9, s16
 ; CHECK-NEXT:    vmov.f32 s10, s13
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index eb52b5a..28166e4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1912,11 +1912,11 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
 ; CHECK-NEXT:    b .LBB20_3
 ; CHECK-NEXT:  .LBB20_1: @ %if.else
 ; CHECK-NEXT:    @ in Loop: Header=BB20_3 Depth=1
-; CHECK-NEXT:    vmov.f32 s6, s5
-; CHECK-NEXT:    vstr s4, [r6]
+; CHECK-NEXT:    vmov.f32 s14, s13
+; CHECK-NEXT:    vstr s12, [r6]
 ; CHECK-NEXT:  .LBB20_2: @ %if.end
 ; CHECK-NEXT:    @ in Loop: Header=BB20_3 Depth=1
-; CHECK-NEXT:    vstr s6, [r6, #4]
+; CHECK-NEXT:    vstr s14, [r6, #4]
 ; CHECK-NEXT:    add.w r12, r12, #20
 ; CHECK-NEXT:    adds r6, #8
 ; CHECK-NEXT:    subs r0, #1
@@ -1925,41 +1925,41 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
 ; CHECK-NEXT:  .LBB20_3: @ %do.body
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-NEXT:    @ Child Loop BB20_5 Depth 2
-; CHECK-NEXT:    vldrw.u32 q3, [r12]
+; CHECK-NEXT:    vldrw.u32 q2, [r12]
 ; CHECK-NEXT:    movs r5, #0
-; CHECK-NEXT:    vmov q4, q3
+; CHECK-NEXT:    vmov q4, q2
 ; CHECK-NEXT:    vshlc q4, r5, #32
-; CHECK-NEXT:    vldrw.u32 q2, [r12, #8]
-; CHECK-NEXT:    vmov q5, q2
+; CHECK-NEXT:    vldrw.u32 q1, [r12, #8]
+; CHECK-NEXT:    vmov q5, q1
 ; CHECK-NEXT:    vshlc q5, r5, #32
-; CHECK-NEXT:    vldrw.u32 q1, [r6]
-; CHECK-NEXT:    vmov.f32 s6, s0
+; CHECK-NEXT:    vldrw.u32 q3, [r6]
+; CHECK-NEXT:    vmov.f32 s14, s0
 ; CHECK-NEXT:    mov r5, r2
-; CHECK-NEXT:    vmov.f32 s7, s0
+; CHECK-NEXT:    vmov.f32 s15, s0
 ; CHECK-NEXT:    wls lr, r8, .LBB20_6
 ; CHECK-NEXT:  @ %bb.4: @ %while.body.preheader
 ; CHECK-NEXT:    @ in Loop: Header=BB20_3 Depth=1
-; CHECK-NEXT:    vmov q6, q1
+; CHECK-NEXT:    vmov q6, q3
 ; CHECK-NEXT:    mov r5, r2
 ; CHECK-NEXT:  .LBB20_5: @ %while.body
 ; CHECK-NEXT:    @ Parent Loop BB20_3 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldrd r7, r4, [r1], #8
-; CHECK-NEXT:    vfma.f32 q6, q3, r7
+; CHECK-NEXT:    vfma.f32 q6, q2, r7
 ; CHECK-NEXT:    vmov r7, s24
-; CHECK-NEXT:    vmov q1, q6
-; CHECK-NEXT:    vfma.f32 q1, q2, r7
+; CHECK-NEXT:    vmov q3, q6
+; CHECK-NEXT:    vfma.f32 q3, q1, r7
 ; CHECK-NEXT:    vstr s24, [r5]
-; CHECK-NEXT:    vmov.f32 s7, s0
-; CHECK-NEXT:    vfma.f32 q1, q4, r4
-; CHECK-NEXT:    vmov r4, s5
-; CHECK-NEXT:    vstr s5, [r5, #4]
-; CHECK-NEXT:    vfma.f32 q1, q5, r4
+; CHECK-NEXT:    vmov.f32 s15, s0
+; CHECK-NEXT:    vfma.f32 q3, q4, r4
+; CHECK-NEXT:    vmov r4, s13
+; CHECK-NEXT:    vstr s13, [r5, #4]
+; CHECK-NEXT:    vfma.f32 q3, q5, r4
 ; CHECK-NEXT:    adds r5, #8
-; CHECK-NEXT:    vmov.f32 s4, s6
-; CHECK-NEXT:    vmov.f32 s5, s7
-; CHECK-NEXT:    vmov.f32 s6, s0
-; CHECK-NEXT:    vmov q6, q1
+; CHECK-NEXT:    vmov.f32 s12, s14
+; CHECK-NEXT:    vmov.f32 s13, s15
+; CHECK-NEXT:    vmov.f32 s14, s0
+; CHECK-NEXT:    vmov q6, q3
 ; CHECK-NEXT:    le lr, .LBB20_5
 ; CHECK-NEXT:  .LBB20_6: @ %while.end
 ; CHECK-NEXT:    @ in Loop: Header=BB20_3 Depth=1
@@ -1968,11 +1968,11 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
 ; CHECK-NEXT:  @ %bb.7: @ %if.then
 ; CHECK-NEXT:    @ in Loop: Header=BB20_3 Depth=1
 ; CHECK-NEXT:    ldr r1, [r1]
-; CHECK-NEXT:    vfma.f32 q1, q3, r1
-; CHECK-NEXT:    vmov r1, s4
-; CHECK-NEXT:    vstr s4, [r5]
-; CHECK-NEXT:    vfma.f32 q1, q2, r1
-; CHECK-NEXT:    vstr s5, [r6]
+; CHECK-NEXT:    vfma.f32 q3, q2, r1
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    vstr s12, [r5]
+; CHECK-NEXT:    vfma.f32 q3, q1, r1
+; CHECK-NEXT:    vstr s13, [r6]
 ; CHECK-NEXT:    b .LBB20_2
 ; CHECK-NEXT:  .LBB20_8: @ %do.end
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
index 4934d22..67910e4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
@@ -762,10 +762,10 @@ define void @foo_v4f32_v4f16(ptr %dest, ptr %mask, ptr %src) {
 ; CHECK-LE-NEXT:  .LBB18_5: @ %else8
 ; CHECK-LE-NEXT:    vmrs r2, p0
 ; CHECK-LE-NEXT:    movs r1, #0
-; CHECK-LE-NEXT:    vcvtt.f32.f16 s3, s1
-; CHECK-LE-NEXT:    vcvtb.f32.f16 s2, s1
-; CHECK-LE-NEXT:    vcvtt.f32.f16 s1, s0
+; CHECK-LE-NEXT:    vcvtt.f32.f16 s6, s0
 ; CHECK-LE-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-LE-NEXT:    vcvtt.f32.f16 s2, s1
+; CHECK-LE-NEXT:    vcvtb.f32.f16 s4, s1
 ; CHECK-LE-NEXT:    and r3, r2, #1
 ; CHECK-LE-NEXT:    rsbs r3, r3, #0
 ; CHECK-LE-NEXT:    bfi r1, r3, #0, #1
@@ -784,15 +784,15 @@ define void @foo_v4f32_v4f16(ptr %dest, ptr %mask, ptr %src) {
 ; CHECK-LE-NEXT:    strne r2, [r0]
 ; CHECK-LE-NEXT:    lsls r2, r1, #30
 ; CHECK-LE-NEXT:    itt mi
-; CHECK-LE-NEXT:    vmovmi r2, s1
+; CHECK-LE-NEXT:    vmovmi r2, s6
 ; CHECK-LE-NEXT:    strmi r2, [r0, #4]
 ; CHECK-LE-NEXT:    lsls r2, r1, #29
 ; CHECK-LE-NEXT:    itt mi
-; CHECK-LE-NEXT:    vmovmi r2, s2
+; CHECK-LE-NEXT:    vmovmi r2, s4
 ; CHECK-LE-NEXT:    strmi r2, [r0, #8]
 ; CHECK-LE-NEXT:    lsls r1, r1, #28
 ; CHECK-LE-NEXT:    itt mi
-; CHECK-LE-NEXT:    vmovmi r1, s3
+; CHECK-LE-NEXT:    vmovmi r1, s2
 ; CHECK-LE-NEXT:    strmi r1, [r0, #12]
 ; CHECK-LE-NEXT:    add sp, #4
 ; CHECK-LE-NEXT:    pop {r7, pc}
@@ -853,10 +853,10 @@ define void @foo_v4f32_v4f16(ptr %dest, ptr %mask, ptr %src) {
 ; CHECK-BE-NEXT:  .LBB18_5: @ %else8
 ; CHECK-BE-NEXT:    vmrs r2, p0
 ; CHECK-BE-NEXT:    movs r1, #0
-; CHECK-BE-NEXT:    vcvtt.f32.f16 s3, s1
-; CHECK-BE-NEXT:    vcvtb.f32.f16 s2, s1
-; CHECK-BE-NEXT:    vcvtt.f32.f16 s1, s0
+; CHECK-BE-NEXT:    vcvtt.f32.f16 s6, s0
 ; CHECK-BE-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-BE-NEXT:    vcvtt.f32.f16 s2, s1
+; CHECK-BE-NEXT:    vcvtb.f32.f16 s4, s1
 ; CHECK-BE-NEXT:    ubfx r3, r2, #12, #1
 ; CHECK-BE-NEXT:    rsbs r3, r3, #0
 ; CHECK-BE-NEXT:    bfi r1, r3, #0, #1
@@ -875,15 +875,15 @@ define void @foo_v4f32_v4f16(ptr %dest, ptr %mask, ptr %src) {
 ; CHECK-BE-NEXT:    strmi r2, [r0]
 ; CHECK-BE-NEXT:    lsls r2, r1, #29
 ; CHECK-BE-NEXT:    itt mi
-; CHECK-BE-NEXT:    vmovmi r2, s1
+; CHECK-BE-NEXT:    vmovmi r2, s6
 ; CHECK-BE-NEXT:    strmi r2, [r0, #4]
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    itt mi
-; CHECK-BE-NEXT:    vmovmi r2, s2
+; CHECK-BE-NEXT:    vmovmi r2, s4
 ; CHECK-BE-NEXT:    strmi r2, [r0, #8]
 ; CHECK-BE-NEXT:    lsls r1, r1, #31
 ; CHECK-BE-NEXT:    itt ne
-; CHECK-BE-NEXT:    vmovne r1, s3
+; CHECK-BE-NEXT:    vmovne r1, s2
 ; CHECK-BE-NEXT:    strne r1, [r0, #12]
 ; CHECK-BE-NEXT:    add sp, #4
 ; CHECK-BE-NEXT:    pop {r7, pc}
@@ -953,10 +953,10 @@ define void @foo_v4f32_v4f16_unaligned(ptr %dest, ptr %mask, ptr %src) {
 ; CHECK-LE-NEXT:  .LBB19_5: @ %else8
 ; CHECK-LE-NEXT:    vmrs r2, p0
 ; CHECK-LE-NEXT:    movs r1, #0
-; CHECK-LE-NEXT:    vcvtt.f32.f16 s3, s1
-; CHECK-LE-NEXT:    vcvtb.f32.f16 s2, s1
-; CHECK-LE-NEXT:    vcvtt.f32.f16 s1, s0
+; CHECK-LE-NEXT:    vcvtt.f32.f16 s6, s0
 ; CHECK-LE-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-LE-NEXT:    vcvtt.f32.f16 s2, s1
+; CHECK-LE-NEXT:    vcvtb.f32.f16 s4, s1
 ; CHECK-LE-NEXT:    and r3, r2, #1
 ; CHECK-LE-NEXT:    rsbs r3, r3, #0
 ; CHECK-LE-NEXT:    bfi r1, r3, #0, #1
@@ -975,15 +975,15 @@ define void @foo_v4f32_v4f16_unaligned(ptr %dest, ptr %mask, ptr %src) {
 ; CHECK-LE-NEXT:    strne r2, [r0]
 ; CHECK-LE-NEXT:    lsls r2, r1, #30
 ; CHECK-LE-NEXT:    itt mi
-; CHECK-LE-NEXT:    vmovmi r2, s1
+; CHECK-LE-NEXT:    vmovmi r2, s6
 ; CHECK-LE-NEXT:    strmi r2, [r0, #4]
 ; CHECK-LE-NEXT:    lsls r2, r1, #29
 ; CHECK-LE-NEXT:    itt mi
-; CHECK-LE-NEXT:    vmovmi r2, s2
+; CHECK-LE-NEXT:    vmovmi r2, s4
 ; CHECK-LE-NEXT:    strmi r2, [r0, #8]
 ; CHECK-LE-NEXT:    lsls r1, r1, #28
 ; CHECK-LE-NEXT:    itt mi
-; CHECK-LE-NEXT:    vmovmi r1, s3
+; CHECK-LE-NEXT:    vmovmi r1, s2
 ; CHECK-LE-NEXT:    strmi r1, [r0, #12]
 ; CHECK-LE-NEXT:    add sp, #4
 ; CHECK-LE-NEXT:    pop {r7, pc}
@@ -1044,10 +1044,10 @@ define void @foo_v4f32_v4f16_unaligned(ptr %dest, ptr %mask, ptr %src) {
 ; CHECK-BE-NEXT:  .LBB19_5: @ %else8
 ; CHECK-BE-NEXT:    vmrs r2, p0
 ; CHECK-BE-NEXT:    movs r1, #0
-; CHECK-BE-NEXT:    vcvtt.f32.f16 s3, s1
-; CHECK-BE-NEXT:    vcvtb.f32.f16 s2, s1
-; CHECK-BE-NEXT:    vcvtt.f32.f16 s1, s0
+; CHECK-BE-NEXT:    vcvtt.f32.f16 s6, s0
 ; CHECK-BE-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-BE-NEXT:    vcvtt.f32.f16 s2, s1
+; CHECK-BE-NEXT:    vcvtb.f32.f16 s4, s1
 ; CHECK-BE-NEXT:    ubfx r3, r2, #12, #1
 ; CHECK-BE-NEXT:    rsbs r3, r3, #0
 ; CHECK-BE-NEXT:    bfi r1, r3, #0, #1
@@ -1066,15 +1066,15 @@ define void @foo_v4f32_v4f16_unaligned(ptr %dest, ptr %mask, ptr %src) {
 ; CHECK-BE-NEXT:    strmi r2, [r0]
 ; CHECK-BE-NEXT:    lsls r2, r1, #29
 ; CHECK-BE-NEXT:    itt mi
-; CHECK-BE-NEXT:    vmovmi r2, s1
+; CHECK-BE-NEXT:    vmovmi r2, s6
 ; CHECK-BE-NEXT:    strmi r2, [r0, #4]
 ; CHECK-BE-NEXT:    lsls r2, r1, #30
 ; CHECK-BE-NEXT:    itt mi
-; CHECK-BE-NEXT:    vmovmi r2, s2
+; CHECK-BE-NEXT:    vmovmi r2, s4
 ; CHECK-BE-NEXT:    strmi r2, [r0, #8]
 ; CHECK-BE-NEXT:    lsls r1, r1, #31
 ; CHECK-BE-NEXT:    itt ne
-; CHECK-BE-NEXT:    vmovne r1, s3
+; CHECK-BE-NEXT:    vmovne r1, s2
 ; CHECK-BE-NEXT:    strne r1, [r0, #12]
 ; CHECK-BE-NEXT:    add sp, #4
 ; CHECK-BE-NEXT:    pop {r7, pc}
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index f4643f8..33816fe 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -225,15 +225,15 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
 ; CHECK-LABEL: shuffle3_i16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov q1, q0
-; CHECK-NEXT:    vmovx.f16 s2, s5
-; CHECK-NEXT:    vmovx.f16 s0, s4
-; CHECK-NEXT:    vins.f16 s5, s4
-; CHECK-NEXT:    vins.f16 s2, s0
-; CHECK-NEXT:    vmov.f32 s3, s5
-; CHECK-NEXT:    vmovx.f16 s1, s7
-; CHECK-NEXT:    vmov.f32 s0, s6
-; CHECK-NEXT:    vins.f16 s1, s7
+; CHECK-NEXT:    vmovx.f16 s5, s3
+; CHECK-NEXT:    vmovx.f16 s6, s1
+; CHECK-NEXT:    vmovx.f16 s4, s0
+; CHECK-NEXT:    vins.f16 s1, s0
+; CHECK-NEXT:    vins.f16 s6, s4
+; CHECK-NEXT:    vins.f16 s5, s3
+; CHECK-NEXT:    vmov.f32 s4, s2
+; CHECK-NEXT:    vmov.f32 s7, s1
+; CHECK-NEXT:    vmov q0, q1
 ; CHECK-NEXT:    bx lr
 entry:
   %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
@@ -357,22 +357,22 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle3step_i16(<32 x i16> %src) {
 ; CHECK-NEXT:    vmov.f32 s17, s4
 ; CHECK-NEXT:    vmovx.f16 s13, s3
 ; CHECK-NEXT:    vins.f16 s17, s2
-; CHECK-NEXT:    vmov.f32 s18, s7
 ; CHECK-NEXT:    vmovx.f16 s2, s8
-; CHECK-NEXT:    vmov.f32 s19, s10
+; CHECK-NEXT:    vmov.f32 s18, s7
+; CHECK-NEXT:    vmovx.f16 s14, s6
 ; CHECK-NEXT:    vins.f16 s18, s2
 ; CHECK-NEXT:    vmovx.f16 s2, s11
+; CHECK-NEXT:    vmov.f32 s19, s10
+; CHECK-NEXT:    vmovx.f16 s15, s9
 ; CHECK-NEXT:    vins.f16 s19, s2
 ; CHECK-NEXT:    vmovx.f16 s2, s1
 ; CHECK-NEXT:    vins.f16 s0, s2
 ; CHECK-NEXT:    vmovx.f16 s2, s4
 ; CHECK-NEXT:    vins.f16 s3, s2
 ; CHECK-NEXT:    vmovx.f16 s2, s7
-; CHECK-NEXT:    vmovx.f16 s4, s10
-; CHECK-NEXT:    vmovx.f16 s14, s6
-; CHECK-NEXT:    vmovx.f16 s15, s9
 ; CHECK-NEXT:    vins.f16 s6, s2
-; CHECK-NEXT:    vins.f16 s9, s4
+; CHECK-NEXT:    vmovx.f16 s2, s10
+; CHECK-NEXT:    vins.f16 s9, s2
 ; CHECK-NEXT:    vmov.f32 s1, s3
 ; CHECK-NEXT:    vins.f16 s14, s8
 ; CHECK-NEXT:    vins.f16 s15, s11
@@ -416,8 +416,8 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle4step_i16(<32 x i16> %src) {
 ; CHECK-NEXT:    vmov.f32 s20, s1
 ; CHECK-NEXT:    vmovx.f16 s1, s10
 ; CHECK-NEXT:    vmov.f32 s22, s9
-; CHECK-NEXT:    vmov.f32 s23, s13
 ; CHECK-NEXT:    vmov.f32 s21, s5
+; CHECK-NEXT:    vmov.f32 s23, s13
 ; CHECK-NEXT:    vadd.i16 q4, q5, q4
 ; CHECK-NEXT:    vmovx.f16 s22, s8
 ; CHECK-NEXT:    vins.f16 s22, s1
@@ -434,9 +434,9 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle4step_i16(<32 x i16> %src) {
 ; CHECK-NEXT:    vins.f16 s4, s6
 ; CHECK-NEXT:    vins.f16 s21, s1
 ; CHECK-NEXT:    vins.f16 s0, s2
-; CHECK-NEXT:    vmov.f32 s3, s12
 ; CHECK-NEXT:    vmov.f32 s1, s4
 ; CHECK-NEXT:    vmov.f32 s2, s8
+; CHECK-NEXT:    vmov.f32 s3, s12
 ; CHECK-NEXT:    vadd.i16 q0, q0, q5
 ; CHECK-NEXT:    vadd.i16 q0, q0, q4
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
@@ -1147,15 +1147,15 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
 ; CHECK-LABEL: shuffle3_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmov q1, q0
-; CHECK-NEXT:    vmovx.f16 s2, s5
-; CHECK-NEXT:    vmovx.f16 s0, s4
-; CHECK-NEXT:    vins.f16 s5, s4
-; CHECK-NEXT:    vins.f16 s2, s0
-; CHECK-NEXT:    vmov.f32 s3, s5
-; CHECK-NEXT:    vmovx.f16 s1, s7
-; CHECK-NEXT:    vmov.f32 s0, s6
-; CHECK-NEXT:    vins.f16 s1, s7
+; CHECK-NEXT:    vmovx.f16 s5, s3
+; CHECK-NEXT:    vmovx.f16 s6, s1
+; CHECK-NEXT:    vmovx.f16 s4, s0
+; CHECK-NEXT:    vins.f16 s1, s0
+; CHECK-NEXT:    vins.f16 s6, s4
+; CHECK-NEXT:    vins.f16 s5, s3
+; CHECK-NEXT:    vmov.f32 s4, s2
+; CHECK-NEXT:    vmov.f32 s7, s1
+; CHECK-NEXT:    vmov q0, q1
 ; CHECK-NEXT:    bx lr
 entry:
   %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
@@ -1348,12 +1348,12 @@ define arm_aapcs_vfpcc <8 x half> @shuffle4step_f16(<32 x half> %src) {
 ; CHECKFP-NEXT:    vins.f16 s0, s2
 ; CHECKFP-NEXT:    vmov.f32 s24, s1
 ; CHECKFP-NEXT:    vmov.f32 s26, s9
-; CHECKFP-NEXT:    vmov.f32 s27, s13
 ; CHECKFP-NEXT:    vmov.f32 s25, s5
+; CHECKFP-NEXT:    vmov.f32 s27, s13
 ; CHECKFP-NEXT:    vmov.f32 s2, s8
 ; CHECKFP-NEXT:    vadd.f16 q4, q6, q4
-; CHECKFP-NEXT:    vmov.f32 s3, s12
 ; CHECKFP-NEXT:    vmov.f32 s1, s4
+; CHECKFP-NEXT:    vmov.f32 s3, s12
 ; CHECKFP-NEXT:    vadd.f16 q0, q0, q5
 ; CHECKFP-NEXT:    vadd.f16 q0, q0, q4
 ; CHECKFP-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll
index a5725a2..cdaf446 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcvt16.ll
@@ -327,11 +327,12 @@ define arm_aapcs_vfpcc <4 x float> @load_ext_4(ptr %src) {
 ; CHECK-MVE:       @ %bb.0: @ %entry
 ; CHECK-MVE-NEXT:    ldrd r0, r1, [r0]
 ; CHECK-MVE-NEXT:    vmov.32 q0[0], r0
-; CHECK-MVE-NEXT:    vmov.32 q0[1], r1
-; CHECK-MVE-NEXT:    vcvtt.f32.f16 s3, s1
-; CHECK-MVE-NEXT:    vcvtb.f32.f16 s2, s1
+; CHECK-MVE-NEXT:    vmov q1, q0
 ; CHECK-MVE-NEXT:    vcvtt.f32.f16 s1, s0
+; CHECK-MVE-NEXT:    vmov.32 q1[1], r1
 ; CHECK-MVE-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-MVE-NEXT:    vcvtt.f32.f16 s3, s5
+; CHECK-MVE-NEXT:    vcvtb.f32.f16 s2, s5
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: load_ext_4:
diff --git a/llvm/test/CodeGen/Thumb2/mve-vld2.ll b/llvm/test/CodeGen/Thumb2/mve-vld2.ll
index 633aef4..36a035d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vld2.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vld2.ll
@@ -514,11 +514,12 @@ define void @vld2_v2f16(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    ldr r2, [r0]
 ; CHECK-NEXT:    ldr r0, [r0, #4]
 ; CHECK-NEXT:    vmov.32 q0[0], r2
-; CHECK-NEXT:    vmov.32 q0[1], r0
+; CHECK-NEXT:    vmov q1, q0
+; CHECK-NEXT:    vmov.32 q1[1], r0
 ; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vmovx.f16 s2, s1
+; CHECK-NEXT:    vmovx.f16 s2, s5
 ; CHECK-NEXT:    vins.f16 s4, s2
-; CHECK-NEXT:    vins.f16 s0, s1
+; CHECK-NEXT:    vins.f16 s0, s5
 ; CHECK-NEXT:    vadd.f16 q0, q0, q1
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    str r0, [r1]
diff --git a/llvm/test/CodeGen/Thumb2/mve-vld3.ll b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
index ccdc996..b207ce7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vld3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
@@ -313,11 +313,11 @@ define void @vld3_v8i16(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vmovx.f16 s2, s8
 ; CHECK-NEXT:    vins.f16 s3, s2
 ; CHECK-NEXT:    vmovx.f16 s2, s11
-; CHECK-NEXT:    vmovx.f16 s8, s14
 ; CHECK-NEXT:    vmovx.f16 s18, s10
-; CHECK-NEXT:    vmovx.f16 s19, s13
 ; CHECK-NEXT:    vins.f16 s10, s2
-; CHECK-NEXT:    vins.f16 s13, s8
+; CHECK-NEXT:    vmovx.f16 s2, s14
+; CHECK-NEXT:    vmovx.f16 s19, s13
+; CHECK-NEXT:    vins.f16 s13, s2
 ; CHECK-NEXT:    vmov.f32 s1, s3
 ; CHECK-NEXT:    vins.f16 s18, s12
 ; CHECK-NEXT:    vins.f16 s19, s15
@@ -368,55 +368,55 @@ define void @vld3_v16i16(ptr %src, ptr %dst) {
 ; CHECK-LV-NEXT:    vmovx.f16 s2, s8
 ; CHECK-LV-NEXT:    vins.f16 s3, s2
 ; CHECK-LV-NEXT:    vmovx.f16 s2, s11
-; CHECK-LV-NEXT:    vmovx.f16 s8, s14
 ; CHECK-LV-NEXT:    vmovx.f16 s18, s10
-; CHECK-LV-NEXT:    vmovx.f16 s19, s13
 ; CHECK-LV-NEXT:    vins.f16 s10, s2
-; CHECK-LV-NEXT:    vins.f16 s13, s8
+; CHECK-LV-NEXT:    vmovx.f16 s2, s14
+; CHECK-LV-NEXT:    vmovx.f16 s19, s13
+; CHECK-LV-NEXT:    vins.f16 s13, s2
 ; CHECK-LV-NEXT:    vmov.f32 s1, s3
 ; CHECK-LV-NEXT:    vins.f16 s18, s12
 ; CHECK-LV-NEXT:    vins.f16 s19, s15
-; CHECK-LV-NEXT:    vmov.f32 s3, s13
-; CHECK-LV-NEXT:    vldrw.u32 q3, [r0, #16]
 ; CHECK-LV-NEXT:    vins.f16 s17, s9
 ; CHECK-LV-NEXT:    vmov.f32 s2, s10
+; CHECK-LV-NEXT:    vmov.f32 s3, s13
+; CHECK-LV-NEXT:    vldrw.u32 q2, [r0, #16]
 ; CHECK-LV-NEXT:    vadd.i16 q0, q0, q4
-; CHECK-LV-NEXT:    vldrw.u32 q2, [r0, #32]
-; CHECK-LV-NEXT:    vadd.i16 q0, q0, q1
-; CHECK-LV-NEXT:    vmovx.f16 s6, s14
 ; CHECK-LV-NEXT:    vldrw.u32 q4, [r0]
-; CHECK-LV-NEXT:    vins.f16 s6, s8
-; CHECK-LV-NEXT:    vmov.f32 s22, s15
-; CHECK-LV-NEXT:    vmovx.f16 s8, s8
-; CHECK-LV-NEXT:    vins.f16 s22, s8
-; CHECK-LV-NEXT:    vmovx.f16 s8, s11
-; CHECK-LV-NEXT:    vmov.f32 s23, s10
-; CHECK-LV-NEXT:    vmovx.f16 s4, s16
-; CHECK-LV-NEXT:    vins.f16 s23, s8
-; CHECK-LV-NEXT:    vmovx.f16 s8, s17
-; CHECK-LV-NEXT:    vins.f16 s16, s8
-; CHECK-LV-NEXT:    vmovx.f16 s8, s12
-; CHECK-LV-NEXT:    vmovx.f16 s5, s19
-; CHECK-LV-NEXT:    vins.f16 s19, s8
-; CHECK-LV-NEXT:    vmovx.f16 s8, s15
-; CHECK-LV-NEXT:    vmovx.f16 s7, s9
-; CHECK-LV-NEXT:    vins.f16 s14, s8
-; CHECK-LV-NEXT:    vmovx.f16 s8, s10
-; CHECK-LV-NEXT:    vins.f16 s4, s18
+; CHECK-LV-NEXT:    vadd.i16 q0, q0, q1
+; CHECK-LV-NEXT:    vldrw.u32 q1, [r0, #32]
+; CHECK-LV-NEXT:    vmovx.f16 s14, s10
+; CHECK-LV-NEXT:    vmov.f32 s22, s11
+; CHECK-LV-NEXT:    vins.f16 s14, s4
+; CHECK-LV-NEXT:    vmovx.f16 s4, s4
+; CHECK-LV-NEXT:    vins.f16 s22, s4
+; CHECK-LV-NEXT:    vmovx.f16 s4, s7
+; CHECK-LV-NEXT:    vmov.f32 s23, s6
+; CHECK-LV-NEXT:    vmovx.f16 s12, s16
+; CHECK-LV-NEXT:    vins.f16 s23, s4
+; CHECK-LV-NEXT:    vmovx.f16 s4, s17
+; CHECK-LV-NEXT:    vins.f16 s16, s4
+; CHECK-LV-NEXT:    vmovx.f16 s4, s8
+; CHECK-LV-NEXT:    vmovx.f16 s13, s19
+; CHECK-LV-NEXT:    vins.f16 s19, s4
+; CHECK-LV-NEXT:    vmovx.f16 s4, s11
+; CHECK-LV-NEXT:    vmovx.f16 s15, s5
+; CHECK-LV-NEXT:    vins.f16 s10, s4
+; CHECK-LV-NEXT:    vmovx.f16 s4, s6
+; CHECK-LV-NEXT:    vins.f16 s12, s18
 ; CHECK-LV-NEXT:    vmov.f32 s20, s17
 ; CHECK-LV-NEXT:    vmovx.f16 s18, s18
-; CHECK-LV-NEXT:    vins.f16 s9, s8
-; CHECK-LV-NEXT:    vins.f16 s5, s13
+; CHECK-LV-NEXT:    vins.f16 s5, s4
+; CHECK-LV-NEXT:    vins.f16 s13, s9
 ; CHECK-LV-NEXT:    vins.f16 s20, s18
 ; CHECK-LV-NEXT:    vmov.f32 s17, s19
-; CHECK-LV-NEXT:    vins.f16 s7, s11
-; CHECK-LV-NEXT:    vmovx.f16 s13, s13
-; CHECK-LV-NEXT:    vmov.f32 s21, s12
-; CHECK-LV-NEXT:    vmov.f32 s18, s14
-; CHECK-LV-NEXT:    vins.f16 s21, s13
-; CHECK-LV-NEXT:    vmov.f32 s19, s9
+; CHECK-LV-NEXT:    vins.f16 s15, s7
+; CHECK-LV-NEXT:    vmovx.f16 s9, s9
+; CHECK-LV-NEXT:    vmov.f32 s21, s8
+; CHECK-LV-NEXT:    vmov.f32 s18, s10
+; CHECK-LV-NEXT:    vins.f16 s21, s9
+; CHECK-LV-NEXT:    vmov.f32 s19, s5
 ; CHECK-LV-NEXT:    vstrw.32 q0, [r1, #16]
-; CHECK-LV-NEXT:    vadd.i16 q1, q4, q1
+; CHECK-LV-NEXT:    vadd.i16 q1, q4, q3
 ; CHECK-LV-NEXT:    vadd.i16 q1, q1, q5
 ; CHECK-LV-NEXT:    vstrw.32 q1, [r1]
 ; CHECK-LV-NEXT:    vpop {d8, d9, d10, d11}
@@ -449,55 +449,55 @@ define void @vld3_v16i16(ptr %src, ptr %dst) {
 ; CHECK-LIS-NEXT:    vmovx.f16 s2, s8
 ; CHECK-LIS-NEXT:    vins.f16 s3, s2
 ; CHECK-LIS-NEXT:    vmovx.f16 s2, s11
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s14
 ; CHECK-LIS-NEXT:    vmovx.f16 s18, s10
-; CHECK-LIS-NEXT:    vmovx.f16 s19, s13
 ; CHECK-LIS-NEXT:    vins.f16 s10, s2
-; CHECK-LIS-NEXT:    vins.f16 s13, s8
+; CHECK-LIS-NEXT:    vmovx.f16 s2, s14
+; CHECK-LIS-NEXT:    vmovx.f16 s19, s13
+; CHECK-LIS-NEXT:    vins.f16 s13, s2
 ; CHECK-LIS-NEXT:    vmov.f32 s1, s3
 ; CHECK-LIS-NEXT:    vins.f16 s18, s12
 ; CHECK-LIS-NEXT:    vins.f16 s19, s15
 ; CHECK-LIS-NEXT:    vmov.f32 s3, s13
 ; CHECK-LIS-NEXT:    vins.f16 s17, s9
 ; CHECK-LIS-NEXT:    vmov.f32 s2, s10
-; CHECK-LIS-NEXT:    vldrw.u32 q2, [r0, #32]
+; CHECK-LIS-NEXT:    vldrw.u32 q3, [r0, #16]
 ; CHECK-LIS-NEXT:    vadd.i16 q0, q0, q4
-; CHECK-LIS-NEXT:    vldrw.u32 q4, [r0, #16]
+; CHECK-LIS-NEXT:    vldrw.u32 q4, [r0]
 ; CHECK-LIS-NEXT:    vadd.i16 q0, q0, q1
-; CHECK-LIS-NEXT:    vldrw.u32 q3, [r0]
-; CHECK-LIS-NEXT:    vmovx.f16 s6, s18
-; CHECK-LIS-NEXT:    vmov.f32 s22, s19
-; CHECK-LIS-NEXT:    vins.f16 s6, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s8
-; CHECK-LIS-NEXT:    vins.f16 s22, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s11
-; CHECK-LIS-NEXT:    vmov.f32 s23, s10
-; CHECK-LIS-NEXT:    vmovx.f16 s4, s12
-; CHECK-LIS-NEXT:    vins.f16 s23, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s13
-; CHECK-LIS-NEXT:    vins.f16 s12, s8
+; CHECK-LIS-NEXT:    vldrw.u32 q1, [r0, #32]
+; CHECK-LIS-NEXT:    vmovx.f16 s10, s14
+; CHECK-LIS-NEXT:    vmov.f32 s22, s15
+; CHECK-LIS-NEXT:    vins.f16 s10, s4
+; CHECK-LIS-NEXT:    vmovx.f16 s4, s4
+; CHECK-LIS-NEXT:    vins.f16 s22, s4
+; CHECK-LIS-NEXT:    vmovx.f16 s4, s7
+; CHECK-LIS-NEXT:    vmov.f32 s23, s6
 ; CHECK-LIS-NEXT:    vmovx.f16 s8, s16
-; CHECK-LIS-NEXT:    vmovx.f16 s5, s15
-; CHECK-LIS-NEXT:    vins.f16 s15, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s19
-; CHECK-LIS-NEXT:    vins.f16 s4, s14
-; CHECK-LIS-NEXT:    vmov.f32 s20, s13
-; CHECK-LIS-NEXT:    vmovx.f16 s14, s14
-; CHECK-LIS-NEXT:    vins.f16 s18, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s10
-; CHECK-LIS-NEXT:    vmovx.f16 s7, s9
-; CHECK-LIS-NEXT:    vins.f16 s20, s14
-; CHECK-LIS-NEXT:    vmovx.f16 s14, s17
-; CHECK-LIS-NEXT:    vmov.f32 s21, s16
-; CHECK-LIS-NEXT:    vins.f16 s9, s8
-; CHECK-LIS-NEXT:    vins.f16 s21, s14
-; CHECK-LIS-NEXT:    vmov.f32 s13, s15
-; CHECK-LIS-NEXT:    vins.f16 s7, s11
-; CHECK-LIS-NEXT:    vins.f16 s5, s17
-; CHECK-LIS-NEXT:    vmov.f32 s14, s18
-; CHECK-LIS-NEXT:    vmov.f32 s15, s9
+; CHECK-LIS-NEXT:    vins.f16 s23, s4
+; CHECK-LIS-NEXT:    vmovx.f16 s4, s17
+; CHECK-LIS-NEXT:    vins.f16 s16, s4
+; CHECK-LIS-NEXT:    vmovx.f16 s4, s12
+; CHECK-LIS-NEXT:    vmovx.f16 s9, s19
+; CHECK-LIS-NEXT:    vins.f16 s19, s4
+; CHECK-LIS-NEXT:    vmovx.f16 s4, s15
+; CHECK-LIS-NEXT:    vmovx.f16 s11, s5
+; CHECK-LIS-NEXT:    vins.f16 s14, s4
+; CHECK-LIS-NEXT:    vmovx.f16 s4, s6
+; CHECK-LIS-NEXT:    vins.f16 s8, s18
+; CHECK-LIS-NEXT:    vmov.f32 s20, s17
+; CHECK-LIS-NEXT:    vmovx.f16 s18, s18
+; CHECK-LIS-NEXT:    vins.f16 s5, s4
+; CHECK-LIS-NEXT:    vins.f16 s9, s13
+; CHECK-LIS-NEXT:    vins.f16 s20, s18
+; CHECK-LIS-NEXT:    vmov.f32 s17, s19
+; CHECK-LIS-NEXT:    vins.f16 s11, s7
+; CHECK-LIS-NEXT:    vmovx.f16 s13, s13
+; CHECK-LIS-NEXT:    vmov.f32 s21, s12
+; CHECK-LIS-NEXT:    vmov.f32 s18, s14
+; CHECK-LIS-NEXT:    vins.f16 s21, s13
+; CHECK-LIS-NEXT:    vmov.f32 s19, s5
 ; CHECK-LIS-NEXT:    vstrw.32 q0, [r1, #16]
-; CHECK-LIS-NEXT:    vadd.i16 q1, q3, q1
+; CHECK-LIS-NEXT:    vadd.i16 q1, q4, q2
 ; CHECK-LIS-NEXT:    vadd.i16 q1, q1, q5
 ; CHECK-LIS-NEXT:    vstrw.32 q1, [r1]
 ; CHECK-LIS-NEXT:    vpop {d8, d9, d10, d11}
@@ -1194,24 +1194,25 @@ define void @vld3_v4f16(ptr %src, ptr %dst) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    ldrd r2, r3, [r0, #16]
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
-; CHECK-NEXT:    vmov.32 q2[0], r2
-; CHECK-NEXT:    vmovx.f16 s12, s4
-; CHECK-NEXT:    vmov.32 q2[1], r3
-; CHECK-NEXT:    vmovx.f16 s13, s7
-; CHECK-NEXT:    vmovx.f16 s0, s9
-; CHECK-NEXT:    vmov.f32 s1, s8
-; CHECK-NEXT:    vins.f16 s1, s0
-; CHECK-NEXT:    vmovx.f16 s0, s5
-; CHECK-NEXT:    vins.f16 s4, s0
-; CHECK-NEXT:    vmovx.f16 s0, s6
-; CHECK-NEXT:    vins.f16 s5, s0
-; CHECK-NEXT:    vmovx.f16 s0, s8
+; CHECK-NEXT:    vmov.32 q0[0], r2
+; CHECK-NEXT:    vmovx.f16 s8, s4
+; CHECK-NEXT:    vmov q3, q0
+; CHECK-NEXT:    vmov.f32 s1, s0
+; CHECK-NEXT:    vmov.32 q3[1], r3
+; CHECK-NEXT:    vmovx.f16 s0, s0
+; CHECK-NEXT:    vmovx.f16 s2, s13
+; CHECK-NEXT:    vmovx.f16 s9, s7
+; CHECK-NEXT:    vins.f16 s1, s2
+; CHECK-NEXT:    vmovx.f16 s2, s5
+; CHECK-NEXT:    vins.f16 s4, s2
+; CHECK-NEXT:    vmovx.f16 s2, s6
+; CHECK-NEXT:    vins.f16 s5, s2
 ; CHECK-NEXT:    vins.f16 s7, s0
 ; CHECK-NEXT:    vmov.f32 s0, s5
-; CHECK-NEXT:    vins.f16 s12, s6
-; CHECK-NEXT:    vins.f16 s13, s9
+; CHECK-NEXT:    vins.f16 s8, s6
+; CHECK-NEXT:    vins.f16 s9, s13
 ; CHECK-NEXT:    vmov.f32 s5, s7
-; CHECK-NEXT:    vadd.f16 q1, q1, q3
+; CHECK-NEXT:    vadd.f16 q1, q1, q2
 ; CHECK-NEXT:    vadd.f16 q0, q1, q0
 ; CHECK-NEXT:    vmov r0, r2, d0
 ; CHECK-NEXT:    strd r0, r2, [r1]
@@ -1228,93 +1229,49 @@ entry:
 }
 
 define void @vld3_v8f16(ptr %src, ptr %dst) {
-; CHECK-LV-LABEL: vld3_v8f16:
-; CHECK-LV:       @ %bb.0: @ %entry
-; CHECK-LV-NEXT:    .vsave {d8, d9}
-; CHECK-LV-NEXT:    vpush {d8, d9}
-; CHECK-LV-NEXT:    vldrw.u32 q2, [r0, #16]
-; CHECK-LV-NEXT:    vldrw.u32 q0, [r0]
-; CHECK-LV-NEXT:    vldrw.u32 q3, [r0, #32]
-; CHECK-LV-NEXT:    vmov.f32 s5, s8
-; CHECK-LV-NEXT:    vmovx.f16 s8, s8
-; CHECK-LV-NEXT:    vmovx.f16 s17, s3
-; CHECK-LV-NEXT:    vins.f16 s3, s8
-; CHECK-LV-NEXT:    vmovx.f16 s8, s11
-; CHECK-LV-NEXT:    vmovx.f16 s18, s10
-; CHECK-LV-NEXT:    vmovx.f16 s16, s0
-; CHECK-LV-NEXT:    vins.f16 s10, s8
-; CHECK-LV-NEXT:    vmovx.f16 s6, s2
-; CHECK-LV-NEXT:    vmov.f32 s4, s1
-; CHECK-LV-NEXT:    vmovx.f16 s8, s14
-; CHECK-LV-NEXT:    vmovx.f16 s19, s13
-; CHECK-LV-NEXT:    vins.f16 s4, s6
-; CHECK-LV-NEXT:    vmovx.f16 s6, s9
-; CHECK-LV-NEXT:    vins.f16 s16, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s15
-; CHECK-LV-NEXT:    vmovx.f16 s7, s12
-; CHECK-LV-NEXT:    vins.f16 s18, s12
-; CHECK-LV-NEXT:    vmovx.f16 s12, s1
-; CHECK-LV-NEXT:    vins.f16 s13, s8
-; CHECK-LV-NEXT:    vins.f16 s5, s6
-; CHECK-LV-NEXT:    vmov.f32 s6, s11
-; CHECK-LV-NEXT:    vins.f16 s14, s2
-; CHECK-LV-NEXT:    vmov.f32 s1, s3
-; CHECK-LV-NEXT:    vins.f16 s19, s15
-; CHECK-LV-NEXT:    vins.f16 s17, s9
-; CHECK-LV-NEXT:    vins.f16 s0, s12
-; CHECK-LV-NEXT:    vmov.f32 s2, s10
-; CHECK-LV-NEXT:    vmov.f32 s3, s13
-; CHECK-LV-NEXT:    vins.f16 s6, s7
-; CHECK-LV-NEXT:    vmov.f32 s7, s14
-; CHECK-LV-NEXT:    vadd.f16 q0, q0, q4
-; CHECK-LV-NEXT:    vadd.f16 q0, q0, q1
-; CHECK-LV-NEXT:    vstrw.32 q0, [r1]
-; CHECK-LV-NEXT:    vpop {d8, d9}
-; CHECK-LV-NEXT:    bx lr
-;
-; CHECK-LIS-LABEL: vld3_v8f16:
-; CHECK-LIS:       @ %bb.0: @ %entry
-; CHECK-LIS-NEXT:    .vsave {d8, d9}
-; CHECK-LIS-NEXT:    vpush {d8, d9}
-; CHECK-LIS-NEXT:    vldrw.u32 q0, [r0]
-; CHECK-LIS-NEXT:    vldrw.u32 q2, [r0, #16]
-; CHECK-LIS-NEXT:    vldrw.u32 q4, [r0, #32]
-; CHECK-LIS-NEXT:    vmov.f32 s4, s1
-; CHECK-LIS-NEXT:    vmovx.f16 s6, s2
-; CHECK-LIS-NEXT:    vins.f16 s4, s6
-; CHECK-LIS-NEXT:    vmov.f32 s5, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s6, s9
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s13, s3
-; CHECK-LIS-NEXT:    vins.f16 s5, s6
-; CHECK-LIS-NEXT:    vins.f16 s3, s8
-; CHECK-LIS-NEXT:    vmov.f32 s6, s11
-; CHECK-LIS-NEXT:    vmovx.f16 s12, s16
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s11
-; CHECK-LIS-NEXT:    vmovx.f16 s14, s10
-; CHECK-LIS-NEXT:    vins.f16 s6, s12
-; CHECK-LIS-NEXT:    vmovx.f16 s12, s0
-; CHECK-LIS-NEXT:    vins.f16 s10, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s8, s18
-; CHECK-LIS-NEXT:    vmovx.f16 s15, s17
-; CHECK-LIS-NEXT:    vins.f16 s12, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s19
-; CHECK-LIS-NEXT:    vmovx.f16 s1, s1
-; CHECK-LIS-NEXT:    vins.f16 s17, s8
-; CHECK-LIS-NEXT:    vins.f16 s18, s2
-; CHECK-LIS-NEXT:    vins.f16 s0, s1
-; CHECK-LIS-NEXT:    vmov.f32 s1, s3
-; CHECK-LIS-NEXT:    vins.f16 s14, s16
-; CHECK-LIS-NEXT:    vins.f16 s15, s19
-; CHECK-LIS-NEXT:    vins.f16 s13, s9
-; CHECK-LIS-NEXT:    vmov.f32 s2, s10
-; CHECK-LIS-NEXT:    vmov.f32 s3, s17
-; CHECK-LIS-NEXT:    vmov.f32 s7, s18
-; CHECK-LIS-NEXT:    vadd.f16 q0, q0, q3
-; CHECK-LIS-NEXT:    vadd.f16 q0, q0, q1
-; CHECK-LIS-NEXT:    vstrw.32 q0, [r1]
-; CHECK-LIS-NEXT:    vpop {d8, d9}
-; CHECK-LIS-NEXT:    bx lr
+; CHECK-LABEL: vld3_v8f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vldrw.u32 q2, [r0, #16]
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    vldrw.u32 q3, [r0, #32]
+; CHECK-NEXT:    vmov.f32 s5, s8
+; CHECK-NEXT:    vmovx.f16 s8, s8
+; CHECK-NEXT:    vmovx.f16 s17, s3
+; CHECK-NEXT:    vins.f16 s3, s8
+; CHECK-NEXT:    vmovx.f16 s8, s11
+; CHECK-NEXT:    vmovx.f16 s18, s10
+; CHECK-NEXT:    vmovx.f16 s16, s0
+; CHECK-NEXT:    vins.f16 s10, s8
+; CHECK-NEXT:    vmovx.f16 s6, s2
+; CHECK-NEXT:    vmov.f32 s4, s1
+; CHECK-NEXT:    vmovx.f16 s8, s14
+; CHECK-NEXT:    vmovx.f16 s19, s13
+; CHECK-NEXT:    vins.f16 s4, s6
+; CHECK-NEXT:    vmovx.f16 s6, s9
+; CHECK-NEXT:    vins.f16 s16, s2
+; CHECK-NEXT:    vmovx.f16 s2, s15
+; CHECK-NEXT:    vmovx.f16 s7, s12
+; CHECK-NEXT:    vins.f16 s18, s12
+; CHECK-NEXT:    vmovx.f16 s12, s1
+; CHECK-NEXT:    vins.f16 s13, s8
+; CHECK-NEXT:    vins.f16 s5, s6
+; CHECK-NEXT:    vmov.f32 s6, s11
+; CHECK-NEXT:    vins.f16 s14, s2
+; CHECK-NEXT:    vmov.f32 s1, s3
+; CHECK-NEXT:    vins.f16 s19, s15
+; CHECK-NEXT:    vins.f16 s17, s9
+; CHECK-NEXT:    vins.f16 s0, s12
+; CHECK-NEXT:    vmov.f32 s2, s10
+; CHECK-NEXT:    vmov.f32 s3, s13
+; CHECK-NEXT:    vins.f16 s6, s7
+; CHECK-NEXT:    vmov.f32 s7, s14
+; CHECK-NEXT:    vadd.f16 q0, q0, q4
+; CHECK-NEXT:    vadd.f16 q0, q0, q1
+; CHECK-NEXT:    vstrw.32 q0, [r1]
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    bx lr
 entry:
   %l1 = load <24 x half>, ptr %src, align 4
   %s1 = shufflevector <24 x half> %l1, <24 x half> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
@@ -1327,167 +1284,86 @@ entry:
 }
 
 define void @vld3_v16f16(ptr %src, ptr %dst) {
-; CHECK-LV-LABEL: vld3_v16f16:
-; CHECK-LV:       @ %bb.0: @ %entry
-; CHECK-LV-NEXT:    .vsave {d8, d9}
-; CHECK-LV-NEXT:    vpush {d8, d9}
-; CHECK-LV-NEXT:    vldrw.u32 q0, [r0, #48]
-; CHECK-LV-NEXT:    vldrw.u32 q2, [r0, #64]
-; CHECK-LV-NEXT:    vldrw.u32 q3, [r0, #80]
-; CHECK-LV-NEXT:    vmovx.f16 s6, s2
-; CHECK-LV-NEXT:    vmov.f32 s4, s1
-; CHECK-LV-NEXT:    vins.f16 s4, s6
-; CHECK-LV-NEXT:    vmovx.f16 s6, s9
-; CHECK-LV-NEXT:    vmov.f32 s5, s8
-; CHECK-LV-NEXT:    vmovx.f16 s7, s12
-; CHECK-LV-NEXT:    vins.f16 s5, s6
-; CHECK-LV-NEXT:    vmov.f32 s6, s11
-; CHECK-LV-NEXT:    vins.f16 s6, s7
-; CHECK-LV-NEXT:    vmovx.f16 s16, s15
-; CHECK-LV-NEXT:    vmov.f32 s7, s14
-; CHECK-LV-NEXT:    vmovx.f16 s17, s3
-; CHECK-LV-NEXT:    vins.f16 s7, s16
-; CHECK-LV-NEXT:    vmovx.f16 s16, s0
-; CHECK-LV-NEXT:    vins.f16 s16, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s1
-; CHECK-LV-NEXT:    vins.f16 s0, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s8
-; CHECK-LV-NEXT:    vins.f16 s3, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s11
-; CHECK-LV-NEXT:    vmovx.f16 s18, s10
-; CHECK-LV-NEXT:    vins.f16 s10, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s14
-; CHECK-LV-NEXT:    vmovx.f16 s19, s13
-; CHECK-LV-NEXT:    vins.f16 s13, s2
-; CHECK-LV-NEXT:    vmov.f32 s1, s3
-; CHECK-LV-NEXT:    vins.f16 s18, s12
-; CHECK-LV-NEXT:    vins.f16 s19, s15
-; CHECK-LV-NEXT:    vmov.f32 s3, s13
-; CHECK-LV-NEXT:    vins.f16 s17, s9
-; CHECK-LV-NEXT:    vmov.f32 s2, s10
-; CHECK-LV-NEXT:    vldrw.u32 q3, [r0, #16]
-; CHECK-LV-NEXT:    vadd.f16 q0, q0, q4
-; CHECK-LV-NEXT:    vadd.f16 q2, q0, q1
-; CHECK-LV-NEXT:    vldrw.u32 q0, [r0]
-; CHECK-LV-NEXT:    vldrw.u32 q1, [r0, #32]
-; CHECK-LV-NEXT:    vstrw.32 q2, [r1, #16]
-; CHECK-LV-NEXT:    vmovx.f16 s10, s2
-; CHECK-LV-NEXT:    vmov.f32 s8, s1
-; CHECK-LV-NEXT:    vins.f16 s8, s10
-; CHECK-LV-NEXT:    vmovx.f16 s10, s13
-; CHECK-LV-NEXT:    vmov.f32 s9, s12
-; CHECK-LV-NEXT:    vmovx.f16 s11, s4
-; CHECK-LV-NEXT:    vins.f16 s9, s10
-; CHECK-LV-NEXT:    vmov.f32 s10, s15
-; CHECK-LV-NEXT:    vins.f16 s10, s11
-; CHECK-LV-NEXT:    vmovx.f16 s16, s7
-; CHECK-LV-NEXT:    vmov.f32 s11, s6
-; CHECK-LV-NEXT:    vmovx.f16 s17, s3
-; CHECK-LV-NEXT:    vins.f16 s11, s16
-; CHECK-LV-NEXT:    vmovx.f16 s16, s0
-; CHECK-LV-NEXT:    vins.f16 s16, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s1
-; CHECK-LV-NEXT:    vins.f16 s0, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s12
-; CHECK-LV-NEXT:    vins.f16 s3, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s15
-; CHECK-LV-NEXT:    vmovx.f16 s18, s14
-; CHECK-LV-NEXT:    vins.f16 s14, s2
-; CHECK-LV-NEXT:    vmovx.f16 s2, s6
-; CHECK-LV-NEXT:    vmovx.f16 s19, s5
-; CHECK-LV-NEXT:    vins.f16 s5, s2
-; CHECK-LV-NEXT:    vmov.f32 s1, s3
-; CHECK-LV-NEXT:    vins.f16 s18, s4
-; CHECK-LV-NEXT:    vins.f16 s19, s7
-; CHECK-LV-NEXT:    vins.f16 s17, s13
-; CHECK-LV-NEXT:    vmov.f32 s2, s14
-; CHECK-LV-NEXT:    vmov.f32 s3, s5
-; CHECK-LV-NEXT:    vadd.f16 q0, q0, q4
-; CHECK-LV-NEXT:    vadd.f16 q0, q0, q2
-; CHECK-LV-NEXT:    vstrw.32 q0, [r1]
-; CHECK-LV-NEXT:    vpop {d8, d9}
-; CHECK-LV-NEXT:    bx lr
-;
-; CHECK-LIS-LABEL: vld3_v16f16:
-; CHECK-LIS:       @ %bb.0: @ %entry
-; CHECK-LIS-NEXT:    .vsave {d8, d9}
-; CHECK-LIS-NEXT:    vpush {d8, d9}
-; CHECK-LIS-NEXT:    vldrw.u32 q0, [r0, #48]
-; CHECK-LIS-NEXT:    vldrw.u32 q2, [r0, #64]
-; CHECK-LIS-NEXT:    vldrw.u32 q3, [r0, #80]
-; CHECK-LIS-NEXT:    vmovx.f16 s6, s2
-; CHECK-LIS-NEXT:    vmov.f32 s4, s1
-; CHECK-LIS-NEXT:    vins.f16 s4, s6
-; CHECK-LIS-NEXT:    vmovx.f16 s6, s9
-; CHECK-LIS-NEXT:    vmov.f32 s5, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s7, s12
-; CHECK-LIS-NEXT:    vins.f16 s5, s6
-; CHECK-LIS-NEXT:    vmov.f32 s6, s11
-; CHECK-LIS-NEXT:    vins.f16 s6, s7
-; CHECK-LIS-NEXT:    vmovx.f16 s16, s15
-; CHECK-LIS-NEXT:    vmov.f32 s7, s14
-; CHECK-LIS-NEXT:    vmovx.f16 s17, s3
-; CHECK-LIS-NEXT:    vins.f16 s7, s16
-; CHECK-LIS-NEXT:    vmovx.f16 s16, s0
-; CHECK-LIS-NEXT:    vins.f16 s16, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s1
-; CHECK-LIS-NEXT:    vins.f16 s0, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s8
-; CHECK-LIS-NEXT:    vins.f16 s3, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s11
-; CHECK-LIS-NEXT:    vmovx.f16 s18, s10
-; CHECK-LIS-NEXT:    vins.f16 s10, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s14
-; CHECK-LIS-NEXT:    vmovx.f16 s19, s13
-; CHECK-LIS-NEXT:    vins.f16 s13, s2
-; CHECK-LIS-NEXT:    vmov.f32 s1, s3
-; CHECK-LIS-NEXT:    vins.f16 s18, s12
-; CHECK-LIS-NEXT:    vins.f16 s19, s15
-; CHECK-LIS-NEXT:    vmov.f32 s3, s13
-; CHECK-LIS-NEXT:    vins.f16 s17, s9
-; CHECK-LIS-NEXT:    vmov.f32 s2, s10
-; CHECK-LIS-NEXT:    vldrw.u32 q3, [r0, #16]
-; CHECK-LIS-NEXT:    vadd.f16 q0, q0, q4
-; CHECK-LIS-NEXT:    vldrw.u32 q2, [r0, #32]
-; CHECK-LIS-NEXT:    vadd.f16 q1, q0, q1
-; CHECK-LIS-NEXT:    vldrw.u32 q0, [r0]
-; CHECK-LIS-NEXT:    vstrw.32 q1, [r1, #16]
-; CHECK-LIS-NEXT:    vmov.f32 s5, s12
-; CHECK-LIS-NEXT:    vmovx.f16 s6, s2
-; CHECK-LIS-NEXT:    vmov.f32 s4, s1
-; CHECK-LIS-NEXT:    vins.f16 s4, s6
-; CHECK-LIS-NEXT:    vmovx.f16 s6, s13
-; CHECK-LIS-NEXT:    vins.f16 s5, s6
-; CHECK-LIS-NEXT:    vmov.f32 s6, s15
-; CHECK-LIS-NEXT:    vmovx.f16 s7, s8
-; CHECK-LIS-NEXT:    vmovx.f16 s16, s11
-; CHECK-LIS-NEXT:    vins.f16 s6, s7
-; CHECK-LIS-NEXT:    vmov.f32 s7, s10
-; CHECK-LIS-NEXT:    vins.f16 s7, s16
-; CHECK-LIS-NEXT:    vmovx.f16 s16, s0
-; CHECK-LIS-NEXT:    vins.f16 s16, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s1
-; CHECK-LIS-NEXT:    vins.f16 s0, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s12
-; CHECK-LIS-NEXT:    vmovx.f16 s17, s3
-; CHECK-LIS-NEXT:    vins.f16 s3, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s15
-; CHECK-LIS-NEXT:    vmovx.f16 s18, s14
-; CHECK-LIS-NEXT:    vins.f16 s14, s2
-; CHECK-LIS-NEXT:    vmovx.f16 s2, s10
-; CHECK-LIS-NEXT:    vmovx.f16 s19, s9
-; CHECK-LIS-NEXT:    vins.f16 s9, s2
-; CHECK-LIS-NEXT:    vmov.f32 s1, s3
-; CHECK-LIS-NEXT:    vins.f16 s18, s8
-; CHECK-LIS-NEXT:    vins.f16 s19, s11
-; CHECK-LIS-NEXT:    vins.f16 s17, s13
-; CHECK-LIS-NEXT:    vmov.f32 s2, s14
-; CHECK-LIS-NEXT:    vmov.f32 s3, s9
-; CHECK-LIS-NEXT:    vadd.f16 q0, q0, q4
-; CHECK-LIS-NEXT:    vadd.f16 q0, q0, q1
-; CHECK-LIS-NEXT:    vstrw.32 q0, [r1]
-; CHECK-LIS-NEXT:    vpop {d8, d9}
-; CHECK-LIS-NEXT:    bx lr
+; CHECK-LABEL: vld3_v16f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vldrw.u32 q0, [r0, #48]
+; CHECK-NEXT:    vldrw.u32 q2, [r0, #64]
+; CHECK-NEXT:    vldrw.u32 q3, [r0, #80]
+; CHECK-NEXT:    vmovx.f16 s6, s2
+; CHECK-NEXT:    vmov.f32 s4, s1
+; CHECK-NEXT:    vins.f16 s4, s6
+; CHECK-NEXT:    vmovx.f16 s6, s9
+; CHECK-NEXT:    vmov.f32 s5, s8
+; CHECK-NEXT:    vmovx.f16 s7, s12
+; CHECK-NEXT:    vins.f16 s5, s6
+; CHECK-NEXT:    vmov.f32 s6, s11
+; CHECK-NEXT:    vins.f16 s6, s7
+; CHECK-NEXT:    vmovx.f16 s16, s15
+; CHECK-NEXT:    vmov.f32 s7, s14
+; CHECK-NEXT:    vmovx.f16 s17, s3
+; CHECK-NEXT:    vins.f16 s7, s16
+; CHECK-NEXT:    vmovx.f16 s16, s0
+; CHECK-NEXT:    vins.f16 s16, s2
+; CHECK-NEXT:    vmovx.f16 s2, s1
+; CHECK-NEXT:    vins.f16 s0, s2
+; CHECK-NEXT:    vmovx.f16 s2, s8
+; CHECK-NEXT:    vins.f16 s3, s2
+; CHECK-NEXT:    vmovx.f16 s2, s11
+; CHECK-NEXT:    vmovx.f16 s18, s10
+; CHECK-NEXT:    vins.f16 s10, s2
+; CHECK-NEXT:    vmovx.f16 s2, s14
+; CHECK-NEXT:    vmovx.f16 s19, s13
+; CHECK-NEXT:    vins.f16 s13, s2
+; CHECK-NEXT:    vmov.f32 s1, s3
+; CHECK-NEXT:    vins.f16 s18, s12
+; CHECK-NEXT:    vins.f16 s19, s15
+; CHECK-NEXT:    vins.f16 s17, s9
+; CHECK-NEXT:    vmov.f32 s2, s10
+; CHECK-NEXT:    vmov.f32 s3, s13
+; CHECK-NEXT:    vldrw.u32 q2, [r0, #16]
+; CHECK-NEXT:    vadd.f16 q0, q0, q4
+; CHECK-NEXT:    vadd.f16 q3, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vldrw.u32 q0, [r0, #32]
+; CHECK-NEXT:    vstrw.32 q3, [r1, #16]
+; CHECK-NEXT:    vmovx.f16 s14, s6
+; CHECK-NEXT:    vmov.f32 s12, s5
+; CHECK-NEXT:    vins.f16 s12, s14
+; CHECK-NEXT:    vmovx.f16 s14, s9
+; CHECK-NEXT:    vmov.f32 s13, s8
+; CHECK-NEXT:    vmovx.f16 s18, s10
+; CHECK-NEXT:    vins.f16 s13, s14
+; CHECK-NEXT:    vmovx.f16 s15, s0
+; CHECK-NEXT:    vmov.f32 s14, s11
+; CHECK-NEXT:    vins.f16 s18, s0
+; CHECK-NEXT:    vins.f16 s14, s15
+; CHECK-NEXT:    vmovx.f16 s16, s3
+; CHECK-NEXT:    vmov.f32 s15, s2
+; CHECK-NEXT:    vmovx.f16 s0, s5
+; CHECK-NEXT:    vins.f16 s15, s16
+; CHECK-NEXT:    vmovx.f16 s16, s4
+; CHECK-NEXT:    vins.f16 s4, s0
+; CHECK-NEXT:    vmovx.f16 s0, s8
+; CHECK-NEXT:    vmovx.f16 s17, s7
+; CHECK-NEXT:    vins.f16 s7, s0
+; CHECK-NEXT:    vmovx.f16 s0, s11
+; CHECK-NEXT:    vmovx.f16 s19, s1
+; CHECK-NEXT:    vins.f16 s10, s0
+; CHECK-NEXT:    vmovx.f16 s0, s2
+; CHECK-NEXT:    vins.f16 s1, s0
+; CHECK-NEXT:    vins.f16 s16, s6
+; CHECK-NEXT:    vmov.f32 s5, s7
+; CHECK-NEXT:    vins.f16 s19, s3
+; CHECK-NEXT:    vins.f16 s17, s9
+; CHECK-NEXT:    vmov.f32 s6, s10
+; CHECK-NEXT:    vmov.f32 s7, s1
+; CHECK-NEXT:    vadd.f16 q0, q1, q4
+; CHECK-NEXT:    vadd.f16 q0, q0, q3
+; CHECK-NEXT:    vstrw.32 q0, [r1]
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    bx lr
 entry:
   %l1 = load <48 x half>, ptr %src, align 4
   %s1 = shufflevector <48 x half> %l1, <48 x half> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
diff --git a/llvm/test/CodeGen/Thumb2/mve-vld4.ll b/llvm/test/CodeGen/Thumb2/mve-vld4.ll
index b49f19e..bf16c5b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vld4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vld4.ll
@@ -360,50 +360,50 @@ define void @vld4_v8i16_align1(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-NEXT:    vldrb.u8 q1, [r0, #32]
-; CHECK-NEXT:    vldrb.u8 q2, [r0, #48]
 ; CHECK-NEXT:    vmovx.f16 s18, s5
 ; CHECK-NEXT:    vmovx.f16 s0, s7
 ; CHECK-NEXT:    vins.f16 s18, s0
-; CHECK-NEXT:    vmovx.f16 s19, s9
-; CHECK-NEXT:    vmovx.f16 s0, s11
+; CHECK-NEXT:    vldrb.u8 q0, [r0, #48]
 ; CHECK-NEXT:    vins.f16 s5, s7
-; CHECK-NEXT:    vins.f16 s19, s0
-; CHECK-NEXT:    vldrb.u8 q0, [r0]
-; CHECK-NEXT:    vins.f16 s9, s11
+; CHECK-NEXT:    vmovx.f16 s19, s1
+; CHECK-NEXT:    vmovx.f16 s8, s3
+; CHECK-NEXT:    vins.f16 s19, s8
+; CHECK-NEXT:    vldrb.u8 q2, [r0]
+; CHECK-NEXT:    vins.f16 s1, s3
 ; CHECK-NEXT:    vmov.f32 s22, s5
-; CHECK-NEXT:    vmovx.f16 s16, s1
-; CHECK-NEXT:    vmovx.f16 s12, s3
+; CHECK-NEXT:    vmovx.f16 s16, s9
+; CHECK-NEXT:    vmovx.f16 s12, s11
 ; CHECK-NEXT:    vins.f16 s16, s12
 ; CHECK-NEXT:    vldrb.u8 q3, [r0, #16]
-; CHECK-NEXT:    vins.f16 s1, s3
-; CHECK-NEXT:    vmov.f32 s23, s9
+; CHECK-NEXT:    vins.f16 s9, s11
+; CHECK-NEXT:    vmov.f32 s23, s1
 ; CHECK-NEXT:    vmovx.f16 s17, s13
 ; CHECK-NEXT:    vmovx.f16 s20, s15
 ; CHECK-NEXT:    vins.f16 s13, s15
 ; CHECK-NEXT:    vins.f16 s17, s20
-; CHECK-NEXT:    vmov.f32 s20, s1
+; CHECK-NEXT:    vmov.f32 s20, s9
 ; CHECK-NEXT:    vmovx.f16 s1, s6
 ; CHECK-NEXT:    vmov.f32 s21, s13
 ; CHECK-NEXT:    vadd.i16 q4, q5, q4
 ; CHECK-NEXT:    vmovx.f16 s22, s4
 ; CHECK-NEXT:    vins.f16 s22, s1
-; CHECK-NEXT:    vmovx.f16 s23, s8
-; CHECK-NEXT:    vmovx.f16 s1, s10
-; CHECK-NEXT:    vmovx.f16 s20, s0
-; CHECK-NEXT:    vins.f16 s23, s1
+; CHECK-NEXT:    vmovx.f16 s23, s0
 ; CHECK-NEXT:    vmovx.f16 s1, s2
+; CHECK-NEXT:    vmovx.f16 s20, s8
+; CHECK-NEXT:    vins.f16 s23, s1
+; CHECK-NEXT:    vmovx.f16 s1, s10
 ; CHECK-NEXT:    vins.f16 s20, s1
 ; CHECK-NEXT:    vmovx.f16 s21, s12
 ; CHECK-NEXT:    vmovx.f16 s1, s14
-; CHECK-NEXT:    vins.f16 s8, s10
-; CHECK-NEXT:    vins.f16 s4, s6
+; CHECK-NEXT:    vins.f16 s0, s2
 ; CHECK-NEXT:    vins.f16 s12, s14
+; CHECK-NEXT:    vins.f16 s4, s6
+; CHECK-NEXT:    vins.f16 s8, s10
 ; CHECK-NEXT:    vins.f16 s21, s1
-; CHECK-NEXT:    vins.f16 s0, s2
-; CHECK-NEXT:    vmov.f32 s3, s8
-; CHECK-NEXT:    vmov.f32 s1, s12
-; CHECK-NEXT:    vmov.f32 s2, s4
-; CHECK-NEXT:    vadd.i16 q0, q0, q5
+; CHECK-NEXT:    vmov.f32 s9, s12
+; CHECK-NEXT:    vmov.f32 s10, s4
+; CHECK-NEXT:    vmov.f32 s11, s0
+; CHECK-NEXT:    vadd.i16 q0, q2, q5
 ; CHECK-NEXT:    vadd.i16 q0, q0, q4
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
@@ -1081,51 +1081,51 @@ define void @vld4_v8f16_align1(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vldrb.u8 q0, [r0, #32]
-; CHECK-NEXT:    vldrb.u8 q2, [r0, #48]
-; CHECK-NEXT:    vmovx.f16 s18, s1
+; CHECK-NEXT:    vldrb.u8 q4, [r0, #16]
+; CHECK-NEXT:    vmovx.f16 s14, s1
 ; CHECK-NEXT:    vmovx.f16 s4, s3
-; CHECK-NEXT:    vins.f16 s18, s4
-; CHECK-NEXT:    vmovx.f16 s19, s9
-; CHECK-NEXT:    vmovx.f16 s4, s11
+; CHECK-NEXT:    vins.f16 s14, s4
+; CHECK-NEXT:    vldrb.u8 q1, [r0, #48]
 ; CHECK-NEXT:    vins.f16 s1, s3
-; CHECK-NEXT:    vins.f16 s19, s4
-; CHECK-NEXT:    vldrb.u8 q1, [r0]
 ; CHECK-NEXT:    vmovx.f16 s22, s0
+; CHECK-NEXT:    vmovx.f16 s15, s5
+; CHECK-NEXT:    vmovx.f16 s8, s7
+; CHECK-NEXT:    vins.f16 s15, s8
+; CHECK-NEXT:    vldrb.u8 q2, [r0]
 ; CHECK-NEXT:    vmovx.f16 s3, s2
-; CHECK-NEXT:    vmovx.f16 s16, s5
-; CHECK-NEXT:    vmovx.f16 s12, s7
-; CHECK-NEXT:    vins.f16 s16, s12
-; CHECK-NEXT:    vldrb.u8 q3, [r0, #16]
+; CHECK-NEXT:    vmovx.f16 s23, s4
+; CHECK-NEXT:    vmovx.f16 s12, s9
+; CHECK-NEXT:    vmovx.f16 s13, s11
+; CHECK-NEXT:    vins.f16 s12, s13
+; CHECK-NEXT:    vmovx.f16 s13, s17
+; CHECK-NEXT:    vmovx.f16 s20, s19
 ; CHECK-NEXT:    vins.f16 s22, s3
-; CHECK-NEXT:    vmovx.f16 s23, s8
-; CHECK-NEXT:    vmovx.f16 s17, s13
-; CHECK-NEXT:    vmovx.f16 s20, s15
-; CHECK-NEXT:    vmovx.f16 s3, s10
-; CHECK-NEXT:    vins.f16 s17, s20
-; CHECK-NEXT:    vins.f16 s23, s3
-; CHECK-NEXT:    vmovx.f16 s20, s4
 ; CHECK-NEXT:    vmovx.f16 s3, s6
-; CHECK-NEXT:    vins.f16 s9, s11
+; CHECK-NEXT:    vins.f16 s13, s20
+; CHECK-NEXT:    vins.f16 s23, s3
+; CHECK-NEXT:    vmovx.f16 s20, s8
+; CHECK-NEXT:    vmovx.f16 s3, s10
 ; CHECK-NEXT:    vins.f16 s5, s7
-; CHECK-NEXT:    vins.f16 s13, s15
+; CHECK-NEXT:    vins.f16 s9, s11
+; CHECK-NEXT:    vins.f16 s17, s19
 ; CHECK-NEXT:    vins.f16 s20, s3
-; CHECK-NEXT:    vmovx.f16 s21, s12
-; CHECK-NEXT:    vmovx.f16 s3, s14
-; CHECK-NEXT:    vins.f16 s8, s10
-; CHECK-NEXT:    vins.f16 s0, s2
-; CHECK-NEXT:    vins.f16 s12, s14
+; CHECK-NEXT:    vmovx.f16 s21, s16
+; CHECK-NEXT:    vmovx.f16 s3, s18
 ; CHECK-NEXT:    vins.f16 s4, s6
-; CHECK-NEXT:    vmov.f32 s24, s5
+; CHECK-NEXT:    vins.f16 s0, s2
+; CHECK-NEXT:    vins.f16 s16, s18
+; CHECK-NEXT:    vins.f16 s8, s10
+; CHECK-NEXT:    vmov.f32 s24, s9
 ; CHECK-NEXT:    vins.f16 s21, s3
 ; CHECK-NEXT:    vmov.f32 s26, s1
-; CHECK-NEXT:    vmov.f32 s27, s9
-; CHECK-NEXT:    vmov.f32 s25, s13
-; CHECK-NEXT:    vmov.f32 s6, s0
-; CHECK-NEXT:    vadd.f16 q4, q6, q4
-; CHECK-NEXT:    vmov.f32 s7, s8
-; CHECK-NEXT:    vmov.f32 s5, s12
-; CHECK-NEXT:    vadd.f16 q0, q1, q5
-; CHECK-NEXT:    vadd.f16 q0, q0, q4
+; CHECK-NEXT:    vmov.f32 s25, s17
+; CHECK-NEXT:    vmov.f32 s27, s5
+; CHECK-NEXT:    vmov.f32 s10, s0
+; CHECK-NEXT:    vadd.f16 q3, q6, q3
+; CHECK-NEXT:    vmov.f32 s9, s16
+; CHECK-NEXT:    vmov.f32 s11, s4
+; CHECK-NEXT:    vadd.f16 q0, q2, q5
+; CHECK-NEXT:    vadd.f16 q0, q0, q3
 ; CHECK-NEXT:    vstrw.32 q0, [r1]
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll
index 2e51e9e..c82bc58 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll
@@ -6,118 +6,114 @@ define void @vldst4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRo
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r7, lr}
 ; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    mul r12, r3, r2
 ; CHECK-NEXT:    lsrs.w r2, r12, #2
 ; CHECK-NEXT:    beq.w .LBB0_3
 ; CHECK-NEXT:  @ %bb.1: @ %vector.ph
 ; CHECK-NEXT:    mvn r3, #7
-; CHECK-NEXT:    ldr r2, [sp, #56]
+; CHECK-NEXT:    ldr r2, [sp, #72]
 ; CHECK-NEXT:    and.w r3, r3, r12, lsr #2
 ; CHECK-NEXT:    sub.w r12, r3, #8
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    add.w lr, r3, r12, lsr #3
 ; CHECK-NEXT:  .LBB0_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vldrh.u16 q1, [r0, #32]
-; CHECK-NEXT:    vldrh.u16 q4, [r0, #48]
-; CHECK-NEXT:    vldrh.u16 q3, [r0], #64
-; CHECK-NEXT:    vmovx.f16 s26, s4
-; CHECK-NEXT:    vins.f16 s4, s6
-; CHECK-NEXT:    vmovx.f16 s6, s6
+; CHECK-NEXT:    vldrh.u16 q3, [r0, #32]
+; CHECK-NEXT:    vldrh.u16 q0, [r0, #48]
+; CHECK-NEXT:    vldrh.u16 q4, [r0], #64
+; CHECK-NEXT:    vmovx.f16 s8, s15
+; CHECK-NEXT:    vmovx.f16 s10, s13
 ; CHECK-NEXT:    vldrh.u16 q5, [r0, #-48]
-; CHECK-NEXT:    vmovx.f16 s27, s16
-; CHECK-NEXT:    vins.f16 s26, s6
-; CHECK-NEXT:    vmovx.f16 s6, s18
-; CHECK-NEXT:    vmovx.f16 s8, s7
-; CHECK-NEXT:    vmovx.f16 s10, s5
-; CHECK-NEXT:    vmovx.f16 s24, s12
 ; CHECK-NEXT:    vins.f16 s10, s8
-; CHECK-NEXT:    vins.f16 s27, s6
-; CHECK-NEXT:    vmovx.f16 s6, s14
-; CHECK-NEXT:    vmovx.f16 s8, s19
-; CHECK-NEXT:    vmovx.f16 s11, s17
-; CHECK-NEXT:    vmov.f32 s0, s13
+; CHECK-NEXT:    vmovx.f16 s8, s3
+; CHECK-NEXT:    vmovx.f16 s11, s1
+; CHECK-NEXT:    vmov.f32 s7, s1
 ; CHECK-NEXT:    vins.f16 s11, s8
-; CHECK-NEXT:    vmovx.f16 s25, s20
-; CHECK-NEXT:    vins.f16 s24, s6
-; CHECK-NEXT:    vmovx.f16 s6, s22
-; CHECK-NEXT:    vmovx.f16 s1, s15
-; CHECK-NEXT:    vmovx.f16 s8, s13
-; CHECK-NEXT:    vins.f16 s20, s22
-; CHECK-NEXT:    vins.f16 s16, s18
-; CHECK-NEXT:    vmov.f32 s2, s5
-; CHECK-NEXT:    vins.f16 s25, s6
-; CHECK-NEXT:    vmov.f32 s3, s17
-; CHECK-NEXT:    vins.f16 s0, s15
+; CHECK-NEXT:    vmovx.f16 s1, s19
+; CHECK-NEXT:    vmovx.f16 s8, s17
+; CHECK-NEXT:    vmov.f32 s6, s13
 ; CHECK-NEXT:    vmovx.f16 s9, s21
+; CHECK-NEXT:    vmov.f32 s4, s17
+; CHECK-NEXT:    vins.f16 s21, s23
 ; CHECK-NEXT:    vins.f16 s8, s1
 ; CHECK-NEXT:    vmovx.f16 s1, s23
-; CHECK-NEXT:    vins.f16 s12, s14
-; CHECK-NEXT:    vins.f16 s21, s23
-; CHECK-NEXT:    vmov.f32 s14, s4
-; CHECK-NEXT:    vmov.f32 s15, s16
 ; CHECK-NEXT:    vins.f16 s9, s1
-; CHECK-NEXT:    vmov.f32 s13, s20
-; CHECK-NEXT:    vmul.f16 q6, q6, r2
-; CHECK-NEXT:    vmul.f16 q3, q3, r2
-; CHECK-NEXT:    vins.f16 s2, s7
-; CHECK-NEXT:    vins.f16 s3, s19
-; CHECK-NEXT:    vmov.f32 s1, s21
-; CHECK-NEXT:    vmul.f16 q0, q0, r2
-; CHECK-NEXT:    vmovx.f16 s4, s12
-; CHECK-NEXT:    vmovx.f16 s6, s24
+; CHECK-NEXT:    vins.f16 s6, s15
+; CHECK-NEXT:    vins.f16 s7, s3
+; CHECK-NEXT:    vins.f16 s4, s19
+; CHECK-NEXT:    vmov.f32 s5, s21
+; CHECK-NEXT:    vmovx.f16 s27, s0
+; CHECK-NEXT:    vmul.f16 q1, q1, r2
+; CHECK-NEXT:    vins.f16 s0, s2
+; CHECK-NEXT:    vmovx.f16 s2, s2
 ; CHECK-NEXT:    vmul.f16 q2, q2, r2
-; CHECK-NEXT:    vmovx.f16 s7, s0
-; CHECK-NEXT:    vins.f16 s0, s8
+; CHECK-NEXT:    vmovx.f16 s3, s4
+; CHECK-NEXT:    vins.f16 s4, s8
 ; CHECK-NEXT:    vmovx.f16 s8, s8
-; CHECK-NEXT:    vins.f16 s4, s6
-; CHECK-NEXT:    vmovx.f16 s5, s1
-; CHECK-NEXT:    vmovx.f16 s6, s9
-; CHECK-NEXT:    vins.f16 s7, s8
-; CHECK-NEXT:    vins.f16 s5, s6
-; CHECK-NEXT:    vmovx.f16 s6, s13
-; CHECK-NEXT:    vmovx.f16 s8, s25
-; CHECK-NEXT:    vins.f16 s6, s8
-; CHECK-NEXT:    vmovx.f16 s19, s2
-; CHECK-NEXT:    vmovx.f16 s8, s10
-; CHECK-NEXT:    vmovx.f16 s18, s14
-; CHECK-NEXT:    vins.f16 s19, s8
-; CHECK-NEXT:    vmovx.f16 s8, s26
-; CHECK-NEXT:    vins.f16 s18, s8
-; CHECK-NEXT:    vmovx.f16 s23, s3
-; CHECK-NEXT:    vmovx.f16 s8, s11
-; CHECK-NEXT:    vins.f16 s14, s26
-; CHECK-NEXT:    vins.f16 s23, s8
-; CHECK-NEXT:    vmovx.f16 s22, s15
-; CHECK-NEXT:    vins.f16 s15, s27
-; CHECK-NEXT:    vmovx.f16 s8, s27
-; CHECK-NEXT:    vins.f16 s12, s24
-; CHECK-NEXT:    vins.f16 s13, s25
-; CHECK-NEXT:    vins.f16 s2, s10
-; CHECK-NEXT:    vins.f16 s3, s11
-; CHECK-NEXT:    vins.f16 s1, s9
-; CHECK-NEXT:    vins.f16 s22, s8
-; CHECK-NEXT:    vmov q2, q3
-; CHECK-NEXT:    vmov q6, q0
-; CHECK-NEXT:    vmov.f32 s10, s4
-; CHECK-NEXT:    vmov.f32 s11, s7
-; CHECK-NEXT:    vmov.f32 s9, s0
-; CHECK-NEXT:    vmov.f32 s17, s2
-; CHECK-NEXT:    vmov.f32 s16, s14
-; CHECK-NEXT:    vmov.f32 s21, s3
-; CHECK-NEXT:    vstrh.16 q4, [r1, #32]
-; CHECK-NEXT:    vmov.f32 s20, s15
-; CHECK-NEXT:    vmov.f32 s7, s5
-; CHECK-NEXT:    vstrh.16 q5, [r1, #48]
-; CHECK-NEXT:    vstrh.16 q2, [r1], #64
-; CHECK-NEXT:    vmov.f32 s4, s13
-; CHECK-NEXT:    vmov.f32 s5, s25
-; CHECK-NEXT:    vstrh.16 q1, [r1, #-48]
+; CHECK-NEXT:    vmovx.f16 s24, s16
+; CHECK-NEXT:    vins.f16 s27, s2
+; CHECK-NEXT:    vmovx.f16 s2, s18
+; CHECK-NEXT:    vins.f16 s3, s8
+; CHECK-NEXT:    vmovx.f16 s26, s12
+; CHECK-NEXT:    vmovx.f16 s25, s20
+; CHECK-NEXT:    vins.f16 s20, s22
+; CHECK-NEXT:    vmovx.f16 s8, s14
+; CHECK-NEXT:    vins.f16 s24, s2
+; CHECK-NEXT:    vmovx.f16 s2, s22
+; CHECK-NEXT:    vins.f16 s12, s14
+; CHECK-NEXT:    vins.f16 s16, s18
+; CHECK-NEXT:    vins.f16 s26, s8
+; CHECK-NEXT:    vins.f16 s25, s2
+; CHECK-NEXT:    vmov.f32 s18, s12
+; CHECK-NEXT:    vmov.f32 s17, s20
+; CHECK-NEXT:    vmul.f16 q6, q6, r2
+; CHECK-NEXT:    vmov.f32 s19, s0
+; CHECK-NEXT:    vmovx.f16 s0, s24
+; CHECK-NEXT:    vmul.f16 q4, q4, r2
+; CHECK-NEXT:    vmovx.f16 s15, s5
+; CHECK-NEXT:    vmovx.f16 s2, s16
+; CHECK-NEXT:    vmovx.f16 s14, s17
+; CHECK-NEXT:    vins.f16 s2, s0
+; CHECK-NEXT:    vmovx.f16 s0, s9
+; CHECK-NEXT:    vins.f16 s15, s0
+; CHECK-NEXT:    vmovx.f16 s0, s25
+; CHECK-NEXT:    vins.f16 s14, s0
+; CHECK-NEXT:    vmovx.f16 s23, s6
+; CHECK-NEXT:    vmovx.f16 s0, s10
+; CHECK-NEXT:    vmovx.f16 s22, s18
+; CHECK-NEXT:    vins.f16 s23, s0
+; CHECK-NEXT:    vmovx.f16 s0, s26
+; CHECK-NEXT:    vins.f16 s22, s0
+; CHECK-NEXT:    vmovx.f16 s31, s7
+; CHECK-NEXT:    vmovx.f16 s0, s11
+; CHECK-NEXT:    vins.f16 s6, s10
+; CHECK-NEXT:    vins.f16 s18, s26
+; CHECK-NEXT:    vins.f16 s7, s11
+; CHECK-NEXT:    vins.f16 s31, s0
+; CHECK-NEXT:    vmovx.f16 s30, s19
+; CHECK-NEXT:    vins.f16 s19, s27
+; CHECK-NEXT:    vmovx.f16 s0, s27
+; CHECK-NEXT:    vins.f16 s16, s24
+; CHECK-NEXT:    vins.f16 s5, s9
+; CHECK-NEXT:    vins.f16 s17, s25
+; CHECK-NEXT:    vins.f16 s30, s0
+; CHECK-NEXT:    vmov.f32 s1, s4
+; CHECK-NEXT:    vmov.f32 s0, s16
+; CHECK-NEXT:    vmov.f32 s21, s6
+; CHECK-NEXT:    vmov.f32 s20, s18
+; CHECK-NEXT:    vmov.f32 s29, s7
+; CHECK-NEXT:    vstrh.16 q5, [r1, #32]
+; CHECK-NEXT:    vmov.f32 s28, s19
+; CHECK-NEXT:    vstrh.16 q7, [r1, #48]
+; CHECK-NEXT:    vstrh.16 q0, [r1], #64
+; CHECK-NEXT:    vmov.f32 s12, s17
+; CHECK-NEXT:    vmov.f32 s13, s5
+; CHECK-NEXT:    vstrh.16 q3, [r1, #-48]
 ; CHECK-NEXT:    le lr, .LBB0_2
 ; CHECK-NEXT:  .LBB0_3: @ %while.end
-; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
   %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst2.ll b/llvm/test/CodeGen/Thumb2/mve-vst2.ll
index 57d08a7..f5a129f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst2.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst2.ll
@@ -503,25 +503,28 @@ entry:
 define void @vst2_v4f16(ptr %src, ptr %dst) {
 ; CHECK-LABEL: vst2_v4f16:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    ldrd r2, r12, [r0]
 ; CHECK-NEXT:    ldrd r3, r0, [r0, #8]
-; CHECK-NEXT:    vmov.32 q0[0], r2
-; CHECK-NEXT:    vmov.32 q1[0], r3
+; CHECK-NEXT:    vmov.32 q4[0], r2
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vmov.32 q3[0], r3
+; CHECK-NEXT:    vmov q2, q3
 ; CHECK-NEXT:    vmov.32 q0[1], r12
-; CHECK-NEXT:    vmov.32 q1[1], r0
-; CHECK-NEXT:    vmovx.f16 s2, s0
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vins.f16 s1, s5
-; CHECK-NEXT:    vmovx.f16 s6, s5
-; CHECK-NEXT:    vmov q2, q0
-; CHECK-NEXT:    vins.f16 s4, s6
-; CHECK-NEXT:    vmov.f32 s9, s2
-; CHECK-NEXT:    vmov.f32 s10, s1
-; CHECK-NEXT:    vmov.f32 s11, s4
-; CHECK-NEXT:    vstrh.16 q2, [r1]
+; CHECK-NEXT:    vmov.32 q2[1], r0
+; CHECK-NEXT:    vmovx.f16 s0, s12
+; CHECK-NEXT:    vmovx.f16 s5, s16
+; CHECK-NEXT:    vmov.f32 s4, s16
+; CHECK-NEXT:    vins.f16 s5, s0
+; CHECK-NEXT:    vmovx.f16 s7, s1
+; CHECK-NEXT:    vins.f16 s1, s9
+; CHECK-NEXT:    vmovx.f16 s0, s9
+; CHECK-NEXT:    vins.f16 s4, s12
+; CHECK-NEXT:    vins.f16 s7, s0
+; CHECK-NEXT:    vmov.f32 s6, s1
+; CHECK-NEXT:    vstrh.16 q1, [r1]
+; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    bx lr
 entry:
   %l1 = load <4 x half>, ptr %src, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index 85317e1..903069b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -12,13 +12,11 @@ define void @vst3_v2i32(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    ldrd r3, r2, [r0, #8]
 ; CHECK-NEXT:    ldrd r4, r0, [r0, #16]
 ; CHECK-NEXT:    vmov q1[2], q1[0], lr, r3
-; CHECK-NEXT:    str r2, [r1, #16]
-; CHECK-NEXT:    vmov.32 q0[0], r4
+; CHECK-NEXT:    strd r2, r0, [r1, #16]
 ; CHECK-NEXT:    vmov q1[3], q1[1], r12, r2
-; CHECK-NEXT:    vmov.32 q0[1], r0
+; CHECK-NEXT:    vmov.32 q0[0], r4
 ; CHECK-NEXT:    vmov.f32 s8, s4
 ; CHECK-NEXT:    vmov.f32 s9, s6
-; CHECK-NEXT:    str r0, [r1, #20]
 ; CHECK-NEXT:    vmov.f32 s10, s0
 ; CHECK-NEXT:    vmov.f32 s11, s5
 ; CHECK-NEXT:    vstrw.32 q2, [r1]
@@ -337,51 +335,49 @@ define void @vst3_v8i16(ptr %src, ptr %dst) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
-; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
 ; CHECK-NEXT:    vldrw.u32 q2, [r0, #16]
-; CHECK-NEXT:    vmov.f32 s12, s7
+; CHECK-NEXT:    vmov.f32 s12, s3
 ; CHECK-NEXT:    vmov.u16 r2, q2[5]
-; CHECK-NEXT:    vmov.16 q0[0], r2
+; CHECK-NEXT:    vmov.16 q1[0], r2
 ; CHECK-NEXT:    vins.f16 s12, s11
-; CHECK-NEXT:    vmov.f32 s1, s12
+; CHECK-NEXT:    vmov.f32 s5, s12
 ; CHECK-NEXT:    vmov.u16 r2, q2[7]
 ; CHECK-NEXT:    vldrw.u32 q3, [r0, #32]
-; CHECK-NEXT:    vmov.16 q0[6], r2
-; CHECK-NEXT:    vmov.f32 s2, s7
+; CHECK-NEXT:    vmov.16 q1[6], r2
+; CHECK-NEXT:    vmov.f32 s20, s0
 ; CHECK-NEXT:    vmov.u16 r0, q2[3]
-; CHECK-NEXT:    vmovx.f16 s7, s14
+; CHECK-NEXT:    vmovx.f16 s6, s14
 ; CHECK-NEXT:    vmov.16 q4[2], r0
-; CHECK-NEXT:    vins.f16 s0, s7
-; CHECK-NEXT:    vmovx.f16 s7, s15
-; CHECK-NEXT:    vins.f16 s3, s7
-; CHECK-NEXT:    vmov.f32 s7, s6
+; CHECK-NEXT:    vins.f16 s4, s6
+; CHECK-NEXT:    vmovx.f16 s6, s3
+; CHECK-NEXT:    vmovx.f16 s3, s15
+; CHECK-NEXT:    vins.f16 s15, s6
+; CHECK-NEXT:    vmov.f32 s6, s15
+; CHECK-NEXT:    vins.f16 s7, s3
+; CHECK-NEXT:    vmov.f32 s3, s2
+; CHECK-NEXT:    vmovx.f16 s15, s1
 ; CHECK-NEXT:    vmovx.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s7, s10
-; CHECK-NEXT:    vmov.f32 s20, s4
-; CHECK-NEXT:    vins.f16 s15, s2
-; CHECK-NEXT:    vmov.f32 s18, s7
 ; CHECK-NEXT:    vins.f16 s20, s8
-; CHECK-NEXT:    vmov.f32 s7, s6
-; CHECK-NEXT:    vmovx.f16 s6, s5
-; CHECK-NEXT:    vmov.f32 s2, s15
-; CHECK-NEXT:    vmovx.f16 s15, s13
-; CHECK-NEXT:    vins.f16 s13, s6
-; CHECK-NEXT:    vmovx.f16 s6, s7
 ; CHECK-NEXT:    vmov.u16 r0, q2[1]
-; CHECK-NEXT:    vmovx.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s14, s6
-; CHECK-NEXT:    vmovx.f16 s6, s12
+; CHECK-NEXT:    vmovx.f16 s0, s0
+; CHECK-NEXT:    vmovx.f16 s16, s13
+; CHECK-NEXT:    vins.f16 s14, s2
+; CHECK-NEXT:    vins.f16 s3, s10
+; CHECK-NEXT:    vmovx.f16 s2, s12
 ; CHECK-NEXT:    vmov.16 q5[4], r0
-; CHECK-NEXT:    vins.f16 s5, s9
-; CHECK-NEXT:    vins.f16 s12, s4
-; CHECK-NEXT:    vins.f16 s17, s15
+; CHECK-NEXT:    vins.f16 s1, s9
+; CHECK-NEXT:    vins.f16 s12, s0
+; CHECK-NEXT:    vins.f16 s13, s15
+; CHECK-NEXT:    vins.f16 s17, s16
 ; CHECK-NEXT:    vmov.f32 s16, s13
-; CHECK-NEXT:    vins.f16 s22, s6
+; CHECK-NEXT:    vmov.f32 s18, s3
+; CHECK-NEXT:    vins.f16 s22, s2
 ; CHECK-NEXT:    vmov.f32 s19, s14
-; CHECK-NEXT:    vstrw.32 q0, [r1, #32]
-; CHECK-NEXT:    vmov.f32 s23, s5
-; CHECK-NEXT:    vstrw.32 q4, [r1, #16]
+; CHECK-NEXT:    vstrw.32 q1, [r1, #32]
 ; CHECK-NEXT:    vmov.f32 s21, s12
+; CHECK-NEXT:    vstrw.32 q4, [r1, #16]
+; CHECK-NEXT:    vmov.f32 s23, s1
 ; CHECK-NEXT:    vstrw.32 q5, [r1]
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    bx lr
@@ -405,110 +401,105 @@ define void @vst3_v16i16(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    .pad #48
 ; CHECK-NEXT:    sub sp, #48
-; CHECK-NEXT:    vldrw.u32 q2, [r0]
-; CHECK-NEXT:    vldrw.u32 q1, [r0, #32]
-; CHECK-NEXT:    vldrw.u32 q7, [r0, #80]
-; CHECK-NEXT:    vmov.f32 s0, s11
-; CHECK-NEXT:    vmov.u16 r2, q1[5]
-; CHECK-NEXT:    vmov.16 q3[0], r2
-; CHECK-NEXT:    vins.f16 s0, s7
-; CHECK-NEXT:    vmov.f32 s2, s11
-; CHECK-NEXT:    vmov.u16 r2, q1[7]
-; CHECK-NEXT:    vmov.f64 d12, d4
-; CHECK-NEXT:    vstrw.32 q1, [sp, #32] @ 16-byte Spill
-; CHECK-NEXT:    vldrw.u32 q1, [r0, #16]
-; CHECK-NEXT:    vmov.f32 s26, s10
+; CHECK-NEXT:    vldrw.u32 q2, [r0, #32]
+; CHECK-NEXT:    vldrw.u32 q7, [r0]
+; CHECK-NEXT:    vldrw.u32 q3, [r0, #16]
+; CHECK-NEXT:    vldrw.u32 q6, [r0, #80]
+; CHECK-NEXT:    vmov.f32 s0, s31
+; CHECK-NEXT:    vmov.u16 r2, q2[5]
+; CHECK-NEXT:    vmov.16 q1[0], r2
+; CHECK-NEXT:    vins.f16 s0, s11
+; CHECK-NEXT:    vstrw.32 q2, [sp, #32] @ 16-byte Spill
+; CHECK-NEXT:    vmov.u16 r2, q2[7]
 ; CHECK-NEXT:    vldrw.u32 q2, [r0, #64]
-; CHECK-NEXT:    vmov.f32 s13, s0
-; CHECK-NEXT:    vstrw.32 q6, [sp] @ 16-byte Spill
-; CHECK-NEXT:    vmov.16 q3[6], r2
+; CHECK-NEXT:    vmov.f32 s5, s0
+; CHECK-NEXT:    vmov.16 q1[6], r2
+; CHECK-NEXT:    vmov.f32 s20, s15
 ; CHECK-NEXT:    vmovx.f16 s0, s10
-; CHECK-NEXT:    vins.f16 s12, s0
-; CHECK-NEXT:    vmovx.f16 s0, s2
-; CHECK-NEXT:    vmov.f32 s14, s11
-; CHECK-NEXT:    vins.f16 s14, s0
-; CHECK-NEXT:    vmov.f32 s20, s7
-; CHECK-NEXT:    vmov q0, q3
-; CHECK-NEXT:    vldrw.u32 q3, [r0, #48]
-; CHECK-NEXT:    vmov.u16 r2, q3[5]
-; CHECK-NEXT:    vins.f16 s20, s15
+; CHECK-NEXT:    vmov.f32 s6, s11
+; CHECK-NEXT:    vins.f16 s4, s0
+; CHECK-NEXT:    vmovx.f16 s0, s31
+; CHECK-NEXT:    vins.f16 s6, s0
+; CHECK-NEXT:    vmovx.f16 s11, s11
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r0, #48]
+; CHECK-NEXT:    vins.f16 s3, s11
+; CHECK-NEXT:    vmovx.f16 s15, s15
+; CHECK-NEXT:    vmov.u16 r2, q1[5]
+; CHECK-NEXT:    vins.f16 s20, s7
 ; CHECK-NEXT:    vmov.16 q4[0], r2
-; CHECK-NEXT:    vmov.u16 r2, q3[7]
+; CHECK-NEXT:    vmov.u16 r2, q1[7]
 ; CHECK-NEXT:    vmov.f32 s17, s20
-; CHECK-NEXT:    vmovx.f16 s20, s31
+; CHECK-NEXT:    vmovx.f16 s20, s27
 ; CHECK-NEXT:    vmov.16 q4[6], r2
-; CHECK-NEXT:    vmov.f32 s18, s7
-; CHECK-NEXT:    vmovx.f16 s7, s30
-; CHECK-NEXT:    vins.f16 s16, s7
-; CHECK-NEXT:    vmovx.f16 s7, s18
-; CHECK-NEXT:    vins.f16 s31, s7
-; CHECK-NEXT:    vmovx.f16 s7, s11
-; CHECK-NEXT:    vins.f16 s3, s7
-; CHECK-NEXT:    vins.f16 s19, s20
 ; CHECK-NEXT:    vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT:    vins.f16 s19, s20
+; CHECK-NEXT:    vmov.f64 d10, d14
 ; CHECK-NEXT:    vldrw.u32 q0, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT:    vmov.f32 s20, s24
-; CHECK-NEXT:    vmovx.f16 s11, s8
-; CHECK-NEXT:    vmov.f32 s7, s25
-; CHECK-NEXT:    vins.f16 s20, s0
+; CHECK-NEXT:    vins.f16 s27, s15
+; CHECK-NEXT:    vmovx.f16 s18, s26
+; CHECK-NEXT:    vins.f16 s28, s0
 ; CHECK-NEXT:    vmov.u16 r0, q0[1]
-; CHECK-NEXT:    vins.f16 s7, s1
+; CHECK-NEXT:    vmov.f32 s22, s30
+; CHECK-NEXT:    vmov.16 q7[4], r0
+; CHECK-NEXT:    vmovx.f16 s15, s20
+; CHECK-NEXT:    vstrw.32 q5, [sp] @ 16-byte Spill
+; CHECK-NEXT:    vmovx.f16 s20, s8
+; CHECK-NEXT:    vmov.u16 r0, q1[1]
+; CHECK-NEXT:    vins.f16 s30, s20
+; CHECK-NEXT:    vmov.f32 s20, s12
+; CHECK-NEXT:    vins.f16 s20, s4
+; CHECK-NEXT:    vmov.f32 s11, s21
 ; CHECK-NEXT:    vmov.16 q5[4], r0
-; CHECK-NEXT:    vmov.u16 r0, q3[1]
-; CHECK-NEXT:    vmov.f32 s23, s7
-; CHECK-NEXT:    vmovx.f16 s7, s24
-; CHECK-NEXT:    vmov.f32 s24, s4
-; CHECK-NEXT:    vins.f16 s8, s7
-; CHECK-NEXT:    vins.f16 s24, s12
-; CHECK-NEXT:    vmov.f32 s21, s8
-; CHECK-NEXT:    vmov.f32 s8, s5
-; CHECK-NEXT:    vmov.16 q6[4], r0
-; CHECK-NEXT:    vins.f16 s8, s13
-; CHECK-NEXT:    vmovx.f16 s4, s4
-; CHECK-NEXT:    vmov.f32 s27, s8
-; CHECK-NEXT:    vmovx.f16 s8, s28
-; CHECK-NEXT:    vins.f16 s28, s4
-; CHECK-NEXT:    vmov.f32 s4, s6
-; CHECK-NEXT:    vmov.u16 r0, q3[3]
-; CHECK-NEXT:    vins.f16 s4, s14
+; CHECK-NEXT:    vmov.u16 r0, q1[3]
+; CHECK-NEXT:    vins.f16 s11, s1
 ; CHECK-NEXT:    vmov.16 q0[2], r0
-; CHECK-NEXT:    vins.f16 s26, s8
-; CHECK-NEXT:    vmov.f32 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s29
-; CHECK-NEXT:    vins.f16 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s6
-; CHECK-NEXT:    vmovx.f16 s0, s5
-; CHECK-NEXT:    vins.f16 s30, s4
+; CHECK-NEXT:    vmovx.f16 s12, s12
+; CHECK-NEXT:    vins.f16 s8, s15
+; CHECK-NEXT:    vmov.f32 s31, s11
+; CHECK-NEXT:    vmovx.f16 s11, s24
+; CHECK-NEXT:    vins.f16 s24, s12
+; CHECK-NEXT:    vmov.f32 s12, s14
+; CHECK-NEXT:    vmovx.f16 s2, s25
+; CHECK-NEXT:    vmov.f32 s29, s8
+; CHECK-NEXT:    vmov.f32 s8, s13
+; CHECK-NEXT:    vins.f16 s1, s2
+; CHECK-NEXT:    vmovx.f16 s2, s14
+; CHECK-NEXT:    vins.f16 s12, s6
+; CHECK-NEXT:    vins.f16 s8, s5
+; CHECK-NEXT:    vins.f16 s26, s2
+; CHECK-NEXT:    vmovx.f16 s0, s13
+; CHECK-NEXT:    vmov.f32 s2, s12
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #32] @ 16-byte Reload
 ; CHECK-NEXT:    vldrw.u32 q1, [sp] @ 16-byte Reload
-; CHECK-NEXT:    vins.f16 s29, s0
-; CHECK-NEXT:    vmov.f32 s0, s29
-; CHECK-NEXT:    vins.f16 s22, s11
-; CHECK-NEXT:    vmov.f32 s3, s30
-; CHECK-NEXT:    vstrw.32 q5, [r1]
-; CHECK-NEXT:    vmov.f32 s29, s5
-; CHECK-NEXT:    vstrw.32 q0, [r1, #64]
-; CHECK-NEXT:    vmov.f32 s30, s6
+; CHECK-NEXT:    vmov.f32 s23, s8
+; CHECK-NEXT:    vins.f16 s25, s0
 ; CHECK-NEXT:    vmov.f32 s8, s6
-; CHECK-NEXT:    vldrw.u32 q1, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT:    vmov.f32 s18, s31
-; CHECK-NEXT:    vmov.u16 r0, q1[3]
-; CHECK-NEXT:    vins.f16 s8, s6
-; CHECK-NEXT:    vmov.16 q1[2], r0
-; CHECK-NEXT:    vmov.f32 s25, s28
-; CHECK-NEXT:    vmov.f32 s6, s8
-; CHECK-NEXT:    vmovx.f16 s8, s9
-; CHECK-NEXT:    vmovx.f16 s4, s29
-; CHECK-NEXT:    vins.f16 s5, s8
-; CHECK-NEXT:    vmovx.f16 s8, s30
-; CHECK-NEXT:    vins.f16 s9, s4
-; CHECK-NEXT:    vins.f16 s10, s8
-; CHECK-NEXT:    vmov.f32 s4, s9
-; CHECK-NEXT:    vmov.f32 s7, s10
-; CHECK-NEXT:    vstrw.32 q6, [r1, #48]
-; CHECK-NEXT:    vstrw.32 q1, [r1, #16]
+; CHECK-NEXT:    vmov.u16 r0, q3[3]
+; CHECK-NEXT:    vins.f16 s8, s14
+; CHECK-NEXT:    vmov.16 q3[2], r0
+; CHECK-NEXT:    vmovx.f16 s14, s9
+; CHECK-NEXT:    vmovx.f16 s12, s5
+; CHECK-NEXT:    vins.f16 s13, s14
+; CHECK-NEXT:    vmovx.f16 s14, s6
+; CHECK-NEXT:    vins.f16 s9, s12
+; CHECK-NEXT:    vins.f16 s10, s14
 ; CHECK-NEXT:    vldrw.u32 q1, [sp, #16] @ 16-byte Reload
-; CHECK-NEXT:    vstrw.32 q4, [r1, #80]
+; CHECK-NEXT:    vins.f16 s16, s18
+; CHECK-NEXT:    vmov.f32 s18, s27
+; CHECK-NEXT:    vins.f16 s22, s11
+; CHECK-NEXT:    vmov.f32 s21, s24
 ; CHECK-NEXT:    vstrw.32 q1, [r1, #32]
+; CHECK-NEXT:    vmov.f32 s0, s25
+; CHECK-NEXT:    vstrw.32 q5, [r1, #48]
+; CHECK-NEXT:    vmov.f32 s3, s26
+; CHECK-NEXT:    vstrw.32 q4, [r1, #80]
+; CHECK-NEXT:    vmov.f32 s12, s9
+; CHECK-NEXT:    vstrw.32 q0, [r1, #64]
+; CHECK-NEXT:    vmov.f32 s14, s8
+; CHECK-NEXT:    vstrw.32 q7, [r1]
+; CHECK-NEXT:    vmov.f32 s15, s10
+; CHECK-NEXT:    vstrw.32 q3, [r1, #16]
 ; CHECK-NEXT:    add sp, #48
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    bx lr
@@ -628,13 +619,13 @@ define void @vst3_v8i8(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vmovx.f16 s2, s7
 ; CHECK-NEXT:    vmovx.f16 s3, s11
 ; CHECK-NEXT:    vins.f16 s3, s2
-; CHECK-NEXT:    vmovx.f16 s16, s1
+; CHECK-NEXT:    vmovx.f16 s2, s1
 ; CHECK-NEXT:    vmov.f32 s1, s15
-; CHECK-NEXT:    vmovx.f16 s18, s15
+; CHECK-NEXT:    vmovx.f16 s16, s15
+; CHECK-NEXT:    vins.f16 s1, s2
 ; CHECK-NEXT:    vmov.f32 s2, s7
 ; CHECK-NEXT:    vmov.u16 r0, q3[0]
-; CHECK-NEXT:    vins.f16 s1, s16
-; CHECK-NEXT:    vins.f16 s2, s18
+; CHECK-NEXT:    vins.f16 s2, s16
 ; CHECK-NEXT:    vmov.8 q4[0], r0
 ; CHECK-NEXT:    vmov.u16 r0, q2[0]
 ; CHECK-NEXT:    vmov.8 q4[1], r0
@@ -1183,16 +1174,18 @@ define void @vst3_v2f16(ptr %src, ptr %dst) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    ldrd r2, r3, [r0]
 ; CHECK-NEXT:    ldr r0, [r0, #8]
-; CHECK-NEXT:    vmov.32 q0[0], r2
+; CHECK-NEXT:    vmov.32 q1[0], r2
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    vmov.32 q2[0], r0
 ; CHECK-NEXT:    vmov.32 q0[1], r3
-; CHECK-NEXT:    vmov.32 q1[0], r0
-; CHECK-NEXT:    vmovx.f16 s2, s0
-; CHECK-NEXT:    vmovx.f16 s6, s4
-; CHECK-NEXT:    vins.f16 s4, s2
+; CHECK-NEXT:    vmovx.f16 s2, s4
+; CHECK-NEXT:    vmov.f32 s0, s4
+; CHECK-NEXT:    vmovx.f16 s4, s8
+; CHECK-NEXT:    vins.f16 s8, s2
 ; CHECK-NEXT:    vmovx.f16 s2, s1
 ; CHECK-NEXT:    vins.f16 s0, s1
-; CHECK-NEXT:    vmov.f32 s1, s4
-; CHECK-NEXT:    vins.f16 s2, s6
+; CHECK-NEXT:    vmov.f32 s1, s8
+; CHECK-NEXT:    vins.f16 s2, s4
 ; CHECK-NEXT:    vmov r3, s2
 ; CHECK-NEXT:    vmov r0, r2, d0
 ; CHECK-NEXT:    stm r1!, {r0, r2, r3}
@@ -1219,25 +1212,27 @@ define void @vst3_v4f16(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    ldrd r3, r2, [r0, #8]
 ; CHECK-NEXT:    ldrd r4, r0, [r0, #16]
 ; CHECK-NEXT:    vmov q0[2], q0[0], lr, r3
-; CHECK-NEXT:    vmov.32 q1[0], r4
+; CHECK-NEXT:    vmov.32 q2[0], r4
 ; CHECK-NEXT:    vmov q0[3], q0[1], r12, r2
-; CHECK-NEXT:    vmov.32 q1[1], r0
+; CHECK-NEXT:    vmov q1, q2
 ; CHECK-NEXT:    vmovx.f16 s9, s3
-; CHECK-NEXT:    vmovx.f16 s6, s0
+; CHECK-NEXT:    vmov.32 q1[1], r0
+; CHECK-NEXT:    vmovx.f16 s4, s0
+; CHECK-NEXT:    vmov.f32 s6, s8
 ; CHECK-NEXT:    vins.f16 s0, s2
-; CHECK-NEXT:    vmovx.f16 s8, s4
+; CHECK-NEXT:    vins.f16 s6, s4
+; CHECK-NEXT:    vmovx.f16 s4, s8
 ; CHECK-NEXT:    vmovx.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s4, s6
-; CHECK-NEXT:    vmovx.f16 s6, s1
-; CHECK-NEXT:    vins.f16 s2, s8
+; CHECK-NEXT:    vins.f16 s2, s4
+; CHECK-NEXT:    vmovx.f16 s4, s1
 ; CHECK-NEXT:    vmovx.f16 s8, s5
-; CHECK-NEXT:    vins.f16 s5, s6
+; CHECK-NEXT:    vins.f16 s5, s4
 ; CHECK-NEXT:    vins.f16 s9, s8
 ; CHECK-NEXT:    vmov.f32 s8, s5
 ; CHECK-NEXT:    vins.f16 s1, s3
 ; CHECK-NEXT:    vmov r0, r2, d4
 ; CHECK-NEXT:    vmov q2, q0
-; CHECK-NEXT:    vmov.f32 s9, s4
+; CHECK-NEXT:    vmov.f32 s9, s6
 ; CHECK-NEXT:    vmov.f32 s10, s2
 ; CHECK-NEXT:    vmov.f32 s11, s1
 ; CHECK-NEXT:    vstrw.32 q2, [r1]
@@ -1259,58 +1254,58 @@ entry:
 define void @vst3_v8f16(ptr %src, ptr %dst) {
 ; CHECK-LABEL: vst3_v8f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .vsave {d8, d9}
-; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vldrw.u32 q4, [r0, #16]
-; CHECK-NEXT:    vldrw.u32 q3, [r0]
-; CHECK-NEXT:    vmovx.f16 s0, s18
-; CHECK-NEXT:    vmov.f32 s4, s15
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vldrw.u32 q5, [r0, #16]
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vmovx.f16 s0, s22
+; CHECK-NEXT:    vmov.f32 s8, s7
 ; CHECK-NEXT:    vmov r2, s0
-; CHECK-NEXT:    vins.f16 s4, s19
+; CHECK-NEXT:    vins.f16 s8, s23
 ; CHECK-NEXT:    vmov.16 q0[0], r2
-; CHECK-NEXT:    vmovx.f16 s10, s16
-; CHECK-NEXT:    vmov.f32 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s19
-; CHECK-NEXT:    vmov r2, s4
-; CHECK-NEXT:    vldrw.u32 q1, [r0, #32]
+; CHECK-NEXT:    vmov.f32 s16, s4
+; CHECK-NEXT:    vmov.f32 s1, s8
+; CHECK-NEXT:    vmovx.f16 s8, s23
+; CHECK-NEXT:    vmov r2, s8
+; CHECK-NEXT:    vldrw.u32 q2, [r0, #32]
 ; CHECK-NEXT:    vmov.16 q0[6], r2
-; CHECK-NEXT:    vmov r0, s10
-; CHECK-NEXT:    vmovx.f16 s8, s6
-; CHECK-NEXT:    vmovx.f16 s2, s15
-; CHECK-NEXT:    vins.f16 s0, s8
-; CHECK-NEXT:    vmovx.f16 s8, s7
-; CHECK-NEXT:    vins.f16 s3, s8
-; CHECK-NEXT:    vmov.f32 s8, s12
-; CHECK-NEXT:    vins.f16 s8, s16
-; CHECK-NEXT:    vins.f16 s7, s2
-; CHECK-NEXT:    vmov.f32 s2, s13
-; CHECK-NEXT:    vmov.16 q2[4], r0
-; CHECK-NEXT:    vins.f16 s2, s17
-; CHECK-NEXT:    vmov.f32 s11, s2
-; CHECK-NEXT:    vmovx.f16 s2, s12
-; CHECK-NEXT:    vmovx.f16 s12, s4
-; CHECK-NEXT:    vins.f16 s4, s2
-; CHECK-NEXT:    vins.f16 s10, s12
-; CHECK-NEXT:    vmovx.f16 s12, s17
-; CHECK-NEXT:    vmov.f32 s2, s14
-; CHECK-NEXT:    vmov r0, s12
-; CHECK-NEXT:    vins.f16 s2, s18
-; CHECK-NEXT:    vmov.16 q4[2], r0
-; CHECK-NEXT:    vmovx.f16 s12, s5
-; CHECK-NEXT:    vmov.f32 s18, s2
-; CHECK-NEXT:    vmovx.f16 s2, s13
-; CHECK-NEXT:    vins.f16 s5, s2
-; CHECK-NEXT:    vmovx.f16 s2, s14
-; CHECK-NEXT:    vins.f16 s6, s2
-; CHECK-NEXT:    vmov.f32 s2, s7
-; CHECK-NEXT:    vmov.f32 s9, s4
-; CHECK-NEXT:    vins.f16 s17, s12
-; CHECK-NEXT:    vmov.f32 s16, s5
+; CHECK-NEXT:    vins.f16 s16, s20
+; CHECK-NEXT:    vmovx.f16 s2, s10
+; CHECK-NEXT:    vmovx.f16 s12, s11
+; CHECK-NEXT:    vins.f16 s0, s2
+; CHECK-NEXT:    vmovx.f16 s2, s7
+; CHECK-NEXT:    vins.f16 s11, s2
+; CHECK-NEXT:    vmovx.f16 s2, s20
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmovx.f16 s2, s4
+; CHECK-NEXT:    vmovx.f16 s4, s8
+; CHECK-NEXT:    vins.f16 s8, s2
+; CHECK-NEXT:    vmovx.f16 s2, s21
+; CHECK-NEXT:    vmov.16 q4[4], r0
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmovx.f16 s2, s5
+; CHECK-NEXT:    vins.f16 s3, s12
+; CHECK-NEXT:    vmov.f32 s12, s5
+; CHECK-NEXT:    vins.f16 s18, s4
+; CHECK-NEXT:    vmov.f32 s4, s6
+; CHECK-NEXT:    vmovx.f16 s14, s9
+; CHECK-NEXT:    vins.f16 s9, s2
+; CHECK-NEXT:    vmovx.f16 s2, s6
+; CHECK-NEXT:    vins.f16 s12, s21
+; CHECK-NEXT:    vins.f16 s4, s22
+; CHECK-NEXT:    vmov.16 q5[2], r0
+; CHECK-NEXT:    vins.f16 s10, s2
+; CHECK-NEXT:    vmov.f32 s2, s11
+; CHECK-NEXT:    vins.f16 s21, s14
+; CHECK-NEXT:    vmov.f32 s20, s9
+; CHECK-NEXT:    vmov.f32 s22, s4
 ; CHECK-NEXT:    vstrw.32 q0, [r1, #32]
-; CHECK-NEXT:    vmov.f32 s19, s6
-; CHECK-NEXT:    vstrw.32 q2, [r1]
-; CHECK-NEXT:    vstrw.32 q4, [r1, #16]
-; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    vmov.f32 s23, s10
+; CHECK-NEXT:    vmov.f32 s17, s8
+; CHECK-NEXT:    vstrw.32 q5, [r1, #16]
+; CHECK-NEXT:    vmov.f32 s19, s12
+; CHECK-NEXT:    vstrw.32 q4, [r1]
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    bx lr
 entry:
   %l1 = load <8 x half>, ptr %src, align 4
@@ -1330,128 +1325,118 @@ define void @vst3_v16f16(ptr %src, ptr %dst) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    .pad #48
-; CHECK-NEXT:    sub sp, #48
-; CHECK-NEXT:    vldrw.u32 q3, [r0, #16]
+; CHECK-NEXT:    .pad #72
+; CHECK-NEXT:    sub sp, #72
+; CHECK-NEXT:    vldrw.u32 q5, [r0, #16]
 ; CHECK-NEXT:    vldrw.u32 q1, [r0, #48]
-; CHECK-NEXT:    vldrw.u32 q6, [r0, #32]
-; CHECK-NEXT:    vmov.f32 s8, s12
-; CHECK-NEXT:    vmovx.f16 s2, s4
-; CHECK-NEXT:    vmov.f32 s0, s13
-; CHECK-NEXT:    vins.f16 s8, s4
-; CHECK-NEXT:    vmov r2, s2
-; CHECK-NEXT:    vins.f16 s0, s5
-; CHECK-NEXT:    vmov.16 q2[4], r2
-; CHECK-NEXT:    vmov q4, q3
-; CHECK-NEXT:    vmov.f32 s11, s0
-; CHECK-NEXT:    vmovx.f16 s0, s16
-; CHECK-NEXT:    vmov.f32 s12, s8
-; CHECK-NEXT:    vmov.f64 d11, d9
-; CHECK-NEXT:    vmov.f32 s21, s17
-; CHECK-NEXT:    vmov.f64 d7, d5
 ; CHECK-NEXT:    vldrw.u32 q2, [r0, #80]
+; CHECK-NEXT:    vldrw.u32 q7, [r0, #32]
+; CHECK-NEXT:    vmov.f32 s0, s20
+; CHECK-NEXT:    vldrw.u32 q4, [r0, #64]
+; CHECK-NEXT:    vins.f16 s0, s4
 ; CHECK-NEXT:    vmovx.f16 s2, s8
+; CHECK-NEXT:    vmov.f32 s12, s0
+; CHECK-NEXT:    vmov.f32 s0, s21
+; CHECK-NEXT:    vins.f16 s0, s5
+; CHECK-NEXT:    vstr s0, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    vmovx.f16 s0, s4
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    vmovx.f16 s0, s20
+; CHECK-NEXT:    vmov.16 q3[4], r2
 ; CHECK-NEXT:    vins.f16 s8, s0
 ; CHECK-NEXT:    vins.f16 s14, s2
-; CHECK-NEXT:    vmovx.f16 s2, s24
-; CHECK-NEXT:    vstrw.32 q3, [sp, #16] @ 16-byte Spill
-; CHECK-NEXT:    vldrw.u32 q3, [r0]
-; CHECK-NEXT:    vmov r2, s2
-; CHECK-NEXT:    vmov.f32 s16, s12
-; CHECK-NEXT:    vmov.f32 s0, s13
-; CHECK-NEXT:    vins.f16 s16, s24
-; CHECK-NEXT:    vmov.16 q4[4], r2
-; CHECK-NEXT:    vins.f16 s0, s25
-; CHECK-NEXT:    vmov.f32 s19, s0
-; CHECK-NEXT:    vmovx.f16 s0, s12
-; CHECK-NEXT:    vmov.f64 d15, d13
-; CHECK-NEXT:    vmov.f32 s17, s13
-; CHECK-NEXT:    vmov.f32 s24, s16
-; CHECK-NEXT:    vmov.f64 d13, d9
-; CHECK-NEXT:    vmov.f64 d9, d7
-; CHECK-NEXT:    vldrw.u32 q3, [r0, #64]
-; CHECK-NEXT:    vmovx.f16 s2, s12
-; CHECK-NEXT:    vins.f16 s12, s0
-; CHECK-NEXT:    vins.f16 s26, s2
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    vstrw.32 q3, [sp, #48] @ 16-byte Spill
+; CHECK-NEXT:    vmov.f32 s12, s0
+; CHECK-NEXT:    vmov q6, q0
+; CHECK-NEXT:    vmovx.f16 s0, s28
+; CHECK-NEXT:    vins.f16 s12, s28
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    vmovx.f16 s2, s16
+; CHECK-NEXT:    vmov.16 q3[4], r2
+; CHECK-NEXT:    vmovx.f16 s0, s24
+; CHECK-NEXT:    vins.f16 s14, s2
 ; CHECK-NEXT:    vmovx.f16 s2, s30
-; CHECK-NEXT:    vmov.f32 s0, s19
-; CHECK-NEXT:    vstrw.32 q6, [sp, #32] @ 16-byte Spill
+; CHECK-NEXT:    vins.f16 s16, s0
+; CHECK-NEXT:    vmov.f32 s0, s27
 ; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vstrw.32 q3, [sp, #32] @ 16-byte Spill
 ; CHECK-NEXT:    vins.f16 s0, s31
-; CHECK-NEXT:    vmov.f32 s29, s25
-; CHECK-NEXT:    vmov.16 q6[0], r0
-; CHECK-NEXT:    vmov.f32 s25, s0
+; CHECK-NEXT:    vmov.16 q3[0], r0
+; CHECK-NEXT:    vmov.f32 s13, s0
 ; CHECK-NEXT:    vmovx.f16 s0, s31
 ; CHECK-NEXT:    vmov r0, s0
-; CHECK-NEXT:    vmovx.f16 s0, s14
-; CHECK-NEXT:    vmov.16 q6[6], r0
-; CHECK-NEXT:    vmovx.f16 s2, s15
-; CHECK-NEXT:    vins.f16 s24, s0
-; CHECK-NEXT:    vmovx.f16 s0, s19
-; CHECK-NEXT:    vins.f16 s15, s0
+; CHECK-NEXT:    vmovx.f16 s0, s18
+; CHECK-NEXT:    vmov.16 q3[6], r0
+; CHECK-NEXT:    vmov.f32 s4, s1
+; CHECK-NEXT:    vins.f16 s12, s0
+; CHECK-NEXT:    vmovx.f16 s0, s27
+; CHECK-NEXT:    vmovx.f16 s2, s19
+; CHECK-NEXT:    vins.f16 s19, s0
+; CHECK-NEXT:    vins.f16 s4, s29
 ; CHECK-NEXT:    vmovx.f16 s0, s6
+; CHECK-NEXT:    vstr s4, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    vmov.f32 s4, s23
-; CHECK-NEXT:    vins.f16 s27, s2
 ; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    vins.f16 s15, s2
 ; CHECK-NEXT:    vins.f16 s4, s7
 ; CHECK-NEXT:    vmov.16 q0[0], r0
-; CHECK-NEXT:    vstrw.32 q7, [sp] @ 16-byte Spill
 ; CHECK-NEXT:    vmov.f32 s1, s4
 ; CHECK-NEXT:    vmovx.f16 s4, s7
 ; CHECK-NEXT:    vmov r0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s10
-; CHECK-NEXT:    vmov.16 q0[6], r0
-; CHECK-NEXT:    vldrw.u32 q7, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT:    vins.f16 s0, s4
 ; CHECK-NEXT:    vmovx.f16 s4, s11
-; CHECK-NEXT:    vmovx.f16 s2, s23
+; CHECK-NEXT:    vmov.16 q0[6], r0
+; CHECK-NEXT:    vstrw.32 q6, [sp, #8] @ 16-byte Spill
+; CHECK-NEXT:    vmovx.f16 s2, s10
 ; CHECK-NEXT:    vins.f16 s3, s4
-; CHECK-NEXT:    vmovx.f16 s4, s5
+; CHECK-NEXT:    vins.f16 s0, s2
+; CHECK-NEXT:    vmovx.f16 s2, s23
 ; CHECK-NEXT:    vins.f16 s11, s2
 ; CHECK-NEXT:    vmov.f32 s2, s22
-; CHECK-NEXT:    vmov r0, s4
 ; CHECK-NEXT:    vins.f16 s2, s6
+; CHECK-NEXT:    vldrw.u32 q6, [sp, #32] @ 16-byte Reload
+; CHECK-NEXT:    vstr s2, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    vmovx.f16 s2, s5
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vldr s27, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    vmov.16 q1[2], r0
-; CHECK-NEXT:    vmov.f32 s29, s12
+; CHECK-NEXT:    vmov.f32 s2, s11
 ; CHECK-NEXT:    vmovx.f16 s4, s21
-; CHECK-NEXT:    vmovx.f16 s12, s9
+; CHECK-NEXT:    vmovx.f16 s6, s9
 ; CHECK-NEXT:    vins.f16 s9, s4
 ; CHECK-NEXT:    vmovx.f16 s4, s22
+; CHECK-NEXT:    vldrw.u32 q5, [sp, #8] @ 16-byte Reload
 ; CHECK-NEXT:    vins.f16 s10, s4
-; CHECK-NEXT:    vmov.f32 s21, s17
-; CHECK-NEXT:    vmov.f32 s22, s18
-; CHECK-NEXT:    vins.f16 s5, s12
-; CHECK-NEXT:    vmov.f32 s4, s18
-; CHECK-NEXT:    vldrw.u32 q4, [sp] @ 16-byte Reload
-; CHECK-NEXT:    vstrw.32 q7, [sp, #32] @ 16-byte Spill
-; CHECK-NEXT:    vmov.f32 s6, s2
-; CHECK-NEXT:    vmovx.f16 s12, s17
-; CHECK-NEXT:    vins.f16 s4, s18
-; CHECK-NEXT:    vmov r0, s12
-; CHECK-NEXT:    vmovx.f16 s12, s13
-; CHECK-NEXT:    vmov.16 q7[2], r0
-; CHECK-NEXT:    vmov.f32 s2, s11
-; CHECK-NEXT:    vmov.f32 s30, s4
+; CHECK-NEXT:    vmovx.f16 s4, s29
+; CHECK-NEXT:    vins.f16 s5, s6
+; CHECK-NEXT:    vmov.f32 s11, s22
+; CHECK-NEXT:    vmovx.f16 s6, s17
+; CHECK-NEXT:    vmov r0, s4
 ; CHECK-NEXT:    vmovx.f16 s4, s21
-; CHECK-NEXT:    vins.f16 s13, s4
+; CHECK-NEXT:    vins.f16 s11, s30
+; CHECK-NEXT:    vmov.16 q7[2], r0
+; CHECK-NEXT:    vins.f16 s17, s4
 ; CHECK-NEXT:    vmovx.f16 s4, s22
-; CHECK-NEXT:    vins.f16 s14, s4
-; CHECK-NEXT:    vldrw.u32 q5, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT:    vldrw.u32 q5, [sp, #48] @ 16-byte Reload
+; CHECK-NEXT:    vins.f16 s18, s4
+; CHECK-NEXT:    vins.f16 s29, s6
+; CHECK-NEXT:    vldr s23, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    vldr s6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    vmov.f32 s25, s16
+; CHECK-NEXT:    vmov.f32 s14, s19
 ; CHECK-NEXT:    vstrw.32 q0, [r1, #80]
-; CHECK-NEXT:    vldrw.u32 q0, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT:    vmov.f32 s26, s15
-; CHECK-NEXT:    vins.f16 s29, s12
 ; CHECK-NEXT:    vmov.f32 s21, s8
-; CHECK-NEXT:    vstrw.32 q6, [r1, #32]
+; CHECK-NEXT:    vstrw.32 q3, [r1, #32]
 ; CHECK-NEXT:    vmov.f32 s4, s9
 ; CHECK-NEXT:    vstrw.32 q5, [r1, #48]
 ; CHECK-NEXT:    vmov.f32 s7, s10
-; CHECK-NEXT:    vstrw.32 q0, [r1]
-; CHECK-NEXT:    vmov.f32 s28, s13
+; CHECK-NEXT:    vstrw.32 q6, [r1]
+; CHECK-NEXT:    vmov.f32 s28, s17
 ; CHECK-NEXT:    vstrw.32 q1, [r1, #64]
-; CHECK-NEXT:    vmov.f32 s31, s14
+; CHECK-NEXT:    vmov.f32 s30, s11
+; CHECK-NEXT:    vmov.f32 s31, s18
 ; CHECK-NEXT:    vstrw.32 q7, [r1, #16]
-; CHECK-NEXT:    add sp, #48
+; CHECK-NEXT:    add sp, #72
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
index b369044..9dba1cc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
@@ -397,55 +397,51 @@ define void @vst4_v8i16_align1(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vldrw.u32 q1, [r0, #32]
 ; CHECK-NEXT:    vldrw.u32 q2, [r0, #48]
-; CHECK-NEXT:    vldrw.u32 q4, [r0, #16]
-; CHECK-NEXT:    vmovx.f16 s12, s5
-; CHECK-NEXT:    vmovx.f16 s0, s9
-; CHECK-NEXT:    vins.f16 s5, s9
-; CHECK-NEXT:    vins.f16 s12, s0
-; CHECK-NEXT:    vmov q0, q1
-; CHECK-NEXT:    vmovx.f16 s27, s4
-; CHECK-NEXT:    vins.f16 s4, s8
-; CHECK-NEXT:    vmov.f32 s3, s12
 ; CHECK-NEXT:    vldrw.u32 q3, [r0]
-; CHECK-NEXT:    vmov.f32 s5, s4
-; CHECK-NEXT:    vmovx.f16 s8, s8
-; CHECK-NEXT:    vmovx.f16 s0, s17
-; CHECK-NEXT:    vmovx.f16 s2, s13
-; CHECK-NEXT:    vins.f16 s27, s8
-; CHECK-NEXT:    vmovx.f16 s4, s12
-; CHECK-NEXT:    vmovx.f16 s8, s16
-; CHECK-NEXT:    vins.f16 s13, s17
-; CHECK-NEXT:    vins.f16 s12, s16
-; CHECK-NEXT:    vmov q5, q3
+; CHECK-NEXT:    vldrw.u32 q4, [r0, #16]
+; CHECK-NEXT:    vmovx.f16 s23, s4
 ; CHECK-NEXT:    vins.f16 s4, s8
-; CHECK-NEXT:    vmov.f32 s22, s4
-; CHECK-NEXT:    vmovx.f16 s4, s11
-; CHECK-NEXT:    vmov.f32 s23, s27
+; CHECK-NEXT:    vmov.f32 s21, s4
+; CHECK-NEXT:    vmovx.f16 s22, s12
+; CHECK-NEXT:    vmovx.f16 s4, s16
 ; CHECK-NEXT:    vmovx.f16 s27, s7
+; CHECK-NEXT:    vins.f16 s22, s4
+; CHECK-NEXT:    vmovx.f16 s4, s11
 ; CHECK-NEXT:    vins.f16 s7, s11
 ; CHECK-NEXT:    vins.f16 s27, s4
 ; CHECK-NEXT:    vmovx.f16 s26, s15
 ; CHECK-NEXT:    vmovx.f16 s4, s19
+; CHECK-NEXT:    vmovx.f16 s3, s5
+; CHECK-NEXT:    vins.f16 s5, s9
+; CHECK-NEXT:    vmovx.f16 s0, s9
 ; CHECK-NEXT:    vmov.f32 s25, s7
 ; CHECK-NEXT:    vins.f16 s26, s4
 ; CHECK-NEXT:    vmovx.f16 s7, s6
 ; CHECK-NEXT:    vmovx.f16 s4, s10
 ; CHECK-NEXT:    vins.f16 s6, s10
-; CHECK-NEXT:    vmov.f32 s21, s5
+; CHECK-NEXT:    vins.f16 s3, s0
+; CHECK-NEXT:    vmovx.f16 s2, s13
+; CHECK-NEXT:    vmovx.f16 s0, s17
+; CHECK-NEXT:    vmov.f32 s1, s5
+; CHECK-NEXT:    vmovx.f16 s8, s8
+; CHECK-NEXT:    vins.f16 s12, s16
 ; CHECK-NEXT:    vins.f16 s15, s19
 ; CHECK-NEXT:    vins.f16 s7, s4
 ; CHECK-NEXT:    vmov.f32 s5, s6
 ; CHECK-NEXT:    vmovx.f16 s6, s14
 ; CHECK-NEXT:    vmovx.f16 s4, s18
+; CHECK-NEXT:    vins.f16 s13, s17
 ; CHECK-NEXT:    vins.f16 s14, s18
 ; CHECK-NEXT:    vins.f16 s2, s0
 ; CHECK-NEXT:    vmov.f32 s0, s13
-; CHECK-NEXT:    vmov.f32 s24, s15
 ; CHECK-NEXT:    vins.f16 s6, s4
-; CHECK-NEXT:    vmov.f32 s4, s14
+; CHECK-NEXT:    vmov.f32 s24, s15
+; CHECK-NEXT:    vins.f16 s23, s8
+; CHECK-NEXT:    vmov.f32 s20, s12
 ; CHECK-NEXT:    vstrb.8 q6, [r1, #48]
-; CHECK-NEXT:    vstrb.8 q1, [r1, #32]
+; CHECK-NEXT:    vmov.f32 s4, s14
 ; CHECK-NEXT:    vstrb.8 q0, [r1, #16]
+; CHECK-NEXT:    vstrb.8 q1, [r1, #32]
 ; CHECK-NEXT:    vstrb.8 q5, [r1]
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    bx lr
@@ -1002,22 +998,28 @@ entry:
 define void @vst4_v2f16(ptr %src, ptr %dst) {
 ; CHECK-LABEL: vst4_v2f16:
 ; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    ldm.w r0, {r2, r3, r12}
-; CHECK-NEXT:    vmov.32 q1[0], r12
+; CHECK-NEXT:    vmov.32 q3[0], r12
+; CHECK-NEXT:    vmov q4, q3
+; CHECK-NEXT:    vmov.f32 s1, s12
 ; CHECK-NEXT:    ldr r0, [r0, #12]
-; CHECK-NEXT:    vmov.32 q0[0], r2
-; CHECK-NEXT:    vmov.32 q0[1], r3
-; CHECK-NEXT:    vmov.32 q1[1], r0
-; CHECK-NEXT:    vmovx.f16 s2, s0
-; CHECK-NEXT:    vmovx.f16 s6, s1
-; CHECK-NEXT:    vmovx.f16 s3, s4
+; CHECK-NEXT:    vmovx.f16 s3, s12
+; CHECK-NEXT:    vmov.32 q1[0], r2
+; CHECK-NEXT:    vmov q2, q1
+; CHECK-NEXT:    vmov.32 q4[1], r0
+; CHECK-NEXT:    vmov.32 q2[1], r3
+; CHECK-NEXT:    vmov.f32 s0, s4
+; CHECK-NEXT:    vmovx.f16 s2, s4
+; CHECK-NEXT:    vmovx.f16 s6, s9
+; CHECK-NEXT:    vmovx.f16 s4, s17
+; CHECK-NEXT:    vins.f16 s0, s9
+; CHECK-NEXT:    vins.f16 s1, s17
 ; CHECK-NEXT:    vins.f16 s2, s6
-; CHECK-NEXT:    vmovx.f16 s6, s5
-; CHECK-NEXT:    vins.f16 s4, s5
-; CHECK-NEXT:    vins.f16 s0, s1
-; CHECK-NEXT:    vins.f16 s3, s6
-; CHECK-NEXT:    vmov.f32 s1, s4
+; CHECK-NEXT:    vins.f16 s3, s4
 ; CHECK-NEXT:    vstrh.16 q0, [r1]
+; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    bx lr
 entry:
   %l1 = load <2 x half>, ptr %src, align 4
@@ -1155,61 +1157,57 @@ entry:
 define void @vst4_v8f16_align1(ptr %src, ptr %dst) {
 ; CHECK-LABEL: vst4_v8f16_align1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .vsave {d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vpush {d9, d10, d11, d12, d13}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vldrw.u32 q1, [r0, #32]
 ; CHECK-NEXT:    vldrw.u32 q5, [r0, #48]
-; CHECK-NEXT:    vldrw.u32 q6, [r0, #16]
 ; CHECK-NEXT:    vldrw.u32 q2, [r0]
-; CHECK-NEXT:    vmovx.f16 s0, s5
-; CHECK-NEXT:    vmovx.f16 s2, s21
-; CHECK-NEXT:    vins.f16 s0, s2
+; CHECK-NEXT:    vldrw.u32 q6, [r0, #16]
+; CHECK-NEXT:    vmovx.f16 s3, s5
+; CHECK-NEXT:    vmovx.f16 s0, s21
+; CHECK-NEXT:    vins.f16 s3, s0
 ; CHECK-NEXT:    vmovx.f16 s2, s9
-; CHECK-NEXT:    vmovx.f16 s12, s25
-; CHECK-NEXT:    vmovx.f16 s19, s4
-; CHECK-NEXT:    vins.f16 s2, s12
-; CHECK-NEXT:    vmovx.f16 s12, s20
-; CHECK-NEXT:    vins.f16 s19, s12
-; CHECK-NEXT:    vmovx.f16 s12, s8
-; CHECK-NEXT:    vmovx.f16 s14, s24
-; CHECK-NEXT:    vmovx.f16 s15, s7
-; CHECK-NEXT:    vins.f16 s12, s14
-; CHECK-NEXT:    vmovx.f16 s14, s23
-; CHECK-NEXT:    vins.f16 s15, s14
-; CHECK-NEXT:    vmovx.f16 s14, s11
-; CHECK-NEXT:    vmovx.f16 s1, s27
+; CHECK-NEXT:    vmovx.f16 s0, s25
+; CHECK-NEXT:    vmovx.f16 s15, s4
+; CHECK-NEXT:    vins.f16 s2, s0
+; CHECK-NEXT:    vmovx.f16 s0, s20
+; CHECK-NEXT:    vins.f16 s15, s0
+; CHECK-NEXT:    vmovx.f16 s14, s8
+; CHECK-NEXT:    vmovx.f16 s0, s24
+; CHECK-NEXT:    vmovx.f16 s19, s7
+; CHECK-NEXT:    vins.f16 s14, s0
+; CHECK-NEXT:    vmovx.f16 s0, s23
+; CHECK-NEXT:    vins.f16 s19, s0
+; CHECK-NEXT:    vmovx.f16 s18, s11
+; CHECK-NEXT:    vmovx.f16 s0, s27
 ; CHECK-NEXT:    vins.f16 s7, s23
-; CHECK-NEXT:    vins.f16 s14, s1
+; CHECK-NEXT:    vins.f16 s18, s0
 ; CHECK-NEXT:    vmovx.f16 s23, s6
-; CHECK-NEXT:    vmovx.f16 s1, s22
-; CHECK-NEXT:    vins.f16 s6, s22
+; CHECK-NEXT:    vmovx.f16 s0, s22
 ; CHECK-NEXT:    vins.f16 s5, s21
+; CHECK-NEXT:    vins.f16 s6, s22
 ; CHECK-NEXT:    vins.f16 s4, s20
-; CHECK-NEXT:    vins.f16 s23, s1
+; CHECK-NEXT:    vins.f16 s8, s24
+; CHECK-NEXT:    vins.f16 s11, s27
+; CHECK-NEXT:    vins.f16 s23, s0
 ; CHECK-NEXT:    vmovx.f16 s22, s10
 ; CHECK-NEXT:    vins.f16 s10, s26
-; CHECK-NEXT:    vmovx.f16 s1, s26
+; CHECK-NEXT:    vmovx.f16 s0, s26
 ; CHECK-NEXT:    vins.f16 s9, s25
-; CHECK-NEXT:    vins.f16 s8, s24
-; CHECK-NEXT:    vins.f16 s11, s27
-; CHECK-NEXT:    vmov q6, q1
-; CHECK-NEXT:    vins.f16 s22, s1
-; CHECK-NEXT:    vmov.f32 s1, s25
-; CHECK-NEXT:    vmov q6, q2
-; CHECK-NEXT:    vmov.f32 s3, s0
+; CHECK-NEXT:    vins.f16 s22, s0
 ; CHECK-NEXT:    vmov.f32 s0, s9
-; CHECK-NEXT:    vmov.f32 s26, s12
+; CHECK-NEXT:    vmov.f32 s1, s5
+; CHECK-NEXT:    vmov.f32 s13, s4
 ; CHECK-NEXT:    vstrb.8 q0, [r1, #16]
-; CHECK-NEXT:    vmov.f32 s25, s4
-; CHECK-NEXT:    vmov.f32 s27, s19
-; CHECK-NEXT:    vmov.f32 s13, s7
-; CHECK-NEXT:    vstrb.8 q6, [r1]
-; CHECK-NEXT:    vmov.f32 s12, s11
+; CHECK-NEXT:    vmov.f32 s12, s8
+; CHECK-NEXT:    vmov.f32 s17, s7
+; CHECK-NEXT:    vstrb.8 q3, [r1]
+; CHECK-NEXT:    vmov.f32 s16, s11
 ; CHECK-NEXT:    vmov.f32 s21, s6
-; CHECK-NEXT:    vstrb.8 q3, [r1, #48]
+; CHECK-NEXT:    vstrb.8 q4, [r1, #48]
 ; CHECK-NEXT:    vmov.f32 s20, s10
 ; CHECK-NEXT:    vstrb.8 q5, [r1, #32]
-; CHECK-NEXT:    vpop {d9, d10, d11, d12, d13}
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    bx lr
 entry:
   %l1 = load <8 x half>, ptr %src, align 4
diff --git a/llvm/test/CodeGen/VE/Scalar/select.ll b/llvm/test/CodeGen/VE/Scalar/select.ll
index 184513a..0a98b91 100644
--- a/llvm/test/CodeGen/VE/Scalar/select.ll
+++ b/llvm/test/CodeGen/VE/Scalar/select.ll
@@ -352,9 +352,9 @@ define fp128 @select_quad_mimm(i1 zeroext %0, fp128 %1) {
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    lea.sl %s1, .LCPI{{[0-9]+}}_0@hi(, %s1)
 ; CHECK-NEXT:    ld %s4, 8(, %s1)
-; CHECK-NEXT:    ld %s5, (, %s1)
+; CHECK-NEXT:    ld %s1, (, %s1)
 ; CHECK-NEXT:    cmov.w.ne %s2, %s4, %s0
-; CHECK-NEXT:    cmov.w.ne %s3, %s5, %s0
+; CHECK-NEXT:    cmov.w.ne %s3, %s1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    or %s1, 0, %s3
 ; CHECK-NEXT:    b.l.t (, %s10)
diff --git a/llvm/test/CodeGen/VE/Scalar/va_caller.ll b/llvm/test/CodeGen/VE/Scalar/va_caller.ll
index 3cffc5e..421a4195 100644
--- a/llvm/test/CodeGen/VE/Scalar/va_caller.ll
+++ b/llvm/test/CodeGen/VE/Scalar/va_caller.ll
@@ -24,15 +24,15 @@ define i32 @caller() {
 ; CHECK-NEXT:    st %s1, 192(, %s11)
 ; CHECK-NEXT:    or %s1, 1, (0)1
 ; CHECK-NEXT:    st %s1, 184(, %s11)
-; CHECK-NEXT:    lea %s1, .LCPI{{[0-9]+}}_0@lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s1, .LCPI{{[0-9]+}}_0@hi(, %s1)
-; CHECK-NEXT:    ld %s34, 8(, %s1)
-; CHECK-NEXT:    ld %s35, (, %s1)
 ; CHECK-NEXT:    st %s0, 176(, %s11)
-; CHECK-NEXT:    lea.sl %s0, 1086324736
-; CHECK-NEXT:    st %s0, 224(, %s11)
-; CHECK-NEXT:    st %s34, 280(, %s11)
+; CHECK-NEXT:    lea %s0, .LCPI{{[0-9]+}}_0@lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s0,  .LCPI{{[0-9]+}}_0@hi(, %s0)
+; CHECK-NEXT:    ld %s2, 8(, %s0)
+; CHECK-NEXT:    lea.sl %s1, 1086324736
+; CHECK-NEXT:    st %s1, 224(, %s11)
+; CHECK-NEXT:    ld %s34, (, %s0)
+; CHECK-NEXT:    st %s2, 280(, %s11)
 ; CHECK-NEXT:    lea %s0, func@lo
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lea.sl %s12, func@hi(, %s0)
@@ -44,10 +44,13 @@ define i32 @caller() {
 ; CHECK-NEXT:    or %s5, 5, (0)1
 ; CHECK-NEXT:    lea.sl %s6, 1086324736
 ; CHECK-NEXT:    or %s7, 0, (0)1
-; CHECK-NEXT:    st %s35, 272(, %s11)
+; CHECK-NEXT:    st %s34, 272(, %s11)
 ; CHECK-NEXT:    bsic %s10, (, %s12)
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    or %s11, 0, %s9
+; CHECK-NEXT:    ld %s10, 8(, %s11)
+; CHECK-NEXT:    ld %s9, (, %s11)
+; CHECK-NEXT:    b.l.t (, %s10)
   call i32 (i32, ...) @func(i32 0, i16 1, i8 2, i32 3, i16 4, i8 5, float 6.0, ptr null, i64 8, double 9.0, i128 10, fp128 0xLA000000000000000)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
index 8595024..06cf968 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
@@ -230,7 +230,6 @@ exit:
 define dso_local void @test5(i16 signext %0, i16 signext %1) nounwind {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
 ; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    vmovups %zmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movb $1, -{{[0-9]+}}(%rsp)
@@ -239,7 +238,8 @@ define dso_local void @test5(i16 signext %0, i16 signext %1) nounwind {
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movl $buf, %ecx
 ; CHECK-NEXT:    movl $32, %edx
-; CHECK-NEXT:    leal -1(%rsi), %r8d
+; CHECK-NEXT:    movl %esi, %r8d
+; CHECK-NEXT:    decl %r8d
 ; CHECK-NEXT:    jmp .LBB4_1
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB4_3: # %if.false
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index d19eaf4..c60d9a3 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1897,7 +1897,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    movw $-5, %ax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    kmovw %k1, %k7
+; KNL-NEXT:    kmovw %k1, %k2
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
@@ -1906,8 +1906,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    movw $-9, %ax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %k1, %k7
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
@@ -1924,9 +1924,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    movw $-33, %ax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    kmovw %k1, %k2
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
@@ -1934,8 +1933,9 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    movw $-65, %ax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %k1, %k3
+; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
@@ -1943,9 +1943,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    movw $-129, %ax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    kmovw %k1, %k3
 ; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
@@ -1953,18 +1952,17 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    movw $-257, %ax # imm = 0xFEFF
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %k1, %k4
+; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $7, %k1, %k1
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    movw $-513, %ax # imm = 0xFDFF
-; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    kandw %k1, %k0, %k0
-; KNL-NEXT:    kmovw %k1, %k4
-; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; KNL-NEXT:    kmovw %eax, %k5
+; KNL-NEXT:    kandw %k5, %k0, %k0
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
@@ -1980,8 +1978,9 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kshiftrw $5, %k1, %k1
 ; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    movw $-2049, %ax # imm = 0xF7FF
-; KNL-NEXT:    kmovw %eax, %k5
-; KNL-NEXT:    kandw %k5, %k0, %k0
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    kshiftlw $15, %k1, %k1
@@ -2026,49 +2025,50 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kshiftrw $14, %k1, %k1
 ; KNL-NEXT:    kmovw %edi, %k6
 ; KNL-NEXT:    korw %k1, %k6, %k1
-; KNL-NEXT:    kandw %k7, %k1, %k1
+; KNL-NEXT:    kandw %k2, %k1, %k1
 ; KNL-NEXT:    kmovw %edx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; KNL-NEXT:    kmovw %k7, %k0
+; KNL-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; KNL-NEXT:    kandw %k7, %k1, %k1
 ; KNL-NEXT:    kmovw %ecx, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
-; KNL-NEXT:    kandw %k0, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
+; KNL-NEXT:    kandw %k2, %k1, %k1
 ; KNL-NEXT:    kmovw %r8d, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kandw %k2, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; KNL-NEXT:    kandw %k7, %k1, %k1
 ; KNL-NEXT:    kmovw %r9d, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
-; KNL-NEXT:    kandw %k2, %k1, %k1
+; KNL-NEXT:    kandw %k3, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
 ; KNL-NEXT:    kandw %k3, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
-; KNL-NEXT:    kandw %k3, %k1, %k1
+; KNL-NEXT:    kandw %k4, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kandw %k4, %k1, %k1
+; KNL-NEXT:    kandw %k5, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
@@ -2081,8 +2081,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT:    kandw %k5, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
+; KNL-NEXT:    kandw %k6, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
@@ -2130,46 +2130,45 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $13, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kandw %k7, %k1, %k1
+; KNL-NEXT:    kandw %k0, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $12, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kandw %k0, %k1, %k1
+; KNL-NEXT:    kandw %k2, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $11, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
-; KNL-NEXT:    kandw %k0, %k1, %k1
+; KNL-NEXT:    kandw %k7, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $10, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kandw %k2, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; KNL-NEXT:    kandw %k0, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $9, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
-; KNL-NEXT:    kandw %k0, %k1, %k1
+; KNL-NEXT:    kandw %k3, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $8, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
 ; KNL-NEXT:    kandw %k3, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $7, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
-; KNL-NEXT:    kandw %k3, %k1, %k1
+; KNL-NEXT:    kandw %k5, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
@@ -2181,28 +2180,29 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kandw %k5, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; KNL-NEXT:    kandw %k0, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $4, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
-; KNL-NEXT:    kandw %k0, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
+; KNL-NEXT:    kandw %k2, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $3, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
-; KNL-NEXT:    kandw %k2, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
+; KNL-NEXT:    kandw %k6, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $2, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k1, %k1
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; KNL-NEXT:    kandw %k5, %k1, %k1
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
+; KNL-NEXT:    kandw %k6, %k1, %k1
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k6
 ; KNL-NEXT:    kshiftlw $14, %k6, %k6
@@ -2221,56 +2221,55 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kshiftrw $14, %k6, %k6
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    korw %k6, %k7, %k6
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; KNL-NEXT:    kandw %k5, %k6, %k6
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; KNL-NEXT:    kandw %k7, %k6, %k6
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $13, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; KNL-NEXT:    kandw %k5, %k6, %k6
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; KNL-NEXT:    kandw %k7, %k6, %k6
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $12, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; KNL-NEXT:    kandw %k5, %k6, %k6
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; KNL-NEXT:    kandw %k7, %k6, %k6
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $11, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; KNL-NEXT:    kandw %k5, %k6, %k6
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; KNL-NEXT:    kandw %k7, %k6, %k6
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $10, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; KNL-NEXT:    kandw %k5, %k6, %k6
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; KNL-NEXT:    kandw %k7, %k6, %k6
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $9, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; KNL-NEXT:    kandw %k5, %k6, %k6
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; KNL-NEXT:    kandw %k7, %k6, %k6
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $8, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; KNL-NEXT:    kandw %k5, %k6, %k6
+; KNL-NEXT:    kandw %k3, %k6, %k6
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
 ; KNL-NEXT:    kshiftrw $7, %k7, %k7
 ; KNL-NEXT:    korw %k7, %k6, %k6
-; KNL-NEXT:    kandw %k3, %k6, %k6
+; KNL-NEXT:    kandw %k5, %k6, %k6
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k7
 ; KNL-NEXT:    kshiftlw $15, %k7, %k7
@@ -2282,20 +2281,20 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; KNL-NEXT:    kshiftlw $15, %k6, %k6
 ; KNL-NEXT:    kshiftrw $5, %k6, %k6
 ; KNL-NEXT:    korw %k6, %k5, %k5
-; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
-; KNL-NEXT:    kandw %k3, %k5, %k4
+; KNL-NEXT:    kandw %k0, %k5, %k4
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k5
 ; KNL-NEXT:    kshiftlw $15, %k5, %k5
 ; KNL-NEXT:    kshiftrw $4, %k5, %k5
 ; KNL-NEXT:    korw %k5, %k4, %k4
-; KNL-NEXT:    kandw %k0, %k4, %k3
+; KNL-NEXT:    kandw %k2, %k4, %k3
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k4
 ; KNL-NEXT:    kshiftlw $15, %k4, %k4
 ; KNL-NEXT:    kshiftrw $3, %k4, %k4
 ; KNL-NEXT:    korw %k4, %k3, %k3
-; KNL-NEXT:    kandw %k2, %k3, %k2
+; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; KNL-NEXT:    kandw %k0, %k3, %k2
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; KNL-NEXT:    kmovw %eax, %k3
 ; KNL-NEXT:    kshiftlw $15, %k3, %k3
@@ -2351,7 +2350,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    movw $-5, %ax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw %k1, %k7
+; AVX512DQNOBW-NEXT:    kmovw %k1, %k2
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
@@ -2360,8 +2359,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movw $-9, %ax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
-; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k1, %k7
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
@@ -2378,9 +2377,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movw $-33, %ax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw %k1, %k2
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
@@ -2388,8 +2386,9 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movw $-65, %ax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
-; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k1, %k3
+; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
@@ -2397,9 +2396,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movw $-129, %ax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw %k1, %k3
 ; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
@@ -2407,18 +2405,17 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movw $-257, %ax # imm = 0xFEFF
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
-; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k1, %k4
+; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $7, %k1, %k1
 ; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movw $-513, %ax # imm = 0xFDFF
-; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw %k1, %k4
-; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512DQNOBW-NEXT:    kmovw %eax, %k5
+; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
@@ -2434,8 +2431,9 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kshiftrw $5, %k1, %k1
 ; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movw $-2049, %ax # imm = 0xF7FF
-; AVX512DQNOBW-NEXT:    kmovw %eax, %k5
-; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
+; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
@@ -2458,215 +2456,216 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kshiftrw $2, %k1, %k1
-; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k1
 ; AVX512DQNOBW-NEXT:    movw $-16385, %ax # imm = 0xBFFF
-; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
-; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %eax, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512DQNOBW-NEXT:    kandw %k0, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $14, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftlw $1, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftrw $1, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k0
 ; AVX512DQNOBW-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    andl $1, %edi
-; AVX512DQNOBW-NEXT:    kmovw %esi, %k0
-; AVX512DQNOBW-NEXT:    kshiftlw $15, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftrw $14, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %esi, %k1
+; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftrw $14, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %edi, %k6
-; AVX512DQNOBW-NEXT:    korw %k0, %k6, %k0
-; AVX512DQNOBW-NEXT:    kandw %k7, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k1, %k6, %k1
+; AVX512DQNOBW-NEXT:    kandw %k2, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %edx, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $13, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k7, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw %k7, %k0
+; AVX512DQNOBW-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512DQNOBW-NEXT:    kandw %k7, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %ecx, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $12, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k2, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %r8d, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $11, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k7, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %r9d, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $10, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k3, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $9, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k3, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $8, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k4, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $7, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k5, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $6, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
+; AVX512DQNOBW-NEXT:    kandw %k4, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $5, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $4, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    kandw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $3, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    kandw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $2, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    kandw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $14, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftlw $1, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftrw $1, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    andl $1, %eax
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
-; AVX512DQNOBW-NEXT:    kmovw %ecx, %k0
-; AVX512DQNOBW-NEXT:    kshiftlw $15, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftrw $14, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %ecx, %k1
+; AVX512DQNOBW-NEXT:    kshiftlw $15, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftrw $14, %k1, %k1
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
-; AVX512DQNOBW-NEXT:    korw %k0, %k6, %k0
+; AVX512DQNOBW-NEXT:    korw %k1, %k6, %k1
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    kandw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $13, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k7, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k0, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $12, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k2, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $11, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k7, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $10, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k0, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $9, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k3, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $8, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k3, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $7, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k3, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k5, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $6, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k4, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kandw %k4, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $5, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k0, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $4, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k2, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $3, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k2, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $2, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k5, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $14, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftlw $1, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftrw $1, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k6
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
-; AVX512DQNOBW-NEXT:    korw %k6, %k0, %k0
+; AVX512DQNOBW-NEXT:    korw %k6, %k1, %k1
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    andl $1, %eax
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
@@ -2675,56 +2674,55 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kshiftrw $14, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    korw %k6, %k7, %k6
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $13, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $12, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $11, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $10, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $9, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k7, %k6, %k6
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $8, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
+; AVX512DQNOBW-NEXT:    kandw %k3, %k6, %k6
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
 ; AVX512DQNOBW-NEXT:    kshiftrw $7, %k7, %k7
 ; AVX512DQNOBW-NEXT:    korw %k7, %k6, %k6
-; AVX512DQNOBW-NEXT:    kandw %k3, %k6, %k6
+; AVX512DQNOBW-NEXT:    kandw %k5, %k6, %k6
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k7
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k7, %k7
@@ -2736,39 +2734,39 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k6, %k6
 ; AVX512DQNOBW-NEXT:    kshiftrw $5, %k6, %k6
 ; AVX512DQNOBW-NEXT:    korw %k6, %k5, %k5
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k3, %k5, %k4
+; AVX512DQNOBW-NEXT:    kandw %k0, %k5, %k4
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k5
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k5, %k5
 ; AVX512DQNOBW-NEXT:    kshiftrw $4, %k5, %k5
 ; AVX512DQNOBW-NEXT:    korw %k5, %k4, %k4
-; AVX512DQNOBW-NEXT:    kandw %k1, %k4, %k3
+; AVX512DQNOBW-NEXT:    kandw %k2, %k4, %k3
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k4
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k4, %k4
 ; AVX512DQNOBW-NEXT:    kshiftrw $3, %k4, %k4
 ; AVX512DQNOBW-NEXT:    korw %k4, %k3, %k3
-; AVX512DQNOBW-NEXT:    kandw %k2, %k3, %k2
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k0, %k3, %k2
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k3
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k3, %k3
 ; AVX512DQNOBW-NEXT:    kshiftrw $2, %k3, %k3
 ; AVX512DQNOBW-NEXT:    korw %k3, %k2, %k2
-; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
-; AVX512DQNOBW-NEXT:    kandw %k1, %k2, %k1
+; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; AVX512DQNOBW-NEXT:    kandw %k0, %k2, %k0
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k2
 ; AVX512DQNOBW-NEXT:    kshiftlw $14, %k2, %k2
-; AVX512DQNOBW-NEXT:    korw %k2, %k1, %k1
-; AVX512DQNOBW-NEXT:    kshiftlw $1, %k1, %k1
-; AVX512DQNOBW-NEXT:    kshiftrw $1, %k1, %k1
+; AVX512DQNOBW-NEXT:    korw %k2, %k0, %k0
+; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
+; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; AVX512DQNOBW-NEXT:    kmovw %eax, %k2
 ; AVX512DQNOBW-NEXT:    kshiftlw $15, %k2, %k2
-; AVX512DQNOBW-NEXT:    korw %k2, %k1, %k1
-; AVX512DQNOBW-NEXT:    vpmovm2d %k1, %zmm2
-; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %zmm3
+; AVX512DQNOBW-NEXT:    korw %k2, %k0, %k0
+; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %zmm2
+; AVX512DQNOBW-NEXT:    vpmovm2d %k1, %zmm3
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
 ; AVX512DQNOBW-NEXT:    vpmovm2d %k0, %zmm4
 ; AVX512DQNOBW-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index f66d81c..880062b 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -200,10 +200,9 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    sbbl %eax, %edi
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 40(%ebp), %ecx
-; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    movl 40(%ebp), %esi
+; X86-NEXT:    movl %esi, %edx
 ; X86-NEXT:    sarl $31, %edx
-; X86-NEXT:    movl %ecx, %esi
 ; X86-NEXT:    xorl %edx, %esi
 ; X86-NEXT:    movl 36(%ebp), %ecx
 ; X86-NEXT:    xorl %edx, %ecx
@@ -408,9 +407,9 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    adcl $-1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    adcl $-1, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    adcl $-1, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 257524e..0530c843 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -2022,19 +2022,19 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind {
 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
 ; X86-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
-; X86-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X86-NEXT:    vmovd %xmm2, %eax
+; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-NEXT:    vmovd %xmm1, %eax
 ; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    js .LBB33_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    vmovdqa %xmm2, %xmm1
+; X86-NEXT:    vmovdqa %xmm1, %xmm2
 ; X86-NEXT:    jmp .LBB33_3
 ; X86-NEXT:  .LBB33_1:
-; X86-NEXT:    vmovdqa %xmm0, %xmm1
-; X86-NEXT:    vmovdqa %xmm2, %xmm0
+; X86-NEXT:    vmovdqa %xmm0, %xmm2
+; X86-NEXT:    vmovdqa %xmm1, %xmm0
 ; X86-NEXT:  .LBB33_3:
-; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
+; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
 ; X86-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
@@ -2154,43 +2154,43 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
 ; SSE2-NEXT:    .cfi_offset %r14, -32
 ; SSE2-NEXT:    .cfi_offset %r15, -24
 ; SSE2-NEXT:    .cfi_offset %rbp, -16
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
 ; SSE2-NEXT:    pextrw $0, %xmm1, %r14d
 ; SSE2-NEXT:    pextrw $0, %xmm0, %r15d
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    psrld $16, %xmm0
-; SSE2-NEXT:    pextrw $0, %xmm0, %eax
-; SSE2-NEXT:    movdqa %xmm5, %xmm0
-; SSE2-NEXT:    psrld $16, %xmm0
-; SSE2-NEXT:    pextrw $0, %xmm0, %ecx
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psrld $16, %xmm2
+; SSE2-NEXT:    pextrw $0, %xmm2, %eax
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrld $16, %xmm2
+; SSE2-NEXT:    pextrw $0, %xmm2, %ecx
 ; SSE2-NEXT:    shll $16, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm3
 ; SSE2-NEXT:    shll $16, %eax
 ; SSE2-NEXT:    movd %eax, %xmm2
 ; SSE2-NEXT:    testl %ecx, %ecx
-; SSE2-NEXT:    movdqa %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
 ; SSE2-NEXT:    js .LBB34_2
 ; SSE2-NEXT:  # %bb.1:
-; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
 ; SSE2-NEXT:  .LBB34_2:
-; SSE2-NEXT:    movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,1,1,1]
-; SSE2-NEXT:    movdqa %xmm5, (%rsp) # 16-byte Spill
-; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1,1,1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    cmpunordss %xmm1, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm6
-; SSE2-NEXT:    andps %xmm1, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[1,1]
+; SSE2-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    shufps {{.*#+}} xmm6 = xmm6[1,1],xmm0[1,1]
+; SSE2-NEXT:    movdqa %xmm7, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm7, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm4
+; SSE2-NEXT:    andps %xmm7, %xmm4
 ; SSE2-NEXT:    js .LBB34_4
 ; SSE2-NEXT:  # %bb.3:
 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
 ; SSE2-NEXT:  .LBB34_4:
-; SSE2-NEXT:    pextrw $0, %xmm4, %ebp
-; SSE2-NEXT:    pextrw $0, %xmm5, %ebx
-; SSE2-NEXT:    maxss %xmm2, %xmm1
-; SSE2-NEXT:    andnps %xmm1, %xmm0
-; SSE2-NEXT:    orps %xmm6, %xmm0
+; SSE2-NEXT:    pextrw $0, %xmm5, %ebp
+; SSE2-NEXT:    pextrw $0, %xmm6, %ebx
+; SSE2-NEXT:    maxss %xmm2, %xmm7
+; SSE2-NEXT:    andnps %xmm7, %xmm0
+; SSE2-NEXT:    orps %xmm4, %xmm0
 ; SSE2-NEXT:    callq __truncsfbf2@PLT
 ; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE2-NEXT:    shll $16, %r15d
@@ -2312,7 +2312,7 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
 ; AVX1-NEXT:    vpextrw $0, %xmm4, %ebx
 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; AVX1-NEXT:    vpextrw $0, %xmm4, %ebp
+; AVX1-NEXT:    vpextrw $0, %xmm4, %r14d
 ; AVX1-NEXT:    vpextrw $0, %xmm0, %r12d
 ; AVX1-NEXT:    vpextrw $0, %xmm1, %r13d
 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
@@ -2331,7 +2331,7 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
 ; AVX1-NEXT:    vmovdqa %xmm0, %xmm1
 ; AVX1-NEXT:    vmovdqa %xmm4, %xmm0
 ; AVX1-NEXT:  .LBB34_3:
-; AVX1-NEXT:    vpextrw $0, %xmm2, %r14d
+; AVX1-NEXT:    vpextrw $0, %xmm2, %ebp
 ; AVX1-NEXT:    vpextrw $0, %xmm3, %r15d
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
 ; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
@@ -2355,8 +2355,8 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    callq __truncsfbf2@PLT
 ; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX1-NEXT:    shll $16, %ebp
-; AVX1-NEXT:    vmovd %ebp, %xmm0
+; AVX1-NEXT:    shll $16, %r14d
+; AVX1-NEXT:    vmovd %r14d, %xmm0
 ; AVX1-NEXT:    shll $16, %ebx
 ; AVX1-NEXT:    vmovd %ebx, %xmm2
 ; AVX1-NEXT:    js .LBB34_7
@@ -2374,8 +2374,8 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
 ; AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    shll $16, %r15d
 ; AVX1-NEXT:    vmovd %r15d, %xmm0
-; AVX1-NEXT:    shll $16, %r14d
-; AVX1-NEXT:    vmovd %r14d, %xmm2
+; AVX1-NEXT:    shll $16, %ebp
+; AVX1-NEXT:    vmovd %ebp, %xmm2
 ; AVX1-NEXT:    js .LBB34_10
 ; AVX1-NEXT:  # %bb.11:
 ; AVX1-NEXT:    vmovdqa %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
index bfff6ef..c617b45 100644
--- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -1827,7 +1827,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    kmovw %eax, %k2
 ; AVX512-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k2}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, (%rsp) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm2
 ; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -1842,10 +1842,10 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
 ; AVX512-NEXT:    movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
 ; AVX512-NEXT:    vmovd %eax, %xmm2
-; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm9
-; AVX512-NEXT:    vmulss %xmm0, %xmm9, %xmm0
-; AVX512-NEXT:    vxorps %xmm10, %xmm10, %xmm10
-; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm10[1,2,3]
+; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm7
+; AVX512-NEXT:    vmulss %xmm7, %xmm0, %xmm0
+; AVX512-NEXT:    vxorps %xmm9, %xmm9, %xmm9
+; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm9[1,2,3]
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-NEXT:    vmovd %xmm0, %eax
@@ -1866,7 +1866,7 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm3
 ; AVX512-NEXT:    vmovss %xmm3, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm1, (%rsp) # 16-byte Spill
 ; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm2
 ; AVX512-NEXT:    vucomiss %xmm2, %xmm3
 ; AVX512-NEXT:    seta %al
@@ -1875,8 +1875,8 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
 ; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vmulss %xmm2, %xmm9, %xmm2
-; AVX512-NEXT:    vblendps {{.*#+}} xmm2 = xmm2[0],xmm10[1,2,3]
+; AVX512-NEXT:    vmulss %xmm7, %xmm2, %xmm2
+; AVX512-NEXT:    vblendps {{.*#+}} xmm2 = xmm2[0],xmm9[1,2,3]
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm1
 ; AVX512-NEXT:    vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-NEXT:    vmovd %xmm1, %eax
@@ -1915,8 +1915,9 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    setp %al
 ; AVX512-NEXT:    kmovw %eax, %k2
 ; AVX512-NEXT:    vmovss %xmm0, %xmm5, %xmm5 {%k2}
-; AVX512-NEXT:    vcvtps2ph $4, %xmm5, %xmm15
-; AVX512-NEXT:    vcvtph2ps %xmm15, %xmm5
+; AVX512-NEXT:    vcvtps2ph $4, %xmm5, %xmm1
+; AVX512-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vcvtph2ps %xmm1, %xmm5
 ; AVX512-NEXT:    vmovss %xmm5, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1928,16 +1929,16 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero
 ; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm3
-; AVX512-NEXT:    vmulss %xmm3, %xmm9, %xmm3
-; AVX512-NEXT:    vblendps {{.*#+}} xmm3 = xmm3[0],xmm10[1,2,3]
+; AVX512-NEXT:    vmulss %xmm7, %xmm3, %xmm3
+; AVX512-NEXT:    vblendps {{.*#+}} xmm3 = xmm3[0],xmm9[1,2,3]
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm3, %xmm1
 ; AVX512-NEXT:    vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-NEXT:    vmovd %xmm1, %eax
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
 ; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
-; AVX512-NEXT:    vmulss %xmm0, %xmm9, %xmm0
-; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm10[1,2,3]
+; AVX512-NEXT:    vmulss %xmm7, %xmm0, %xmm0
+; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm9[1,2,3]
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-NEXT:    vmovd %xmm0, %ecx
@@ -1957,12 +1958,11 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    setp %al
 ; AVX512-NEXT:    kmovw %eax, %k2
 ; AVX512-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k2}
-; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm11
-; AVX512-NEXT:    vcvtph2ps %xmm11, %xmm3
+; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm12
+; AVX512-NEXT:    vcvtph2ps %xmm12, %xmm3
 ; AVX512-NEXT:    vmovss %xmm3, %xmm0, %xmm0 {%k1}
-; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
-; AVX512-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm2
+; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm15
+; AVX512-NEXT:    vcvtph2ps %xmm15, %xmm2
 ; AVX512-NEXT:    vucomiss %xmm2, %xmm3
 ; AVX512-NEXT:    seta %al
 ; AVX512-NEXT:    kmovw %eax, %k1
@@ -1978,11 +1978,11 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    setp %al
 ; AVX512-NEXT:    kmovw %eax, %k2
 ; AVX512-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k2}
-; AVX512-NEXT:    vcvtps2ph $4, %xmm3, %xmm7
-; AVX512-NEXT:    vcvtph2ps %xmm7, %xmm3
+; AVX512-NEXT:    vcvtps2ph $4, %xmm3, %xmm10
+; AVX512-NEXT:    vcvtph2ps %xmm10, %xmm3
 ; AVX512-NEXT:    vmovss %xmm3, %xmm0, %xmm0 {%k1}
-; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm12
-; AVX512-NEXT:    vcvtph2ps %xmm12, %xmm0
+; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm11
+; AVX512-NEXT:    vcvtph2ps %xmm11, %xmm0
 ; AVX512-NEXT:    vucomiss %xmm0, %xmm3
 ; AVX512-NEXT:    seta %al
 ; AVX512-NEXT:    kmovw %eax, %k1
@@ -1990,20 +1990,20 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
 ; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vmulss %xmm2, %xmm9, %xmm2
-; AVX512-NEXT:    vblendps {{.*#+}} xmm2 = xmm2[0],xmm10[1,2,3]
+; AVX512-NEXT:    vmulss %xmm7, %xmm2, %xmm2
+; AVX512-NEXT:    vblendps {{.*#+}} xmm2 = xmm2[0],xmm9[1,2,3]
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm14
 ; AVX512-NEXT:    vmovd %xmm14, %eax
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
 ; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
-; AVX512-NEXT:    vmulss %xmm0, %xmm9, %xmm0
-; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm10[1,2,3]
+; AVX512-NEXT:    vmulss %xmm7, %xmm0, %xmm0
+; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm9[1,2,3]
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm13
 ; AVX512-NEXT:    vmovd %xmm13, %ecx
 ; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
 ; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
-; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
 ; AVX512-NEXT:    vcvtph2ps %xmm4, %xmm0
 ; AVX512-NEXT:    vucomiss %xmm0, %xmm0
 ; AVX512-NEXT:    setp %al
@@ -2016,8 +2016,8 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm3
 ; AVX512-NEXT:    vcvtph2ps %xmm3, %xmm1
 ; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
-; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm8
-; AVX512-NEXT:    vcvtph2ps %xmm8, %xmm2
+; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm5
+; AVX512-NEXT:    vcvtph2ps %xmm5, %xmm2
 ; AVX512-NEXT:    vucomiss %xmm2, %xmm1
 ; AVX512-NEXT:    seta %al
 ; AVX512-NEXT:    kmovw %eax, %k1
@@ -2045,125 +2045,125 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
 ; AVX512-NEXT:    vcvtph2ps %xmm2, %xmm2
-; AVX512-NEXT:    vmulss %xmm2, %xmm9, %xmm2
+; AVX512-NEXT:    vmulss %xmm7, %xmm2, %xmm2
 ; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
 ; AVX512-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
 ; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
-; AVX512-NEXT:    vmulss %xmm0, %xmm9, %xmm0
-; AVX512-NEXT:    vblendps {{.*#+}} xmm2 = xmm2[0],xmm10[1,2,3]
-; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm10[1,2,3]
-; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm9
-; AVX512-NEXT:    vmovd %xmm9, %eax
-; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm10
-; AVX512-NEXT:    vmovd %xmm10, %ecx
+; AVX512-NEXT:    vmulss %xmm7, %xmm0, %xmm0
+; AVX512-NEXT:    vblendps {{.*#+}} xmm2 = xmm2[0],xmm9[1,2,3]
+; AVX512-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm9[1,2,3]
+; AVX512-NEXT:    vcvtps2ph $4, %xmm2, %xmm7
+; AVX512-NEXT:    vmovd %xmm7, %eax
+; AVX512-NEXT:    vcvtps2ph $4, %xmm0, %xmm9
+; AVX512-NEXT:    vmovd %xmm9, %ecx
 ; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
 ; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
 ; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; AVX512-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm6 # 16-byte Folded Reload
-; AVX512-NEXT:    # xmm6 = xmm0[0],mem[0]
-; AVX512-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; AVX512-NEXT:    vmovd %xmm0, %ecx
-; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
-; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
-; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1]
+; AVX512-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
 ; AVX512-NEXT:    vmovd %xmm2, %eax
-; AVX512-NEXT:    vmovd %xmm15, %ecx
-; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
-; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
-; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
-; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; AVX512-NEXT:    vmovd %xmm11, %eax
-; AVX512-NEXT:    vmovd %xmm7, %ecx
+; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
+; AVX512-NEXT:    vmovd %xmm2, %ecx
 ; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
-; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
-; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3]
+; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; AVX512-NEXT:    vmovd %xmm6, %eax
+; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; AVX512-NEXT:    vmovd %xmm6, %ecx
+; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm8
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm8[0],xmm6[0],xmm8[1],xmm6[1],xmm8[2],xmm6[2],xmm8[3],xmm6[3]
+; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm6[0],xmm2[0],xmm6[1],xmm2[1]
+; AVX512-NEXT:    vmovd %xmm12, %eax
+; AVX512-NEXT:    vmovd %xmm10, %ecx
+; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm8
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm8[0],xmm6[0],xmm8[1],xmm6[1],xmm8[2],xmm6[2],xmm8[3],xmm6[3]
 ; AVX512-NEXT:    vmovd %xmm3, %eax
 ; AVX512-NEXT:    vmovd %xmm4, %ecx
 ; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
 ; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
 ; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
-; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; AVX512-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX512-NEXT:    vpcmpeqw %xmm0, %xmm2, %xmm3
-; AVX512-NEXT:    vpblendvb %xmm3, %xmm2, %xmm6, %xmm2
+; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1]
+; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
+; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm4
+; AVX512-NEXT:    vpblendvb %xmm4, %xmm3, %xmm0, %xmm4
 ; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
 ; AVX512-NEXT:    vmovd %xmm3, %eax
-; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
+; AVX512-NEXT:    vmovdqa (%rsp), %xmm3 # 16-byte Reload
 ; AVX512-NEXT:    vmovd %xmm3, %ecx
 ; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
-; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
-; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
-; AVX512-NEXT:    vmovd %xmm4, %eax
-; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
-; AVX512-NEXT:    vmovd %xmm4, %ecx
-; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
-; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
-; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
-; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
-; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
-; AVX512-NEXT:    vmovd %xmm4, %eax
-; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
-; AVX512-NEXT:    vmovd %xmm12, %eax
-; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
-; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
-; AVX512-NEXT:    vmovd %xmm8, %eax
+; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3]
+; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; AVX512-NEXT:    vmovd %xmm6, %eax
+; AVX512-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
+; AVX512-NEXT:    vmovd %xmm6, %ecx
+; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; AVX512-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm8
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm8[0],xmm6[0],xmm8[1],xmm6[1],xmm8[2],xmm6[2],xmm8[3],xmm6[3]
+; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm3 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
+; AVX512-NEXT:    vmovd %xmm15, %eax
+; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; AVX512-NEXT:    vmovd %xmm11, %eax
+; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm8[0],xmm6[0],xmm8[1],xmm6[1],xmm8[2],xmm6[2],xmm8[3],xmm6[3]
+; AVX512-NEXT:    vmovd %xmm5, %eax
 ; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
 ; AVX512-NEXT:    vmovd %xmm1, %eax
 ; AVX512-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
 ; AVX512-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3]
-; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; AVX512-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1]
 ; AVX512-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; AVX512-NEXT:    vpcmpeqw %xmm0, %xmm1, %xmm0
-; AVX512-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
-; AVX512-NEXT:    vcvtph2ps %xmm10, %xmm1
+; AVX512-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm1, %xmm4, %xmm1
+; AVX512-NEXT:    vcvtph2ps %xmm9, %xmm2
 ; AVX512-NEXT:    xorl %eax, %eax
-; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vucomiss %xmm2, %xmm1
+; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
 ; AVX512-NEXT:    movl $65535, %ecx # imm = 0xFFFF
 ; AVX512-NEXT:    movl $0, %edx
 ; AVX512-NEXT:    cmovel %ecx, %edx
-; AVX512-NEXT:    vcvtph2ps %xmm9, %xmm1
-; AVX512-NEXT:    vucomiss %xmm2, %xmm1
+; AVX512-NEXT:    vcvtph2ps %xmm7, %xmm2
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
 ; AVX512-NEXT:    movl $0, %esi
 ; AVX512-NEXT:    cmovel %ecx, %esi
-; AVX512-NEXT:    vcvtph2ps %xmm13, %xmm1
-; AVX512-NEXT:    vucomiss %xmm2, %xmm1
+; AVX512-NEXT:    vcvtph2ps %xmm13, %xmm2
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
 ; AVX512-NEXT:    movl $0, %edi
 ; AVX512-NEXT:    cmovel %ecx, %edi
-; AVX512-NEXT:    vcvtph2ps %xmm14, %xmm1
-; AVX512-NEXT:    vucomiss %xmm2, %xmm1
+; AVX512-NEXT:    vcvtph2ps %xmm14, %xmm2
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
 ; AVX512-NEXT:    movl $0, %r8d
 ; AVX512-NEXT:    cmovel %ecx, %r8d
-; AVX512-NEXT:    vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; AVX512-NEXT:    vucomiss %xmm2, %xmm1
+; AVX512-NEXT:    vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
 ; AVX512-NEXT:    movl $0, %r9d
 ; AVX512-NEXT:    cmovel %ecx, %r9d
-; AVX512-NEXT:    vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; AVX512-NEXT:    vucomiss %xmm2, %xmm1
+; AVX512-NEXT:    vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
 ; AVX512-NEXT:    movl $0, %r10d
 ; AVX512-NEXT:    cmovel %ecx, %r10d
-; AVX512-NEXT:    vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; AVX512-NEXT:    vucomiss %xmm2, %xmm1
+; AVX512-NEXT:    vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
 ; AVX512-NEXT:    movl $0, %r11d
 ; AVX512-NEXT:    cmovel %ecx, %r11d
-; AVX512-NEXT:    vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
-; AVX512-NEXT:    vucomiss %xmm2, %xmm1
-; AVX512-NEXT:    vmovd %esi, %xmm1
-; AVX512-NEXT:    vpinsrw $1, %edx, %xmm1, %xmm1
-; AVX512-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
-; AVX512-NEXT:    vpinsrw $3, %r8d, %xmm1, %xmm1
-; AVX512-NEXT:    vpinsrw $4, %r9d, %xmm1, %xmm1
-; AVX512-NEXT:    vpinsrw $5, %r10d, %xmm1, %xmm1
-; AVX512-NEXT:    vpinsrw $6, %r11d, %xmm1, %xmm1
+; AVX512-NEXT:    vcvtph2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
+; AVX512-NEXT:    vucomiss %xmm3, %xmm2
+; AVX512-NEXT:    vmovd %esi, %xmm2
+; AVX512-NEXT:    vpinsrw $1, %edx, %xmm2, %xmm2
+; AVX512-NEXT:    vpinsrw $2, %edi, %xmm2, %xmm2
+; AVX512-NEXT:    vpinsrw $3, %r8d, %xmm2, %xmm2
+; AVX512-NEXT:    vpinsrw $4, %r9d, %xmm2, %xmm2
+; AVX512-NEXT:    vpinsrw $5, %r10d, %xmm2, %xmm2
+; AVX512-NEXT:    vpinsrw $6, %r11d, %xmm2, %xmm2
 ; AVX512-NEXT:    cmovel %ecx, %eax
-; AVX512-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1
-; AVX512-NEXT:    vpblendvb %xmm1, %xmm0, %xmm6, %xmm0
+; AVX512-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    addq $88, %rsp
 ; AVX512-NEXT:    retq
 ;
@@ -2204,19 +2204,19 @@ define <4 x half> @test_fmaximumnum_v4f16(<4 x half> %x, <4 x half> %y) nounwind
 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
 ; X86-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
 ; X86-NEXT:    fstps {{[0-9]+}}(%esp)
-; X86-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X86-NEXT:    vmovd %xmm2, %eax
+; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-NEXT:    vmovd %xmm1, %eax
 ; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    testl %eax, %eax
 ; X86-NEXT:    js .LBB33_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    vmovdqa %xmm2, %xmm1
+; X86-NEXT:    vmovdqa %xmm1, %xmm2
 ; X86-NEXT:    jmp .LBB33_3
 ; X86-NEXT:  .LBB33_1:
-; X86-NEXT:    vmovdqa %xmm0, %xmm1
-; X86-NEXT:    vmovdqa %xmm2, %xmm0
+; X86-NEXT:    vmovdqa %xmm0, %xmm2
+; X86-NEXT:    vmovdqa %xmm1, %xmm0
 ; X86-NEXT:  .LBB33_3:
-; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
+; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
 ; X86-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
@@ -2327,43 +2327,43 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; SSE2-NEXT:    pushq %r14
 ; SSE2-NEXT:    pushq %rbx
 ; SSE2-NEXT:    subq $56, %rsp
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
 ; SSE2-NEXT:    pextrw $0, %xmm1, %r14d
 ; SSE2-NEXT:    pextrw $0, %xmm0, %r15d
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    psrld $16, %xmm0
-; SSE2-NEXT:    pextrw $0, %xmm0, %eax
-; SSE2-NEXT:    movdqa %xmm5, %xmm0
-; SSE2-NEXT:    psrld $16, %xmm0
-; SSE2-NEXT:    pextrw $0, %xmm0, %ecx
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psrld $16, %xmm2
+; SSE2-NEXT:    pextrw $0, %xmm2, %eax
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrld $16, %xmm2
+; SSE2-NEXT:    pextrw $0, %xmm2, %ecx
 ; SSE2-NEXT:    shll $16, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm3
 ; SSE2-NEXT:    shll $16, %eax
 ; SSE2-NEXT:    movd %eax, %xmm2
 ; SSE2-NEXT:    testl %ecx, %ecx
-; SSE2-NEXT:    movdqa %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
 ; SSE2-NEXT:    js .LBB34_2
 ; SSE2-NEXT:  # %bb.1:
-; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
 ; SSE2-NEXT:  .LBB34_2:
-; SSE2-NEXT:    movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,1,1,1]
-; SSE2-NEXT:    movdqa %xmm5, (%rsp) # 16-byte Spill
-; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1,1,1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    cmpordss %xmm1, %xmm0
-; SSE2-NEXT:    movaps %xmm0, %xmm6
-; SSE2-NEXT:    andps %xmm1, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[1,1]
+; SSE2-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    shufps {{.*#+}} xmm6 = xmm6[1,1],xmm0[1,1]
+; SSE2-NEXT:    movdqa %xmm7, %xmm0
+; SSE2-NEXT:    cmpordss %xmm7, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm4
+; SSE2-NEXT:    andps %xmm7, %xmm4
 ; SSE2-NEXT:    js .LBB34_4
 ; SSE2-NEXT:  # %bb.3:
 ; SSE2-NEXT:    movdqa %xmm3, %xmm2
 ; SSE2-NEXT:  .LBB34_4:
-; SSE2-NEXT:    pextrw $0, %xmm4, %ebp
-; SSE2-NEXT:    pextrw $0, %xmm5, %ebx
-; SSE2-NEXT:    maxss %xmm2, %xmm1
-; SSE2-NEXT:    andnps %xmm1, %xmm0
-; SSE2-NEXT:    orps %xmm6, %xmm0
+; SSE2-NEXT:    pextrw $0, %xmm5, %ebp
+; SSE2-NEXT:    pextrw $0, %xmm6, %ebx
+; SSE2-NEXT:    maxss %xmm2, %xmm7
+; SSE2-NEXT:    andnps %xmm7, %xmm0
+; SSE2-NEXT:    orps %xmm4, %xmm0
 ; SSE2-NEXT:    callq __truncsfbf2@PLT
 ; SSE2-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE2-NEXT:    shll $16, %r15d
@@ -2467,7 +2467,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
 ; AVX1-NEXT:    vpextrw $0, %xmm4, %ebx
 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; AVX1-NEXT:    vpextrw $0, %xmm4, %ebp
+; AVX1-NEXT:    vpextrw $0, %xmm4, %r14d
 ; AVX1-NEXT:    vpextrw $0, %xmm0, %r12d
 ; AVX1-NEXT:    vpextrw $0, %xmm1, %r13d
 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
@@ -2486,7 +2486,7 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX1-NEXT:    vmovdqa %xmm0, %xmm1
 ; AVX1-NEXT:    vmovdqa %xmm4, %xmm0
 ; AVX1-NEXT:  .LBB34_3:
-; AVX1-NEXT:    vpextrw $0, %xmm2, %r14d
+; AVX1-NEXT:    vpextrw $0, %xmm2, %ebp
 ; AVX1-NEXT:    vpextrw $0, %xmm3, %r15d
 ; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
 ; AVX1-NEXT:    vcmpordss %xmm0, %xmm0, %xmm2
@@ -2510,8 +2510,8 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    callq __truncsfbf2@PLT
 ; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; AVX1-NEXT:    shll $16, %ebp
-; AVX1-NEXT:    vmovd %ebp, %xmm0
+; AVX1-NEXT:    shll $16, %r14d
+; AVX1-NEXT:    vmovd %r14d, %xmm0
 ; AVX1-NEXT:    shll $16, %ebx
 ; AVX1-NEXT:    vmovd %ebx, %xmm2
 ; AVX1-NEXT:    js .LBB34_7
@@ -2529,8 +2529,8 @@ define <4 x bfloat> @test_fmaximumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) n
 ; AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    shll $16, %r15d
 ; AVX1-NEXT:    vmovd %r15d, %xmm0
-; AVX1-NEXT:    shll $16, %r14d
-; AVX1-NEXT:    vmovd %r14d, %xmm2
+; AVX1-NEXT:    shll $16, %ebp
+; AVX1-NEXT:    vmovd %ebp, %xmm2
 ; AVX1-NEXT:    js .LBB34_10
 ; AVX1-NEXT:  # %bb.11:
 ; AVX1-NEXT:    vmovdqa %xmm2, %xmm1
diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll
index 8efd581..1665ef9 100644
--- a/llvm/test/CodeGen/X86/fp-round.ll
+++ b/llvm/test/CodeGen/X86/fp-round.ll
@@ -55,7 +55,7 @@ define half @round_f16(half %h) {
 ; AVX512F-NEXT:    vmovd %eax, %xmm0
 ; AVX512F-NEXT:    vcvtph2ps %xmm0, %xmm0
 ; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
+; AVX512F-NEXT:    vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512F-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; AVX512F-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
@@ -67,7 +67,7 @@ define half @round_f16(half %h) {
 ; AVX512FP16:       ## %bb.0: ## %entry
 ; AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
 ; AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
-; AVX512FP16-NEXT:    vpternlogq $248, %xmm1, %xmm0, %xmm2
+; AVX512FP16-NEXT:    vpternlogq {{.*#+}} xmm2 = xmm2 | (xmm0 & xmm1)
 ; AVX512FP16-NEXT:    vaddsh %xmm2, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    vrndscalesh $11, %xmm0, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    retq
@@ -103,7 +103,7 @@ define float @round_f32(float %x) {
 ; AVX512F-LABEL: round_f32:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
+; AVX512F-NEXT:    vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512F-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; AVX512F-NEXT:    retq
@@ -111,7 +111,7 @@ define float @round_f32(float %x) {
 ; AVX512FP16-LABEL: round_f32:
 ; AVX512FP16:       ## %bb.0:
 ; AVX512FP16-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512FP16-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
+; AVX512FP16-NEXT:    vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512FP16-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    retq
@@ -147,7 +147,7 @@ define double @round_f64(double %x) {
 ; AVX512F-LABEL: round_f64:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512F-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
+; AVX512F-NEXT:    vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512F-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
 ; AVX512F-NEXT:    retq
@@ -155,7 +155,7 @@ define double @round_f64(double %x) {
 ; AVX512FP16-LABEL: round_f64:
 ; AVX512FP16:       ## %bb.0:
 ; AVX512FP16-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512FP16-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
+; AVX512FP16-NEXT:    vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512FP16-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    retq
@@ -213,7 +213,7 @@ define <4 x float> @round_v4f32(<4 x float> %x) {
 ; AVX512F-LABEL: round_v4f32:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
+; AVX512F-NEXT:    vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512F-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    vroundps $11, %xmm0, %xmm0
 ; AVX512F-NEXT:    retq
@@ -221,7 +221,7 @@ define <4 x float> @round_v4f32(<4 x float> %x) {
 ; AVX512FP16-LABEL: round_v4f32:
 ; AVX512FP16:       ## %bb.0:
 ; AVX512FP16-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512FP16-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
+; AVX512FP16-NEXT:    vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512FP16-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    vroundps $11, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    retq
@@ -267,7 +267,7 @@ define <2 x double> @round_v2f64(<2 x double> %x) {
 ; AVX512F-LABEL: round_v2f64:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512F-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
+; AVX512F-NEXT:    vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512F-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    vroundpd $11, %xmm0, %xmm0
 ; AVX512F-NEXT:    retq
@@ -275,7 +275,7 @@ define <2 x double> @round_v2f64(<2 x double> %x) {
 ; AVX512FP16-LABEL: round_v2f64:
 ; AVX512FP16:       ## %bb.0:
 ; AVX512FP16-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512FP16-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1
+; AVX512FP16-NEXT:    vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
 ; AVX512FP16-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    vroundpd $11, %xmm0, %xmm0
 ; AVX512FP16-NEXT:    retq
@@ -361,7 +361,7 @@ define <8 x float> @round_v8f32(<8 x float> %x) {
 ; AVX512F-LABEL: round_v8f32:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm1
+; AVX512F-NEXT:    vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
 ; AVX512F-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vroundps $11, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
@@ -369,7 +369,7 @@ define <8 x float> @round_v8f32(<8 x float> %x) {
 ; AVX512FP16-LABEL: round_v8f32:
 ; AVX512FP16:       ## %bb.0:
 ; AVX512FP16-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512FP16-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm1
+; AVX512FP16-NEXT:    vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
 ; AVX512FP16-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 ; AVX512FP16-NEXT:    vroundps $11, %ymm0, %ymm0
 ; AVX512FP16-NEXT:    retq
@@ -431,7 +431,7 @@ define <4 x double> @round_v4f64(<4 x double> %x) {
 ; AVX512F-LABEL: round_v4f64:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512F-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm1
+; AVX512F-NEXT:    vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
 ; AVX512F-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vroundpd $11, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
@@ -439,7 +439,7 @@ define <4 x double> @round_v4f64(<4 x double> %x) {
 ; AVX512FP16-LABEL: round_v4f64:
 ; AVX512FP16:       ## %bb.0:
 ; AVX512FP16-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512FP16-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm1
+; AVX512FP16-NEXT:    vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
 ; AVX512FP16-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 ; AVX512FP16-NEXT:    vroundpd $11, %ymm0, %ymm0
 ; AVX512FP16-NEXT:    retq
@@ -587,7 +587,7 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
 ; AVX512F-LABEL: round_v16f32:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm1
+; AVX512F-NEXT:    vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
 ; AVX512F-NEXT:    vaddps %zmm1, %zmm0, %zmm0
 ; AVX512F-NEXT:    vrndscaleps $11, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
@@ -595,7 +595,7 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
 ; AVX512FP16-LABEL: round_v16f32:
 ; AVX512FP16:       ## %bb.0:
 ; AVX512FP16-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512FP16-NEXT:    vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm1
+; AVX512FP16-NEXT:    vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
 ; AVX512FP16-NEXT:    vaddps %zmm1, %zmm0, %zmm0
 ; AVX512FP16-NEXT:    vrndscaleps $11, %zmm0, %zmm0
 ; AVX512FP16-NEXT:    retq
@@ -695,7 +695,7 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
 ; AVX512F-LABEL: round_v8f64:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512F-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
+; AVX512F-NEXT:    vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
 ; AVX512F-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
 ; AVX512F-NEXT:    vrndscalepd $11, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
@@ -703,7 +703,7 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
 ; AVX512FP16-LABEL: round_v8f64:
 ; AVX512FP16:       ## %bb.0:
 ; AVX512FP16-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512FP16-NEXT:    vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
+; AVX512FP16-NEXT:    vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
 ; AVX512FP16-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
 ; AVX512FP16-NEXT:    vrndscalepd $11, %zmm0, %zmm0
 ; AVX512FP16-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
index bd3cb37..0498f9b 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint-fp16.ll
@@ -176,7 +176,7 @@ define i64 @fptosi_f16toi64(half %x) #0 {
 ;
 ; X86-LABEL: fptosi_f16toi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
 ; X86-NEXT:    vcvttph2qq %xmm0, %xmm0
 ; X86-NEXT:    vmovd %xmm0, %eax
 ; X86-NEXT:    vpextrd $1, %xmm0, %edx
@@ -395,7 +395,7 @@ define i64 @fptoui_f16toi64(half %x) #0 {
 ;
 ; X86-LABEL: fptoui_f16toi64:
 ; X86:       # %bb.0:
-; X86-NEXT:    vmovsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
 ; X86-NEXT:    vcvttph2uqq %xmm0, %xmm0
 ; X86-NEXT:    vmovd %xmm0, %eax
 ; X86-NEXT:    vpextrd $1, %xmm0, %edx
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index 6e7f109..9ae4a64 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -1603,15 +1603,15 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
 ; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm4, %xmm2
 ; BWON-F16C-NEXT:    vmovd %xmm2, %ecx
 ; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm3
-; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
 ; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
-; BWON-F16C-NEXT:    vucomiss %xmm3, %xmm2
+; BWON-F16C-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
+; BWON-F16C-NEXT:    vucomiss %xmm2, %xmm3
 ; BWON-F16C-NEXT:    ja .LBB26_6
 ; BWON-F16C-NEXT:  # %bb.5:
-; BWON-F16C-NEXT:    vmovaps %xmm3, %xmm2
+; BWON-F16C-NEXT:    vmovaps %xmm2, %xmm3
 ; BWON-F16C-NEXT:  .LBB26_6:
-; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm3, %xmm2
 ; BWON-F16C-NEXT:    vmovd %xmm2, %edx
 ; BWON-F16C-NEXT:    vshufpd {{.*#+}} xmm2 = xmm1[1,0]
 ; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm3
diff --git a/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir b/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir
index 65675ce..bf72593 100644
--- a/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir
+++ b/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir
@@ -1,5 +1,7 @@
 # RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=btver2 -run-pass=post-RA-sched -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=btver2 -passes=post-RA-sched -o - %s | FileCheck %s
 # RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=btver2 -run-pass=post-RA-sched -o - %s -experimental-debug-variable-locations| FileCheck %s
+# RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=btver2 -passes=post-RA-sched -o - %s -experimental-debug-variable-locations| FileCheck %s
 
 # Test that multiple DBG_VALUE's and DBG_PHIs following an instruction whose
 # register needs # to be changed during the post-RA scheduler pass are updated
diff --git a/llvm/test/CodeGen/X86/pr27681.mir b/llvm/test/CodeGen/X86/pr27681.mir
index e7293fd..d25cb65 100644
--- a/llvm/test/CodeGen/X86/pr27681.mir
+++ b/llvm/test/CodeGen/X86/pr27681.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=slm -run-pass post-RA-sched -o - %s | FileCheck %s
+# RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=slm -passes=post-RA-sched -o - %s | FileCheck %s
 #
 # Verify that the critical antidependence breaker does not consider
 # a high byte register as available as a replacement register
diff --git a/llvm/test/CodeGen/X86/pr41619_reduced.mir b/llvm/test/CodeGen/X86/pr41619_reduced.mir
new file mode 100644
index 0000000..5dc2eb6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr41619_reduced.mir
@@ -0,0 +1,27 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=x86_64-- -mattr=+avx2 -run-pass=peephole-opt -o - %s | FileCheck %s
+
+# When trying to coalesce the operand of VMOVSDto64rr, a query would
+# be made with the same register class but the source has a
+# subregister and the result does not.
+---
+name:            uncoalescable_copy_queries_same_regclass_with_only_one_subreg
+tracksRegLiveness: true
+isSSA:           true
+body:             |
+  bb.0:
+    liveins: $rax
+
+    ; CHECK-LABEL: name: uncoalescable_copy_queries_same_regclass_with_only_one_subreg
+    ; CHECK: liveins: $rax
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr128 = COPY [[COPY]].sub_32bit
+    ; CHECK-NEXT: [[VMOVSDto64rr:%[0-9]+]]:gr64 = VMOVSDto64rr [[COPY1]]
+    ; CHECK-NEXT: RET 0, implicit [[VMOVSDto64rr]]
+    %0:gr64 = COPY $rax
+    %1:vr128 = COPY %0.sub_32bit
+    %2:gr64 = VMOVSDto64rr %1
+    RET 0, implicit %2
+
+...
diff --git a/llvm/test/CodeGen/X86/smax.ll b/llvm/test/CodeGen/X86/smax.ll
index 2d594229..86891e9 100644
--- a/llvm/test/CodeGen/X86/smax.ll
+++ b/llvm/test/CodeGen/X86/smax.ll
@@ -642,11 +642,11 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ;
 ; X86-LABEL: test_signbits_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl $15, %eax
-; X86-NEXT:    cmpw %cx, %ax
-; X86-NEXT:    cmovlel %ecx, %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shrl $15, %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    cmovgl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %ax = ashr i16 %a, 15
diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll
index bde61d5..8907f6c 100644
--- a/llvm/test/CodeGen/X86/smin.ll
+++ b/llvm/test/CodeGen/X86/smin.ll
@@ -643,11 +643,11 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ;
 ; X86-LABEL: test_signbits_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl $15, %eax
-; X86-NEXT:    cmpw %cx, %ax
-; X86-NEXT:    cmovgel %ecx, %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shrl $15, %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    cmovll %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %ax = ashr i16 %a, 15
diff --git a/llvm/test/CodeGen/X86/test-shrink.ll b/llvm/test/CodeGen/X86/test-shrink.ll
index 6a7a0ad..03bba9c 100644
--- a/llvm/test/CodeGen/X86/test-shrink.ll
+++ b/llvm/test/CodeGen/X86/test-shrink.ll
@@ -546,7 +546,6 @@ define void @testw(i16 inreg %x) nounwind minsize {
 ; CHECK-WIN32-64-LABEL: testw:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN32-64-NEXT:    testw $2049, %cx # imm = 0x801
 ; CHECK-WIN32-64-NEXT:    jne .LBB12_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll
index f0479ae..f589d4a 100644
--- a/llvm/test/CodeGen/X86/umax.ll
+++ b/llvm/test/CodeGen/X86/umax.ll
@@ -1237,11 +1237,11 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ;
 ; X86-LABEL: test_signbits_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl $15, %eax
-; X86-NEXT:    cmpw %cx, %ax
-; X86-NEXT:    cmovbel %ecx, %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shrl $15, %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    cmoval %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %ax = ashr i16 %a, 15
diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll
index e4ce089..7a5cdbb 100644
--- a/llvm/test/CodeGen/X86/umin.ll
+++ b/llvm/test/CodeGen/X86/umin.ll
@@ -652,11 +652,11 @@ define i16 @test_signbits_i16(i16 %a, i16 %b) nounwind {
 ;
 ; X86-LABEL: test_signbits_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrl $15, %eax
-; X86-NEXT:    cmpw %cx, %ax
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shrl $15, %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    cmovbl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %ax = ashr i16 %a, 15
diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll
index 87a948a..e88387a 100644
--- a/llvm/test/CodeGen/X86/vector-compress.ll
+++ b/llvm/test/CodeGen/X86/vector-compress.ll
@@ -2390,25 +2390,24 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
 ; AVX512F-NEXT:    kshiftrw $13, %k1, %k1
 ; AVX512F-NEXT:    korw %k1, %k0, %k0
 ; AVX512F-NEXT:    movw $-9, %ax
-; AVX512F-NEXT:    kmovw %eax, %k7
-; AVX512F-NEXT:    kandw %k7, %k0, %k0
-; AVX512F-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512F-NEXT:    kmovw %eax, %k5
+; AVX512F-NEXT:    kandw %k5, %k0, %k0
 ; AVX512F-NEXT:    movzbl 376(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512F-NEXT:    kshiftrw $12, %k1, %k1
 ; AVX512F-NEXT:    korw %k1, %k0, %k0
 ; AVX512F-NEXT:    movw $-17, %ax
-; AVX512F-NEXT:    kmovw %eax, %k5
-; AVX512F-NEXT:    kandw %k5, %k0, %k0
+; AVX512F-NEXT:    kmovw %eax, %k6
+; AVX512F-NEXT:    kandw %k6, %k0, %k0
 ; AVX512F-NEXT:    movzbl 384(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512F-NEXT:    kshiftrw $11, %k1, %k1
 ; AVX512F-NEXT:    korw %k1, %k0, %k0
 ; AVX512F-NEXT:    movw $-33, %ax
-; AVX512F-NEXT:    kmovw %eax, %k6
-; AVX512F-NEXT:    kandw %k6, %k0, %k0
+; AVX512F-NEXT:    kmovw %eax, %k7
+; AVX512F-NEXT:    kandw %k7, %k0, %k0
 ; AVX512F-NEXT:    movzbl 392(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k1
@@ -2516,26 +2515,26 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $13, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kandw %k7, %k1, %k1
+; AVX512F-NEXT:    kandw %k5, %k1, %k1
 ; AVX512F-NEXT:    movzbl 248(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $12, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kandw %k5, %k1, %k1
+; AVX512F-NEXT:    kandw %k6, %k1, %k1
 ; AVX512F-NEXT:    movzbl 256(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $11, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kandw %k6, %k1, %k1
+; AVX512F-NEXT:    kandw %k7, %k1, %k1
 ; AVX512F-NEXT:    movzbl 264(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $10, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
-; AVX512F-NEXT:    kandw %k7, %k1, %k1
+; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; AVX512F-NEXT:    kandw %k0, %k1, %k1
 ; AVX512F-NEXT:    movzbl 272(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
@@ -2626,27 +2625,31 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $13, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
-; AVX512F-NEXT:    kandw %k4, %k1, %k1
+; AVX512F-NEXT:    kmovw %k5, %k4
+; AVX512F-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512F-NEXT:    kandw %k5, %k1, %k1
 ; AVX512F-NEXT:    movzbl 120(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $12, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512F-NEXT:    kandw %k5, %k1, %k1
+; AVX512F-NEXT:    kmovw %k6, %k5
+; AVX512F-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512F-NEXT:    kandw %k6, %k1, %k1
 ; AVX512F-NEXT:    movzbl 128(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $11, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512F-NEXT:    kandw %k6, %k1, %k1
+; AVX512F-NEXT:    kmovw %k7, %k6
+; AVX512F-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512F-NEXT:    kandw %k7, %k1, %k1
 ; AVX512F-NEXT:    movzbl 136(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $10, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
+; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
 ; AVX512F-NEXT:    kandw %k7, %k1, %k1
 ; AVX512F-NEXT:    movzbl 144(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
@@ -2667,8 +2670,8 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $7, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
-; AVX512F-NEXT:    kandw %k7, %k1, %k1
+; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
+; AVX512F-NEXT:    kandw %k2, %k1, %k1
 ; AVX512F-NEXT:    movzbl 168(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
@@ -2741,8 +2744,7 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $10, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
-; AVX512F-NEXT:    kandw %k2, %k1, %k1
+; AVX512F-NEXT:    kandw %k7, %k1, %k1
 ; AVX512F-NEXT:    movzbl 16(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
@@ -2764,7 +2766,8 @@ define <64 x i8> @test_compress_v64i8(<64 x i8> %vec, <64 x i1> %mask, <64 x i8>
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $7, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kandw %k7, %k1, %k1
+; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
+; AVX512F-NEXT:    kandw %k2, %k1, %k1
 ; AVX512F-NEXT:    movzbl 40(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
@@ -3763,25 +3766,24 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i
 ; AVX512F-NEXT:    kshiftrw $13, %k1, %k1
 ; AVX512F-NEXT:    korw %k1, %k0, %k0
 ; AVX512F-NEXT:    movw $-9, %ax
-; AVX512F-NEXT:    kmovw %eax, %k7
-; AVX512F-NEXT:    kandw %k7, %k0, %k0
-; AVX512F-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512F-NEXT:    kmovw %eax, %k5
+; AVX512F-NEXT:    kandw %k5, %k0, %k0
 ; AVX512F-NEXT:    movzbl 376(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512F-NEXT:    kshiftrw $12, %k1, %k1
 ; AVX512F-NEXT:    korw %k1, %k0, %k0
 ; AVX512F-NEXT:    movw $-17, %ax
-; AVX512F-NEXT:    kmovw %eax, %k5
-; AVX512F-NEXT:    kandw %k5, %k0, %k0
+; AVX512F-NEXT:    kmovw %eax, %k6
+; AVX512F-NEXT:    kandw %k6, %k0, %k0
 ; AVX512F-NEXT:    movzbl 384(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k1
 ; AVX512F-NEXT:    kshiftrw $11, %k1, %k1
 ; AVX512F-NEXT:    korw %k1, %k0, %k0
 ; AVX512F-NEXT:    movw $-33, %ax
-; AVX512F-NEXT:    kmovw %eax, %k6
-; AVX512F-NEXT:    kandw %k6, %k0, %k0
+; AVX512F-NEXT:    kmovw %eax, %k7
+; AVX512F-NEXT:    kandw %k7, %k0, %k0
 ; AVX512F-NEXT:    movzbl 392(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k1
@@ -3889,26 +3891,26 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $13, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kandw %k7, %k1, %k1
+; AVX512F-NEXT:    kandw %k5, %k1, %k1
 ; AVX512F-NEXT:    movzbl 248(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $12, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kandw %k5, %k1, %k1
+; AVX512F-NEXT:    kandw %k6, %k1, %k1
 ; AVX512F-NEXT:    movzbl 256(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $11, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kandw %k6, %k1, %k1
+; AVX512F-NEXT:    kandw %k7, %k1, %k1
 ; AVX512F-NEXT:    movzbl 264(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $10, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
-; AVX512F-NEXT:    kandw %k7, %k1, %k1
+; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
+; AVX512F-NEXT:    kandw %k0, %k1, %k1
 ; AVX512F-NEXT:    movzbl 272(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
@@ -3999,27 +4001,31 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $13, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
-; AVX512F-NEXT:    kandw %k4, %k1, %k1
+; AVX512F-NEXT:    kmovw %k5, %k4
+; AVX512F-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512F-NEXT:    kandw %k5, %k1, %k1
 ; AVX512F-NEXT:    movzbl 120(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $12, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512F-NEXT:    kandw %k5, %k1, %k1
+; AVX512F-NEXT:    kmovw %k6, %k5
+; AVX512F-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512F-NEXT:    kandw %k6, %k1, %k1
 ; AVX512F-NEXT:    movzbl 128(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $11, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512F-NEXT:    kandw %k6, %k1, %k1
+; AVX512F-NEXT:    kmovw %k7, %k6
+; AVX512F-NEXT:    kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
+; AVX512F-NEXT:    kandw %k7, %k1, %k1
 ; AVX512F-NEXT:    movzbl 136(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $10, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
+; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
 ; AVX512F-NEXT:    kandw %k7, %k1, %k1
 ; AVX512F-NEXT:    movzbl 144(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
@@ -4040,8 +4046,8 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $7, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
-; AVX512F-NEXT:    kandw %k7, %k1, %k1
+; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
+; AVX512F-NEXT:    kandw %k2, %k1, %k1
 ; AVX512F-NEXT:    movzbl 168(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
@@ -4114,8 +4120,7 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $10, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
-; AVX512F-NEXT:    kandw %k2, %k1, %k1
+; AVX512F-NEXT:    kandw %k7, %k1, %k1
 ; AVX512F-NEXT:    movzbl 16(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
@@ -4137,7 +4142,8 @@ define <64 x i32> @test_compress_large(<64 x i1> %mask, <64 x i32> %vec, <64 x i
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
 ; AVX512F-NEXT:    kshiftrw $7, %k2, %k2
 ; AVX512F-NEXT:    korw %k2, %k1, %k1
-; AVX512F-NEXT:    kandw %k7, %k1, %k1
+; AVX512F-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
+; AVX512F-NEXT:    kandw %k2, %k1, %k1
 ; AVX512F-NEXT:    movzbl 40(%rbp), %eax
 ; AVX512F-NEXT:    kmovw %eax, %k2
 ; AVX512F-NEXT:    kshiftlw $15, %k2, %k2
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 193e570..32ad72b 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -1312,7 +1312,6 @@ define void @fancierRotate2(ptr %arr, ptr %control, i32 %rot0, i32 %rot1) {
 ; AVX1-NEXT:    addq $8, %rax
 ; AVX1-NEXT:    jne .LBB8_1
 ; AVX1-NEXT:  # %bb.2: # %exit
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: fancierRotate2:
diff --git a/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll
index 43d2a99..338e104 100644
--- a/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll
@@ -3296,27 +3296,27 @@ define void @lshr_64bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $63, %ecx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrl $3, %eax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $56, %eax
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -120(%rsp,%rax), %rdi
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -112(%rsp,%rax), %r8
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %rdi, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -120(%rsp,%rax), %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -112(%rsp,%rax), %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r8, %r15
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, -128(%rsp,%rax), %rbx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %esi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -104(%rsp,%rax), %r9
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r9, %r13
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r8, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %rdi, %r10
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -88(%rsp,%rax), %r11
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r11, %r14
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %r12d
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    notl %r12d
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r8, %r8
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %r8, %r8
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r15, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %rdi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r15, %rdi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -96(%rsp,%rax), %r15
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r15, %rbp
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorb $63, %sil
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %rdi, %rdi
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbx, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r8, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %r8, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbx, %r8
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    leaq (%r15,%r15), %rbx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %rbx, %rbx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r13, %rbx
@@ -3342,8 +3342,8 @@ define void @lshr_64bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r10, 40(%rdx)
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, 16(%rdx)
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rbx, 24(%rdx)
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, (%rdx)
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, 8(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, (%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, 8(%rdx)
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq $8, %rsp
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %rbx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r12
@@ -5571,27 +5571,27 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $63, %ecx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrl $3, %eax
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    andl $56, %eax
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -120(%rsp,%rax), %rdi
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -112(%rsp,%rax), %r8
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %rdi, %r15
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -120(%rsp,%rax), %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -112(%rsp,%rax), %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r8, %r15
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, -128(%rsp,%rax), %rbx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %esi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -104(%rsp,%rax), %r9
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r9, %r13
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r8, %r10
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %rdi, %r10
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -88(%rsp,%rax), %r11
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r11, %r14
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movl %ecx, %r12d
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    notl %r12d
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r8, %r8
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %r8, %r8
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r15, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %rdi, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r15, %rdi
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq -96(%rsp,%rax), %r15
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shrxq %rcx, %r15, %rbp
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    xorb $63, %sil
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %rdi, %rdi
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %rdi, %rdi
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbx, %rdi
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq %r8, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %rsi, %r8, %r8
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %rbx, %r8
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    leaq (%r15,%r15), %rbx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    shlxq %r12, %rbx, %rbx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    orq %r13, %rbx
@@ -5617,8 +5617,8 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r10, 40(%rdx)
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r9, 16(%rdx)
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rbx, 24(%rdx)
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, (%rdx)
-; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, 8(%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %r8, (%rdx)
+; X64-HAVE-BMI2-NO-SHLD-NEXT:    movq %rdi, 8(%rdx)
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    addq $8, %rsp
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %rbx
 ; X64-HAVE-BMI2-NO-SHLD-NEXT:    popq %r12
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
index fbbf2a6..81c4d5d 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -1672,9 +1672,10 @@ define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i
 ;
 ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca_with_zero_upper_half:
 ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0:
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movups (%rdi), %xmm0
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm1, %xmm1
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    leal (,%rsi,8), %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
index ff13f4b..8d36eef 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
@@ -1946,9 +1946,10 @@ define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst
 ;
 ; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_16byte_chunk_of_32byte_alloca:
 ; X64-HAVE-BMI2-HAVE-SHLD:       # %bb.0:
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movq %rsi, %rcx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movups (%rdi), %xmm0
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movups 16(%rdi), %xmm1
-; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    leal (,%rsi,8), %ecx
+; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    shll $3, %ecx
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    xorps %xmm2, %xmm2
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp)
 ; X64-HAVE-BMI2-HAVE-SHLD-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp)
diff --git a/llvm/test/DebugInfo/Generic/discriminated-union.ll b/llvm/test/DebugInfo/Generic/discriminated-union.ll
index d267d9b..0267580 100644
--- a/llvm/test/DebugInfo/Generic/discriminated-union.ll
+++ b/llvm/test/DebugInfo/Generic/discriminated-union.ll
@@ -1,8 +1,8 @@
 ; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s
+; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s --check-prefix %if target-byteorder-big-endian %{ CHECK-BE %} %else %{ CHECK-LE %}
 
 ; RUN: %llc_dwarf --try-experimental-debuginfo-iterators -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s
+; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s --check-prefix %if target-byteorder-big-endian %{ CHECK-BE %} %else %{ CHECK-LE %}
 
 ; Check for a variant part that has two members, one of which has a
 ; discriminant value.
@@ -22,7 +22,8 @@
 ;         CHECK: DW_AT_alignment
 ;         CHECK: DW_AT_data_member_location [DW_FORM_data1]	(0x00)
 ;     CHECK: DW_TAG_variant
-;       CHECK: DW_AT_discr_value [DW_FORM_data1]	(0x00)
+;       CHECK-LE: DW_AT_discr_value [DW_FORM_block1]	(<0x10> 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 )
+;       CHECK-BE: DW_AT_discr_value [DW_FORM_block1]	(<0x10> 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 )
 ;       CHECK: DW_TAG_member
 ;         CHECK: DW_AT_type
 ;         CHECK: DW_AT_alignment
@@ -71,7 +72,7 @@ attributes #0 = { nounwind uwtable }
 !21 = !DIBasicType(name: "u8", size: 8, encoding: DW_ATE_unsigned)
 !22 = !DIDerivedType(tag: DW_TAG_member, name: "__1", scope: !18, file: !7, baseType: !23, size: 64, align: 64)
 !23 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&u8", baseType: !21, size: 64, align: 64)
-!24 = !DIDerivedType(tag: DW_TAG_member, scope: !14, file: !7, baseType: !25, size: 128, align: 64, extraData: i64 0)
+!24 = !DIDerivedType(tag: DW_TAG_member, scope: !14, file: !7, baseType: !25, size: 128, align: 64, extraData: i128 18446744073709551616)
 !25 = !DICompositeType(tag: DW_TAG_structure_type, name: "Nope", scope: !12, file: !7, size: 128, align: 64, elements: !4, identifier: "7ce1efff6b82281ab9ceb730566e7e20::Nope")
 !27 = !DIBasicType(name: "u64", size: 64, encoding: DW_ATE_unsigned)
 !28 = !DIExpression()
diff --git a/llvm/test/ExecutionEngine/Orc/minimal-throw-catch.ll b/llvm/test/ExecutionEngine/Orc/minimal-throw-catch.ll
index 1b8f451..4ee55c6 100644
--- a/llvm/test/ExecutionEngine/Orc/minimal-throw-catch.ll
+++ b/llvm/test/ExecutionEngine/Orc/minimal-throw-catch.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86_64-apple
+; REQUIRES: system-darwin && host-unwind-supports-jit
 ; RUN: lli -jit-kind=orc %s
 ;
 ; Basic correctness testing for eh-frame processing and registration.
diff --git a/llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll b/llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll
index cd22ec6..83d2d89 100644
--- a/llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll
+++ b/llvm/test/ExecutionEngine/OrcLazy/minimal-throw-catch.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86_64-apple
+; REQUIRES: system-darwin && host-unwind-supports-jit
 ; RUN: lli -jit-kind=orc-lazy %s
 ;
 ; Basic correctness testing for eh-frame processing and registration.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-fminv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-fminv.ll
new file mode 100644
index 0000000..508decf
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-fminv.ll
@@ -0,0 +1,274 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
+; Forked from llvm/test/CodeGen/AArch64/arm64-fminv.ll
+;
+; Currently handled (suboptimally) by handleUnknownInstruction:
+; - llvm.aarch64.neon.fmaxv
+; - llvm.aarch64.neon.fminv
+; - llvm.aarch64.neon.fmaxnmv
+; - llvm.aarch64.neon.fminnmv
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define float @test_fminv_v2f32(<2 x float> %in) #0 {
+; CHECK-LABEL: define float @test_fminv_v2f32(
+; CHECK-SAME: <2 x float> [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[IN]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[MIN]]
+;
+  %min = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %in)
+  ret float %min
+}
+
+define float @test_fminv_v4f32(<4 x float> %in) #0 {
+; CHECK-LABEL: define float @test_fminv_v4f32(
+; CHECK-SAME: <4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MIN:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> [[IN]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[MIN]]
+;
+  %min = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %in)
+  ret float %min
+}
+
+define double @test_fminv_v2f64(<2 x double> %in) #0 {
+; CHECK-LABEL: define double @test_fminv_v2f64(
+; CHECK-SAME: <2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MIN:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[IN]])
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret double [[MIN]]
+;
+  %min = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %in)
+  ret double %min
+}
+
+declare float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxv_v2f32(<2 x float> %in) #0 {
+; CHECK-LABEL: define float @test_fmaxv_v2f32(
+; CHECK-SAME: <2 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[IN]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[MAX]]
+;
+  %max = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %in)
+  ret float %max
+}
+
+define float @test_fmaxv_v4f32(<4 x float> %in) #0 {
+; CHECK-LABEL: define float @test_fmaxv_v4f32(
+; CHECK-SAME: <4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> [[IN]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[MAX]]
+;
+  %max = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %in)
+  ret float %max
+}
+
+define double @test_fmaxv_v2f64(<2 x double> %shareholder_value) #0 {
+; CHECK-LABEL: define double @test_fmaxv_v2f64(
+; CHECK-SAME: <2 x double> [[SHAREHOLDER_VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MAX:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[SHAREHOLDER_VALUE]])
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret double [[MAX]]
+;
+  %max_sv = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %shareholder_value)
+  ret double %max_sv
+}
+
+declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double>)
+
+define float @test_fminnmv_v2f32(<2 x float> %in) #0 {
+; CHECK-LABEL: define float @test_fminnmv_v2f32(
+; CHECK-SAME: <2 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MINNM:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[IN]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[MINNM]]
+;
+  %minnm = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %in)
+  ret float %minnm
+}
+
+define float @test_fminnmv_v4f32(<4 x float> %in) #0 {
+; CHECK-LABEL: define float @test_fminnmv_v4f32(
+; CHECK-SAME: <4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MINNM:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> [[IN]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[MINNM]]
+;
+  %minnm = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %in)
+  ret float %minnm
+}
+
+define double @test_fminnmv_v2f64(<2 x double> %in) #0 {
+; CHECK-LABEL: define double @test_fminnmv_v2f64(
+; CHECK-SAME: <2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MINNM:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[IN]])
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret double [[MINNM]]
+;
+  %minnm = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %in)
+  ret double %minnm
+}
+
+declare float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double>)
+
+define float @test_fmaxnmv_v2f32(<2 x float> %in) #0 {
+; CHECK-LABEL: define float @test_fmaxnmv_v2f32(
+; CHECK-SAME: <2 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MAXNM:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[IN]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[MAXNM]]
+;
+  %maxnm = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %in)
+  ret float %maxnm
+}
+
+define float @test_fmaxnmv_v4f32(<4 x float> %in) #0 {
+; CHECK-LABEL: define float @test_fmaxnmv_v4f32(
+; CHECK-SAME: <4 x float> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MAXNM:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> [[IN]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[MAXNM]]
+;
+  %maxnm = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %in)
+  ret float %maxnm
+}
+
+define double @test_fmaxnmv_v2f64(<2 x double> %in) #0 {
+; CHECK-LABEL: define double @test_fmaxnmv_v2f64(
+; CHECK-SAME: <2 x double> [[IN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[MAXNM:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[IN]])
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret double [[MAXNM]]
+;
+  %maxnm = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %in)
+  ret double %maxnm
+}
+
+declare float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float>)
+declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>)
+declare double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double>)
+
+attributes #0 = { sanitize_memory }
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll
new file mode 100644
index 0000000..3c2775b
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vaddlv.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
+; Forked from llvm/test/CodeGen/AArch64/arm64-vaddlv.ll
+;
+; Currently handled (suboptimally) by handleUnknownInstruction:
+; - llvm.aarch64.neon.saddlv
+; - llvm.aarch64.neon.uaddlv
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define i64 @test_vaddlv_s32(<2 x i32> %a1) nounwind readnone #0 {
+; CHECK-LABEL: define i64 @test_vaddlv_s32(
+; CHECK-SAME: <2 x i32> [[A1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    [[VADDLV_I:%.*]] = tail call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[A1]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[VADDLV_I]]
+;
+entry:
+  %vaddlv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a1) nounwind
+  ret i64 %vaddlv.i
+}
+
+define i64 @test_vaddlv_u32(<2 x i32> %a1) nounwind readnone #0 {
+; CHECK-LABEL: define i64 @test_vaddlv_u32(
+; CHECK-SAME: <2 x i32> [[A1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    [[VADDLV_I:%.*]] = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[A1]]) #[[ATTR2]]
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[VADDLV_I]]
+;
+entry:
+  %vaddlv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a1) nounwind
+  ret i64 %vaddlv.i
+}
+
+declare i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32>) nounwind readnone
+
+declare i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32>) nounwind readnone
+
+attributes #0 = { sanitize_memory }
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/scmp.ll b/llvm/test/Instrumentation/MemorySanitizer/scmp.ll
new file mode 100644
index 0000000..89c5b283
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/scmp.ll
@@ -0,0 +1,492 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
+; llvm.scmp is correctly handled heuristically when each parameter is the same
+; type as the return type e.g.,
+;   call i8 @llvm.scmp.i8.i8(i8 %x, i8 %y)
+; but handled incorrectly by visitInstruction when the return type is different
+; e.g.,
+;   call i8 @llvm.scmp.i8.i62(i62 %x, i62 %y)
+;   call <4 x i8> @llvm.scmp.v4i8.v4i32(<4 x i32> %x, <4 x i32> %y)
+;
+; Forked from llvm/test/CodeGen/X86/scmp.ll
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8 @scmp.8.8(i8 %x, i8 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @scmp.8.8(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.scmp.i8.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT:    store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP3]]
+;
+  %1 = call i8 @llvm.scmp(i8 %x, i8 %y)
+  ret i8 %1
+}
+
+define i8 @scmp.8.16(i16 %x, i16 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @scmp.8.16(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i16 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.scmp(i16 %x, i16 %y)
+  ret i8 %1
+}
+
+define i8 @scmp.8.32(i32 %x, i32 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @scmp.8.32(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.scmp(i32 %x, i32 %y)
+  ret i8 %1
+}
+
+define i8 @scmp.8.64(i64 %x, i64 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @scmp.8.64(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.scmp.i8.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.scmp(i64 %x, i64 %y)
+  ret i8 %1
+}
+
+define i8 @scmp.8.128(i128 %x, i128 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @scmp.8.128(
+; CHECK-SAME: i128 [[X:%.*]], i128 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i128, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.scmp.i8.i128(i128 [[X]], i128 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+  ret i8 %1
+}
+
+define i32 @scmp.32.32(i32 %x, i32 %y) nounwind #0 {
+; CHECK-LABEL: define i32 @scmp.32.32(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = call i32 @llvm.scmp(i32 %x, i32 %y)
+  ret i32 %1
+}
+
+define i32 @scmp.32.64(i64 %x, i64 %y) nounwind #0 {
+; CHECK-LABEL: define i32 @scmp.32.64(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %1 = call i32 @llvm.scmp(i64 %x, i64 %y)
+  ret i32 %1
+}
+
+define i64 @scmp.64.64(i64 %x, i64 %y) nounwind #0 {
+; CHECK-LABEL: define i64 @scmp.64.64(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.scmp.i64.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %1 = call i64 @llvm.scmp(i64 %x, i64 %y)
+  ret i64 %1
+}
+
+define i4 @scmp_narrow_result(i32 %x, i32 %y) nounwind #0 {
+; CHECK-LABEL: define i4 @scmp_narrow_result(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i4 @llvm.scmp.i4.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    store i4 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i4 [[TMP5]]
+;
+  %1 = call i4 @llvm.scmp(i32 %x, i32 %y)
+  ret i4 %1
+}
+
+define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @scmp_narrow_op(
+; CHECK-SAME: i62 [[X:%.*]], i62 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i62, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i62, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i62 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i62 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.scmp.i8.i62(i62 [[X]], i62 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.scmp(i62 %x, i62 %y)
+  ret i8 %1
+}
+
+define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind #0 {
+; CHECK-LABEL: define i141 @scmp_wide_result(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i141 @llvm.scmp.i141.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    store i141 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i141 [[TMP5]]
+;
+  %1 = call i141 @llvm.scmp(i32 %x, i32 %y)
+  ret i141 %1
+}
+
+define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @scmp_wide_op(
+; CHECK-SAME: i109 [[X:%.*]], i109 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i109, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i109, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i109 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i109 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.scmp.i8.i109(i109 [[X]], i109 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.scmp(i109 %x, i109 %y)
+  ret i8 %1
+}
+
+define i41 @scmp_uncommon_types(i7 %x, i7 %y) nounwind #0 {
+; CHECK-LABEL: define i41 @scmp_uncommon_types(
+; CHECK-SAME: i7 [[X:%.*]], i7 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i7, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i7, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i7 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i7 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i41 @llvm.scmp.i41.i7(i7 [[X]], i7 [[Y]])
+; CHECK-NEXT:    store i41 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i41 [[TMP5]]
+;
+  %1 = call i41 @llvm.scmp(i7 %x, i7 %y)
+  ret i41 %1
+}
+
+define <4 x i32> @scmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind #0 {
+; CHECK-LABEL: define <4 x i32> @scmp_normal_vectors(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[X]], <4 x i32> [[Y]])
+; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %1 = call <4 x i32> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
+  ret <4 x i32> %1
+}
+
+define <4 x i8> @scmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind #0 {
+; CHECK-LABEL: define <4 x i8> @scmp_narrow_vec_result(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i8> @llvm.scmp.v4i8.v4i32(<4 x i32> [[X]], <4 x i32> [[Y]])
+; CHECK-NEXT:    store <4 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i8> [[TMP7]]
+;
+  %1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
+  ret <4 x i8> %1
+}
+
+define <4 x i32> @scmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind #0 {
+; CHECK-LABEL: define <4 x i32> @scmp_narrow_vec_op(
+; CHECK-SAME: <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP1]] to i32
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
+; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[TMP7]]
+;
+  %1 = call <4 x i32> @llvm.scmp(<4 x i8> %x, <4 x i8> %y)
+  ret <4 x i32> %1
+}
+
+define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind #0 {
+; CHECK-LABEL: define <16 x i32> @scmp_wide_vec_result(
+; CHECK-SAME: <16 x i8> [[X:%.*]], <16 x i8> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i32> @llvm.scmp.v16i32.v16i8(<16 x i8> [[X]], <16 x i8> [[Y]])
+; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <16 x i32> [[TMP7]]
+;
+  %1 = call <16 x i32> @llvm.scmp(<16 x i8> %x, <16 x i8> %y)
+  ret <16 x i32> %1
+}
+
+define <16 x i8> @scmp_wide_vec_op(<16 x i64> %x, <16 x i64> %y) nounwind #0 {
+; CHECK-LABEL: define <16 x i8> @scmp_wide_vec_op(
+; CHECK-SAME: <16 x i64> [[X:%.*]], <16 x i64> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i64> [[TMP1]] to i1024
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i1024 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i64> [[TMP2]] to i1024
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i1024 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i64(<16 x i64> [[X]], <16 x i64> [[Y]])
+; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <16 x i8> [[TMP7]]
+;
+  %1 = call <16 x i8> @llvm.scmp(<16 x i64> %x, <16 x i64> %y)
+  ret <16 x i8> %1
+}
+
+define <7 x i117> @scmp_uncommon_vectors(<7 x i7> %x, <7 x i7> %y) nounwind #0 {
+; CHECK-LABEL: define <7 x i117> @scmp_uncommon_vectors(
+; CHECK-SAME: <7 x i7> [[X:%.*]], <7 x i7> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <7 x i7>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <7 x i7>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <7 x i7> [[TMP1]] to i49
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i49 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <7 x i7> [[TMP2]] to i49
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i49 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <7 x i117> @llvm.scmp.v7i117.v7i7(<7 x i7> [[X]], <7 x i7> [[Y]])
+; CHECK-NEXT:    store <7 x i117> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <7 x i117> [[TMP7]]
+;
+  %1 = call <7 x i117> @llvm.scmp(<7 x i7> %x, <7 x i7> %y)
+  ret <7 x i117> %1
+}
+
+define <1 x i3> @scmp_scalarize(<1 x i33> %x, <1 x i33> %y) nounwind #0 {
+; CHECK-LABEL: define <1 x i3> @scmp_scalarize(
+; CHECK-SAME: <1 x i33> [[X:%.*]], <1 x i33> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i33>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i33>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i33> [[TMP1]] to i33
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i33 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i33> [[TMP2]] to i33
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i33 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i3> @llvm.scmp.v1i3.v1i33(<1 x i33> [[X]], <1 x i33> [[Y]])
+; CHECK-NEXT:    store <1 x i3> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <1 x i3> [[TMP7]]
+;
+  %1 = call <1 x i3> @llvm.scmp(<1 x i33> %x, <1 x i33> %y)
+  ret <1 x i3> %1
+}
+
+define <2 x i8> @scmp_bool_operands(<2 x i1> %x, <2 x i1> %y) nounwind #0 {
+; CHECK-LABEL: define <2 x i8> @scmp_bool_operands(
+; CHECK-SAME: <2 x i1> [[X:%.*]], <2 x i1> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i1>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i1> [[TMP1]] to i2
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i2 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i1> [[TMP2]] to i2
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i2 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i8> @llvm.scmp.v2i8.v2i1(<2 x i1> [[X]], <2 x i1> [[Y]])
+; CHECK-NEXT:    store <2 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <2 x i8> [[TMP7]]
+;
+  %1 = call <2 x i8> @llvm.scmp(<2 x i1> %x, <2 x i1> %y)
+  ret <2 x i8> %1
+}
+
+define <2 x i16> @scmp_ret_wider_than_operands(<2 x i8> %x, <2 x i8> %y) nounwind #0 {
+; CHECK-LABEL: define <2 x i16> @scmp_ret_wider_than_operands(
+; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i8> [[TMP1]] to i16
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i8> [[TMP2]] to i16
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i16> @llvm.scmp.v2i16.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]])
+; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <2 x i16> [[TMP7]]
+;
+  %1 = call <2 x i16> @llvm.scmp(<2 x i8> %x, <2 x i8> %y)
+  ret <2 x i16> %1
+}
+
+attributes #0 = { sanitize_memory }
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/ucmp.ll b/llvm/test/Instrumentation/MemorySanitizer/ucmp.ll
new file mode 100644
index 0000000..5e0a248
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/ucmp.ll
@@ -0,0 +1,420 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
+; llvm.ucmp is correctly handled heuristically when each parameter is the same
+; type as the return type e.g.,
+;   call i8 @llvm.ucmp.i8.i8(i8 %x, i8 %y)
+; but handled incorrectly by visitInstruction when the return type is different  
+; e.g.,
+;   call i8 @llvm.ucmp.i8.i62(i62 %x, i62 %y)
+;   call <4 x i8> @llvm.ucmp.v4i8.v4i32(<4 x i32> %x, <4 x i32> %y)
+
+; Forked from llvm/test/CodeGen/X86/ucmp.ll
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8 @ucmp.8.8(i8 %x, i8 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @ucmp.8.8(
+; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.ucmp.i8.i8(i8 [[X]], i8 [[Y]])
+; CHECK-NEXT:    store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP3]]
+;
+  %1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
+  ret i8 %1
+}
+
+define i8 @ucmp.8.16(i16 %x, i16 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @ucmp.8.16(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i16 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.ucmp.i8.i16(i16 [[X]], i16 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
+  ret i8 %1
+}
+
+define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @ucmp.8.32(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+  ret i8 %1
+}
+
+define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @ucmp.8.64(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.ucmp.i8.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
+  ret i8 %1
+}
+
+define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @ucmp.8.128(
+; CHECK-SAME: i128 [[X:%.*]], i128 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i128, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.ucmp.i8.i128(i128 [[X]], i128 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.ucmp(i128 %x, i128 %y)
+  ret i8 %1
+}
+
+define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind #0 {
+; CHECK-LABEL: define i32 @ucmp.32.32(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ucmp.i32.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+  ret i32 %1
+}
+
+define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind #0 {
+; CHECK-LABEL: define i32 @ucmp.32.64(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.ucmp.i32.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  %1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
+  ret i32 %1
+}
+
+define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind #0 {
+; CHECK-LABEL: define i64 @ucmp.64.64(
+; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
+  ret i64 %1
+}
+
+define i4 @ucmp_narrow_result(i32 %x, i32 %y) nounwind #0 {
+; CHECK-LABEL: define i4 @ucmp_narrow_result(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i4 @llvm.ucmp.i4.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    store i4 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i4 [[TMP5]]
+;
+  %1 = call i4 @llvm.ucmp(i32 %x, i32 %y)
+  ret i4 %1
+}
+
+define i8 @ucmp_narrow_op(i62 %x, i62 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @ucmp_narrow_op(
+; CHECK-SAME: i62 [[X:%.*]], i62 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i62, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i62, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i62 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i62 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.ucmp.i8.i62(i62 [[X]], i62 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.ucmp(i62 %x, i62 %y)
+  ret i8 %1
+}
+
+define i141 @ucmp_wide_result(i32 %x, i32 %y) nounwind #0 {
+; CHECK-LABEL: define i141 @ucmp_wide_result(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i141 @llvm.ucmp.i141.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    store i141 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i141 [[TMP5]]
+;
+  %1 = call i141 @llvm.ucmp(i32 %x, i32 %y)
+  ret i141 %1
+}
+
+define i8 @ucmp_wide_op(i109 %x, i109 %y) nounwind #0 {
+; CHECK-LABEL: define i8 @ucmp_wide_op(
+; CHECK-SAME: i109 [[X:%.*]], i109 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i109, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i109, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i109 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i109 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.ucmp.i8.i109(i109 [[X]], i109 [[Y]])
+; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i8 [[TMP5]]
+;
+  %1 = call i8 @llvm.ucmp(i109 %x, i109 %y)
+  ret i8 %1
+}
+
+define i41 @ucmp_uncommon_types(i7 %x, i7 %y) nounwind #0 {
+; CHECK-LABEL: define i41 @ucmp_uncommon_types(
+; CHECK-SAME: i7 [[X:%.*]], i7 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i7, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i7, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i7 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i7 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i41 @llvm.ucmp.i41.i7(i7 [[X]], i7 [[Y]])
+; CHECK-NEXT:    store i41 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i41 [[TMP5]]
+;
+  %1 = call i41 @llvm.ucmp(i7 %x, i7 %y)
+  ret i41 %1
+}
+
+define <4 x i32> @ucmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) nounwind #0 {
+; CHECK-LABEL: define <4 x i32> @ucmp_normal_vectors(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[X]], <4 x i32> [[Y]])
+; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %1 = call <4 x i32> @llvm.ucmp(<4 x i32> %x, <4 x i32> %y)
+  ret <4 x i32> %1
+}
+
+define <4 x i8> @ucmp_narrow_vec_result(<4 x i32> %x, <4 x i32> %y) nounwind #0 {
+; CHECK-LABEL: define <4 x i8> @ucmp_narrow_vec_result(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i8> @llvm.ucmp.v4i8.v4i32(<4 x i32> [[X]], <4 x i32> [[Y]])
+; CHECK-NEXT:    store <4 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i8> [[TMP7]]
+;
+  %1 = call <4 x i8> @llvm.ucmp(<4 x i32> %x, <4 x i32> %y)
+  ret <4 x i8> %1
+}
+
+define <4 x i32> @ucmp_narrow_vec_op(<4 x i8> %x, <4 x i8> %y) nounwind #0 {
+; CHECK-LABEL: define <4 x i32> @ucmp_narrow_vec_op(
+; CHECK-SAME: <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i8> [[TMP1]] to i32
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
+; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[TMP7]]
+;
+  %1 = call <4 x i32> @llvm.ucmp(<4 x i8> %x, <4 x i8> %y)
+  ret <4 x i32> %1
+}
+
+define <16 x i32> @ucmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind #0 {
+; CHECK-LABEL: define <16 x i32> @ucmp_wide_vec_result(
+; CHECK-SAME: <16 x i8> [[X:%.*]], <16 x i8> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i32> @llvm.ucmp.v16i32.v16i8(<16 x i8> [[X]], <16 x i8> [[Y]])
+; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <16 x i32> [[TMP7]]
+;
+  %1 = call <16 x i32> @llvm.ucmp(<16 x i8> %x, <16 x i8> %y)
+  ret <16 x i32> %1
+}
+
+define <16 x i8> @ucmp_wide_vec_op(<16 x i32> %x, <16 x i32> %y) nounwind #0 {
+; CHECK-LABEL: define <16 x i8> @ucmp_wide_vec_op(
+; CHECK-SAME: <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i32(<16 x i32> [[X]], <16 x i32> [[Y]])
+; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <16 x i8> [[TMP7]]
+;
+  %1 = call <16 x i8> @llvm.ucmp(<16 x i32> %x, <16 x i32> %y)
+  ret <16 x i8> %1
+}
+
+define <17 x i2> @ucmp_uncommon_vectors(<17 x i71> %x, <17 x i71> %y) nounwind #0 {
+; CHECK-LABEL: define <17 x i2> @ucmp_uncommon_vectors(
+; CHECK-SAME: <17 x i71> [[X:%.*]], <17 x i71> [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <17 x i71>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <17 x i71>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <17 x i71> [[TMP1]] to i1207
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i1207 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <17 x i71> [[TMP2]] to i1207
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i1207 [[TMP4]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <17 x i2> @llvm.ucmp.v17i2.v17i71(<17 x i71> [[X]], <17 x i71> [[Y]])
+; CHECK-NEXT:    store <17 x i2> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <17 x i2> [[TMP7]]
+;
+  %1 = call <17 x i2> @llvm.ucmp(<17 x i71> %x, <17 x i71> %y)
+  ret <17 x i2> %1
+}
+
+attributes #0 = { sanitize_memory }
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
index d1e55d1..8ab6c93 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
@@ -128,19 +128,19 @@ v_cmp_class_f16_e64 vcc_lo, 0.5, m0
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s5, v255.h, v2.l
-// W32: v_cmp_class_f16_e64 s5, v255.h, v2.l    ; encoding: [0x05,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W32: v_cmp_class_f16_e64 s5, v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s5, s105, v255.h
-// W32: v_cmp_class_f16_e64 s5, s105, v255.h    ; encoding: [0x05,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W32: v_cmp_class_f16_e64 s5, s105, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s[10:11], v255.h, v2.l
-// W64: v_cmp_class_f16_e64 s[10:11], v255.h, v2.l ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W64: v_cmp_class_f16_e64 s[10:11], v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s[10:11], s105, v255.h
-// W64: v_cmp_class_f16_e64 s[10:11], s105, v255.h ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W64: v_cmp_class_f16_e64 s[10:11], s105, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f32_e64 s5, v1, v2
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
index a4340919..ed397bd 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopcx.s
@@ -51,13 +51,13 @@ v_cmpx_class_f16_e64 v1.l, 0.5
 // GFX11: v_cmpx_class_f16_e64 v1.l, 0.5          ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xe1,0x01,0x00]
 
 v_cmpx_class_f16_e64 v1.h, v2.h
-// GFX11: v_cmpx_class_f16_e64 v1.h, v2.h         ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_class_f16_e64 v1.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_class_f16_e64 v255.h, v2.l
-// GFX11: v_cmpx_class_f16_e64 v255.h, v2.l       ; encoding: [0x7e,0x08,0xfd,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_class_f16_e64 v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xfd,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_class_f16_e64 s105, v255.h
-// GFX11: v_cmpx_class_f16_e64 s105, v255.h       ; encoding: [0x7e,0x10,0xfd,0xd4,0x69,0xfe,0x03,0x00]
+// GFX11: v_cmpx_class_f16_e64 s105, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xfd,0xd4,0x69,0xfe,0x03,0x00]
 
 v_cmpx_class_f32_e64 v1, v2
 // GFX11: v_cmpx_class_f32_e64 v1, v2             ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x05,0x02,0x00]
@@ -195,10 +195,10 @@ v_cmpx_eq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_eq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x82,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_eq_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_eq_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x82,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_eq_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x82,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_eq_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_eq_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x82,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_eq_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x82,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_eq_f32_e64 v1, v2
 // GFX11: v_cmpx_eq_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x00]
@@ -327,10 +327,10 @@ v_cmpx_eq_i16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_eq_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_eq_i16_e64 v1.h, v2.l
-// GFX11: v_cmpx_eq_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb2,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_eq_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb2,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_eq_i16_e64 v255.l, v255.h
-// GFX11: v_cmpx_eq_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb2,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_eq_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb2,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_eq_i32_e64 v1, v2
 // GFX11: v_cmpx_eq_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x05,0x02,0x00]
@@ -459,10 +459,10 @@ v_cmpx_eq_u16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_eq_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_eq_u16_e64 v1.h, v2.l
-// GFX11: v_cmpx_eq_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xba,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_eq_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xba,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_eq_u16_e64 v255.l, v255.h
-// GFX11: v_cmpx_eq_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xba,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_eq_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xba,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_eq_u32_e64 v1, v2
 // GFX11: v_cmpx_eq_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x05,0x02,0x00]
@@ -591,10 +591,10 @@ v_cmpx_f_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_f_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x80,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_f_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_f_f16_e64 v1.h, v2.l             ; encoding: [0x7e,0x08,0x80,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_f_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x80,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_f_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_f_f16_e64 v255.l, v255.h         ; encoding: [0x7e,0x10,0x80,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_f_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x80,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_f_f32_e64 v1, v2
 // GFX11: v_cmpx_f_f32_e64 v1, v2                 ; encoding: [0x7e,0x00,0x90,0xd4,0x01,0x05,0x02,0x00]
@@ -885,10 +885,10 @@ v_cmpx_ge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_ge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x86,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ge_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_ge_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x86,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_ge_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x86,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ge_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_ge_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x86,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_ge_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x86,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ge_f32_e64 v1, v2
 // GFX11: v_cmpx_ge_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x00]
@@ -1017,10 +1017,10 @@ v_cmpx_ge_i16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_ge_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ge_i16_e64 v1.h, v2.l
-// GFX11: v_cmpx_ge_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb6,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_ge_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb6,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ge_i16_e64 v255.l, v255.h
-// GFX11: v_cmpx_ge_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb6,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_ge_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb6,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ge_i32_e64 v1, v2
 // GFX11: v_cmpx_ge_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x05,0x02,0x00]
@@ -1149,10 +1149,10 @@ v_cmpx_ge_u16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_ge_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ge_u16_e64 v1.h, v2.l
-// GFX11: v_cmpx_ge_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xbe,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_ge_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xbe,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ge_u16_e64 v255.l, v255.h
-// GFX11: v_cmpx_ge_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xbe,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_ge_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xbe,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ge_u32_e64 v1, v2
 // GFX11: v_cmpx_ge_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x05,0x02,0x00]
@@ -1281,10 +1281,10 @@ v_cmpx_gt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_gt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x84,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_gt_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_gt_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x84,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_gt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x84,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_gt_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_gt_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x84,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_gt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x84,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_gt_f32_e64 v1, v2
 // GFX11: v_cmpx_gt_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x00]
@@ -1413,10 +1413,10 @@ v_cmpx_gt_i16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_gt_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_gt_i16_e64 v1.h, v2.l
-// GFX11: v_cmpx_gt_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb4,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_gt_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb4,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_gt_i16_e64 v255.l, v255.h
-// GFX11: v_cmpx_gt_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb4,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_gt_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb4,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_gt_i32_e64 v1, v2
 // GFX11: v_cmpx_gt_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x05,0x02,0x00]
@@ -1545,10 +1545,10 @@ v_cmpx_gt_u16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_gt_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_gt_u16_e64 v1.h, v2.l
-// GFX11: v_cmpx_gt_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xbc,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_gt_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xbc,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_gt_u16_e64 v255.l, v255.h
-// GFX11: v_cmpx_gt_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xbc,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_gt_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xbc,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_gt_u32_e64 v1, v2
 // GFX11: v_cmpx_gt_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x05,0x02,0x00]
@@ -1677,10 +1677,10 @@ v_cmpx_le_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_le_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x83,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_le_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_le_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x83,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_le_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x83,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_le_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_le_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x83,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_le_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x83,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_le_f32_e64 v1, v2
 // GFX11: v_cmpx_le_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x00]
@@ -1809,10 +1809,10 @@ v_cmpx_le_i16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_le_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_le_i16_e64 v1.h, v2.l
-// GFX11: v_cmpx_le_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb3,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_le_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb3,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_le_i16_e64 v255.l, v255.h
-// GFX11: v_cmpx_le_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb3,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_le_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb3,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_le_i32_e64 v1, v2
 // GFX11: v_cmpx_le_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x05,0x02,0x00]
@@ -1941,10 +1941,10 @@ v_cmpx_le_u16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_le_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_le_u16_e64 v1.h, v2.l
-// GFX11: v_cmpx_le_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xbb,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_le_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xbb,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_le_u16_e64 v255.l, v255.h
-// GFX11: v_cmpx_le_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xbb,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_le_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xbb,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_le_u32_e64 v1, v2
 // GFX11: v_cmpx_le_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x05,0x02,0x00]
@@ -2073,10 +2073,10 @@ v_cmpx_lg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_lg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x85,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_lg_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_lg_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x85,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_lg_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x85,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_lg_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_lg_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x85,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_lg_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x85,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_lg_f32_e64 v1, v2
 // GFX11: v_cmpx_lg_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x00]
@@ -2205,10 +2205,10 @@ v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_lt_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_lt_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_lt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_lt_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_lt_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_lt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_lt_f32_e64 v1, v2
 // GFX11: v_cmpx_lt_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00]
@@ -2337,10 +2337,10 @@ v_cmpx_lt_i16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_lt_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_lt_i16_e64 v1.h, v2.l
-// GFX11: v_cmpx_lt_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb1,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_lt_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb1,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_lt_i16_e64 v255.l, v255.h
-// GFX11: v_cmpx_lt_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb1,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_lt_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb1,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_lt_i32_e64 v1, v2
 // GFX11: v_cmpx_lt_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x05,0x02,0x00]
@@ -2469,10 +2469,10 @@ v_cmpx_lt_u16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_lt_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_lt_u16_e64 v1.h, v2.l
-// GFX11: v_cmpx_lt_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb9,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_lt_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb9,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_lt_u16_e64 v255.l, v255.h
-// GFX11: v_cmpx_lt_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb9,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_lt_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb9,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_lt_u32_e64 v1, v2
 // GFX11: v_cmpx_lt_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x05,0x02,0x00]
@@ -2601,10 +2601,10 @@ v_cmpx_ne_i16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_ne_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ne_i16_e64 v1.h, v2.l
-// GFX11: v_cmpx_ne_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb5,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_ne_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb5,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ne_i16_e64 v255.l, v255.h
-// GFX11: v_cmpx_ne_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb5,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_ne_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb5,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ne_i32_e64 v1, v2
 // GFX11: v_cmpx_ne_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x05,0x02,0x00]
@@ -2733,10 +2733,10 @@ v_cmpx_ne_u16_e64 0xfe0b, vcc_hi
 // GFX11: v_cmpx_ne_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ne_u16_e64 v1.h, v2.l
-// GFX11: v_cmpx_ne_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xbd,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_ne_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xbd,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ne_u16_e64 v255.l, v255.h
-// GFX11: v_cmpx_ne_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xbd,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_ne_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xbd,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ne_u32_e64 v1, v2
 // GFX11: v_cmpx_ne_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x05,0x02,0x00]
@@ -2865,10 +2865,10 @@ v_cmpx_neq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_neq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_neq_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_neq_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8d,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_neq_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8d,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_neq_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_neq_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8d,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_neq_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8d,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_neq_f32_e64 v1, v2
 // GFX11: v_cmpx_neq_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x00]
@@ -2997,10 +2997,10 @@ v_cmpx_nge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_nge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x89,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_nge_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_nge_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x89,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_nge_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x89,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_nge_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_nge_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x89,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_nge_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x89,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_nge_f32_e64 v1, v2
 // GFX11: v_cmpx_nge_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x00]
@@ -3129,10 +3129,10 @@ v_cmpx_ngt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_ngt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ngt_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_ngt_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8b,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_ngt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8b,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ngt_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_ngt_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8b,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_ngt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8b,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ngt_f32_e64 v1, v2
 // GFX11: v_cmpx_ngt_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x00]
@@ -3261,10 +3261,10 @@ v_cmpx_nle_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_nle_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_nle_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_nle_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8c,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_nle_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8c,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_nle_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_nle_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8c,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_nle_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8c,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_nle_f32_e64 v1, v2
 // GFX11: v_cmpx_nle_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x00]
@@ -3393,10 +3393,10 @@ v_cmpx_nlg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_nlg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_nlg_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_nlg_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8a,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_nlg_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8a,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_nlg_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_nlg_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8a,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_nlg_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8a,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_nlg_f32_e64 v1, v2
 // GFX11: v_cmpx_nlg_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x00]
@@ -3525,10 +3525,10 @@ v_cmpx_nlt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_nlt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_nlt_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_nlt_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8e,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_nlt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8e,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_nlt_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_nlt_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8e,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_nlt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8e,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_nlt_f32_e64 v1, v2
 // GFX11: v_cmpx_nlt_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x00]
@@ -3657,10 +3657,10 @@ v_cmpx_o_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_o_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x87,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_o_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_o_f16_e64 v1.h, v2.l             ; encoding: [0x7e,0x08,0x87,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_o_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x87,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_o_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_o_f16_e64 v255.l, v255.h         ; encoding: [0x7e,0x10,0x87,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_o_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x87,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_o_f32_e64 v1, v2
 // GFX11: v_cmpx_o_f32_e64 v1, v2                 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x00]
@@ -4077,10 +4077,10 @@ v_cmpx_tru_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_t_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_t_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_t_f16_e64 v1.h, v2.l             ; encoding: [0x7e,0x08,0x8f,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_t_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8f,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_t_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_t_f16_e64 v255.l, v255.h         ; encoding: [0x7e,0x10,0x8f,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_t_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8f,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_tru_f32_e64 v1, v2
 // GFX11: v_cmpx_t_f32_e64 v1, v2                 ; encoding: [0x7e,0x00,0x9f,0xd4,0x01,0x05,0x02,0x00]
@@ -4209,10 +4209,10 @@ v_cmpx_u_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX11: v_cmpx_u_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x88,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_u_f16_e64 v1.h, v2.l
-// GFX11: v_cmpx_u_f16_e64 v1.h, v2.l             ; encoding: [0x7e,0x08,0x88,0xd4,0x01,0x05,0x02,0x00]
+// GFX11: v_cmpx_u_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x88,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_u_f16_e64 v255.l, v255.h
-// GFX11: v_cmpx_u_f16_e64 v255.l, v255.h         ; encoding: [0x7e,0x10,0x88,0xd4,0xff,0xff,0x03,0x00]
+// GFX11: v_cmpx_u_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x88,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_u_f32_e64 v1, v2
 // GFX11: v_cmpx_u_f32_e64 v1, v2                 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
index d3eba05..e821a3b 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
@@ -2,7 +2,7 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s
 
 v_cmp_class_f16 vcc, v1.h, v255.h
-// GFX11: v_cmp_class_f16_e64 vcc, v1.h, v255.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmp_class_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmp_class_f16_e64_dpp vcc, v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -20,10 +20,10 @@ v_cmp_class_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmp_class_f16_e64_dpp vcc, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmp_class_f16 vcc, v127.h, v255.h
-// GFX11: v_cmp_class_f16_e64 vcc, v127.h, v255.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 
 v_cmp_class_f16 vcc, v127.h, v255.h
-// GFX11: v_cmp_class_f16_e64 vcc, v127.h, v255.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 
 v_cmp_class_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmp_class_f16_e64_dpp vcc, v127.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
@@ -56,10 +56,10 @@ v_cmp_class_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmp_class_f16_e64_dpp vcc, v127.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
 v_cmp_class_f16 vcc, v128.h, v2.h
-// GFX11: v_cmp_class_f16_e64 vcc, v128.h, v2.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
 
 v_cmp_class_f16 vcc, v128.h, v2.h
-// GFX11: v_cmp_class_f16_e64 vcc, v128.h, v2.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
 
 v_cmp_class_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmp_class_f16_e64_dpp vcc, v128.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
@@ -92,10 +92,10 @@ v_cmp_class_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmp_class_f16_e64_dpp vcc, v128.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
 v_cmp_class_f16 vcc, vcc_hi, v255.h
-// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
 v_cmp_class_f16 vcc, vcc_hi, v255.h
-// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
 v_cmp_class_f16 vcc, vcc_hi, v255.l
 // GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
@@ -104,10 +104,10 @@ v_cmp_class_f16 vcc, vcc_hi, v255.l
 // GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
 v_cmp_class_f16 vcc, vcc_lo, v255.h
-// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 
 v_cmp_class_f16 vcc, vcc_lo, v255.h
-// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 
 v_cmp_class_f16 vcc, vcc_lo, v255.l
 // GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255.l ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s
index 233858f..7e5e59b 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s
@@ -2,7 +2,7 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s
 
 v_cmpx_class_f16 v1.h, v255.h
-// GFX11: v_cmpx_class_f16_e64 v1.h, v255.h       ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_class_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_class_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_class_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -20,7 +20,7 @@ v_cmpx_class_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_class_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_class_f16 v255.h, v2.h
-// GFX11: v_cmpx_class_f16_e64 v255.h, v2.h       ; encoding: [0x7e,0x18,0xfd,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_class_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_class_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_class_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -38,7 +38,7 @@ v_cmpx_class_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_class_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_eq_f16 v1.h, v255.h
-// GFX11: v_cmpx_eq_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x82,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_eq_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x82,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_eq_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_eq_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x82,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -56,7 +56,7 @@ v_cmpx_eq_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_eq_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_eq_f16 v255.h, v2.h
-// GFX11: v_cmpx_eq_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x82,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_eq_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x82,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_eq_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_eq_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x82,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -74,7 +74,7 @@ v_cmpx_eq_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_eq_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_eq_i16 v1.h, v255.h
-// GFX11: v_cmpx_eq_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb2,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_eq_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb2,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_eq_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_eq_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -92,7 +92,7 @@ v_cmpx_eq_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_eq_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_eq_i16 v255.h, v2.h
-// GFX11: v_cmpx_eq_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb2,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_eq_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb2,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_eq_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_eq_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb2,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -110,7 +110,7 @@ v_cmpx_eq_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_eq_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_eq_u16 v1.h, v255.h
-// GFX11: v_cmpx_eq_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xba,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_eq_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xba,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_eq_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_eq_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xba,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -128,7 +128,7 @@ v_cmpx_eq_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_eq_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_eq_u16 v255.h, v2.h
-// GFX11: v_cmpx_eq_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xba,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_eq_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xba,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_eq_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_eq_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xba,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -146,7 +146,7 @@ v_cmpx_eq_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_eq_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_f_f16 v1.h, v255.h
-// GFX11: v_cmpx_f_f16_e64 v1.h, v255.h           ; encoding: [0x7e,0x18,0x80,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_f_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x80,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_f_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_f_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x80,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -164,7 +164,7 @@ v_cmpx_f_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_f_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_f_f16 v255.h, v2.h
-// GFX11: v_cmpx_f_f16_e64 v255.h, v2.h           ; encoding: [0x7e,0x18,0x80,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_f_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x80,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_f_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_f_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x80,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -182,7 +182,7 @@ v_cmpx_f_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_f_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ge_f16 v1.h, v255.h
-// GFX11: v_cmpx_ge_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x86,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_ge_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x86,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ge_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ge_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x86,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -200,7 +200,7 @@ v_cmpx_ge_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ge_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ge_f16 v255.h, v2.h
-// GFX11: v_cmpx_ge_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x86,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_ge_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x86,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ge_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ge_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x86,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -218,7 +218,7 @@ v_cmpx_ge_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ge_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ge_i16 v1.h, v255.h
-// GFX11: v_cmpx_ge_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb6,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_ge_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb6,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ge_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ge_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -236,7 +236,7 @@ v_cmpx_ge_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ge_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ge_i16 v255.h, v2.h
-// GFX11: v_cmpx_ge_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb6,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_ge_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb6,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ge_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ge_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb6,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -254,7 +254,7 @@ v_cmpx_ge_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ge_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ge_u16 v1.h, v255.h
-// GFX11: v_cmpx_ge_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xbe,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_ge_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbe,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ge_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ge_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -272,7 +272,7 @@ v_cmpx_ge_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ge_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ge_u16 v255.h, v2.h
-// GFX11: v_cmpx_ge_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xbe,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_ge_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbe,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ge_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ge_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbe,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -290,7 +290,7 @@ v_cmpx_ge_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ge_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_gt_f16 v1.h, v255.h
-// GFX11: v_cmpx_gt_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x84,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_gt_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x84,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_gt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_gt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x84,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -308,7 +308,7 @@ v_cmpx_gt_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_gt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_gt_f16 v255.h, v2.h
-// GFX11: v_cmpx_gt_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x84,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_gt_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x84,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_gt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_gt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x84,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -326,7 +326,7 @@ v_cmpx_gt_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_gt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_gt_i16 v1.h, v255.h
-// GFX11: v_cmpx_gt_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb4,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_gt_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb4,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_gt_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_gt_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -344,7 +344,7 @@ v_cmpx_gt_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_gt_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_gt_i16 v255.h, v2.h
-// GFX11: v_cmpx_gt_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb4,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_gt_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb4,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_gt_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_gt_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb4,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -362,7 +362,7 @@ v_cmpx_gt_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_gt_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_gt_u16 v1.h, v255.h
-// GFX11: v_cmpx_gt_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xbc,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_gt_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbc,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_gt_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_gt_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -380,7 +380,7 @@ v_cmpx_gt_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_gt_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_gt_u16 v255.h, v2.h
-// GFX11: v_cmpx_gt_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xbc,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_gt_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbc,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_gt_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_gt_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbc,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -398,7 +398,7 @@ v_cmpx_gt_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_gt_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_le_f16 v1.h, v255.h
-// GFX11: v_cmpx_le_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x83,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_le_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x83,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_le_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_le_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x83,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -416,7 +416,7 @@ v_cmpx_le_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_le_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_le_f16 v255.h, v2.h
-// GFX11: v_cmpx_le_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x83,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_le_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x83,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_le_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_le_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x83,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -434,7 +434,7 @@ v_cmpx_le_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_le_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_le_i16 v1.h, v255.h
-// GFX11: v_cmpx_le_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb3,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_le_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb3,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_le_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_le_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -452,7 +452,7 @@ v_cmpx_le_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_le_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_le_i16 v255.h, v2.h
-// GFX11: v_cmpx_le_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb3,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_le_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb3,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_le_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_le_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb3,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -470,7 +470,7 @@ v_cmpx_le_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_le_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_le_u16 v1.h, v255.h
-// GFX11: v_cmpx_le_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xbb,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_le_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbb,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_le_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_le_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -488,7 +488,7 @@ v_cmpx_le_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_le_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_le_u16 v255.h, v2.h
-// GFX11: v_cmpx_le_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xbb,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_le_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbb,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_le_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_le_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbb,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -506,7 +506,7 @@ v_cmpx_le_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_le_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_lg_f16 v1.h, v255.h
-// GFX11: v_cmpx_lg_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x85,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_lg_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x85,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_lg_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_lg_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x85,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -524,7 +524,7 @@ v_cmpx_lg_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lg_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_lg_f16 v255.h, v2.h
-// GFX11: v_cmpx_lg_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x85,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_lg_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x85,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_lg_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_lg_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x85,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -542,7 +542,7 @@ v_cmpx_lg_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lg_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_lt_f16 v1.h, v255.h
-// GFX11: v_cmpx_lt_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x81,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_lt_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_lt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_lt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -560,7 +560,7 @@ v_cmpx_lt_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_lt_f16 v255.h, v2.h
-// GFX11: v_cmpx_lt_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x81,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_lt_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_lt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_lt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -578,7 +578,7 @@ v_cmpx_lt_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_lt_i16 v1.h, v255.h
-// GFX11: v_cmpx_lt_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb1,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_lt_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb1,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_lt_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_lt_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -596,7 +596,7 @@ v_cmpx_lt_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lt_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_lt_i16 v255.h, v2.h
-// GFX11: v_cmpx_lt_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb1,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_lt_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb1,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_lt_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_lt_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb1,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -614,7 +614,7 @@ v_cmpx_lt_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lt_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_lt_u16 v1.h, v255.h
-// GFX11: v_cmpx_lt_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb9,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_lt_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb9,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_lt_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_lt_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -632,7 +632,7 @@ v_cmpx_lt_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lt_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_lt_u16 v255.h, v2.h
-// GFX11: v_cmpx_lt_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb9,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_lt_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb9,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_lt_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_lt_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb9,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -650,7 +650,7 @@ v_cmpx_lt_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_lt_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ne_i16 v1.h, v255.h
-// GFX11: v_cmpx_ne_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb5,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_ne_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb5,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ne_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ne_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -668,7 +668,7 @@ v_cmpx_ne_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ne_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ne_i16 v255.h, v2.h
-// GFX11: v_cmpx_ne_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb5,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_ne_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb5,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ne_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ne_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb5,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -686,7 +686,7 @@ v_cmpx_ne_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ne_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ne_u16 v1.h, v255.h
-// GFX11: v_cmpx_ne_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xbd,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_ne_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbd,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ne_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ne_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -704,7 +704,7 @@ v_cmpx_ne_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ne_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ne_u16 v255.h, v2.h
-// GFX11: v_cmpx_ne_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xbd,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_ne_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbd,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ne_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ne_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -722,7 +722,7 @@ v_cmpx_ne_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ne_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_neq_f16 v1.h, v255.h
-// GFX11: v_cmpx_neq_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8d,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_neq_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8d,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_neq_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_neq_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -740,7 +740,7 @@ v_cmpx_neq_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_neq_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_neq_f16 v255.h, v2.h
-// GFX11: v_cmpx_neq_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8d,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_neq_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8d,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_neq_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_neq_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8d,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -758,7 +758,7 @@ v_cmpx_neq_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_neq_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_nge_f16 v1.h, v255.h
-// GFX11: v_cmpx_nge_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x89,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_nge_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x89,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_nge_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_nge_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x89,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -776,7 +776,7 @@ v_cmpx_nge_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_nge_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_nge_f16 v255.h, v2.h
-// GFX11: v_cmpx_nge_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x89,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_nge_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x89,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_nge_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_nge_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x89,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -794,7 +794,7 @@ v_cmpx_nge_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_nge_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ngt_f16 v1.h, v255.h
-// GFX11: v_cmpx_ngt_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8b,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_ngt_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8b,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ngt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ngt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -812,7 +812,7 @@ v_cmpx_ngt_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ngt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ngt_f16 v255.h, v2.h
-// GFX11: v_cmpx_ngt_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8b,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_ngt_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8b,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ngt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_ngt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8b,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -830,7 +830,7 @@ v_cmpx_ngt_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_ngt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_nle_f16 v1.h, v255.h
-// GFX11: v_cmpx_nle_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8c,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_nle_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8c,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_nle_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_nle_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -848,7 +848,7 @@ v_cmpx_nle_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_nle_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_nle_f16 v255.h, v2.h
-// GFX11: v_cmpx_nle_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8c,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_nle_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8c,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_nle_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_nle_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8c,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -866,7 +866,7 @@ v_cmpx_nle_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_nle_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_nlg_f16 v1.h, v255.h
-// GFX11: v_cmpx_nlg_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8a,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_nlg_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8a,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_nlg_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_nlg_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -884,7 +884,7 @@ v_cmpx_nlg_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_nlg_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_nlg_f16 v255.h, v2.h
-// GFX11: v_cmpx_nlg_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8a,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_nlg_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8a,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_nlg_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_nlg_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8a,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -902,7 +902,7 @@ v_cmpx_nlg_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_nlg_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_nlt_f16 v1.h, v255.h
-// GFX11: v_cmpx_nlt_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8e,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_nlt_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8e,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_nlt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_nlt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -920,7 +920,7 @@ v_cmpx_nlt_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_nlt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_nlt_f16 v255.h, v2.h
-// GFX11: v_cmpx_nlt_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8e,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_nlt_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8e,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_nlt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_nlt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8e,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -938,7 +938,7 @@ v_cmpx_nlt_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_nlt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_o_f16 v1.h, v255.h
-// GFX11: v_cmpx_o_f16_e64 v1.h, v255.h           ; encoding: [0x7e,0x18,0x87,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_o_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x87,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_o_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_o_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x87,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -956,7 +956,7 @@ v_cmpx_o_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_o_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_o_f16 v255.h, v2.h
-// GFX11: v_cmpx_o_f16_e64 v255.h, v2.h           ; encoding: [0x7e,0x18,0x87,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_o_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x87,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_o_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_o_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x87,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -974,7 +974,7 @@ v_cmpx_o_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_o_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_t_f16 v1.h, v255.h
-// GFX11: v_cmpx_t_f16_e64 v1.h, v255.h           ; encoding: [0x7e,0x18,0x8f,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_t_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8f,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_t_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_t_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -992,7 +992,7 @@ v_cmpx_t_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_t_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_t_f16 v255.h, v2.h
-// GFX11: v_cmpx_t_f16_e64 v255.h, v2.h           ; encoding: [0x7e,0x18,0x8f,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_t_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8f,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_t_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_t_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8f,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -1010,7 +1010,7 @@ v_cmpx_t_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_t_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_tru_f16 v1.h, v255.h
-// GFX11: v_cmpx_t_f16_e64 v1.h, v255.h           ; encoding: [0x7e,0x18,0x8f,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_t_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8f,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_tru_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_t_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -1028,7 +1028,7 @@ v_cmpx_tru_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_t_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_tru_f16 v255.h, v2.h
-// GFX11: v_cmpx_t_f16_e64 v255.h, v2.h           ; encoding: [0x7e,0x18,0x8f,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_t_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8f,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_tru_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_t_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8f,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -1046,7 +1046,7 @@ v_cmpx_tru_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_t_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_u_f16 v1.h, v255.h
-// GFX11: v_cmpx_u_f16_e64 v1.h, v255.h           ; encoding: [0x7e,0x18,0x88,0xd4,0x01,0xff,0x03,0x00]
+// GFX11: v_cmpx_u_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x88,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_u_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_u_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x88,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -1064,7 +1064,7 @@ v_cmpx_u_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX11: v_cmpx_u_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_u_f16 v255.h, v2.h
-// GFX11: v_cmpx_u_f16_e64 v255.h, v2.h           ; encoding: [0x7e,0x18,0x88,0xd4,0xff,0x05,0x02,0x00]
+// GFX11: v_cmpx_u_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x88,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_u_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: v_cmpx_u_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x88,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
index d73ffb6..bc8b2bc 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
@@ -120,19 +120,19 @@ v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi
 // GFX12: v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi ; encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
 
 v_cmp_class_f16_e64 s5, v255.h, v2.l
-// W32: v_cmp_class_f16_e64 s5, v255.h, v2.l    ; encoding: [0x05,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W32: v_cmp_class_f16_e64 s5, v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s5, s105, v255.h
-// W32: v_cmp_class_f16_e64 s5, s105, v255.h    ; encoding: [0x05,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W32: v_cmp_class_f16_e64 s5, s105, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s[10:11], v255.h, v2.l
-// W64: v_cmp_class_f16_e64 s[10:11], v255.h, v2.l ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W64: v_cmp_class_f16_e64 s[10:11], v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x0a,0x08,0x7d,0xd4,0xff,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 s[10:11], s105, v255.h
-// W64: v_cmp_class_f16_e64 s[10:11], s105, v255.h ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W64: v_cmp_class_f16_e64 s[10:11], s105, v255.h op_sel:[0,1,0] ; encoding: [0x0a,0x10,0x7d,0xd4,0x69,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_cmp_class_f16_e64 vcc_lo, 0.5, m0
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s
index cfc7b2c5..e40b6a7 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx.s
@@ -48,13 +48,13 @@ v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi
 // GFX12: v_cmpx_class_f16_e64 -|0xfe0b|, vcc_hi  ; encoding: [0x7e,0x01,0xfd,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_class_f16_e64 v1.h, v2.h
-// GFX12: v_cmpx_class_f16_e64 v1.h, v2.h         ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_class_f16_e64 v1.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_class_f16_e64 v255.h, v2.l
-// GFX12: v_cmpx_class_f16_e64 v255.h, v2.l       ; encoding: [0x7e,0x08,0xfd,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_class_f16_e64 v255.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xfd,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_class_f16_e64 s105, v255.h
-// GFX12: v_cmpx_class_f16_e64 s105, v255.h       ; encoding: [0x7e,0x10,0xfd,0xd4,0x69,0xfe,0x03,0x00]
+// GFX12: v_cmpx_class_f16_e64 s105, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xfd,0xd4,0x69,0xfe,0x03,0x00]
 
 v_cmpx_class_f32_e64 v1, v2
 // GFX12: v_cmpx_class_f32_e64 v1, v2             ; encoding: [0x7e,0x00,0xfe,0xd4,0x01,0x05,0x02,0x00]
@@ -192,10 +192,10 @@ v_cmpx_eq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_eq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x82,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_eq_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_eq_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x82,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_eq_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x82,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_eq_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_eq_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x82,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_eq_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x82,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_eq_f32_e64 v1, v2
 // GFX12: v_cmpx_eq_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x92,0xd4,0x01,0x05,0x02,0x00]
@@ -324,10 +324,10 @@ v_cmpx_eq_i16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_eq_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_eq_i16_e64 v1.h, v2.l
-// GFX12: v_cmpx_eq_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb2,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_eq_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb2,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_eq_i16_e64 v255.l, v255.h
-// GFX12: v_cmpx_eq_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb2,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_eq_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb2,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_eq_i32_e64 v1, v2
 // GFX12: v_cmpx_eq_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc2,0xd4,0x01,0x05,0x02,0x00]
@@ -456,10 +456,10 @@ v_cmpx_eq_u16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_eq_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_eq_u16_e64 v1.h, v2.l
-// GFX12: v_cmpx_eq_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xba,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_eq_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xba,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_eq_u16_e64 v255.l, v255.h
-// GFX12: v_cmpx_eq_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xba,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_eq_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xba,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_eq_u32_e64 v1, v2
 // GFX12: v_cmpx_eq_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xca,0xd4,0x01,0x05,0x02,0x00]
@@ -588,10 +588,10 @@ v_cmpx_ge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_ge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x86,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ge_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_ge_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x86,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_ge_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x86,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ge_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_ge_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x86,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_ge_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x86,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ge_f32_e64 v1, v2
 // GFX12: v_cmpx_ge_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x96,0xd4,0x01,0x05,0x02,0x00]
@@ -720,10 +720,10 @@ v_cmpx_ge_i16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_ge_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ge_i16_e64 v1.h, v2.l
-// GFX12: v_cmpx_ge_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb6,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_ge_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb6,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ge_i16_e64 v255.l, v255.h
-// GFX12: v_cmpx_ge_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb6,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_ge_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb6,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ge_i32_e64 v1, v2
 // GFX12: v_cmpx_ge_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc6,0xd4,0x01,0x05,0x02,0x00]
@@ -852,10 +852,10 @@ v_cmpx_ge_u16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_ge_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ge_u16_e64 v1.h, v2.l
-// GFX12: v_cmpx_ge_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xbe,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_ge_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xbe,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ge_u16_e64 v255.l, v255.h
-// GFX12: v_cmpx_ge_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xbe,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_ge_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xbe,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ge_u32_e64 v1, v2
 // GFX12: v_cmpx_ge_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xce,0xd4,0x01,0x05,0x02,0x00]
@@ -984,10 +984,10 @@ v_cmpx_gt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_gt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x84,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_gt_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_gt_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x84,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_gt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x84,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_gt_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_gt_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x84,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_gt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x84,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_gt_f32_e64 v1, v2
 // GFX12: v_cmpx_gt_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x94,0xd4,0x01,0x05,0x02,0x00]
@@ -1116,10 +1116,10 @@ v_cmpx_gt_i16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_gt_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_gt_i16_e64 v1.h, v2.l
-// GFX12: v_cmpx_gt_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb4,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_gt_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb4,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_gt_i16_e64 v255.l, v255.h
-// GFX12: v_cmpx_gt_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb4,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_gt_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb4,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_gt_i32_e64 v1, v2
 // GFX12: v_cmpx_gt_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc4,0xd4,0x01,0x05,0x02,0x00]
@@ -1248,10 +1248,10 @@ v_cmpx_gt_u16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_gt_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_gt_u16_e64 v1.h, v2.l
-// GFX12: v_cmpx_gt_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xbc,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_gt_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xbc,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_gt_u16_e64 v255.l, v255.h
-// GFX12: v_cmpx_gt_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xbc,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_gt_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xbc,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_gt_u32_e64 v1, v2
 // GFX12: v_cmpx_gt_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcc,0xd4,0x01,0x05,0x02,0x00]
@@ -1380,10 +1380,10 @@ v_cmpx_le_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_le_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x83,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_le_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_le_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x83,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_le_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x83,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_le_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_le_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x83,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_le_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x83,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_le_f32_e64 v1, v2
 // GFX12: v_cmpx_le_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x93,0xd4,0x01,0x05,0x02,0x00]
@@ -1512,10 +1512,10 @@ v_cmpx_le_i16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_le_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_le_i16_e64 v1.h, v2.l
-// GFX12: v_cmpx_le_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb3,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_le_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb3,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_le_i16_e64 v255.l, v255.h
-// GFX12: v_cmpx_le_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb3,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_le_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb3,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_le_i32_e64 v1, v2
 // GFX12: v_cmpx_le_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc3,0xd4,0x01,0x05,0x02,0x00]
@@ -1644,10 +1644,10 @@ v_cmpx_le_u16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_le_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_le_u16_e64 v1.h, v2.l
-// GFX12: v_cmpx_le_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xbb,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_le_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xbb,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_le_u16_e64 v255.l, v255.h
-// GFX12: v_cmpx_le_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xbb,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_le_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xbb,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_le_u32_e64 v1, v2
 // GFX12: v_cmpx_le_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcb,0xd4,0x01,0x05,0x02,0x00]
@@ -1776,10 +1776,10 @@ v_cmpx_lg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_lg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x85,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_lg_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_lg_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x85,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_lg_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x85,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_lg_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_lg_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x85,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_lg_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x85,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_lg_f32_e64 v1, v2
 // GFX12: v_cmpx_lg_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x95,0xd4,0x01,0x05,0x02,0x00]
@@ -1908,10 +1908,10 @@ v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_lt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x81,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_lt_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_lt_f16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_lt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x81,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_lt_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_lt_f16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_lt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x81,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_lt_f32_e64 v1, v2
 // GFX12: v_cmpx_lt_f32_e64 v1, v2                ; encoding: [0x7e,0x00,0x91,0xd4,0x01,0x05,0x02,0x00]
@@ -2040,10 +2040,10 @@ v_cmpx_lt_i16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_lt_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_lt_i16_e64 v1.h, v2.l
-// GFX12: v_cmpx_lt_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb1,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_lt_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb1,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_lt_i16_e64 v255.l, v255.h
-// GFX12: v_cmpx_lt_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb1,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_lt_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb1,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_lt_i32_e64 v1, v2
 // GFX12: v_cmpx_lt_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc1,0xd4,0x01,0x05,0x02,0x00]
@@ -2172,10 +2172,10 @@ v_cmpx_lt_u16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_lt_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_lt_u16_e64 v1.h, v2.l
-// GFX12: v_cmpx_lt_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb9,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_lt_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb9,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_lt_u16_e64 v255.l, v255.h
-// GFX12: v_cmpx_lt_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb9,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_lt_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb9,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_lt_u32_e64 v1, v2
 // GFX12: v_cmpx_lt_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc9,0xd4,0x01,0x05,0x02,0x00]
@@ -2304,10 +2304,10 @@ v_cmpx_ne_i16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_ne_i16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ne_i16_e64 v1.h, v2.l
-// GFX12: v_cmpx_ne_i16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xb5,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_ne_i16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xb5,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ne_i16_e64 v255.l, v255.h
-// GFX12: v_cmpx_ne_i16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xb5,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_ne_i16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xb5,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ne_i32_e64 v1, v2
 // GFX12: v_cmpx_ne_i32_e64 v1, v2                ; encoding: [0x7e,0x00,0xc5,0xd4,0x01,0x05,0x02,0x00]
@@ -2436,10 +2436,10 @@ v_cmpx_ne_u16_e64 0xfe0b, vcc_hi
 // GFX12: v_cmpx_ne_u16_e64 0xfe0b, vcc_hi        ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ne_u16_e64 v1.h, v2.l
-// GFX12: v_cmpx_ne_u16_e64 v1.h, v2.l            ; encoding: [0x7e,0x08,0xbd,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_ne_u16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0xbd,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ne_u16_e64 v255.l, v255.h
-// GFX12: v_cmpx_ne_u16_e64 v255.l, v255.h        ; encoding: [0x7e,0x10,0xbd,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_ne_u16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0xbd,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ne_u32_e64 v1, v2
 // GFX12: v_cmpx_ne_u32_e64 v1, v2                ; encoding: [0x7e,0x00,0xcd,0xd4,0x01,0x05,0x02,0x00]
@@ -2568,10 +2568,10 @@ v_cmpx_neq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_neq_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_neq_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_neq_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8d,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_neq_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8d,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_neq_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_neq_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8d,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_neq_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8d,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_neq_f32_e64 v1, v2
 // GFX12: v_cmpx_neq_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9d,0xd4,0x01,0x05,0x02,0x00]
@@ -2700,10 +2700,10 @@ v_cmpx_nge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_nge_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x89,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_nge_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_nge_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x89,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_nge_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x89,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_nge_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_nge_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x89,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_nge_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x89,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_nge_f32_e64 v1, v2
 // GFX12: v_cmpx_nge_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x99,0xd4,0x01,0x05,0x02,0x00]
@@ -2832,10 +2832,10 @@ v_cmpx_ngt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_ngt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_ngt_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_ngt_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8b,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_ngt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8b,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_ngt_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_ngt_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8b,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_ngt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8b,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_ngt_f32_e64 v1, v2
 // GFX12: v_cmpx_ngt_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9b,0xd4,0x01,0x05,0x02,0x00]
@@ -2964,10 +2964,10 @@ v_cmpx_nle_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_nle_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_nle_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_nle_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8c,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_nle_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8c,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_nle_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_nle_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8c,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_nle_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8c,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_nle_f32_e64 v1, v2
 // GFX12: v_cmpx_nle_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9c,0xd4,0x01,0x05,0x02,0x00]
@@ -3096,10 +3096,10 @@ v_cmpx_nlg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_nlg_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_nlg_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_nlg_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8a,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_nlg_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8a,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_nlg_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_nlg_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8a,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_nlg_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8a,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_nlg_f32_e64 v1, v2
 // GFX12: v_cmpx_nlg_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9a,0xd4,0x01,0x05,0x02,0x00]
@@ -3228,10 +3228,10 @@ v_cmpx_nlt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_nlt_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x8e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_nlt_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_nlt_f16_e64 v1.h, v2.l           ; encoding: [0x7e,0x08,0x8e,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_nlt_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x8e,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_nlt_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_nlt_f16_e64 v255.l, v255.h       ; encoding: [0x7e,0x10,0x8e,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_nlt_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x8e,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_nlt_f32_e64 v1, v2
 // GFX12: v_cmpx_nlt_f32_e64 v1, v2               ; encoding: [0x7e,0x00,0x9e,0xd4,0x01,0x05,0x02,0x00]
@@ -3360,10 +3360,10 @@ v_cmpx_o_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_o_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x87,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_o_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_o_f16_e64 v1.h, v2.l             ; encoding: [0x7e,0x08,0x87,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_o_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x87,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_o_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_o_f16_e64 v255.l, v255.h         ; encoding: [0x7e,0x10,0x87,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_o_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x87,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_o_f32_e64 v1, v2
 // GFX12: v_cmpx_o_f32_e64 v1, v2                 ; encoding: [0x7e,0x00,0x97,0xd4,0x01,0x05,0x02,0x00]
@@ -3492,10 +3492,10 @@ v_cmpx_u_f16_e64 -|0xfe0b|, -|vcc_hi| clamp
 // GFX12: v_cmpx_u_f16_e64 -|0xfe0b|, -|vcc_hi| clamp ; encoding: [0x7e,0x83,0x88,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
 
 v_cmpx_u_f16_e64 v1.h, v2.l
-// GFX12: v_cmpx_u_f16_e64 v1.h, v2.l             ; encoding: [0x7e,0x08,0x88,0xd4,0x01,0x05,0x02,0x00]
+// GFX12: v_cmpx_u_f16_e64 v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x7e,0x08,0x88,0xd4,0x01,0x05,0x02,0x00]
 
 v_cmpx_u_f16_e64 v255.l, v255.h
-// GFX12: v_cmpx_u_f16_e64 v255.l, v255.h         ; encoding: [0x7e,0x10,0x88,0xd4,0xff,0xff,0x03,0x00]
+// GFX12: v_cmpx_u_f16_e64 v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x7e,0x10,0x88,0xd4,0xff,0xff,0x03,0x00]
 
 v_cmpx_u_f32_e64 v1, v2
 // GFX12: v_cmpx_u_f32_e64 v1, v2                 ; encoding: [0x7e,0x00,0x98,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
index 2005f482..1163aaf 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
@@ -5,7 +5,7 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 > /dev/null | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16 vcc, v1.h, v255.h
-// W64: v_cmp_class_f16_e64 vcc, v1.h, v255.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x01,0xff,0x03,0x00]
+// W64: v_cmp_class_f16_e64 vcc, v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x01,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
@@ -29,7 +29,7 @@ v_cmp_class_f16 vcc, v1.l, v255.l quad_perm:[3,2,1,0]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, v127.h, v255.h
-// W64: v_cmp_class_f16_e64 vcc, v127.h, v255.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// W64: v_cmp_class_f16_e64 vcc, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
@@ -53,7 +53,7 @@ v_cmp_class_f16 vcc, v127.l, v255.l quad_perm:[3,2,1,0]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, v128.h, v2.h
-// W64: v_cmp_class_f16_e64 vcc, v128.h, v2.h   ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// W64: v_cmp_class_f16_e64 vcc, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
@@ -77,7 +77,7 @@ v_cmp_class_f16 vcc, v128.l, v2.l quad_perm:[3,2,1,0]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, vcc_hi, v255.h
-// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, vcc_hi, v255.l
@@ -85,7 +85,7 @@ v_cmp_class_f16 vcc, vcc_hi, v255.l
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, vcc_lo, v255.h
-// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
+// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, vcc_lo, v255.l
@@ -93,7 +93,7 @@ v_cmp_class_f16 vcc, vcc_lo, v255.l
 // W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, v127.h, v255.h
-// W32: v_cmp_class_f16_e64 vcc_lo, v127.h, v255.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// W32: v_cmp_class_f16_e64 vcc_lo, v127.h, v255.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
@@ -117,7 +117,7 @@ v_cmp_class_f16 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, v128.h, v2.h
-// W32: v_cmp_class_f16_e64 vcc_lo, v128.h, v2.h ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// W32: v_cmp_class_f16_e64 vcc_lo, v128.h, v2.h op_sel:[1,1,0] ; encoding: [0x6a,0x18,0x7d,0xd4,0x80,0x05,0x02,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
@@ -141,7 +141,7 @@ v_cmp_class_f16 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, vcc_hi, v255.h
-// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, vcc_hi, v255.l
@@ -149,7 +149,7 @@ v_cmp_class_f16 vcc_lo, vcc_hi, v255.l
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, vcc_lo, v255.h
-// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255.h ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
+// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255.h op_sel:[0,1,0] ; encoding: [0x6a,0x10,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 // W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, vcc_lo, v255.l
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s
index 3d02c95..b6f907c 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s
@@ -2,7 +2,7 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 %s
 
 v_cmpx_class_f16 v1.h, v255.h
-// GFX12: v_cmpx_class_f16_e64 v1.h, v255.h       ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_class_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_class_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_class_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -20,7 +20,7 @@ v_cmpx_class_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_class_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_class_f16 v255.h, v2.h
-// GFX12: v_cmpx_class_f16_e64 v255.h, v2.h       ; encoding: [0x7e,0x18,0xfd,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_class_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_class_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_class_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xfd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -38,7 +38,7 @@ v_cmpx_class_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_class_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_eq_f16 v1.h, v255.h
-// GFX12: v_cmpx_eq_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x82,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_eq_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x82,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_eq_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_eq_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x82,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -56,7 +56,7 @@ v_cmpx_eq_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_eq_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_eq_f16 v255.h, v2.h
-// GFX12: v_cmpx_eq_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x82,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_eq_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x82,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_eq_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_eq_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x82,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -74,7 +74,7 @@ v_cmpx_eq_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_eq_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_eq_i16 v1.h, v255.h
-// GFX12: v_cmpx_eq_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb2,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_eq_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb2,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_eq_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_eq_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -92,7 +92,7 @@ v_cmpx_eq_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_eq_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_eq_i16 v255.h, v2.h
-// GFX12: v_cmpx_eq_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb2,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_eq_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb2,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_eq_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_eq_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb2,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -110,7 +110,7 @@ v_cmpx_eq_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_eq_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_eq_u16 v1.h, v255.h
-// GFX12: v_cmpx_eq_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xba,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_eq_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xba,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_eq_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_eq_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xba,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -128,7 +128,7 @@ v_cmpx_eq_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_eq_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_eq_u16 v255.h, v2.h
-// GFX12: v_cmpx_eq_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xba,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_eq_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xba,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_eq_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_eq_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xba,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -146,7 +146,7 @@ v_cmpx_eq_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_eq_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ge_f16 v1.h, v255.h
-// GFX12: v_cmpx_ge_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x86,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_ge_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x86,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ge_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ge_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x86,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -164,7 +164,7 @@ v_cmpx_ge_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ge_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ge_f16 v255.h, v2.h
-// GFX12: v_cmpx_ge_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x86,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_ge_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x86,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ge_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ge_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x86,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -182,7 +182,7 @@ v_cmpx_ge_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ge_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ge_i16 v1.h, v255.h
-// GFX12: v_cmpx_ge_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb6,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_ge_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb6,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ge_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ge_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -200,7 +200,7 @@ v_cmpx_ge_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ge_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ge_i16 v255.h, v2.h
-// GFX12: v_cmpx_ge_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb6,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_ge_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb6,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ge_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ge_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb6,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -218,7 +218,7 @@ v_cmpx_ge_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ge_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ge_u16 v1.h, v255.h
-// GFX12: v_cmpx_ge_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xbe,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_ge_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbe,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ge_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ge_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -236,7 +236,7 @@ v_cmpx_ge_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ge_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ge_u16 v255.h, v2.h
-// GFX12: v_cmpx_ge_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xbe,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_ge_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbe,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ge_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ge_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbe,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -254,7 +254,7 @@ v_cmpx_ge_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ge_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_gt_f16 v1.h, v255.h
-// GFX12: v_cmpx_gt_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x84,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_gt_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x84,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_gt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_gt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x84,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -272,7 +272,7 @@ v_cmpx_gt_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_gt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_gt_f16 v255.h, v2.h
-// GFX12: v_cmpx_gt_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x84,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_gt_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x84,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_gt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_gt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x84,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -290,7 +290,7 @@ v_cmpx_gt_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_gt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_gt_i16 v1.h, v255.h
-// GFX12: v_cmpx_gt_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb4,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_gt_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb4,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_gt_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_gt_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -308,7 +308,7 @@ v_cmpx_gt_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_gt_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_gt_i16 v255.h, v2.h
-// GFX12: v_cmpx_gt_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb4,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_gt_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb4,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_gt_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_gt_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb4,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -326,7 +326,7 @@ v_cmpx_gt_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_gt_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_gt_u16 v1.h, v255.h
-// GFX12: v_cmpx_gt_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xbc,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_gt_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbc,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_gt_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_gt_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -344,7 +344,7 @@ v_cmpx_gt_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_gt_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_gt_u16 v255.h, v2.h
-// GFX12: v_cmpx_gt_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xbc,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_gt_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbc,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_gt_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_gt_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbc,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -362,7 +362,7 @@ v_cmpx_gt_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_gt_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_le_f16 v1.h, v255.h
-// GFX12: v_cmpx_le_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x83,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_le_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x83,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_le_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_le_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x83,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -380,7 +380,7 @@ v_cmpx_le_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_le_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_le_f16 v255.h, v2.h
-// GFX12: v_cmpx_le_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x83,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_le_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x83,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_le_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_le_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x83,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -398,7 +398,7 @@ v_cmpx_le_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_le_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_le_i16 v1.h, v255.h
-// GFX12: v_cmpx_le_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb3,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_le_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb3,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_le_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_le_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -416,7 +416,7 @@ v_cmpx_le_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_le_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_le_i16 v255.h, v2.h
-// GFX12: v_cmpx_le_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb3,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_le_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb3,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_le_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_le_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb3,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -434,7 +434,7 @@ v_cmpx_le_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_le_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_le_u16 v1.h, v255.h
-// GFX12: v_cmpx_le_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xbb,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_le_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbb,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_le_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_le_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -452,7 +452,7 @@ v_cmpx_le_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_le_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_le_u16 v255.h, v2.h
-// GFX12: v_cmpx_le_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xbb,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_le_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbb,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_le_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_le_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbb,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -470,7 +470,7 @@ v_cmpx_le_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_le_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_lg_f16 v1.h, v255.h
-// GFX12: v_cmpx_lg_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x85,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_lg_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x85,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_lg_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_lg_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x85,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -488,7 +488,7 @@ v_cmpx_lg_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_lg_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_lg_f16 v255.h, v2.h
-// GFX12: v_cmpx_lg_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x85,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_lg_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x85,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_lg_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_lg_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x85,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -506,7 +506,7 @@ v_cmpx_lg_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_lg_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_lt_f16 v1.h, v255.h
-// GFX12: v_cmpx_lt_f16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0x81,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_lt_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_lt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_lt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -524,7 +524,7 @@ v_cmpx_lt_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_lt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_lt_f16 v255.h, v2.h
-// GFX12: v_cmpx_lt_f16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0x81,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_lt_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_lt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_lt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -542,7 +542,7 @@ v_cmpx_lt_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_lt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_lt_i16 v1.h, v255.h
-// GFX12: v_cmpx_lt_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb1,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_lt_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb1,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_lt_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_lt_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -560,7 +560,7 @@ v_cmpx_lt_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_lt_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_lt_i16 v255.h, v2.h
-// GFX12: v_cmpx_lt_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb1,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_lt_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb1,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_lt_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_lt_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb1,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -578,7 +578,7 @@ v_cmpx_lt_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_lt_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_lt_u16 v1.h, v255.h
-// GFX12: v_cmpx_lt_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb9,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_lt_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb9,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_lt_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_lt_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -596,7 +596,7 @@ v_cmpx_lt_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_lt_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_lt_u16 v255.h, v2.h
-// GFX12: v_cmpx_lt_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb9,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_lt_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb9,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_lt_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_lt_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb9,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -614,7 +614,7 @@ v_cmpx_lt_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_lt_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ne_i16 v1.h, v255.h
-// GFX12: v_cmpx_ne_i16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xb5,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_ne_i16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb5,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ne_i16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ne_i16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -632,7 +632,7 @@ v_cmpx_ne_i16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ne_i16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ne_i16 v255.h, v2.h
-// GFX12: v_cmpx_ne_i16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xb5,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_ne_i16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xb5,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ne_i16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ne_i16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xb5,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -650,7 +650,7 @@ v_cmpx_ne_i16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ne_i16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ne_u16 v1.h, v255.h
-// GFX12: v_cmpx_ne_u16_e64 v1.h, v255.h          ; encoding: [0x7e,0x18,0xbd,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_ne_u16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbd,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ne_u16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ne_u16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -668,7 +668,7 @@ v_cmpx_ne_u16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ne_u16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ne_u16 v255.h, v2.h
-// GFX12: v_cmpx_ne_u16_e64 v255.h, v2.h          ; encoding: [0x7e,0x18,0xbd,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_ne_u16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0xbd,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ne_u16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ne_u16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0xbd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -686,7 +686,7 @@ v_cmpx_ne_u16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ne_u16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_neq_f16 v1.h, v255.h
-// GFX12: v_cmpx_neq_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8d,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_neq_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8d,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_neq_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_neq_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -704,7 +704,7 @@ v_cmpx_neq_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_neq_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_neq_f16 v255.h, v2.h
-// GFX12: v_cmpx_neq_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8d,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_neq_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8d,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_neq_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_neq_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8d,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -722,7 +722,7 @@ v_cmpx_neq_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_neq_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_nge_f16 v1.h, v255.h
-// GFX12: v_cmpx_nge_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x89,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_nge_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x89,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_nge_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_nge_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x89,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -740,7 +740,7 @@ v_cmpx_nge_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_nge_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_nge_f16 v255.h, v2.h
-// GFX12: v_cmpx_nge_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x89,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_nge_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x89,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_nge_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_nge_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x89,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -758,7 +758,7 @@ v_cmpx_nge_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_nge_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_ngt_f16 v1.h, v255.h
-// GFX12: v_cmpx_ngt_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8b,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_ngt_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8b,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ngt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ngt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -776,7 +776,7 @@ v_cmpx_ngt_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ngt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_ngt_f16 v255.h, v2.h
-// GFX12: v_cmpx_ngt_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8b,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_ngt_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8b,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_ngt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_ngt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8b,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -794,7 +794,7 @@ v_cmpx_ngt_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_ngt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_nle_f16 v1.h, v255.h
-// GFX12: v_cmpx_nle_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8c,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_nle_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8c,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_nle_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_nle_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -812,7 +812,7 @@ v_cmpx_nle_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_nle_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_nle_f16 v255.h, v2.h
-// GFX12: v_cmpx_nle_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8c,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_nle_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8c,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_nle_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_nle_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8c,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -830,7 +830,7 @@ v_cmpx_nle_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_nle_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_nlg_f16 v1.h, v255.h
-// GFX12: v_cmpx_nlg_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8a,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_nlg_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8a,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_nlg_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_nlg_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -848,7 +848,7 @@ v_cmpx_nlg_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_nlg_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_nlg_f16 v255.h, v2.h
-// GFX12: v_cmpx_nlg_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8a,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_nlg_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8a,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_nlg_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_nlg_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8a,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -866,7 +866,7 @@ v_cmpx_nlg_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_nlg_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_nlt_f16 v1.h, v255.h
-// GFX12: v_cmpx_nlt_f16_e64 v1.h, v255.h         ; encoding: [0x7e,0x18,0x8e,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_nlt_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8e,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_nlt_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_nlt_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -884,7 +884,7 @@ v_cmpx_nlt_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_nlt_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_nlt_f16 v255.h, v2.h
-// GFX12: v_cmpx_nlt_f16_e64 v255.h, v2.h         ; encoding: [0x7e,0x18,0x8e,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_nlt_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x8e,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_nlt_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_nlt_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x8e,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -902,7 +902,7 @@ v_cmpx_nlt_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_nlt_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_o_f16 v1.h, v255.h
-// GFX12: v_cmpx_o_f16_e64 v1.h, v255.h           ; encoding: [0x7e,0x18,0x87,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_o_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x87,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_o_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_o_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x87,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -920,7 +920,7 @@ v_cmpx_o_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_o_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_o_f16 v255.h, v2.h
-// GFX12: v_cmpx_o_f16_e64 v255.h, v2.h           ; encoding: [0x7e,0x18,0x87,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_o_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x87,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_o_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_o_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x87,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
@@ -938,7 +938,7 @@ v_cmpx_o_f16 v255.l, v2.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_o_f16_e64_dpp v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
 v_cmpx_u_f16 v1.h, v255.h
-// GFX12: v_cmpx_u_f16_e64 v1.h, v255.h           ; encoding: [0x7e,0x18,0x88,0xd4,0x01,0xff,0x03,0x00]
+// GFX12: v_cmpx_u_f16_e64 v1.h, v255.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x88,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_u_f16 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_u_f16_e64_dpp v1.h, v255.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x88,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
@@ -956,7 +956,7 @@ v_cmpx_u_f16 v1.l, v255.l quad_perm:[3,2,1,0]
 // GFX12: v_cmpx_u_f16_e64_dpp v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
 v_cmpx_u_f16 v255.h, v2.h
-// GFX12: v_cmpx_u_f16_e64 v255.h, v2.h           ; encoding: [0x7e,0x18,0x88,0xd4,0xff,0x05,0x02,0x00]
+// GFX12: v_cmpx_u_f16_e64 v255.h, v2.h op_sel:[1,1,0] ; encoding: [0x7e,0x18,0x88,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_u_f16 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: v_cmpx_u_f16_e64_dpp v255.h, v2.h op_sel:[1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x18,0x88,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Sparc/sparcv9-synthetic-instructions.s b/llvm/test/MC/Sparc/sparcv9-synthetic-instructions.s
index f8a99cd..0d14ff5 100644
--- a/llvm/test/MC/Sparc/sparcv9-synthetic-instructions.s
+++ b/llvm/test/MC/Sparc/sparcv9-synthetic-instructions.s
@@ -86,3 +86,39 @@ setx (.BB1-.BB0), %g1, %o0
 setuw 32768, %g1
 ! V9: mov 1, %g1 ! encoding: [0x82,0x10,0x20,0x01]
 setuw 1, %g1
+
+! V8:      error: instruction requires a CPU feature not currently enabled
+! V9: mov 4095, %g1                   ! encoding: [0x82,0x10,0x2f,0xff]
+setsw 4095, %g1
+! V8:      error: instruction requires a CPU feature not currently enabled
+! V9: mov -4096, %g1                  ! encoding: [0x82,0x10,0x30,0x00]
+setsw -4096, %g1
+! V8:      error: instruction requires a CPU feature not currently enabled
+! V9: sethi %hi(4096), %g1            ! encoding: [0x03,0b00AAAAAA,A,A]
+! V9:                                 !   fixup A - offset: 0, value: %hi(4096), kind: fixup_sparc_hi22
+setsw 4096, %g1
+! V8:      error: instruction requires a CPU feature not currently enabled
+! V9: sethi %hi(-4097), %g1           ! encoding: [0x03,0b00AAAAAA,A,A]
+! V9:                                 !   fixup A - offset: 0, value: %hi(-4097), kind: fixup_sparc_hi22
+! V9: sra %g1, %g0, %g1               ! encoding: [0x83,0x38,0x40,0x00]
+setsw -4097, %g1
+! V8:      error: instruction requires a CPU feature not currently enabled
+! V9: sethi %hi(2147483647), %o1      ! encoding: [0x13,0b00AAAAAA,A,A]
+! V9:                                 !   fixup A - offset: 0, value: %hi(2147483647), kind: fixup_sparc_hi22
+! V9: or %o1, %lo(2147483647), %o1    ! encoding: [0x92,0x12,0b011000AA,A]
+! V9:                                 !   fixup A - offset: 0, value: %lo(2147483647), kind: fixup_sparc_lo10
+setsw 2147483647, %o1
+! V8:      error: instruction requires a CPU feature not currently enabled
+! V9: sethi %hi(-2147483647), %o1     ! encoding: [0x13,0b00AAAAAA,A,A]
+! V9:                                 !   fixup A - offset: 0, value: %hi(-2147483647), kind: fixup_sparc_hi22
+! V9: or %o1, %lo(-2147483647), %o1   ! encoding: [0x92,0x12,0b011000AA,A]
+! V9:                                 !   fixup A - offset: 0, value: %lo(-2147483647), kind: fixup_sparc_lo10
+! V9: sra %o1, %g0, %o1               ! encoding: [0x93,0x3a,0x40,0x00]
+setsw -2147483647, %o1
+! V8:      error: instruction requires a CPU feature not currently enabled
+! V9: sethi %hi(.Ltmp0), %o1          ! encoding: [0x13,0b00AAAAAA,A,A]
+! V9:                                 !   fixup A - offset: 0, value: %hi(.Ltmp0), kind: fixup_sparc_hi22
+! V9: or %o1, %lo(.Ltmp0), %o1        ! encoding: [0x92,0x12,0b011000AA,A]
+! V9:                                 !   fixup A - offset: 0, value: %lo(.Ltmp0), kind: fixup_sparc_lo10
+! V9: sra %o1, %g0, %o1               ! encoding: [0x93,0x3a,0x40,0x00]
+setsw ., %o1
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index a2e27df..09f90ee 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -302,9 +302,9 @@ define i32 @pr91691(i32 %0) {
   ret i32 %7
 }
 
-define i32 @pr91691_keep_nsw(i32 %0) {
-; CHECK-LABEL: @pr91691_keep_nsw(
-; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
+define i32 @pr91691_drop_nsw(i32 %0) {
+; CHECK-LABEL: @pr91691_drop_nsw(
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 31
@@ -337,6 +337,46 @@ define i32 @test_drop_range_attr(i32 %x) {
   ret i32 %sel
 }
 
+define i32 @bit_ceil_plus_nsw(i32 %x) {
+; CHECK-LABEL: @bit_ceil_plus_nsw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 31
+; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  %sub = add nsw i32 %x, 1
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
+  %sub2 = sub nuw nsw i32 32, %ctlz
+  %shl = shl nuw i32 1, %sub2
+  %ult = icmp ult i32 %x, 2147483647
+  %sel = select i1 %ult, i32 %shl, i32 1
+  ret i32 %sel
+}
+
+define i32 @bit_ceil_plus_nuw(i32 %x) {
+; CHECK-LABEL: @bit_ceil_plus_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
+; CHECK-NEXT:    [[SUB2:%.*]] = and i32 [[TMP0]], 31
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB2]]
+; CHECK-NEXT:    ret i32 [[SHL]]
+;
+entry:
+  %sub = add nuw i32 %x, 1
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
+  %sub2 = sub nuw nsw i32 32, %ctlz
+  %shl = shl nuw i32 1, %sub2
+  %ult = icmp ult i32 %x, 2147483647
+  %sel = select i1 %ult, i32 %shl, i32 1
+  ret i32 %sel
+}
+
 declare i32 @llvm.ctlz.i32(i32, i1 immarg)
 declare i64 @llvm.ctlz.i64(i64, i1 immarg)
 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll
index 0384c1a..382355c 100644
--- a/llvm/test/Transforms/InstCombine/xor-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll
@@ -319,3 +319,158 @@ define i1 @xor_icmp_to_icmp_add_multiuse2(i32 %a) {
   %cmp3 = xor i1 %cmp, %cmp1
   ret i1 %cmp3
 }
+
+define i1 @test_xor_of_bittest_ne_ne(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_ne_ne(
+; CHECK-NEXT:    [[Y:%.*]] = xor i8 [[X:%.*]], [[Y1:%.*]]
+; CHECK-NEXT:    [[MASK2:%.*]] = and i8 [[Y]], 2
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i8 [[MASK2]], 0
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %mask1 = and i8 %x, 2
+  %cmp1 = icmp ne i8 %mask1, 0
+  %mask2 = and i8 %y, 2
+  %cmp2 = icmp ne i8 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @test_xor_of_bittest_eq_eq(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_eq_eq(
+; CHECK-NEXT:    [[Y:%.*]] = xor i8 [[X:%.*]], [[Y1:%.*]]
+; CHECK-NEXT:    [[MASK2:%.*]] = and i8 [[Y]], 2
+; CHECK-NEXT:    [[XOR:%.*]] = icmp ne i8 [[MASK2]], 0
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %mask1 = and i8 %x, 2
+  %cmp1 = icmp eq i8 %mask1, 0
+  %mask2 = and i8 %y, 2
+  %cmp2 = icmp eq i8 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @test_xor_of_bittest_ne_eq(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_ne_eq(
+; CHECK-NEXT:    [[Y:%.*]] = xor i8 [[X:%.*]], [[Y1:%.*]]
+; CHECK-NEXT:    [[MASK2:%.*]] = and i8 [[Y]], 2
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i8 [[MASK2]], 0
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %mask1 = and i8 %x, 2
+  %cmp1 = icmp ne i8 %mask1, 0
+  %mask2 = and i8 %y, 2
+  %cmp2 = icmp eq i8 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @test_xor_of_bittest_eq_ne(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_eq_ne(
+; CHECK-NEXT:    [[X:%.*]] = xor i8 [[X1:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MASK1:%.*]] = and i8 [[X]], 2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[MASK1]], 0
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %mask1 = and i8 %x, 2
+  %cmp1 = icmp eq i8 %mask1, 0
+  %mask2 = and i8 %y, 2
+  %cmp2 = icmp ne i8 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @test_xor_of_bittest_ne_ne_multiuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_ne_ne_multiuse1(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i8 [[X:%.*]], 2
+; CHECK-NEXT:    call void @usei8(i8 [[MASK1]])
+; CHECK-NEXT:    [[MASK2:%.*]] = and i8 [[Y:%.*]], 2
+; CHECK-NEXT:    call void @usei8(i8 [[MASK2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 2
+; CHECK-NEXT:    [[XOR:%.*]] = icmp ne i8 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %mask1 = and i8 %x, 2
+  call void @usei8(i8 %mask1)
+  %cmp1 = icmp ne i8 %mask1, 0
+  %mask2 = and i8 %y, 2
+  call void @usei8(i8 %mask2)
+  %cmp2 = icmp ne i8 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+; Negative tests
+
+define i1 @test_xor_of_bittest_ne_ne_type_mismatch(i8 %x, i16 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_ne_ne_type_mismatch(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i8 [[MASK1]], 0
+; CHECK-NEXT:    [[MASK2:%.*]] = and i16 [[Y:%.*]], 2
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i16 [[MASK2]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %mask1 = and i8 %x, 2
+  %cmp1 = icmp ne i8 %mask1, 0
+  %mask2 = and i16 %y, 2
+  %cmp2 = icmp ne i16 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @test_xor_of_bittest_ne_ne_mask_mismatch(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_ne_ne_mask_mismatch(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i8 [[MASK1]], 0
+; CHECK-NEXT:    [[MASK2:%.*]] = and i8 [[Y:%.*]], 2
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i8 [[MASK2]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %mask1 = and i8 %x, 4
+  %cmp1 = icmp ne i8 %mask1, 0
+  %mask2 = and i8 %y, 2
+  %cmp2 = icmp ne i8 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @test_xor_of_bittest_ne_ne_nonpower2(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_ne_ne_nonpower2(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i8 [[MASK1]], 0
+; CHECK-NEXT:    [[MASK2:%.*]] = and i8 [[Y:%.*]], 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i8 [[MASK2]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %mask1 = and i8 %x, 3
+  %cmp1 = icmp ne i8 %mask1, 0
+  %mask2 = and i8 %y, 3
+  %cmp2 = icmp ne i8 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @test_xor_of_bittest_ne_ne_multiuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @test_xor_of_bittest_ne_ne_multiuse2(
+; CHECK-NEXT:    [[MASK1:%.*]] = and i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i8 [[MASK1]], 0
+; CHECK-NEXT:    call void @use(i1 [[CMP1]])
+; CHECK-NEXT:    [[MASK2:%.*]] = and i8 [[Y:%.*]], 2
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i8 [[MASK2]], 0
+; CHECK-NEXT:    [[XOR:%.*]] = xor i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret i1 [[XOR]]
+;
+  %mask1 = and i8 %x, 2
+  %cmp1 = icmp ne i8 %mask1, 0
+  call void @use(i1 %cmp1)
+  %mask2 = and i8 %y, 2
+  %cmp2 = icmp ne i8 %mask2, 0
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+declare void @usei8(i8)
diff --git a/llvm/test/Transforms/InstSimplify/assume-non-zero.ll b/llvm/test/Transforms/InstSimplify/assume-non-zero.ll
index 9176b81..22bdf37 100644
--- a/llvm/test/Transforms/InstSimplify/assume-non-zero.ll
+++ b/llvm/test/Transforms/InstSimplify/assume-non-zero.ll
@@ -231,3 +231,17 @@ define i1 @nonnull17_unknown(i8 %x) {
   %q = icmp ne i8 %x, 0
   ret i1 %q
 }
+
+define i1 @nonnull_trunc_true(i8 %x) {
+; CHECK-LABEL: @nonnull_trunc_true(
+; CHECK-NEXT:    [[A:%.*]] = trunc i8 [[X:%.*]] to i1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[A]])
+; CHECK-NEXT:    [[Q:%.*]] = icmp ne i8 [[X]], 0
+; CHECK-NEXT:    ret i1 [[Q]]
+;
+  %a = trunc i8 %x to i1
+  call void @llvm.assume(i1 %a)
+  %q = icmp ne i8 %x, 0
+  ret i1 %q
+}
+
diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll
index 222b258..4192a59 100644
--- a/llvm/test/Transforms/InstSimplify/compare.ll
+++ b/llvm/test/Transforms/InstSimplify/compare.ll
@@ -3450,6 +3450,21 @@ define i1 @icmp_ult_vscale_false(i8 %x, i8 %y) {
   ret i1 %cmp
 }
 
+define i1 @icmp_eq_false_by_trunc(i8 %x) {
+; CHECK-LABEL: @icmp_eq_false_by_trunc(
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i8 [[X:%.*]] to i1
+; CHECK-NEXT:    [[NOT:%.*]] = xor i1 [[TRUNC]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[NOT]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %trunc = trunc i8 %x to i1
+  %not = xor i1 %trunc, true
+  call void @llvm.assume(i1 %not)
+  %cmp = icmp eq i8 %x, 1
+  ret i1 %cmp
+}
+
 declare i64 @llvm.vscale.i64()
 
 ; TODO: Add coverage for global aliases, link once, etc..
diff --git a/llvm/test/Transforms/InstSimplify/shr-nop.ll b/llvm/test/Transforms/InstSimplify/shr-nop.ll
index 29fe222..4b9292a 100644
--- a/llvm/test/Transforms/InstSimplify/shr-nop.ll
+++ b/llvm/test/Transforms/InstSimplify/shr-nop.ll
@@ -381,6 +381,19 @@ define i32 @exact_lshr_lowbit(i32 %shiftval) {
   ret i32 %shr
 }
 
+define i8 @exact_lshr_lowbit_set_assume_trunc(i8 %x) {
+; CHECK-LABEL: @exact_lshr_lowbit_set_assume_trunc(
+; CHECK-NEXT:    [[COND:%.*]] = trunc i8 [[X:%.*]] to i1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[COND]])
+; CHECK-NEXT:    [[SHR:%.*]] = lshr exact i8 [[X]], 1
+; CHECK-NEXT:    ret i8 [[SHR]]
+;
+  %cond = trunc i8 %x to i1
+  call void @llvm.assume(i1 %cond)
+  %shr = lshr exact i8 %x, 1
+  ret i8 %shr
+}
+
 define i32 @exact_ashr_lowbit(i32 %shiftval) {
 ; CHECK-LABEL: @exact_ashr_lowbit(
 ; CHECK-NEXT:    ret i32 7
diff --git a/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll b/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll
index 6b7e9a8..57a91a3 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -passes="loop-idiom" < %s -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -6,6 +6,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 
+;.
+; CHECK: @.memset_pattern = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
+;.
 define dso_local void @double_memset(ptr nocapture %p) {
 ; CHECK-LABEL: @double_memset(
 ; CHECK-NEXT:  entry:
@@ -110,3 +115,15 @@ for.body:
 !20 = !{!"any pointer", !7, i64 0}
 !21 = !{!22, !20, i64 0}
 !22 = !{!"B", !20, i64 0}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META7:![0-9]+]], i64 0, i64 128}
+; CHECK: [[META5]] = !{[[META6:![0-9]+]], i64 32, !"_ZTS1A", [[META7]], i64 0, i64 8, [[META7]], i64 8, i64 8, [[META7]], i64 16, i64 8, [[META7]], i64 24, i64 8}
+; CHECK: [[META6]] = !{[[META3]], i64 0, !"omnipotent char"}
+; CHECK: [[META7]] = !{[[META6]], i64 8, !"double"}
+;.
diff --git a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll
index 5335b12..b65e953 100644
--- a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll
+++ b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll
@@ -1,10 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
-; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
 
 target triple = "x86_64-apple-darwin10.0.0"
 
@@ -17,6 +14,13 @@ target triple = "x86_64-apple-darwin10.0.0"
 ;    f[i].b = 2;
 ;  }
 ;}
+
+
+;.
+; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+;.
 define void @bar1(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-LABEL: @bar1(
 ; CHECK-NEXT:  entry:
@@ -286,3 +290,7 @@ for.end.loopexit:                                 ; preds = %for.body
 for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nounwind willreturn memory(argmem: readwrite) }
+;.
diff --git a/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll b/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll
index ac50c87..a3b4219 100644
--- a/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll
+++ b/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll
@@ -1,8 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
 target datalayout = "e-p:64:64:64:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
-; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
 
 target triple = "x86_64-apple-darwin10.0.0"
 
@@ -12,6 +11,9 @@ target triple = "x86_64-apple-darwin10.0.0"
 ;    f[i+1] = 0;
 ;  }
 ;}
+;.
+; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+;.
 define void @test(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
@@ -121,3 +123,8 @@ for.end.loopexit:                                 ; preds = %for.body
 for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind willreturn memory(argmem: readwrite) }
+;.
diff --git a/llvm/test/Transforms/LoopIdiom/unroll.ll b/llvm/test/Transforms/LoopIdiom/unroll.ll
index 7c41310..c70eeef 100644
--- a/llvm/test/Transforms/LoopIdiom/unroll.ll
+++ b/llvm/test/Transforms/LoopIdiom/unroll.ll
@@ -1,8 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
-; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
 
 target triple = "x86_64-apple-darwin10.0.0"
 
@@ -12,6 +11,9 @@ target triple = "x86_64-apple-darwin10.0.0"
 ;    f[i+1] = 0;
 ;  }
 ;}
+;.
+; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+;.
 define void @test(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
@@ -125,3 +127,8 @@ for.end.loopexit:                                 ; preds = %for.body
 for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind willreturn memory(argmem: readwrite) }
+;.
diff --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
index 3c7828a..73a566a 100644
--- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@@ -58,6 +58,14 @@ for.end19:
   ret void
 }
 
+; CHECK: --- !Analysis
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Function:        test01
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
+; CHECK-NEXT: ...
+
 ; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
 ; CHECK-NEXT: Name:            Dependence
@@ -66,6 +74,14 @@ for.end19:
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
 
+; DELIN: --- !Analysis
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            Dependence
+; DELIN-NEXT: Function:        test01
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          Computed dependence info, invoking the transform.
+; DELIN-NEXT: ...
+
 ; DELIN: --- !Missed
 ; DELIN-NEXT: Pass:            loop-interchange
 ; DELIN-NEXT: Name:            InterchangeNotProfitable
@@ -118,6 +134,14 @@ define void @test02(i32 %k, i32 %N) {
    ret void
 }
 
+; CHECK: --- !Analysis
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Function:        test02
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
+; CHECK-NEXT: ...
+
 ; CHECK: --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
 ; CHECK-NEXT: Name:            Dependence
@@ -126,6 +150,14 @@ define void @test02(i32 %k, i32 %N) {
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
 
+; DELIN: --- !Analysis
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            Dependence
+; DELIN-NEXT: Function:        test02
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          Computed dependence info, invoking the transform.
+; DELIN-NEXT: ...
+
 ; DELIN: --- !Passed
 ; DELIN-NEXT: Pass:            loop-interchange
 ; DELIN-NEXT: Name:            Interchanged
@@ -174,6 +206,14 @@ for.body4:                                        ; preds = %for.body4, %for.con
   br i1 %exitcond, label %for.body4, label %for.cond.loopexit
 }
 
+; CHECK: --- !Analysis
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Function:        test03
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
+; CHECK-NEXT: ...
+
 ; CHECK: --- !Passed
 ; CHECK-NEXT: Pass:            loop-interchange
 ; CHECK-NEXT: Name:            Interchanged
@@ -182,6 +222,14 @@ for.body4:                                        ; preds = %for.body4, %for.con
 ; CHECK-NEXT:   - String:          Loop interchanged with enclosing loop.
 ; CHECK-NEXT: ...
 
+; DELIN: --- !Analysis
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            Dependence
+; DELIN-NEXT: Function:        test03
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          Computed dependence info, invoking the transform.
+; DELIN-NEXT: ...
+
 ; DELIN: --- !Passed
 ; DELIN-NEXT: Pass:            loop-interchange
 ; DELIN-NEXT: Name:            Interchanged
diff --git a/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll b/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll
new file mode 100644
index 0000000..d37fb46
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/no-dependence-info.ll
@@ -0,0 +1,52 @@
+; RUN: opt %s -passes='loop-interchange' -pass-remarks=loop-interchange -disable-output 2>&1 | FileCheck --allow-empty %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; CHECK-NOT: Computed dependence info, invoking the transform.
+
+; For the below test, backedge count cannot be computed. 
+; Computing backedge count requires only SCEV and should
+; not require dependence info. 
+;
+; void bar(int m, int n) {
+; for (unsigned int i = 0; i < m; ++i) {
+;    for (unsigned int j = 0; j < m; ++j) {
+;     // dummy code
+;    }
+;  }
+;}
+
+define void @bar(i32 %m, i32 %n)
+{
+entry:
+  br label %outer.header
+
+outer.header:
+ %m_temp1 = phi i32 [%m, %entry], [%m_temp, %outer.latch]
+ br label %inner.header
+
+
+inner.header:
+ %n_temp1 = phi i32 [%n, %outer.header], [%n_temp, %inner.latch]
+
+ br label %body
+
+body:
+ ; dummy code
+
+br label %inner.latch 
+
+inner.latch:
+%n_temp = add i32 %n_temp1, 1
+%cmp2 = icmp eq i32 %n_temp, 1 
+br i1 %cmp2, label %outer.latch, label %inner.header
+
+outer.latch:
+%m_temp = add i32 %n, 1
+%cmp3 = icmp eq i32 %m_temp, 1 
+br i1 %cmp3, label %exit, label %outer.header
+
+exit:
+ret void
+}
+
diff --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
index def68ca..520e1ee 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
@@ -14,6 +14,14 @@
 ;   }
 ; }
 
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        pr43326-triply-nested
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Passed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            Interchanged
diff --git a/llvm/test/Transforms/LoopInterchange/pr43326.ll b/llvm/test/Transforms/LoopInterchange/pr43326.ll
index 4dbb067..c25c4fa 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326.ll
@@ -8,6 +8,14 @@
 @d = global i32 0
 @e = global [1 x [1 x i32]] zeroinitializer
 
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        pr43326
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Passed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            Interchanged
diff --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll
index b580794..936c53e 100644
--- a/llvm/test/Transforms/LoopInterchange/pr48212.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll
@@ -2,6 +2,14 @@
 ; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1
 ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
 
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        pr48212
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Passed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            Interchanged
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
index eea0c26..27d99e0 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -5,6 +5,14 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        test1
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Passed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            Interchanged
@@ -77,6 +85,14 @@ for1.loopexit:                                 ; preds = %for1.inc
 
 ; In this test case, the inner reduction PHI %inner does not involve the outer
 ; reduction PHI %sum.outer, do not interchange.
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        test2
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Missed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            UnsupportedPHIOuter
@@ -114,6 +130,14 @@ for1.loopexit:                                 ; preds = %for1.inc
 
 ; Check that we do not interchange if there is an additional instruction
 ; between the outer and inner reduction PHIs.
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        test3
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Missed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            UnsupportedPHIOuter
@@ -151,6 +175,14 @@ for1.loopexit:                                 ; preds = %for1.inc
 }
 
 ; Check that we do not interchange if reduction is stored in an invariant address inside inner loop
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        test4
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Missed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            Dependence
@@ -190,6 +222,14 @@ for1.loopexit:                                 ; preds = %for1.inc
 
 ; Check that we do not interchange or crash if the PHI in the outer loop gets a
 ; constant from the inner loop.
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        test_constant_inner_loop_res
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Missed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            UnsupportedPHIOuter
@@ -229,6 +269,14 @@ for1.loopexit:                                 ; preds = %for1.inc
 
 ; Floating point reductions are interchanged if all the fp instructions
 ; involved allow reassociation.
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        test5
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Passed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            Interchanged
@@ -269,6 +317,14 @@ for.exit:                                         ; preds = %outer.inc
 
 ; Floating point reductions are not interchanged if not all the fp instructions
 ; involved allow reassociation.
+; REMARKS: --- !Analysis
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Dependence
+; REMARKS-NEXT: Function:        test6
+; REMARKS-NEXT: Args:
+; REMARKS-NEXT:   - String:          Computed dependence info, invoking the transform.
+; REMARKS-NEXT: ...
+
 ; REMARKS: --- !Missed
 ; REMARKS-NEXT: Pass:            loop-interchange
 ; REMARKS-NEXT: Name:            UnsupportedPHIOuter
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index a0214ae..9d0d30a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -2434,7 +2434,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
 ; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-MAXBW:       vector.body:
 ; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
+; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP6]]
 ; CHECK-MAXBW-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 0
@@ -2446,12 +2446,12 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
 ; CHECK-MAXBW-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
 ; CHECK-MAXBW-NEXT:    [[TMP13:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i64>
 ; CHECK-MAXBW-NEXT:    [[TMP14:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP13]], [[TMP9]]
-; CHECK-MAXBW-NEXT:    [[PARTIAL_REDUCE]] = call <vscale x 1 x i64> @llvm.experimental.vector.partial.reduce.add.nxv1i64.nxv8i64(<vscale x 1 x i64> [[VEC_PHI]], <vscale x 8 x i64> [[TMP14]])
+; CHECK-MAXBW-NEXT:    [[TMP19]] = add <vscale x 8 x i64> [[VEC_PHI]], [[TMP14]]
 ; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
 ; CHECK-MAXBW-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-MAXBW-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
 ; CHECK-MAXBW:       middle.block:
-; CHECK-MAXBW-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[PARTIAL_REDUCE]])
+; CHECK-MAXBW-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP19]])
 ; CHECK-MAXBW-NEXT:    [[CMP_N:%.*]] = icmp eq i64 16, [[N_VEC]]
 ; CHECK-MAXBW-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-MAXBW:       scalar.ph:
@@ -3106,89 +3106,188 @@ exit:                                 ; preds = %for.cond.cleanup.loopexit, %ent
   ret i64 %result
 }
 
+define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
+; CHECK-INTERLEAVE1-LABEL: define dso_local i32 @not_dotp_vscale1(
+; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[N:%.*]], i64 [[COST:%.*]]) #[[ATTR0]] {
+; CHECK-INTERLEAVE1-NEXT:  entry:
+; CHECK-INTERLEAVE1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-INTERLEAVE1-NEXT:    br i1 [[CMP]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-INTERLEAVE1:       for.body.preheader:
+; CHECK-INTERLEAVE1-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-INTERLEAVE1-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVE1-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 2
+; CHECK-INTERLEAVE1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; CHECK-INTERLEAVE1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-INTERLEAVE1:       vector.ph:
+; CHECK-INTERLEAVE1-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVE1-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-INTERLEAVE1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
+; CHECK-INTERLEAVE1-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-INTERLEAVE1-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVE1-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-INTERLEAVE1-NEXT:    [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-INTERLEAVE1-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
+; CHECK-INTERLEAVE1-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]]
+; CHECK-INTERLEAVE1-NEXT:    [[TMP10:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[COST]], i32 0
+; CHECK-INTERLEAVE1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-INTERLEAVE1:       vector.body:
+; CHECK-INTERLEAVE1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ [[TMP10]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INTERLEAVE1-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP11]]
+; CHECK-INTERLEAVE1-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INTERLEAVE1-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-INTERLEAVE1-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP13]], align 1
+; CHECK-INTERLEAVE1-NEXT:    [[TMP14:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD]] to <vscale x 2 x i64>
+; CHECK-INTERLEAVE1-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 0
+; CHECK-INTERLEAVE1-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 2 x i8>, ptr [[TMP15]], align 1
+; CHECK-INTERLEAVE1-NEXT:    [[TMP16:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD2]] to <vscale x 2 x i64>
+; CHECK-INTERLEAVE1-NEXT:    [[TMP17:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP16]], [[TMP14]]
+; CHECK-INTERLEAVE1-NEXT:    [[TMP18]] = add <vscale x 2 x i64> [[TMP17]], [[VEC_PHI]]
+; CHECK-INTERLEAVE1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-INTERLEAVE1-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INTERLEAVE1-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-INTERLEAVE1:       middle.block:
+; CHECK-INTERLEAVE1-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP18]])
+; CHECK-INTERLEAVE1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-INTERLEAVE1-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+;
+; CHECK-INTERLEAVED-LABEL: define dso_local i32 @not_dotp_vscale1(
+; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[N:%.*]], i64 [[COST:%.*]]) #[[ATTR0]] {
+; CHECK-INTERLEAVED-NEXT:  entry:
+; CHECK-INTERLEAVED-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-INTERLEAVED-NEXT:    br i1 [[CMP]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-INTERLEAVED:       for.body.preheader:
+; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-INTERLEAVED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; CHECK-INTERLEAVED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-INTERLEAVED:       vector.ph:
+; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
+; CHECK-INTERLEAVED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
+; CHECK-INTERLEAVED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 4
+; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-INTERLEAVED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[COST]], i32 0
+; CHECK-INTERLEAVED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-INTERLEAVED:       vector.body:
+; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ [[TMP10]], [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI1:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INTERLEAVED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP11]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INTERLEAVED-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-INTERLEAVED-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 2
+; CHECK-INTERLEAVED-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP15]]
+; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP13]], align 1
+; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD3:%.*]] = load <vscale x 2 x i8>, ptr [[TMP16]], align 1
+; CHECK-INTERLEAVED-NEXT:    [[TMP17:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD]] to <vscale x 2 x i64>
+; CHECK-INTERLEAVED-NEXT:    [[TMP18:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD3]] to <vscale x 2 x i64>
+; CHECK-INTERLEAVED-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 0
+; CHECK-INTERLEAVED-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT:    [[TMP21:%.*]] = mul i64 [[TMP20]], 2
+; CHECK-INTERLEAVED-NEXT:    [[TMP22:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 [[TMP21]]
+; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD4:%.*]] = load <vscale x 2 x i8>, ptr [[TMP19]], align 1
+; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD5:%.*]] = load <vscale x 2 x i8>, ptr [[TMP22]], align 1
+; CHECK-INTERLEAVED-NEXT:    [[TMP23:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD4]] to <vscale x 2 x i64>
+; CHECK-INTERLEAVED-NEXT:    [[TMP24:%.*]] = zext <vscale x 2 x i8> [[WIDE_LOAD5]] to <vscale x 2 x i64>
+; CHECK-INTERLEAVED-NEXT:    [[TMP25:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP23]], [[TMP17]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP26:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP24]], [[TMP18]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP27]] = add <vscale x 2 x i64> [[TMP25]], [[VEC_PHI]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP28]] = add <vscale x 2 x i64> [[TMP26]], [[VEC_PHI1]]
+; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-INTERLEAVED:       middle.block:
+; CHECK-INTERLEAVED-NEXT:    [[BIN_RDX:%.*]] = add <vscale x 2 x i64> [[TMP28]], [[TMP27]]
+; CHECK-INTERLEAVED-NEXT:    [[TMP30:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[BIN_RDX]])
+; CHECK-INTERLEAVED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-INTERLEAVED-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+;
+; CHECK-MAXBW-LABEL: define dso_local i32 @not_dotp_vscale1(
+; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[N:%.*]], i64 [[COST:%.*]]) #[[ATTR0]] {
+; CHECK-MAXBW-NEXT:  entry:
+; CHECK-MAXBW-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-MAXBW-NEXT:    br i1 [[CMP]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-MAXBW:       for.body.preheader:
+; CHECK-MAXBW-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
+; CHECK-MAXBW-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-MAXBW-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 8
+; CHECK-MAXBW-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; CHECK-MAXBW-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-MAXBW:       vector.ph:
+; CHECK-MAXBW-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-MAXBW-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 8
+; CHECK-MAXBW-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
+; CHECK-MAXBW-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-MAXBW-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-MAXBW-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 8
+; CHECK-MAXBW-NEXT:    [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-MAXBW-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
+; CHECK-MAXBW-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[N_VEC]]
+; CHECK-MAXBW-NEXT:    [[TMP10:%.*]] = insertelement <vscale x 8 x i64> zeroinitializer, i64 [[COST]], i32 0
+; CHECK-MAXBW-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-MAXBW:       vector.body:
+; CHECK-MAXBW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-MAXBW-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 8 x i64> [ [[TMP10]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-MAXBW-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-MAXBW-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP11]]
+; CHECK-MAXBW-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 0
+; CHECK-MAXBW-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]]
+; CHECK-MAXBW-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-MAXBW-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP13]], align 1
+; CHECK-MAXBW-NEXT:    [[TMP14:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i64>
+; CHECK-MAXBW-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 0
+; CHECK-MAXBW-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1
+; CHECK-MAXBW-NEXT:    [[TMP16:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i64>
+; CHECK-MAXBW-NEXT:    [[TMP17:%.*]] = mul nuw nsw <vscale x 8 x i64> [[TMP16]], [[TMP14]]
+; CHECK-MAXBW-NEXT:    [[TMP20]] = add <vscale x 8 x i64> [[TMP17]], [[VEC_PHI]]
+; CHECK-MAXBW-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-MAXBW-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-MAXBW-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-MAXBW:       middle.block:
+; CHECK-MAXBW-NEXT:    [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP20]])
+; CHECK-MAXBW-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-MAXBW-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+;
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %for.body, label %exit
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %accum = phi i64 [ %add, %for.body ], [ %cost, %entry ]
+  %a.ptr = phi ptr [ %a.gep, %for.body ], [ %a, %entry ]
+  %b.ptr = phi ptr [ %b.gep, %for.body ], [ %b, %entry ]
+  %a.load = load i8, ptr %a.ptr, align 1
+  %a.ext = zext i8 %a.load to i64
+  %b.load = load i8, ptr %b.ptr, align 1
+  %b.ext = zext i8 %b.load to i64
+  %mul = mul nuw nsw i64 %b.ext, %a.ext
+  %add = add nsw i64 %mul, %accum
+  %a.gep = getelementptr inbounds nuw i8, ptr %a.ptr, i64 1
+  %b.gep = getelementptr inbounds nuw i8, ptr %b.ptr, i64 1
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cmp.2 = icmp eq i32 %iv.next, %n
+  br i1 %cmp.2, label %exit, label %for.body
+
+exit:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %cost.result = phi i64 [ %cost, %entry ], [ %add, %for.body ]
+  %result = trunc i64 %cost.result to i32
+  ret i32 %result
+}
+
 !7 = distinct !{!7, !8, !9, !10}
 !8 = !{!"llvm.loop.mustprogress"}
 !9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
 !10 = !{!"llvm.loop.vectorize.enable", i1 true}
 attributes #0 = { vscale_range(1,16) "target-features"="+sve" }
-;.
-; CHECK-INTERLEAVE1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK-INTERLEAVE1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK-INTERLEAVE1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK-INTERLEAVE1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[META15]] = !{!"llvm.loop.mustprogress"}
-; CHECK-INTERLEAVE1: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP22]] = distinct !{[[LOOP22]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVE1: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVE1: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]}
-;.
-; CHECK-INTERLEAVED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK-INTERLEAVED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK-INTERLEAVED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK-INTERLEAVED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[META15]] = !{!"llvm.loop.mustprogress"}
-; CHECK-INTERLEAVED: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]}
-; CHECK-INTERLEAVED: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]], [[META2]]}
-; CHECK-INTERLEAVED: [[LOOP26]] = distinct !{[[LOOP26]], [[META2]], [[META1]]}
-;.
-; CHECK-MAXBW: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK-MAXBW: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK-MAXBW: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK-MAXBW: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP14]] = distinct !{[[LOOP14]], [[META15:![0-9]+]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[META15]] = !{!"llvm.loop.mustprogress"}
-; CHECK-MAXBW: [[LOOP16]] = distinct !{[[LOOP16]], [[META15]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP22]] = distinct !{[[LOOP22]], [[META2]], [[META1]]}
-; CHECK-MAXBW: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]}
-; CHECK-MAXBW: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]}
-;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 9d21ea0..b439b64 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -272,36 +272,52 @@ define i64 @loop_contains_safe_div() #1 {
 ; CHECK-NEXT:    [[P2:%.*]] = alloca [1024 x i8], align 4
 ; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
 ; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP12:%.*]] = mul i64 [[TMP11]], 4
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP10]], 4
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
+; CHECK-NEXT:    [[INDEX1:%.*]] = sub i64 64, [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
+; CHECK-NEXT:    [[TMP16:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP17:%.*]] = mul <vscale x 4 x i64> [[TMP16]], splat (i64 1)
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 4 x i64> splat (i64 3), [[TMP17]]
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i64 1, [[TMP5]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX1:%.*]] = add i64 3, [[INDEX2]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX1]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i32> [[WIDE_LOAD]], splat (i32 20000)
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i32> [[TMP3]], splat (i32 1)
-; CHECK-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
-; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]])
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP13:%.*]] = udiv <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 20000)
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq <vscale x 4 x i32> [[TMP13]], splat (i32 1)
+; CHECK-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX2]], [[TMP5]]
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <vscale x 4 x i1> [[TMP14]], splat (i1 true)
+; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.split:
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       vector.early.exit:
-; CHECK-NEXT:    [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP5]], i1 true)
-; CHECK-NEXT:    [[EARLY_EXIT_VALUE:%.*]] = extractelement <2 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT:    [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP15]], i1 true)
+; CHECK-NEXT:    [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
 ; CHECK-NEXT:    br label [[LOOP_END:%.*]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 64, [[INDEX1]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_END]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[OFFSET_IDX]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll
index 27923f8..fb278c7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll
@@ -91,10 +91,10 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
 ; NO-ZVFBFMIN-LABEL: define void @vfwmaccbf16.vv(
 ; NO-ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; NO-ZVFBFMIN-NEXT:  [[ENTRY:.*]]:
-; NO-ZVFBFMIN-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; NO-ZVFBFMIN-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
 ; NO-ZVFBFMIN-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; NO-ZVFBFMIN:       [[VECTOR_PH]]:
-; NO-ZVFBFMIN-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; NO-ZVFBFMIN-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
 ; NO-ZVFBFMIN-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; NO-ZVFBFMIN-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; NO-ZVFBFMIN:       [[VECTOR_BODY]]:
@@ -104,16 +104,16 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
 ; NO-ZVFBFMIN-NEXT:    [[TMP2:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[TMP0]]
 ; NO-ZVFBFMIN-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[C]], i64 [[TMP0]]
 ; NO-ZVFBFMIN-NEXT:    [[TMP4:%.*]] = getelementptr bfloat, ptr [[TMP1]], i32 0
-; NO-ZVFBFMIN-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x bfloat>, ptr [[TMP4]], align 2
+; NO-ZVFBFMIN-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x bfloat>, ptr [[TMP4]], align 2
 ; NO-ZVFBFMIN-NEXT:    [[TMP5:%.*]] = getelementptr bfloat, ptr [[TMP2]], i32 0
-; NO-ZVFBFMIN-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x bfloat>, ptr [[TMP5]], align 2
+; NO-ZVFBFMIN-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x bfloat>, ptr [[TMP5]], align 2
 ; NO-ZVFBFMIN-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP3]], i32 0
-; NO-ZVFBFMIN-NEXT:    [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP6]], align 4
-; NO-ZVFBFMIN-NEXT:    [[TMP7:%.*]] = fpext <8 x bfloat> [[WIDE_LOAD]] to <8 x float>
-; NO-ZVFBFMIN-NEXT:    [[TMP8:%.*]] = fpext <8 x bfloat> [[WIDE_LOAD1]] to <8 x float>
-; NO-ZVFBFMIN-NEXT:    [[TMP9:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP7]], <8 x float> [[TMP8]], <8 x float> [[WIDE_LOAD2]])
-; NO-ZVFBFMIN-NEXT:    store <8 x float> [[TMP9]], ptr [[TMP6]], align 4
-; NO-ZVFBFMIN-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; NO-ZVFBFMIN-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
+; NO-ZVFBFMIN-NEXT:    [[TMP7:%.*]] = fpext <4 x bfloat> [[WIDE_LOAD]] to <4 x float>
+; NO-ZVFBFMIN-NEXT:    [[TMP8:%.*]] = fpext <4 x bfloat> [[WIDE_LOAD1]] to <4 x float>
+; NO-ZVFBFMIN-NEXT:    [[TMP9:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[WIDE_LOAD2]])
+; NO-ZVFBFMIN-NEXT:    store <4 x float> [[TMP9]], ptr [[TMP6]], align 4
+; NO-ZVFBFMIN-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; NO-ZVFBFMIN-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; NO-ZVFBFMIN-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; NO-ZVFBFMIN:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
index a360339..81a2675 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
@@ -6,8 +6,8 @@
 ; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
 
 define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -62,8 +62,8 @@ exit:
 }
 
 define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -118,8 +118,8 @@ exit:
 }
 
 define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -174,8 +174,8 @@ exit:
 }
 
 define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -230,8 +230,8 @@ exit:
 }
 
 define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -281,8 +281,8 @@ exit:
 }
 
 define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -332,8 +332,8 @@ exit:
 }
 
 define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -387,8 +387,8 @@ exit:
 }
 
 define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -442,8 +442,8 @@ exit:
 }
 
 define void @vp_abs(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
index d64a24b..40cf02c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -5,8 +5,8 @@
 ; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
 
 define void @vp_sext(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -58,8 +58,8 @@ exit:
 }
 
 define void @vp_zext(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -109,8 +109,8 @@ exit:
 }
 
 define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -160,8 +160,8 @@ exit:
 }
 
 define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -211,8 +211,8 @@ exit:
 }
 
 define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -262,8 +262,8 @@ exit:
 }
 
 define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -313,8 +313,8 @@ exit:
 }
 
 define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -364,8 +364,8 @@ exit:
 }
 
 define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -415,8 +415,8 @@ exit:
 }
 
 define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
@@ -466,8 +466,8 @@ exit:
 }
 
 define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index aacf632..0daf15d0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -24,8 +24,8 @@
 
 
 define i32 @reduction(ptr %a, i64 %n, i32 %start) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 
 ; IF-EVL-OUTLOOP: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-OUTLOOP-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
index 5824a96..e64ea38 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
@@ -11,8 +11,8 @@
 ; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefixes=NO-VP,CHECK %s
 
 define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
-; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
-; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
 ;
 ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
 ; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
index 74e2503..90de5b6 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -6,8 +6,8 @@
  ; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
 
  define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
- ; IF-EVL: VPlan 'Initial VPlan for VF={1},UF={1}'
- ; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
+ ; IF-EVL: VPlan 'Initial VPlan for VF={1},UF>=1'
+ ; IF-EVL-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
  ; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}'
  ; IF-EVL: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI
  ;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
index e1fc7e0..0d80aff 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll
@@ -24,10 +24,12 @@ define <4 x float> @int_sin_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -217,10 +219,12 @@ define <4 x float> @exp_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -297,10 +301,12 @@ define <4 x float> @log_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -470,10 +476,12 @@ define <4 x float> @sin_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -509,10 +517,12 @@ define <4 x float> @cos_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -548,10 +558,12 @@ define <4 x float> @tan_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -587,10 +599,12 @@ define <4 x float> @asin_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -625,10 +639,12 @@ define <4 x float> @int_asin_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -664,10 +680,12 @@ define <4 x float> @acos_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -702,10 +720,12 @@ define <4 x float> @int_acos_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -741,10 +761,12 @@ define <4 x float> @atan_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -779,10 +801,12 @@ define <4 x float> @int_atan_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -822,11 +846,14 @@ define <4 x float> @atan2_4x(ptr %a, ptr %b) {
 ; NOACCELERATE-NEXT:    [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
-; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXTB_2:%.*]] = extractelement <4 x float> [[BB]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atan2f(float [[VECEXT_2]], float [[VECEXTB_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[VECEXTB_3:%.*]] = extractelement <4 x float> [[BB]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atan2f(float [[VECEXT_3]], float [[VECEXTB_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
@@ -870,11 +897,14 @@ define <4 x float> @int_atan2_4x(ptr %a, ptr %b) {
 ; NOACCELERATE-NEXT:    [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
-; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXTB_2:%.*]] = extractelement <4 x float> [[BB]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_2]], float [[VECEXTB_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[VECEXTB_3:%.*]] = extractelement <4 x float> [[BB]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_3]], float [[VECEXTB_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -915,10 +945,12 @@ define <4 x float> @sinh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -953,10 +985,12 @@ define <4 x float> @int_sinh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -992,10 +1026,12 @@ define <4 x float> @cosh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -1030,10 +1066,12 @@ define <4 x float> @int_cosh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -1069,10 +1107,12 @@ define <4 x float> @tanh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -1107,10 +1147,12 @@ define <4 x float> @int_tanh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -1308,10 +1350,12 @@ define <4 x float> @int_cos_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
index 666514a..34d65a3 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
@@ -24,10 +24,12 @@ define <4 x float> @int_sin_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -217,10 +219,12 @@ define <4 x float> @exp_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -297,10 +301,12 @@ define <4 x float> @log_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -470,10 +476,12 @@ define <4 x float> @sin_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -509,10 +517,12 @@ define <4 x float> @cos_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -548,10 +558,12 @@ define <4 x float> @tan_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -587,10 +599,12 @@ define <4 x float> @asin_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -625,10 +639,12 @@ define <4 x float> @int_asin_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -664,10 +680,12 @@ define <4 x float> @acos_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -702,10 +720,12 @@ define <4 x float> @int_acos_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -741,10 +761,12 @@ define <4 x float> @atan_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -779,10 +801,12 @@ define <4 x float> @int_atan_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -822,11 +846,14 @@ define <4 x float> @atan2_4x(ptr %a, ptr %b) {
 ; NOACCELERATE-NEXT:    [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atan2f(float [[VECEXT_1]], float [[VECEXTB_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
-; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXTB_2:%.*]] = extractelement <4 x float> [[BB]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atan2f(float [[VECEXT_2]], float [[VECEXTB_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[VECEXTB_3:%.*]] = extractelement <4 x float> [[BB]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atan2f(float [[VECEXT_3]], float [[VECEXTB_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
@@ -870,11 +897,14 @@ define <4 x float> @int_atan2_4x(ptr %a, ptr %b) {
 ; NOACCELERATE-NEXT:    [[VECEXTB_1:%.*]] = extractelement <4 x float> [[BB]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_1]], float [[VECEXTB_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[BB]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.atan2.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]])
-; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXTB_2:%.*]] = extractelement <4 x float> [[BB]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_2]], float [[VECEXTB_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[VECEXTB_3:%.*]] = extractelement <4 x float> [[BB]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan2.f32(float [[VECEXT_3]], float [[VECEXTB_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -915,10 +945,12 @@ define <4 x float> @sinh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -953,10 +985,12 @@ define <4 x float> @int_sinh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -992,10 +1026,12 @@ define <4 x float> @cosh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -1030,10 +1066,12 @@ define <4 x float> @int_cosh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -1069,10 +1107,12 @@ define <4 x float> @tanh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -1107,10 +1147,12 @@ define <4 x float> @int_tanh_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
@@ -1308,10 +1350,12 @@ define <4 x float> @int_cos_4x(ptr %a) {
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
 ; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]])
-; NOACCELERATE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
index 29bd819..bb88edf 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
@@ -607,35 +607,13 @@ define <2 x i32> @sdiv_v2i32_unknown_divisor(<2 x i32> %a, <2 x i32> %x, <2 x i3
 
 ; computes (a/const + x - y) * z
 define <2 x i32> @sdiv_v2i32_const_divisor(<2 x i32> %a, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
-; NO-SVE-LABEL: define <2 x i32> @sdiv_v2i32_const_divisor(
-; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i32> [[A]], i64 0
-; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i32> [[A]], i64 1
-; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], 2
-; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], 4
-; NO-SVE-NEXT:    [[X0:%.*]] = extractelement <2 x i32> [[X]], i64 0
-; NO-SVE-NEXT:    [[X1:%.*]] = extractelement <2 x i32> [[X]], i64 1
-; NO-SVE-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], [[X0]]
-; NO-SVE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], [[X1]]
-; NO-SVE-NEXT:    [[Y0:%.*]] = extractelement <2 x i32> [[Y]], i64 0
-; NO-SVE-NEXT:    [[Y1:%.*]] = extractelement <2 x i32> [[Y]], i64 1
-; NO-SVE-NEXT:    [[TMP5:%.*]] = sub i32 [[TMP3]], [[Y0]]
-; NO-SVE-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP4]], [[Y1]]
-; NO-SVE-NEXT:    [[Z0:%.*]] = extractelement <2 x i32> [[Z]], i64 0
-; NO-SVE-NEXT:    [[Z1:%.*]] = extractelement <2 x i32> [[Z]], i64 1
-; NO-SVE-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP5]], [[Z0]]
-; NO-SVE-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP6]], [[Z1]]
-; NO-SVE-NEXT:    [[RES0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; NO-SVE-NEXT:    [[RES1:%.*]] = insertelement <2 x i32> [[RES0]], i32 [[TMP8]], i32 1
-; NO-SVE-NEXT:    ret <2 x i32> [[RES1]]
-;
-; SVE-LABEL: define <2 x i32> @sdiv_v2i32_const_divisor(
-; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
-; SVE-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
-; SVE-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
-; SVE-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
-; SVE-NEXT:    [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
-; SVE-NEXT:    ret <2 x i32> [[TMP4]]
+; CHECK-LABEL: define <2 x i32> @sdiv_v2i32_const_divisor(
+; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[A]], <i32 2, i32 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <2 x i32> [[TMP3]], [[Z]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
 ;
 {
   %a0 = extractelement <2 x i32> %a, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/vectorizable-intrinsic.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/vectorizable-intrinsic.ll
index 114bf58..3a56258 100644
--- a/llvm/test/Transforms/SLPVectorizer/NVPTX/vectorizable-intrinsic.ll
+++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/vectorizable-intrinsic.ll
@@ -4,13 +4,17 @@
 target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx--nvidiacl"
 
-; Test that CTLZ can be vectorized currently even though the second argument is a scalar
-
+; Vector versions of the intrinsics are scalarized, so keep them scalar
 define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
 ; CHECK-LABEL: define <2 x i8> @cltz_test(
 ; CHECK-SAME: <2 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X]], i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i8> [[X]], i32 0
+; CHECK-NEXT:    [[CALL_I:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP0]], i1 false)
+; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i8> zeroinitializer, i8 [[CALL_I]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i8> [[X]], i32 1
+; CHECK-NEXT:    [[CALL_I4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <2 x i8> [[VECINIT]], i8 [[CALL_I4]], i32 1
 ; CHECK-NEXT:    ret <2 x i8> [[VEC]]
 ;
 entry:
@@ -28,7 +32,12 @@ define <2 x i8> @cltz_test_poison(<2 x i8> %x) #0 {
 ; CHECK-LABEL: define <2 x i8> @cltz_test_poison(
 ; CHECK-SAME: <2 x i8> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X]], i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i8> [[X]], i32 0
+; CHECK-NEXT:    [[CALL_I:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP0]], i1 false)
+; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x i8> poison, i8 [[CALL_I]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i8> [[X]], i32 1
+; CHECK-NEXT:    [[CALL_I4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <2 x i8> [[VECINIT]], i8 [[CALL_I4]], i32 1
 ; CHECK-NEXT:    ret <2 x i8> [[VEC]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
index f6f87dc..a9eb786 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
@@ -149,27 +149,37 @@ define <4 x float> @exp_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @exp_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -196,27 +206,37 @@ define <4 x float> @int_exp_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_exp_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -243,27 +263,37 @@ define <4 x float> @log_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @log_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -290,27 +320,37 @@ define <4 x float> @int_log_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_log_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -337,27 +377,37 @@ define <4 x float> @sin_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @sin_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -384,27 +434,37 @@ define <4 x float> @int_sin_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_sin_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -431,27 +491,37 @@ define <4 x float> @asin_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @asin_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -478,27 +548,37 @@ define <4 x float> @int_asin_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_asin_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -525,27 +605,37 @@ define <4 x float> @cos_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @cos_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -572,27 +662,37 @@ define <4 x float> @int_cos_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_cos_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -619,27 +719,37 @@ define <4 x float> @acos_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @acos_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -666,27 +776,37 @@ define <4 x float> @int_acos_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_acos_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -713,27 +833,37 @@ define <4 x float> @tan_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @tan_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -760,27 +890,37 @@ define <4 x float> @int_tan_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_tan_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -807,27 +947,37 @@ define <4 x float> @atan_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @atan_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -854,27 +1004,37 @@ define <4 x float> @int_atan_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_atan_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -901,27 +1061,37 @@ define <4 x float> @sinh_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @sinh_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -948,27 +1118,37 @@ define <4 x float> @int_sinh_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_sinh_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -1109,27 +1289,37 @@ define <4 x float> @cosh_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @cosh_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -1156,27 +1346,37 @@ define <4 x float> @int_cosh_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -1317,27 +1517,37 @@ define <4 x float> @tanh_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @tanh_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
@@ -1364,27 +1574,37 @@ define <4 x float> @int_tanh_4x(ptr %a) {
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; DEFAULT-LABEL: define <4 x float> @int_tanh_4x
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; DEFAULT-NEXT:    [[TMP2:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP1]])
-; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; DEFAULT-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4]])
-; DEFAULT-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
+; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
+; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
+; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
+; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
+; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, ptr %a, align 16
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
index 2565d5b..258b0ec 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -8,7 +8,7 @@
 ; YAML: Function:        test1
 ; YAML: Args:
 ; YAML:   - String:          'Stores SLP vectorized with cost '
-; YAML:   - Cost:            '4'
+; YAML:   - Cost:            '5'
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '5'
 
@@ -46,7 +46,7 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
 ; YAML: Function:        test2
 ; YAML: Args:
 ; YAML:   - String:          'Stores SLP vectorized with cost '
-; YAML:   - Cost:            '12'
+; YAML:   - Cost:            '14'
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '5'
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
index 78dd39b..b790e6f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
@@ -9,23 +9,29 @@
 define <8 x float> @ceil_floor(<8 x float> %a) {
 ; SSE-LABEL: @ceil_floor(
 ; SSE-NEXT:    [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0
+; SSE-NEXT:    [[A1:%.*]] = extractelement <8 x float> [[A]], i64 1
+; SSE-NEXT:    [[A2:%.*]] = extractelement <8 x float> [[A]], i64 2
 ; SSE-NEXT:    [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3
+; SSE-NEXT:    [[A4:%.*]] = extractelement <8 x float> [[A]], i64 4
+; SSE-NEXT:    [[A5:%.*]] = extractelement <8 x float> [[A]], i64 5
+; SSE-NEXT:    [[A6:%.*]] = extractelement <8 x float> [[A]], i64 6
+; SSE-NEXT:    [[A7:%.*]] = extractelement <8 x float> [[A]], i64 7
 ; SSE-NEXT:    [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]])
-; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
-; SSE-NEXT:    [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]])
+; SSE-NEXT:    [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]])
+; SSE-NEXT:    [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]])
 ; SSE-NEXT:    [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]])
-; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
-; SSE-NEXT:    [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
-; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
-; SSE-NEXT:    [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
+; SSE-NEXT:    [[AB4:%.*]] = call float @llvm.ceil.f32(float [[A4]])
+; SSE-NEXT:    [[AB5:%.*]] = call float @llvm.ceil.f32(float [[A5]])
+; SSE-NEXT:    [[AB6:%.*]] = call float @llvm.floor.f32(float [[A6]])
+; SSE-NEXT:    [[AB7:%.*]] = call float @llvm.floor.f32(float [[A7]])
 ; SSE-NEXT:    [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i64 1
+; SSE-NEXT:    [[R23:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i64 2
 ; SSE-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
-; SSE-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
-; SSE-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i64 4
+; SSE-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i64 5
+; SSE-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i64 6
+; SSE-NEXT:    [[R71:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i64 7
 ; SSE-NEXT:    ret <8 x float> [[R71]]
 ;
 ; SLM-LABEL: @ceil_floor(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
index 27554e6..ef1a670 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
@@ -9,23 +9,29 @@
 define <8 x float> @ceil_floor(<8 x float> %a) {
 ; SSE-LABEL: @ceil_floor(
 ; SSE-NEXT:    [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0
+; SSE-NEXT:    [[A1:%.*]] = extractelement <8 x float> [[A]], i64 1
+; SSE-NEXT:    [[A2:%.*]] = extractelement <8 x float> [[A]], i64 2
 ; SSE-NEXT:    [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3
+; SSE-NEXT:    [[A4:%.*]] = extractelement <8 x float> [[A]], i64 4
+; SSE-NEXT:    [[A5:%.*]] = extractelement <8 x float> [[A]], i64 5
+; SSE-NEXT:    [[A6:%.*]] = extractelement <8 x float> [[A]], i64 6
+; SSE-NEXT:    [[A7:%.*]] = extractelement <8 x float> [[A]], i64 7
 ; SSE-NEXT:    [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]])
-; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
-; SSE-NEXT:    [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]])
+; SSE-NEXT:    [[AB1:%.*]] = call float @llvm.floor.f32(float [[A1]])
+; SSE-NEXT:    [[AB2:%.*]] = call float @llvm.floor.f32(float [[A2]])
 ; SSE-NEXT:    [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]])
-; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
-; SSE-NEXT:    [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
-; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
-; SSE-NEXT:    [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
+; SSE-NEXT:    [[AB4:%.*]] = call float @llvm.ceil.f32(float [[A4]])
+; SSE-NEXT:    [[AB5:%.*]] = call float @llvm.ceil.f32(float [[A5]])
+; SSE-NEXT:    [[AB6:%.*]] = call float @llvm.floor.f32(float [[A6]])
+; SSE-NEXT:    [[AB7:%.*]] = call float @llvm.floor.f32(float [[A7]])
 ; SSE-NEXT:    [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i64 1
+; SSE-NEXT:    [[R23:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i64 2
 ; SSE-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3
-; SSE-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
-; SSE-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i64 4
+; SSE-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i64 5
+; SSE-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i64 6
+; SSE-NEXT:    [[R71:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i64 7
 ; SSE-NEXT:    ret <8 x float> [[R71]]
 ;
 ; SLM-LABEL: @ceil_floor(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-with-reuses.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-with-reuses.ll
index 579239b..75a413f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-with-reuses.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-with-reuses.ll
@@ -10,15 +10,18 @@ define <4 x double> @test(ptr %ia, ptr %ib, ptr %ic, ptr %id, ptr %ie, ptr %x) {
 ; CHECK-NEXT:    [[I4275:%.*]] = load double, ptr [[ID]], align 8
 ; CHECK-NEXT:    [[I4277:%.*]] = load double, ptr [[IE]], align 8
 ; CHECK-NEXT:    [[I4326:%.*]] = load <4 x double>, ptr [[X]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[I4326]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[I4275]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x double> poison, double [[I4238]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[I4252]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[I4264]], i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[I4277]], i32 3
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <4 x double> [[TMP3]], [[TMP7]]
-; CHECK-NEXT:    ret <4 x double> [[TMP8]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[I4326]], <4 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[I4238]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[I4252]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[TMP1]], double [[I4275]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[I4264]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[I4277]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <2 x double> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[I44281:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    ret <4 x double> [[I44281]]
 ;
   %i4238 = load double, ptr %ia, align 8
   %i4252 = load double, ptr %ib, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
index 8835e3b..548a63c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
@@ -15,9 +15,14 @@ declare i64 @round(i64) nounwind willreturn
 
 define void @sin_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @sin_libm(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[A1:%.*]] = load double, ptr [[IDX1]], align 8
+; CHECK-NEXT:    [[SIN1:%.*]] = tail call double @sin(double [[A0]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[SIN2:%.*]] = tail call double @sin(double [[A1]]) #[[ATTR3]]
+; CHECK-NEXT:    store double [[SIN1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    store double [[SIN2]], ptr [[IDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %a0 = load double, ptr %a, align 8
@@ -33,9 +38,14 @@ define void @sin_libm(ptr %a, ptr %b) {
 
 define void @cos_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @cos_libm(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[A1:%.*]] = load double, ptr [[IDX1]], align 8
+; CHECK-NEXT:    [[COS1:%.*]] = tail call double @cos(double [[A0]]) #[[ATTR3]]
+; CHECK-NEXT:    [[COS2:%.*]] = tail call double @cos(double [[A1]]) #[[ATTR3]]
+; CHECK-NEXT:    store double [[COS1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    store double [[COS2]], ptr [[IDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %a0 = load double, ptr %a, align 8
@@ -51,9 +61,14 @@ define void @cos_libm(ptr %a, ptr %b) {
 
 define void @tan_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @tan_libm(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.tan.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[A1:%.*]] = load double, ptr [[IDX1]], align 8
+; CHECK-NEXT:    [[TAN1:%.*]] = tail call double @tan(double [[A0]]) #[[ATTR3]]
+; CHECK-NEXT:    [[TAN2:%.*]] = tail call double @tan(double [[A1]]) #[[ATTR3]]
+; CHECK-NEXT:    store double [[TAN1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    store double [[TAN2]], ptr [[IDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %a0 = load double, ptr %a, align 8
@@ -69,9 +84,14 @@ define void @tan_libm(ptr %a, ptr %b) {
 
 define void @pow_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @pow_libm(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[A1:%.*]] = load double, ptr [[IDX1]], align 8
+; CHECK-NEXT:    [[POW1:%.*]] = tail call double @pow(double [[A0]], double [[A0]]) #[[ATTR3]]
+; CHECK-NEXT:    [[POW2:%.*]] = tail call double @pow(double [[A1]], double [[A1]]) #[[ATTR3]]
+; CHECK-NEXT:    store double [[POW1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    store double [[POW2]], ptr [[IDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %a0 = load double, ptr %a, align 8
@@ -87,9 +107,14 @@ define void @pow_libm(ptr %a, ptr %b) {
 
 define void @exp_libm(ptr %a, ptr %b) {
 ; CHECK-LABEL: @exp_libm(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[A1:%.*]] = load double, ptr [[IDX1]], align 8
+; CHECK-NEXT:    [[EXP1:%.*]] = tail call double @exp2(double [[A0]]) #[[ATTR3]]
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call double @exp2(double [[A1]]) #[[ATTR3]]
+; CHECK-NEXT:    store double [[EXP1]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
+; CHECK-NEXT:    store double [[EXP2]], ptr [[IDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %a0 = load double, ptr %a, align 8
@@ -134,8 +159,8 @@ define void @sqrt_libm_errno(ptr %a, ptr %b) {
 ; CHECK-NEXT:    [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
 ; CHECK-NEXT:    [[A1:%.*]] = load double, ptr [[IDX1]], align 8
-; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR3]]
+; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR4]]
 ; CHECK-NEXT:    store double [[SQRT1]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
 ; CHECK-NEXT:    store double [[SQRT2]], ptr [[IDX2]], align 8
@@ -158,8 +183,8 @@ define void @round_custom(ptr %a, ptr %b) {
 ; CHECK-NEXT:    [[A0:%.*]] = load i64, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
 ; CHECK-NEXT:    [[A1:%.*]] = load i64, ptr [[IDX1]], align 8
-; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR4:[0-9]+]]
-; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR4]]
+; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR3]]
+; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR3]]
 ; CHECK-NEXT:    store i64 [[ROUND1]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
 ; CHECK-NEXT:    store i64 [[ROUND2]], ptr [[IDX2]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
index de99654..c2369a6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
@@ -9,7 +9,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ; YAML-NEXT:  Function:        foo
 ; YAML-NEXT:  Args:
 ; YAML-NEXT:    - String:          'SLP vectorized with cost '
-; YAML-NEXT:    - Cost:            '-3'
+; YAML-NEXT:    - Cost:            '-4'
 ; YAML-NEXT:    - String:          ' and with tree size '
 ; YAML-NEXT:    - TreeSize:        '10'
 ; YAML-NEXT:  ...
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
index fbde1e8..e7d7ce8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -34,13 +34,39 @@ declare float @llvm.powi.f32.i32(float, i32)
 define void @fn2(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @fn2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = sitofp <4 x i32> [[TMP2]] to <4 x float>
-; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP4]], i32 [[TMP3]])
-; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[I0:%.*]] = load i32, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[I1:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[I0]], [[I1]]
+; CHECK-NEXT:    [[FP1:%.*]] = sitofp i32 [[ADD1]] to float
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call float @llvm.powi.f32.i32(float [[FP1]], i32 [[ADD1]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
+; CHECK-NEXT:    [[I3:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[I2]], [[I3]]
+; CHECK-NEXT:    [[FP2:%.*]] = sitofp i32 [[ADD2]] to float
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call float @llvm.powi.f32.i32(float [[FP2]], i32 [[ADD1]]) #[[ATTR2]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
+; CHECK-NEXT:    [[I5:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[I4]], [[I5]]
+; CHECK-NEXT:    [[FP3:%.*]] = sitofp i32 [[ADD3]] to float
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call float @llvm.powi.f32.i32(float [[FP3]], i32 [[ADD1]]) #[[ATTR2]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 3
+; CHECK-NEXT:    [[I7:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[I6]], [[I7]]
+; CHECK-NEXT:    [[FP4:%.*]] = sitofp i32 [[ADD4]] to float
+; CHECK-NEXT:    [[CALL4:%.*]] = tail call float @llvm.powi.f32.i32(float [[FP4]], i32 [[ADD1]]) #[[ATTR2]]
+; CHECK-NEXT:    store float [[CALL1]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[C]], i32 1
+; CHECK-NEXT:    store float [[CALL2]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[C]], i32 2
+; CHECK-NEXT:    store float [[CALL3]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[C]], i32 3
+; CHECK-NEXT:    store float [[CALL4]], ptr [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
index 0eaf89d..bb7d54c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -378,11 +378,35 @@ declare float @llvm.powi.f32.i32(float, i32)
 define void @vec_powi_f32(ptr %a, ptr %b, ptr %c, i32 %P) {
 ; CHECK-LABEL: @vec_powi_f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP4]], i32 [[P:%.*]])
-; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[I0:%.*]] = load float, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[I1:%.*]] = load float, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[I0]], [[I1]]
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD1]], i32 [[P:%.*]]) #[[ATTR3]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 1
+; CHECK-NEXT:    [[I3:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[I2]], [[I3]]
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD2]], i32 [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
+; CHECK-NEXT:    [[I5:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[I4]], [[I5]]
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD3]], i32 [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3
+; CHECK-NEXT:    [[I7:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ADD4:%.*]] = fadd float [[I6]], [[I7]]
+; CHECK-NEXT:    [[CALL4:%.*]] = tail call float @llvm.powi.f32.i32(float [[ADD4]], i32 [[P]]) #[[ATTR3]]
+; CHECK-NEXT:    store float [[CALL1]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[C]], i32 1
+; CHECK-NEXT:    store float [[CALL2]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[C]], i32 2
+; CHECK-NEXT:    store float [[CALL3]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[C]], i32 3
+; CHECK-NEXT:    store float [[CALL4]], ptr [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll
index f87a713..f8f2459 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic_with_scalar_param.ll
@@ -5,9 +5,24 @@ declare float @llvm.powi.f32.i32(float, i32)
 define void @vec_powi_f32(ptr %a, ptr %c, i32 %P) {
 ; CHECK-LABEL: @vec_powi_f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP1]], i32 [[P:%.*]])
-; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[I0:%.*]] = load float, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call float @llvm.powi.f32.i32(float [[I0]], i32 [[P:%.*]])
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call float @llvm.powi.f32.i32(float [[I2]], i32 [[P]])
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
+; CHECK-NEXT:    [[I4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call float @llvm.powi.f32.i32(float [[I4]], i32 [[P]])
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3
+; CHECK-NEXT:    [[I6:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[CALL4:%.*]] = tail call float @llvm.powi.f32.i32(float [[I6]], i32 [[P]])
+; CHECK-NEXT:    store float [[CALL1]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[C]], i32 1
+; CHECK-NEXT:    store float [[CALL2]], ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[C]], i32 2
+; CHECK-NEXT:    store float [[CALL3]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[C]], i32 3
+; CHECK-NEXT:    store float [[CALL4]], ptr [[ARRAYIDX10]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powi-regression.ll b/llvm/test/Transforms/SLPVectorizer/X86/powi-regression.ll
index 9ae378f..0cf0d77 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/powi-regression.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/powi-regression.ll
@@ -6,7 +6,12 @@
 define <2 x double> @PR53887_v2f64(<2 x double> noundef %x) {
 ; CHECK-LABEL: @PR53887_v2f64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[X:%.*]], i32 6)
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x double> [[X:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT]], i32 6)
+; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <2 x double> zeroinitializer, double [[TMP2]], i64 0
+; CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x double> [[X]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT1]], i32 6)
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> [[VECINIT]], double [[TMP1]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[TMP0]]
 ;
 entry:
@@ -22,7 +27,18 @@ entry:
 define <4 x double> @PR53887_v4f64(<4 x double> noundef %x) {
 ; CHECK-LABEL: @PR53887_v4f64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call fast <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[X:%.*]], i32 6)
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x double> [[X:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT]], i32 6)
+; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x double> zeroinitializer, double [[TMP4]], i64 0
+; CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <4 x double> [[X]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT1]], i32 6)
+; CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x double> [[VECINIT]], double [[TMP1]], i64 1
+; CHECK-NEXT:    [[VECEXT4:%.*]] = extractelement <4 x double> [[X]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT4]], i32 6)
+; CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <4 x double> [[VECINIT3]], double [[TMP2]], i64 2
+; CHECK-NEXT:    [[VECEXT7:%.*]] = extractelement <4 x double> [[X]], i64 3
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT7]], i32 6)
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> [[VECINIT6]], double [[TMP3]], i64 3
 ; CHECK-NEXT:    ret <4 x double> [[TMP0]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powi.ll b/llvm/test/Transforms/SLPVectorizer/X86/powi.ll
index ce5f912f..a177b46 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/powi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/powi.ll
@@ -6,7 +6,12 @@
 
 define <2 x double> @buildvector_powi_2f64_6(<2 x double> %a) {
 ; CHECK-LABEL: @buildvector_powi_2f64_6(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[A:%.*]], i32 6)
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
+; CHECK-NEXT:    [[C0:%.*]] = call double @llvm.powi.f64.i32(double [[A0]], i32 6)
+; CHECK-NEXT:    [[C1:%.*]] = call double @llvm.powi.f64.i32(double [[A1]], i32 6)
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %a0 = extractelement <2 x double> %a, i32 0
@@ -39,7 +44,18 @@ define <2 x double> @buildvector_powi_2f64_var(<2 x double> %a, i32 %b) {
 
 define <4 x float> @buildvector_powi_4f32_3(<4 x float> %a) {
 ; CHECK-LABEL: @buildvector_powi_4f32_3(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[A:%.*]], i32 3)
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
+; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
+; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
+; CHECK-NEXT:    [[C0:%.*]] = call float @llvm.powi.f32.i32(float [[A0]], i32 3)
+; CHECK-NEXT:    [[C1:%.*]] = call float @llvm.powi.f32.i32(float [[A1]], i32 3)
+; CHECK-NEXT:    [[C2:%.*]] = call float @llvm.powi.f32.i32(float [[A2]], i32 3)
+; CHECK-NEXT:    [[C3:%.*]] = call float @llvm.powi.f32.i32(float [[A3]], i32 3)
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x float> poison, float [[C0]], i32 0
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[C1]], i32 1
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[C2]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[R2]], float [[C3]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
@@ -63,7 +79,18 @@ define <4 x float> @buildvector_powi_4f32_3(<4 x float> %a) {
 
 define <4 x double> @buildvector_powi_4f64_16(<4 x double> %a) {
 ; CHECK-LABEL: @buildvector_powi_4f64_16(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[A:%.*]], i32 16)
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1
+; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2
+; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3
+; CHECK-NEXT:    [[C0:%.*]] = call double @llvm.powi.f64.i32(double [[A0]], i32 16)
+; CHECK-NEXT:    [[C1:%.*]] = call double @llvm.powi.f64.i32(double [[A1]], i32 16)
+; CHECK-NEXT:    [[C2:%.*]] = call double @llvm.powi.f64.i32(double [[A2]], i32 16)
+; CHECK-NEXT:    [[C3:%.*]] = call double @llvm.powi.f64.i32(double [[A3]], i32 16)
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> poison, double [[C0]], i32 0
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3
 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
 ;
   %a0 = extractelement <4 x double> %a, i32 0
@@ -83,7 +110,30 @@ define <4 x double> @buildvector_powi_4f64_16(<4 x double> %a) {
 
 define <8 x float> @buildvector_powi_8f32_4(<8 x float> %a) {
 ; CHECK-LABEL: @buildvector_powi_8f32_4(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> [[A:%.*]], i32 4)
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1
+; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2
+; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3
+; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4
+; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5
+; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6
+; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7
+; CHECK-NEXT:    [[C0:%.*]] = call float @llvm.powi.f32.i32(float [[A0]], i32 4)
+; CHECK-NEXT:    [[C1:%.*]] = call float @llvm.powi.f32.i32(float [[A1]], i32 4)
+; CHECK-NEXT:    [[C2:%.*]] = call float @llvm.powi.f32.i32(float [[A2]], i32 4)
+; CHECK-NEXT:    [[C3:%.*]] = call float @llvm.powi.f32.i32(float [[A3]], i32 4)
+; CHECK-NEXT:    [[C4:%.*]] = call float @llvm.powi.f32.i32(float [[A4]], i32 4)
+; CHECK-NEXT:    [[C5:%.*]] = call float @llvm.powi.f32.i32(float [[A5]], i32 4)
+; CHECK-NEXT:    [[C6:%.*]] = call float @llvm.powi.f32.i32(float [[A6]], i32 4)
+; CHECK-NEXT:    [[C7:%.*]] = call float @llvm.powi.f32.i32(float [[A7]], i32 4)
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> poison, float [[C0]], i32 0
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
@@ -119,7 +169,30 @@ define <8 x float> @buildvector_powi_8f32_4(<8 x float> %a) {
 
 define <8 x double> @buildvector_powi_8f64_5(<8 x double> %a) {
 ; CHECK-LABEL: @buildvector_powi_8f64_5(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> [[A:%.*]], i32 5)
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1
+; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2
+; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3
+; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4
+; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5
+; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6
+; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7
+; CHECK-NEXT:    [[C0:%.*]] = call double @llvm.powi.f64.i32(double [[A0]], i32 5)
+; CHECK-NEXT:    [[C1:%.*]] = call double @llvm.powi.f64.i32(double [[A1]], i32 5)
+; CHECK-NEXT:    [[C2:%.*]] = call double @llvm.powi.f64.i32(double [[A2]], i32 5)
+; CHECK-NEXT:    [[C3:%.*]] = call double @llvm.powi.f64.i32(double [[A3]], i32 5)
+; CHECK-NEXT:    [[C4:%.*]] = call double @llvm.powi.f64.i32(double [[A4]], i32 5)
+; CHECK-NEXT:    [[C5:%.*]] = call double @llvm.powi.f64.i32(double [[A5]], i32 5)
+; CHECK-NEXT:    [[C6:%.*]] = call double @llvm.powi.f64.i32(double [[A6]], i32 5)
+; CHECK-NEXT:    [[C7:%.*]] = call double @llvm.powi.f64.i32(double [[A7]], i32 5)
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> poison, double [[C0]], i32 0
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
   %a0 = extractelement <8 x double> %a, i32 0
@@ -206,7 +279,54 @@ define <8 x double> @buildvector_powi_8f64_mismatch(<8 x double> %a) {
 
 define <16 x float> @buildvector_powi_16f32_n13(<16 x float> %a) {
 ; CHECK-LABEL: @buildvector_powi_16f32_n13(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> [[A:%.*]], i32 -13)
+; CHECK-NEXT:    [[A0:%.*]] = extractelement <16 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[A1:%.*]] = extractelement <16 x float> [[A]], i32 1
+; CHECK-NEXT:    [[A2:%.*]] = extractelement <16 x float> [[A]], i32 2
+; CHECK-NEXT:    [[A3:%.*]] = extractelement <16 x float> [[A]], i32 3
+; CHECK-NEXT:    [[A4:%.*]] = extractelement <16 x float> [[A]], i32 4
+; CHECK-NEXT:    [[A5:%.*]] = extractelement <16 x float> [[A]], i32 5
+; CHECK-NEXT:    [[A6:%.*]] = extractelement <16 x float> [[A]], i32 6
+; CHECK-NEXT:    [[A7:%.*]] = extractelement <16 x float> [[A]], i32 7
+; CHECK-NEXT:    [[A8:%.*]] = extractelement <16 x float> [[A]], i32 8
+; CHECK-NEXT:    [[A9:%.*]] = extractelement <16 x float> [[A]], i32 9
+; CHECK-NEXT:    [[A10:%.*]] = extractelement <16 x float> [[A]], i32 10
+; CHECK-NEXT:    [[A11:%.*]] = extractelement <16 x float> [[A]], i32 11
+; CHECK-NEXT:    [[A12:%.*]] = extractelement <16 x float> [[A]], i32 12
+; CHECK-NEXT:    [[A13:%.*]] = extractelement <16 x float> [[A]], i32 13
+; CHECK-NEXT:    [[A14:%.*]] = extractelement <16 x float> [[A]], i32 14
+; CHECK-NEXT:    [[A15:%.*]] = extractelement <16 x float> [[A]], i32 15
+; CHECK-NEXT:    [[C0:%.*]] = call float @llvm.powi.f32.i32(float [[A0]], i32 -13)
+; CHECK-NEXT:    [[C1:%.*]] = call float @llvm.powi.f32.i32(float [[A1]], i32 -13)
+; CHECK-NEXT:    [[C2:%.*]] = call float @llvm.powi.f32.i32(float [[A2]], i32 -13)
+; CHECK-NEXT:    [[C3:%.*]] = call float @llvm.powi.f32.i32(float [[A3]], i32 -13)
+; CHECK-NEXT:    [[C4:%.*]] = call float @llvm.powi.f32.i32(float [[A4]], i32 -13)
+; CHECK-NEXT:    [[C5:%.*]] = call float @llvm.powi.f32.i32(float [[A5]], i32 -13)
+; CHECK-NEXT:    [[C6:%.*]] = call float @llvm.powi.f32.i32(float [[A6]], i32 -13)
+; CHECK-NEXT:    [[C7:%.*]] = call float @llvm.powi.f32.i32(float [[A7]], i32 -13)
+; CHECK-NEXT:    [[C8:%.*]] = call float @llvm.powi.f32.i32(float [[A8]], i32 -13)
+; CHECK-NEXT:    [[C9:%.*]] = call float @llvm.powi.f32.i32(float [[A9]], i32 -13)
+; CHECK-NEXT:    [[C10:%.*]] = call float @llvm.powi.f32.i32(float [[A10]], i32 -13)
+; CHECK-NEXT:    [[C11:%.*]] = call float @llvm.powi.f32.i32(float [[A11]], i32 -13)
+; CHECK-NEXT:    [[C12:%.*]] = call float @llvm.powi.f32.i32(float [[A12]], i32 -13)
+; CHECK-NEXT:    [[C13:%.*]] = call float @llvm.powi.f32.i32(float [[A13]], i32 -13)
+; CHECK-NEXT:    [[C14:%.*]] = call float @llvm.powi.f32.i32(float [[A14]], i32 -13)
+; CHECK-NEXT:    [[C15:%.*]] = call float @llvm.powi.f32.i32(float [[A15]], i32 -13)
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> poison, float [[C0]], i32 0
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7
+; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8
+; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9
+; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10
+; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11
+; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12
+; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13
+; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15
 ; CHECK-NEXT:    ret <16 x float> [[TMP1]]
 ;
   %a0  = extractelement <16 x float> %a, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll
index 2cdbd5c..cb47830 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll
@@ -49,11 +49,24 @@ define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <
 ;
 ; AVX512-LABEL: @reduce_and4(
 ; AVX512-NEXT:  entry:
-; AVX512-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
-; AVX512-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
-; AVX512-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
-; AVX512-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
-; AVX512-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP3]]
+; AVX512-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i32> [[V1:%.*]], i64 0
+; AVX512-NEXT:    [[VECEXT1:%.*]] = extractelement <4 x i32> [[V1]], i64 1
+; AVX512-NEXT:    [[VECEXT2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
+; AVX512-NEXT:    [[VECEXT4:%.*]] = extractelement <4 x i32> [[V1]], i64 3
+; AVX512-NEXT:    [[VECEXT7:%.*]] = extractelement <4 x i32> [[V2:%.*]], i64 0
+; AVX512-NEXT:    [[VECEXT8:%.*]] = extractelement <4 x i32> [[V2]], i64 1
+; AVX512-NEXT:    [[VECEXT10:%.*]] = extractelement <4 x i32> [[V2]], i64 2
+; AVX512-NEXT:    [[VECEXT12:%.*]] = extractelement <4 x i32> [[V2]], i64 3
+; AVX512-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX512-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[VECEXT8]], i32 8
+; AVX512-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[VECEXT7]], i32 9
+; AVX512-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[VECEXT10]], i32 10
+; AVX512-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[VECEXT12]], i32 11
+; AVX512-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[VECEXT1]], i32 12
+; AVX512-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[VECEXT]], i32 13
+; AVX512-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[VECEXT2]], i32 14
+; AVX512-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[VECEXT4]], i32 15
+; AVX512-NEXT:    [[OP_RDX:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP8]])
 ; AVX512-NEXT:    [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
 ; AVX512-NEXT:    ret i32 [[OP_RDX1]]
 ;
@@ -131,11 +144,24 @@ define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i
 ; AVX2-NEXT:    ret i32 [[OP_RDX]]
 ;
 ; AVX512-LABEL: @reduce_and4_transpose(
-; AVX512-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; AVX512-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
-; AVX512-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; AVX512-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]])
-; AVX512-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP4]]
+; AVX512-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i32> [[V1:%.*]], i64 0
+; AVX512-NEXT:    [[VECEXT1:%.*]] = extractelement <4 x i32> [[V2:%.*]], i64 0
+; AVX512-NEXT:    [[VECEXT7:%.*]] = extractelement <4 x i32> [[V1]], i64 1
+; AVX512-NEXT:    [[VECEXT8:%.*]] = extractelement <4 x i32> [[V2]], i64 1
+; AVX512-NEXT:    [[VECEXT15:%.*]] = extractelement <4 x i32> [[V1]], i64 2
+; AVX512-NEXT:    [[VECEXT16:%.*]] = extractelement <4 x i32> [[V2]], i64 2
+; AVX512-NEXT:    [[VECEXT23:%.*]] = extractelement <4 x i32> [[V1]], i64 3
+; AVX512-NEXT:    [[VECEXT24:%.*]] = extractelement <4 x i32> [[V2]], i64 3
+; AVX512-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX512-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[VECEXT24]], i32 8
+; AVX512-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[VECEXT16]], i32 9
+; AVX512-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[VECEXT8]], i32 10
+; AVX512-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[VECEXT1]], i32 11
+; AVX512-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[VECEXT23]], i32 12
+; AVX512-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[VECEXT15]], i32 13
+; AVX512-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[VECEXT7]], i32 14
+; AVX512-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[VECEXT]], i32 15
+; AVX512-NEXT:    [[OP_RDX:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP9]])
 ; AVX512-NEXT:    [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
 ; AVX512-NEXT:    ret i32 [[OP_RDX1]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scalarize-ctlz.ll b/llvm/test/Transforms/SLPVectorizer/X86/scalarize-ctlz.ll
new file mode 100644
index 0000000..0f9b2e9
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/scalarize-ctlz.ll
@@ -0,0 +1,203 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-- -mcpu=x86-64 %s | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-- -mcpu=x86-64-v2 %s | FileCheck %s --check-prefixes=SSE,SSE4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-- -mcpu=x86-64-v3 %s | FileCheck %s --check-prefixes=AVX2
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-- -mcpu=x86-64-v4 %s | FileCheck %s --check-prefixes=AVX512
+
+; PR124993 - ensure scalarized CTLZ calls remain scalarized unless there is a definite cost improvement, the cost of scalarization was being over estimated.
+
+define <2 x i64> @scalarize_ctlz_v2i64(<2 x i64> %v)  {
+; SSE-LABEL: define <2 x i64> @scalarize_ctlz_v2i64(
+; SSE-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; SSE-NEXT:    [[V0:%.*]] = extractelement <2 x i64> [[V]], i64 0
+; SSE-NEXT:    [[V1:%.*]] = extractelement <2 x i64> [[V]], i64 1
+; SSE-NEXT:    [[C0:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V0]], i1 false)
+; SSE-NEXT:    [[C1:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V1]], i1 false)
+; SSE-NEXT:    [[R0:%.*]] = insertelement <2 x i64> poison, i64 [[C0]], i64 0
+; SSE-NEXT:    [[R1:%.*]] = insertelement <2 x i64> [[R0]], i64 [[C1]], i64 1
+; SSE-NEXT:    ret <2 x i64> [[R1]]
+;
+; AVX2-LABEL: define <2 x i64> @scalarize_ctlz_v2i64(
+; AVX2-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX2-NEXT:    [[V0:%.*]] = extractelement <2 x i64> [[V]], i64 0
+; AVX2-NEXT:    [[V1:%.*]] = extractelement <2 x i64> [[V]], i64 1
+; AVX2-NEXT:    [[C0:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V0]], i1 false)
+; AVX2-NEXT:    [[C1:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V1]], i1 false)
+; AVX2-NEXT:    [[R0:%.*]] = insertelement <2 x i64> poison, i64 [[C0]], i64 0
+; AVX2-NEXT:    [[R1:%.*]] = insertelement <2 x i64> [[R0]], i64 [[C1]], i64 1
+; AVX2-NEXT:    ret <2 x i64> [[R1]]
+;
+; AVX512-LABEL: define <2 x i64> @scalarize_ctlz_v2i64(
+; AVX512-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; AVX512-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[V]], i1 false)
+; AVX512-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %v0 = extractelement <2 x i64> %v, i64 0
+  %v1 = extractelement <2 x i64> %v, i64 1
+  %c0 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v0, i1 false)
+  %c1 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v1, i1 false)
+  %r0 = insertelement <2 x i64> poison, i64 %c0, i64 0
+  %r1 = insertelement <2 x i64> %r0, i64 %c1, i64 1
+  ret <2 x i64> %r1
+}
+
+define <4 x i64> @scalarize_ctlz_v4i64(<4 x i64> %v)  {
+; SSE-LABEL: define <4 x i64> @scalarize_ctlz_v4i64(
+; SSE-SAME: <4 x i64> [[V:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[V0:%.*]] = extractelement <4 x i64> [[V]], i64 0
+; SSE-NEXT:    [[V1:%.*]] = extractelement <4 x i64> [[V]], i64 1
+; SSE-NEXT:    [[V2:%.*]] = extractelement <4 x i64> [[V]], i64 2
+; SSE-NEXT:    [[V3:%.*]] = extractelement <4 x i64> [[V]], i64 3
+; SSE-NEXT:    [[C0:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V0]], i1 false)
+; SSE-NEXT:    [[C1:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V1]], i1 false)
+; SSE-NEXT:    [[C2:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V2]], i1 false)
+; SSE-NEXT:    [[C3:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V3]], i1 false)
+; SSE-NEXT:    [[R0:%.*]] = insertelement <4 x i64> poison, i64 [[C0]], i64 0
+; SSE-NEXT:    [[R1:%.*]] = insertelement <4 x i64> [[R0]], i64 [[C1]], i64 1
+; SSE-NEXT:    [[R2:%.*]] = insertelement <4 x i64> [[R1]], i64 [[C2]], i64 2
+; SSE-NEXT:    [[R3:%.*]] = insertelement <4 x i64> [[R2]], i64 [[C3]], i64 3
+; SSE-NEXT:    ret <4 x i64> [[R3]]
+;
+; AVX2-LABEL: define <4 x i64> @scalarize_ctlz_v4i64(
+; AVX2-SAME: <4 x i64> [[V:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[V0:%.*]] = extractelement <4 x i64> [[V]], i64 0
+; AVX2-NEXT:    [[V1:%.*]] = extractelement <4 x i64> [[V]], i64 1
+; AVX2-NEXT:    [[V2:%.*]] = extractelement <4 x i64> [[V]], i64 2
+; AVX2-NEXT:    [[V3:%.*]] = extractelement <4 x i64> [[V]], i64 3
+; AVX2-NEXT:    [[C0:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V0]], i1 false)
+; AVX2-NEXT:    [[C1:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V1]], i1 false)
+; AVX2-NEXT:    [[C2:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V2]], i1 false)
+; AVX2-NEXT:    [[C3:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V3]], i1 false)
+; AVX2-NEXT:    [[R0:%.*]] = insertelement <4 x i64> poison, i64 [[C0]], i64 0
+; AVX2-NEXT:    [[R1:%.*]] = insertelement <4 x i64> [[R0]], i64 [[C1]], i64 1
+; AVX2-NEXT:    [[R2:%.*]] = insertelement <4 x i64> [[R1]], i64 [[C2]], i64 2
+; AVX2-NEXT:    [[R3:%.*]] = insertelement <4 x i64> [[R2]], i64 [[C3]], i64 3
+; AVX2-NEXT:    ret <4 x i64> [[R3]]
+;
+; AVX512-LABEL: define <4 x i64> @scalarize_ctlz_v4i64(
+; AVX512-SAME: <4 x i64> [[V:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[V]], i1 false)
+; AVX512-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %v0 = extractelement <4 x i64> %v, i64 0
+  %v1 = extractelement <4 x i64> %v, i64 1
+  %v2 = extractelement <4 x i64> %v, i64 2
+  %v3 = extractelement <4 x i64> %v, i64 3
+  %c0 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v0, i1 false)
+  %c1 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v1, i1 false)
+  %c2 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v2, i1 false)
+  %c3 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v3, i1 false)
+  %r0 = insertelement <4 x i64> poison, i64 %c0, i64 0
+  %r1 = insertelement <4 x i64> %r0, i64 %c1, i64 1
+  %r2 = insertelement <4 x i64> %r1, i64 %c2, i64 2
+  %r3 = insertelement <4 x i64> %r2, i64 %c3, i64 3
+  ret <4 x i64> %r3
+}
+
+define <8 x i64> @scalarize_ctlz_v8i64(<8 x i64> %v)  {
+; SSE2-LABEL: define <8 x i64> @scalarize_ctlz_v8i64(
+; SSE2-SAME: <8 x i64> [[V:%.*]]) #[[ATTR0]] {
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[V]], <8 x i64> poison, <2 x i32> <i32 0, i32 1>
+; SSE2-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 false)
+; SSE2-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i64> [[V]], <8 x i64> poison, <2 x i32> <i32 2, i32 3>
+; SSE2-NEXT:    [[TMP4:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP3]], i1 false)
+; SSE2-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i64> [[V]], <8 x i64> poison, <2 x i32> <i32 4, i32 5>
+; SSE2-NEXT:    [[TMP6:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP5]], i1 false)
+; SSE2-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i64> [[V]], <8 x i64> poison, <2 x i32> <i32 6, i32 7>
+; SSE2-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP7]], i1 false)
+; SSE2-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[R31:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+; SSE2-NEXT:    [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[R52:%.*]] = shufflevector <8 x i64> [[R31]], <8 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; SSE2-NEXT:    [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE2-NEXT:    [[R73:%.*]] = shufflevector <8 x i64> [[R52]], <8 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; SSE2-NEXT:    ret <8 x i64> [[R73]]
+;
+; SSE4-LABEL: define <8 x i64> @scalarize_ctlz_v8i64(
+; SSE4-SAME: <8 x i64> [[V:%.*]]) #[[ATTR0]] {
+; SSE4-NEXT:    [[V0:%.*]] = extractelement <8 x i64> [[V]], i64 0
+; SSE4-NEXT:    [[V1:%.*]] = extractelement <8 x i64> [[V]], i64 1
+; SSE4-NEXT:    [[V2:%.*]] = extractelement <8 x i64> [[V]], i64 2
+; SSE4-NEXT:    [[V3:%.*]] = extractelement <8 x i64> [[V]], i64 3
+; SSE4-NEXT:    [[V4:%.*]] = extractelement <8 x i64> [[V]], i64 4
+; SSE4-NEXT:    [[V5:%.*]] = extractelement <8 x i64> [[V]], i64 5
+; SSE4-NEXT:    [[V6:%.*]] = extractelement <8 x i64> [[V]], i64 6
+; SSE4-NEXT:    [[V7:%.*]] = extractelement <8 x i64> [[V]], i64 7
+; SSE4-NEXT:    [[C0:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V0]], i1 false)
+; SSE4-NEXT:    [[C1:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V1]], i1 false)
+; SSE4-NEXT:    [[C2:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V2]], i1 false)
+; SSE4-NEXT:    [[C3:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V3]], i1 false)
+; SSE4-NEXT:    [[C4:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V4]], i1 false)
+; SSE4-NEXT:    [[C5:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V5]], i1 false)
+; SSE4-NEXT:    [[C6:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V6]], i1 false)
+; SSE4-NEXT:    [[C7:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V7]], i1 false)
+; SSE4-NEXT:    [[R0:%.*]] = insertelement <8 x i64> poison, i64 [[C0]], i64 0
+; SSE4-NEXT:    [[R1:%.*]] = insertelement <8 x i64> [[R0]], i64 [[C1]], i64 1
+; SSE4-NEXT:    [[R2:%.*]] = insertelement <8 x i64> [[R1]], i64 [[C2]], i64 2
+; SSE4-NEXT:    [[R3:%.*]] = insertelement <8 x i64> [[R2]], i64 [[C3]], i64 3
+; SSE4-NEXT:    [[R4:%.*]] = insertelement <8 x i64> [[R3]], i64 [[C4]], i64 4
+; SSE4-NEXT:    [[R5:%.*]] = insertelement <8 x i64> [[R4]], i64 [[C5]], i64 5
+; SSE4-NEXT:    [[R6:%.*]] = insertelement <8 x i64> [[R5]], i64 [[C6]], i64 6
+; SSE4-NEXT:    [[R7:%.*]] = insertelement <8 x i64> [[R6]], i64 [[C7]], i64 7
+; SSE4-NEXT:    ret <8 x i64> [[R7]]
+;
+; AVX2-LABEL: define <8 x i64> @scalarize_ctlz_v8i64(
+; AVX2-SAME: <8 x i64> [[V:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[V0:%.*]] = extractelement <8 x i64> [[V]], i64 0
+; AVX2-NEXT:    [[V1:%.*]] = extractelement <8 x i64> [[V]], i64 1
+; AVX2-NEXT:    [[V2:%.*]] = extractelement <8 x i64> [[V]], i64 2
+; AVX2-NEXT:    [[V3:%.*]] = extractelement <8 x i64> [[V]], i64 3
+; AVX2-NEXT:    [[V4:%.*]] = extractelement <8 x i64> [[V]], i64 4
+; AVX2-NEXT:    [[V5:%.*]] = extractelement <8 x i64> [[V]], i64 5
+; AVX2-NEXT:    [[V6:%.*]] = extractelement <8 x i64> [[V]], i64 6
+; AVX2-NEXT:    [[V7:%.*]] = extractelement <8 x i64> [[V]], i64 7
+; AVX2-NEXT:    [[C0:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V0]], i1 false)
+; AVX2-NEXT:    [[C1:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V1]], i1 false)
+; AVX2-NEXT:    [[C2:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V2]], i1 false)
+; AVX2-NEXT:    [[C3:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V3]], i1 false)
+; AVX2-NEXT:    [[C4:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V4]], i1 false)
+; AVX2-NEXT:    [[C5:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V5]], i1 false)
+; AVX2-NEXT:    [[C6:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V6]], i1 false)
+; AVX2-NEXT:    [[C7:%.*]] = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[V7]], i1 false)
+; AVX2-NEXT:    [[R0:%.*]] = insertelement <8 x i64> poison, i64 [[C0]], i64 0
+; AVX2-NEXT:    [[R1:%.*]] = insertelement <8 x i64> [[R0]], i64 [[C1]], i64 1
+; AVX2-NEXT:    [[R2:%.*]] = insertelement <8 x i64> [[R1]], i64 [[C2]], i64 2
+; AVX2-NEXT:    [[R3:%.*]] = insertelement <8 x i64> [[R2]], i64 [[C3]], i64 3
+; AVX2-NEXT:    [[R4:%.*]] = insertelement <8 x i64> [[R3]], i64 [[C4]], i64 4
+; AVX2-NEXT:    [[R5:%.*]] = insertelement <8 x i64> [[R4]], i64 [[C5]], i64 5
+; AVX2-NEXT:    [[R6:%.*]] = insertelement <8 x i64> [[R5]], i64 [[C6]], i64 6
+; AVX2-NEXT:    [[R7:%.*]] = insertelement <8 x i64> [[R6]], i64 [[C7]], i64 7
+; AVX2-NEXT:    ret <8 x i64> [[R7]]
+;
+; AVX512-LABEL: define <8 x i64> @scalarize_ctlz_v8i64(
+; AVX512-SAME: <8 x i64> [[V:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> [[V]], i1 false)
+; AVX512-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %v0 = extractelement <8 x i64> %v, i64 0
+  %v1 = extractelement <8 x i64> %v, i64 1
+  %v2 = extractelement <8 x i64> %v, i64 2
+  %v3 = extractelement <8 x i64> %v, i64 3
+  %v4 = extractelement <8 x i64> %v, i64 4
+  %v5 = extractelement <8 x i64> %v, i64 5
+  %v6 = extractelement <8 x i64> %v, i64 6
+  %v7 = extractelement <8 x i64> %v, i64 7
+  %c0 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v0, i1 false)
+  %c1 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v1, i1 false)
+  %c2 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v2, i1 false)
+  %c3 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v3, i1 false)
+  %c4 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v4, i1 false)
+  %c5 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v5, i1 false)
+  %c6 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v6, i1 false)
+  %c7 = tail call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 %v7, i1 false)
+  %r0 = insertelement <8 x i64> poison, i64 %c0, i64 0
+  %r1 = insertelement <8 x i64> %r0, i64 %c1, i64 1
+  %r2 = insertelement <8 x i64> %r1, i64 %c2, i64 2
+  %r3 = insertelement <8 x i64> %r2, i64 %c3, i64 3
+  %r4 = insertelement <8 x i64> %r3, i64 %c4, i64 4
+  %r5 = insertelement <8 x i64> %r4, i64 %c5, i64 5
+  %r6 = insertelement <8 x i64> %r5, i64 %c6, i64 6
+  %r7 = insertelement <8 x i64> %r6, i64 %c7, i64 7
+  ret <8 x i64> %r7
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll b/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
index 68fb6c6..e1e80d9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
@@ -11,16 +11,25 @@ declare double @llvm.sin.f64(double)
 
 define void @test() {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, ptr @src, align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 2, i32 6>
-; CHECK-NEXT:    [[TMP3:%.*]] = call fast <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 3, i32 7>
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 0, i32 4>
+; CHECK-NEXT:    [[A2:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 2), align 8
+; CHECK-NEXT:    [[A3:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 3), align 8
+; CHECK-NEXT:    [[A6:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 6), align 8
+; CHECK-NEXT:    [[A7:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 7), align 8
+; CHECK-NEXT:    [[SIN0:%.*]] = call fast double @llvm.sin.f64(double [[A2]])
+; CHECK-NEXT:    [[SIN1:%.*]] = call fast double @llvm.sin.f64(double [[A3]])
+; CHECK-NEXT:    [[SIN2:%.*]] = call fast double @llvm.sin.f64(double [[A6]])
+; CHECK-NEXT:    [[SIN3:%.*]] = call fast double @llvm.sin.f64(double [[A7]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @src, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr getelementptr inbounds ([8 x double], ptr @src, i32 0, i64 4), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP7:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP6]])
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <2 x i32> <i32 1, i32 5>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP9:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP8]])
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x double> poison, double [[SIN1]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[TMP13]], double [[SIN3]], i32 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[TMP7]], [[TMP5]]
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x double> poison, double [[SIN0]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP14]], double [[SIN2]], i32 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast <2 x double> [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    store <2 x double> [[TMP12]], ptr @dst, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll b/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll
index f1034f3..ae5018a6 100644
--- a/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll
@@ -1,22 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %}
+; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s --check-prefix=X86 %}
+; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %}
 
 define void @tes() {
-; CHECK-LABEL: define void @tes() {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = fcmp ole <2 x double> zeroinitializer, zeroinitializer
-; CHECK-NEXT:    br label [[TMP1:%.*]]
-; CHECK:       1:
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> <i32 0, i32 0, i32 0, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP3]])
-; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 false, i1 [[TMP4]], i1 false
-; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 false, i1 [[OP_RDX]], i1 false
-; CHECK-NEXT:    br i1 [[OP_RDX1]], label [[TMP6:%.*]], label [[TMP5:%.*]]
-; CHECK:       4:
-; CHECK-NEXT:    ret void
-; CHECK:       5:
-; CHECK-NEXT:    ret void
+; X86-LABEL: define void @tes() {
+; X86-NEXT:  entry:
+; X86-NEXT:    [[TMP0:%.*]] = fcmp ole <2 x double> zeroinitializer, zeroinitializer
+; X86-NEXT:    br label [[TMP1:%.*]]
+; X86:       1:
+; X86-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> <i32 0, i32 0, i32 0, i32 2>
+; X86-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
+; X86-NEXT:    [[OP_RDX:%.*]] = select i1 false, i1 [[TMP3]], i1 false
+; X86-NEXT:    [[OP_RDX1:%.*]] = select i1 false, i1 [[OP_RDX]], i1 false
+; X86-NEXT:    br i1 [[OP_RDX1]], label [[TMP4:%.*]], label [[TMP5:%.*]]
+; X86:       4:
+; X86-NEXT:    ret void
+; X86:       5:
+; X86-NEXT:    ret void
+;
+; AARCH64-LABEL: define void @tes() {
+; AARCH64-NEXT:  entry:
+; AARCH64-NEXT:    [[TMP0:%.*]] = extractelement <2 x i1> zeroinitializer, i64 0
+; AARCH64-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> zeroinitializer, i64 0
+; AARCH64-NEXT:    [[TMP2:%.*]] = fcmp ole <2 x double> zeroinitializer, zeroinitializer
+; AARCH64-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0
+; AARCH64-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> zeroinitializer, i64 0
+; AARCH64-NEXT:    br label [[TMP5:%.*]]
+; AARCH64:       5:
+; AARCH64-NEXT:    [[TMP6:%.*]] = select i1 false, i1 false, i1 false
+; AARCH64-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i1 [[TMP0]], i1 false
+; AARCH64-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i1 [[TMP1]], i1 false
+; AARCH64-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i1 false, i1 false
+; AARCH64-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i1 [[TMP3]], i1 false
+; AARCH64-NEXT:    [[TMP11:%.*]] = select i1 [[TMP10]], i1 [[TMP4]], i1 false
+; AARCH64-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]]
+; AARCH64:       12:
+; AARCH64-NEXT:    ret void
+; AARCH64:       13:
+; AARCH64-NEXT:    ret void
 ;
 entry:
   %0 = extractelement <2 x i1> zeroinitializer, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll b/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll
index 2570cdb..0a6b86c 100644
--- a/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll
+++ b/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll
@@ -1,11 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -S < %s | FileCheck %s %}
+; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=X86 %}
+; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=AARCH64 %}
 
 define void @test() {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret void
+; X86-LABEL: @test(
+; X86-NEXT:  entry:
+; X86-NEXT:    ret void
+;
+; AARCH64-LABEL: @test(
+; AARCH64-NEXT:  entry:
+; AARCH64-NEXT:    [[TMP0:%.*]] = extractelement <8 x half> zeroinitializer, i64 1
+; AARCH64-NEXT:    [[TOBOOL:%.*]] = fcmp une half [[TMP0]], 0xH0000
+; AARCH64-NEXT:    [[TMP1:%.*]] = extractelement <8 x half> zeroinitializer, i64 1
+; AARCH64-NEXT:    [[TOBOOL3:%.*]] = fcmp une half [[TMP1]], 0xH0000
+; AARCH64-NEXT:    ret void
 ;
 entry:
   %0 = extractelement <8 x half> zeroinitializer, i64 1
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 3c0069d..aad7a08 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -566,6 +566,54 @@ def have_ld64_plugin_support():
 if have_ld64_plugin_support():
     config.available_features.add("ld64_plugin")
 
+def host_unwind_supports_jit():
+    # Do we expect the host machine to support JIT registration of clang's
+    # default unwind info format for the host (e.g. eh-frames, compact-unwind,
+    # etc.).
+
+    # Linux and the BSDs use DWARF eh-frames and all known unwinders support
+    # register_frame at minimum.
+    if platform.system() in [ "Linux", "FreeBSD", "NetBSD" ]:
+        return True
+
+    # Windows does not support frame info without the ORC runtime.
+    if platform.system() == "Windows":
+        return False
+
+    # On Darwin/x86-64 clang produces both eh-frames and compact-unwind, and
+    # libunwind supports register_frame. On Darwin/arm64 clang produces
+    # compact-unwind only, and JIT'd registration is not available before
+    # macOS 14.0.
+    if platform.system() == "Darwin":
+
+        assert (
+            "arm64" in config.host_triple
+            or "x86_64" in config.host_triple
+        )
+
+        if "x86_64" in config.host_triple:
+            return True
+
+        # Must be arm64. Check the macOS version.
+        try:
+            osx_version = subprocess.check_output(
+                ["sw_vers", "-productVersion"], universal_newlines=True
+            )
+            osx_version = tuple(int(x) for x in osx_version.split("."))
+            if len(osx_version) == 2:
+                osx_version = (osx_version[0], osx_version[1], 0)
+            if osx_version >= (14, 0):
+                return True
+        except:
+            pass
+
+        return False
+
+    return False
+
+if host_unwind_supports_jit():
+    config.available_features.add("host-unwind-supports-jit")
+
 # Ask llvm-config about asserts
 llvm_config.feature_config(
     [
diff --git a/llvm/unittests/Frontend/OpenMPContextTest.cpp b/llvm/unittests/Frontend/OpenMPContextTest.cpp
index 1c999a2..f9683ae 100644
--- a/llvm/unittests/Frontend/OpenMPContextTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPContextTest.cpp
@@ -33,9 +33,11 @@ TEST_F(OpenMPContextTest, RoundTripAndAssociation) {
 #define OMP_TRAIT_SELECTOR(Enum, TraitSetEnum, Str, RequiresProperty)          \
   EXPECT_EQ(TraitSelector::Enum,                                               \
             getOpenMPContextTraitSelectorKind(                                 \
-                getOpenMPContextTraitSelectorName(TraitSelector::Enum)));      \
+                getOpenMPContextTraitSelectorName(TraitSelector::Enum),        \
+                TraitSet::TraitSetEnum));                                      \
   EXPECT_EQ(Str, getOpenMPContextTraitSelectorName(                            \
-                     getOpenMPContextTraitSelectorKind(Str)));
+                     getOpenMPContextTraitSelectorKind(                        \
+                         Str, TraitSet::TraitSetEnum)));
 #define OMP_TRAIT_PROPERTY(Enum, TraitSetEnum, TraitSelectorEnum, Str)         \
   EXPECT_EQ(TraitProperty::Enum,                                               \
             getOpenMPContextTraitPropertyKind(                                 \
@@ -68,10 +70,10 @@ TEST_F(OpenMPContextTest, ValidNesting) {
 }
 
 TEST_F(OpenMPContextTest, ApplicabilityNonConstruct) {
-  OMPContext HostLinux(false, Triple("x86_64-unknown-linux"));
-  OMPContext DeviceLinux(true, Triple("x86_64-unknown-linux"));
-  OMPContext HostNVPTX(false, Triple("nvptx64-nvidia-cuda"));
-  OMPContext DeviceNVPTX(true, Triple("nvptx64-nvidia-cuda"));
+  OMPContext HostLinux(false, Triple("x86_64-unknown-linux"), Triple(), -1);
+  OMPContext DeviceLinux(true, Triple("x86_64-unknown-linux"), Triple(), -1);
+  OMPContext HostNVPTX(false, Triple("nvptx64-nvidia-cuda"), Triple(), -1);
+  OMPContext DeviceNVPTX(true, Triple("nvptx64-nvidia-cuda"), Triple(), -1);
 
   VariantMatchInfo Empty;
   EXPECT_TRUE(isVariantApplicableInContext(Empty, HostLinux));
@@ -129,19 +131,21 @@ TEST_F(OpenMPContextTest, ApplicabilityNonConstruct) {
 }
 
 TEST_F(OpenMPContextTest, ApplicabilityAllTraits) {
-  OMPContext HostLinuxParallelParallel(false, Triple("x86_64-unknown-linux"));
+  OMPContext HostLinuxParallelParallel(false, Triple("x86_64-unknown-linux"),
+                                       Triple(), -1);
   HostLinuxParallelParallel.addTrait(
       TraitProperty::construct_parallel_parallel);
   HostLinuxParallelParallel.addTrait(
       TraitProperty::construct_parallel_parallel);
-  OMPContext DeviceLinuxTargetParallel(true, Triple("x86_64-unknown-linux"));
+  OMPContext DeviceLinuxTargetParallel(true, Triple("x86_64-unknown-linux"),
+                                       Triple(), -1);
   DeviceLinuxTargetParallel.addTrait(TraitProperty::construct_target_target);
   DeviceLinuxTargetParallel.addTrait(
       TraitProperty::construct_parallel_parallel);
-  OMPContext HostNVPTXFor(false, Triple("nvptx64-nvidia-cuda"));
+  OMPContext HostNVPTXFor(false, Triple("nvptx64-nvidia-cuda"), Triple(), -1);
   HostNVPTXFor.addTrait(TraitProperty::construct_for_for);
-  OMPContext DeviceNVPTXTargetTeamsParallel(true,
-                                            Triple("nvptx64-nvidia-cuda"));
+  OMPContext DeviceNVPTXTargetTeamsParallel(true, Triple("nvptx64-nvidia-cuda"),
+                                            Triple(), -1);
   DeviceNVPTXTargetTeamsParallel.addTrait(
       TraitProperty::construct_target_target);
   DeviceNVPTXTargetTeamsParallel.addTrait(TraitProperty::construct_teams_teams);
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 4783006..9e0a338 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6180,6 +6180,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
   F->addFnAttr("target-features", "+mmx,+sse");
   IRBuilder<> Builder(BB);
   auto *Int32Ty = Builder.getInt32Ty();
+  Builder.SetCurrentDebugLocation(DL);
 
   AllocaInst *APtr = Builder.CreateAlloca(Int32Ty, nullptr, "a_ptr");
   AllocaInst *BPtr = Builder.CreateAlloca(Int32Ty, nullptr, "b_ptr");
@@ -6189,6 +6190,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
   Builder.CreateStore(Builder.getInt32(20), BPtr);
   auto BodyGenCB = [&](InsertPointTy AllocaIP,
                        InsertPointTy CodeGenIP) -> InsertPointTy {
+    IRBuilderBase::InsertPointGuard guard(Builder);
+    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
     Builder.restoreIP(CodeGenIP);
     LoadInst *AVal = Builder.CreateLoad(Int32Ty, APtr);
     LoadInst *BVal = Builder.CreateLoad(Int32Ty, BPtr);
@@ -6206,6 +6209,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
+        IRBuilderBase::InsertPointGuard guard(Builder);
+        Builder.SetCurrentDebugLocation(llvm::DebugLoc());
         if (!OMPBuilder.Config.isTargetDevice()) {
           RetVal = cast<llvm::Value>(&Arg);
           return CodeGenIP;
@@ -6252,6 +6257,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
                               Builder.saveIP(), EntryInfo, DefaultAttrs,
                               RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
                               GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
+  EXPECT_EQ(DL, Builder.getCurrentDebugLocation());
   Builder.restoreIP(AfterIP);
 
   OMPBuilder.finalize();
@@ -6350,6 +6356,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
   F->addFnAttr("target-features", "+gfx9-insts,+wavefrontsize64");
   IRBuilder<> Builder(BB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+  Builder.SetCurrentDebugLocation(DL);
 
   LoadInst *Value = nullptr;
   StoreInst *TargetStore = nullptr;
@@ -6361,6 +6368,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
+        IRBuilderBase::InsertPointGuard guard(Builder);
+        Builder.SetCurrentDebugLocation(llvm::DebugLoc());
         if (!OMPBuilder.Config.isTargetDevice()) {
           RetVal = cast<llvm::Value>(&Arg);
           return CodeGenIP;
@@ -6394,6 +6403,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
       -> OpenMPIRBuilder::InsertPointTy {
+    IRBuilderBase::InsertPointGuard guard(Builder);
+    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
     Builder.restoreIP(CodeGenIP);
     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
     TargetStore = Builder.CreateStore(Value, CapturedArgs[1]);
@@ -6415,6 +6426,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
                               EntryInfo, DefaultAttrs, RuntimeAttrs,
                               /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
                               BodyGenCB, SimpleArgAccessorCB));
+  EXPECT_EQ(DL, Builder.getCurrentDebugLocation());
   Builder.restoreIP(AfterIP);
 
   Builder.CreateRetVoid();
@@ -6722,6 +6734,7 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
   F->setName("func");
   IRBuilder<> Builder(BB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+  Builder.SetCurrentDebugLocation(DL);
 
   LoadInst *Value = nullptr;
   StoreInst *TargetStore = nullptr;
@@ -6732,6 +6745,8 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
       [&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
           llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
           llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
+        IRBuilderBase::InsertPointGuard guard(Builder);
+        Builder.SetCurrentDebugLocation(llvm::DebugLoc());
         if (!OMPBuilder.Config.isTargetDevice()) {
           RetVal = cast<llvm::Value>(&Arg);
           return CodeGenIP;
@@ -6767,6 +6782,8 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
   auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
                        OpenMPIRBuilder::InsertPointTy CodeGenIP)
       -> OpenMPIRBuilder::InsertPointTy {
+    IRBuilderBase::InsertPointGuard guard(Builder);
+    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
     Builder.restoreIP(CodeGenIP);
     RaiseAlloca = Builder.CreateAlloca(Builder.getInt32Ty());
     Value = Builder.CreateLoad(Type::getInt32Ty(Ctx), CapturedArgs[0]);
@@ -6789,6 +6806,7 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
                               EntryInfo, DefaultAttrs, RuntimeAttrs,
                               /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
                               BodyGenCB, SimpleArgAccessorCB));
+  EXPECT_EQ(DL, Builder.getCurrentDebugLocation());
   Builder.restoreIP(AfterIP);
 
   Builder.CreateRetVoid();
diff --git a/llvm/utils/TableGen/DAGISelMatcherOpt.cpp b/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
index 1092b33e..713092c 100644
--- a/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -135,13 +135,11 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
       // variants.
     }
 
-  // If we have a CheckType/CheckChildType/Record node followed by a
-  // CheckOpcode, invert the two nodes.  We prefer to do structural checks
-  // before type checks, as this opens opportunities for factoring on targets
-  // like X86 where many operations are valid on multiple types.
-  if ((isa<CheckTypeMatcher>(N) || isa<CheckChildTypeMatcher>(N) ||
-       isa<RecordMatcher>(N)) &&
-      isa<CheckOpcodeMatcher>(N->getNext())) {
+  // If we have a Record node followed by a CheckOpcode, invert the two nodes.
+  // We prefer to do structural checks before type checks, as this opens
+  // opportunities for factoring on targets like X86 where many operations are
+  // valid on multiple types.
+  if (isa<RecordMatcher>(N) && isa<CheckOpcodeMatcher>(N->getNext())) {
     // Unlink the two nodes from the list.
     Matcher *CheckType = MatcherPtr.release();
     Matcher *CheckOpcode = CheckType->takeNext();
diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn
index b9399fa..96db1ab4 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn
@@ -22,6 +22,7 @@ static_library("JITLink") {
     "COFFDirectiveParser.cpp",
     "COFFLinkGraphBuilder.cpp",
     "COFF_x86_64.cpp",
+    "CompactUnwindSupport.cpp",
     "DWARFRecordSectionSplitter.cpp",
     "EHFrameSupport.cpp",
     "ELF.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn
index a13c3cd..b0d4d94 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn
@@ -68,5 +68,6 @@ static_library("Orc") {
     "Speculation.cpp",
     "TaskDispatch.cpp",
     "ThreadSafeModule.cpp",
+    "UnwindInfoRegistrationPlugin.cpp",
   ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
index d62f504..49bf1516 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
@@ -16,6 +16,7 @@ static_library("TargetProcess") {
     "SimpleExecutorMemoryManager.cpp",
     "SimpleRemoteEPCServer.cpp",
     "TargetExecutionUtils.cpp",
+    "UnwindInfoManager.cpp",
   ]
   if (current_os == "linux") {
     libs = [ "rt" ]
diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
index b8f3548..b1fbf44 100644
--- a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
@@ -37,9 +37,16 @@ struct MemRefAccess;
 // was encountered in the loop nest.
 struct LoopNestStateCollector {
   SmallVector<AffineForOp, 4> forOps;
+  // Affine loads.
   SmallVector<Operation *, 4> loadOpInsts;
+  // Affine stores.
   SmallVector<Operation *, 4> storeOpInsts;
-  bool hasNonAffineRegionOp = false;
+  // Non-affine loads.
+  SmallVector<Operation *, 4> memrefLoads;
+  // Non-affine stores.
+  SmallVector<Operation *, 4> memrefStores;
+  // Free operations.
+  SmallVector<Operation *, 4> memrefFrees;
 
   // Collects load and store operations, and whether or not a region holding op
   // other than ForOp and IfOp was encountered in the loop nest.
@@ -47,9 +54,11 @@ struct LoopNestStateCollector {
 };
 
 // MemRefDependenceGraph is a graph data structure where graph nodes are
-// top-level operations in a `Block` which contain load/store ops, and edges
-// are memref dependences between the nodes.
-// TODO: Add a more flexible dependence graph representation.
+// top-level operations in a `Block` and edges are memref dependences or SSA
+// dependences (on memrefs) between the nodes. Nodes are created for all
+// top-level operations except in certain cases (see `init` method). Edges are
+// created between nodes with a dependence (see `Edge` documentation). Edges
+// aren't created from/to nodes that have no memory effects.
 struct MemRefDependenceGraph {
 public:
   // Node represents a node in the graph. A Node is either an entire loop nest
@@ -60,10 +69,18 @@ public:
     unsigned id;
     // The top-level statement which is (or contains) a load/store.
     Operation *op;
-    // List of load operations.
+    // List of affine loads.
     SmallVector<Operation *, 4> loads;
-    // List of store op insts.
+    // List of non-affine loads.
+    SmallVector<Operation *, 4> memrefLoads;
+    // List of affine store ops.
     SmallVector<Operation *, 4> stores;
+    // List of non-affine stores.
+    SmallVector<Operation *, 4> memrefStores;
+    // List of free operations.
+    SmallVector<Operation *, 4> memrefFrees;
+    // Set of private memrefs used in this node.
+    DenseSet<Value> privateMemrefs;
 
     Node(unsigned id, Operation *op) : id(id), op(op) {}
 
@@ -73,6 +90,13 @@ public:
     // Returns the store op count for 'memref'.
     unsigned getStoreOpCount(Value memref) const;
 
+    /// Returns true if there exists an operation with a write memory effect to
+    /// `memref` in this node.
+    unsigned hasStore(Value memref) const;
+
+    // Returns true if the node has a free op on `memref`.
+    unsigned hasFree(Value memref) const;
+
     // Returns all store ops in 'storeOps' which access 'memref'.
     void getStoreOpsForMemref(Value memref,
                               SmallVectorImpl<Operation *> *storeOps) const;
@@ -86,7 +110,16 @@ public:
     void getLoadAndStoreMemrefSet(DenseSet<Value> *loadAndStoreMemrefSet) const;
   };
 
-  // Edge represents a data dependence between nodes in the graph.
+  // Edge represents a data dependence between nodes in the graph. It can either
+  // be a memory dependence or an SSA dependence. In the former case, it
+  // corresponds to a pair of memory accesses to the same memref or aliasing
+  // memrefs where at least one of them has a write or free memory effect. The
+  // memory accesses need not be affine load/store operations. Operations are
+  // checked for read/write effects and edges may be added conservatively. Edges
+  // are not created to/from nodes that have no memory effect. An exception to
+  // this are SSA dependences between operations that define memrefs (like
+  // alloc's, view-like ops) and their memory-effecting users that are enclosed
+  // in loops.
   struct Edge {
     // The id of the node at the other end of the edge.
     // If this edge is stored in Edge = Node.inEdges[i], then
@@ -182,9 +215,12 @@ public:
   // of sibling node 'sibId' into node 'dstId'.
   void updateEdges(unsigned sibId, unsigned dstId);
 
-  // Adds ops in 'loads' and 'stores' to node at 'id'.
-  void addToNode(unsigned id, const SmallVectorImpl<Operation *> &loads,
-                 const SmallVectorImpl<Operation *> &stores);
+  // Adds the specified ops to lists of node at 'id'.
+  void addToNode(unsigned id, ArrayRef<Operation *> loads,
+                 ArrayRef<Operation *> stores,
+                 ArrayRef<Operation *> memrefLoads,
+                 ArrayRef<Operation *> memrefStores,
+                 ArrayRef<Operation *> memrefFrees);
 
   void clearNodeLoadAndStores(unsigned id);
 
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index a7d6834..72fae1bd 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -107,7 +107,6 @@ def LLVM_IsFPClass : LLVM_OneResultIntrOp<"is.fpclass", [], [0], [Pure],
 }
 
 def LLVM_CopySignOp : LLVM_BinarySameArgsIntrOpF<"copysign">;
-def LLVM_CosOp : LLVM_UnaryIntrOpF<"cos">;
 def LLVM_ExpOp : LLVM_UnaryIntrOpF<"exp">;
 def LLVM_Exp2Op : LLVM_UnaryIntrOpF<"exp2">;
 def LLVM_FAbsOp : LLVM_UnaryIntrOpF<"fabs">;
@@ -125,7 +124,6 @@ def LLVM_Prefetch : LLVM_ZeroResultIntrOp<"prefetch", [0],
 > {
   let arguments = (ins LLVM_AnyPointer:$addr, I32Attr:$rw, I32Attr:$hint, I32Attr:$cache);
 }
-def LLVM_SinOp : LLVM_UnaryIntrOpF<"sin">;
 def LLVM_RoundEvenOp : LLVM_UnaryIntrOpF<"roundeven">;
 def LLVM_RoundOp : LLVM_UnaryIntrOpF<"round">;
 def LLVM_FTruncOp : LLVM_UnaryIntrOpF<"trunc">;
@@ -167,6 +165,11 @@ def LLVM_SMaxOp : LLVM_BinarySameArgsIntrOpI<"smax">;
 def LLVM_SMinOp : LLVM_BinarySameArgsIntrOpI<"smin">;
 def LLVM_UMaxOp : LLVM_BinarySameArgsIntrOpI<"umax">;
 def LLVM_UMinOp : LLVM_BinarySameArgsIntrOpI<"umin">;
+
+def LLVM_SinOp : LLVM_UnaryIntrOpF<"sin">;
+def LLVM_CosOp : LLVM_UnaryIntrOpF<"cos">;
+def LLVM_TanOp : LLVM_UnaryIntrOpF<"tan">;
+
 def LLVM_SinhOp : LLVM_UnaryIntrOpF<"sinh">;
 def LLVM_CoshOp : LLVM_UnaryIntrOpF<"cosh">;
 def LLVM_TanhOp : LLVM_UnaryIntrOpF<"tanh">;
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
index d474ba8..a9270c6 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
@@ -39,7 +39,11 @@ enum NVVMMemorySpace {
   /// Shared memory space identifier.
   kSharedMemorySpace = 3,
   /// Constant memory space identifier.
-  kConstantMemorySpace = 4
+  kConstantMemorySpace = 4,
+  /// Tensor memory space identifier.
+  /// Tensor memory is available only in arch-accelerated
+  /// variants from sm100 onwards.
+  kTensorMemorySpace = 6
 };
 
 /// Return the element type and number of elements associated with a wmma matrix
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 23db937..3d37875 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -23,6 +23,7 @@ include "mlir/Interfaces/InferIntRangeInterface.td"
 def LLVM_PointerGeneric : LLVM_PointerInAddressSpace<0>;
 def LLVM_PointerGlobal : LLVM_PointerInAddressSpace<1>;
 def LLVM_PointerShared : LLVM_PointerInAddressSpace<3>;
+def LLVM_PointerTensor : LLVM_PointerInAddressSpace<6>;
 
 //===----------------------------------------------------------------------===//
 // NVVM dialect definitions
@@ -476,8 +477,7 @@ def NVVM_BarrierArriveOp : NVVM_PTXBuilder_Op<"barrier.arrive">
     The default barrier id is 0 that is similar to `nvvm.barrier` Op. When 
     `barrierId` is not present, the default barrier id is used. 
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar)
   }];
   
   let assemblyFormat = "(`id` `=` $barrierId^)? `number_of_threads` `=` $numberOfThreads attr-dict";
@@ -503,8 +503,7 @@ def NVVM_ClusterArriveOp : NVVM_Op<"cluster.arrive"> {
 
     The `aligned` attribute, when provided, generates the .aligned version of the PTX instruction.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
   }];
 
   string llvmBuilder = [{
@@ -530,8 +529,7 @@ def NVVM_ClusterArriveRelaxedOp : NVVM_Op<"cluster.arrive.relaxed"> {
     ordering and visibility guarantees provided for the memory accesses performed prior to
     `cluster.arrive`.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
   }];
 
   string llvmBuilder = [{
@@ -552,8 +550,7 @@ def NVVM_ClusterWaitOp : NVVM_Op<"cluster.wait"> {
     of the cluster to perform `cluster.arrive`. The `aligned` attribute, when provided,
     generates the .aligned version of the PTX instruction.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-barrier-cluster)
   }];
 
   string llvmBuilder = [{
@@ -605,8 +602,8 @@ def NVVM_FenceProxyOp : NVVM_PTXBuilder_Op<"fence.proxy">,
   let description = [{
     Fence operation with proxy to establish an ordering between memory accesses
     that may happen through different proxies.
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
+
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
   }];
   
   let assemblyFormat = "attr-dict";
@@ -656,8 +653,8 @@ def NVVM_FenceProxyAcquireOp : NVVM_Op<"fence.proxy.acquire">,
     value for the `size` operand is 128 and must be an immediate. Generic Addressing
     is used unconditionally, and the address specified by the operand `addr` must
     fall within the `.global` state space. Otherwise, the behavior is undefined
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
   }];
 
   let assemblyFormat = "$scope $addr `,` $size (`from_proxy` `=` $fromProxy^)? (`to_proxy` `=` $toProxy^)? attr-dict";
@@ -684,8 +681,8 @@ def NVVM_FenceProxyReleaseOp : NVVM_Op<"fence.proxy.release">,
     subsequent memory access performed via the tensormap proxy. `fence.proxy.release`
     operation can form a release sequence that synchronizes with an acquire
     sequence that contains the fence.proxy.acquire proxy fence operation
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
   }];
 
   let assemblyFormat = "$scope (`from_proxy` `=` $fromProxy^)? (`to_proxy` `=` $toProxy^)? attr-dict";
@@ -723,8 +720,8 @@ def NVVM_FenceMbarrierInitOp : NVVM_PTXBuilder_Op<"fence.mbarrier.init"> {
   let arguments = (ins );
     let description = [{
     Fence operation that applies on the prior nvvm.mbarrier.init
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar)
   }];
   
   let assemblyFormat = "attr-dict";
@@ -767,8 +764,8 @@ def NVVM_ShflOp :
     the source. The `mask_and_clamp` contains two packed values specifying
     a mask for logically splitting warps into sub-segments and an upper bound
     for clamping the source lane index.
-    [For more information, refer PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-shfl-sync)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-shfl-sync)
   }];
   string llvmBuilder = [{
       auto intId = getShflIntrinsicId(
@@ -813,8 +810,7 @@ def NVVM_ElectSyncOp : NVVM_Op<"elect.sync">
     of this Op. The predicate result is set to `True` for the
     leader thread, and `False` for all other threads.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-elect-sync)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-elect-sync)
   }];
 
   let results = (outs I1:$pred);
@@ -898,8 +894,8 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> {
     The `addr` operand specifies the address of the mbarrier object
     in generic address space. The `noinc` attr impacts how the
     mbarrier's state is updated.
-    [For more information, refer PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
   }];
   let assemblyFormat = "$addr attr-dict `:` type(operands)";
 
@@ -922,8 +918,9 @@ def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.share
     track all prior cp.async operations initiated by the executing thread.
     The `addr` operand specifies the address of the mbarrier object in
     shared memory. The `noinc` attr impacts how the mbarrier's state
-    is updated. [For more information, refer PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
+    is updated. 
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
   }];
   let assemblyFormat = "$addr attr-dict `:` type(operands)";
 
@@ -981,8 +978,8 @@ def NVVM_CvtFloatToTF32Op : NVVM_Op<"cvt.float.to.tf32"> {
     The `relu` attribute, when set, lowers to the '.relu' variant of
     the cvt instruction. The `rnd` and `sat` attributes specify the
     the rounding and saturation modes respectively.
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
   }];
 
   let hasVerifier = 1;
@@ -1632,8 +1629,8 @@ def NVVM_StMatrixOp: NVVM_PTXBuilder_Op<"stmatrix">,
   let description = [{
     Collectively store one or more matrices across all threads in a warp to the
     location indicated by the address operand $ptr in shared memory.
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-store-instruction-stmatrix)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-store-instruction-stmatrix)
   }];
   
   let assemblyFormat = "$ptr `,` $sources attr-dict `:` type(operands)";
@@ -1845,8 +1842,7 @@ def NVVM_CpAsyncBulkCommitGroupOp : NVVM_Op<"cp.async.bulk.commit.group">,
     This Op commits all prior initiated but uncommitted cp.async.bulk
     instructions into a cp.async.bulk-group.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-commit-group)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-commit-group)
   }];
 
   string llvmBuilder = [{
@@ -1870,8 +1866,7 @@ def NVVM_CpAsyncBulkWaitGroupOp : NVVM_Op<"cp.async.bulk.wait_group">,
     async operations in the specified bulk async-group have completed reading 
     from their source locations.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-wait-group)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-wait-group)
   }];
   
   string llvmBuilder = [{
@@ -1916,8 +1911,7 @@ def NVVM_CpAsyncBulkTensorGlobalToSharedClusterOp :
     The `l2CacheHint` operand is optional, and it is used to specify cache 
     eviction policy that may be used during the memory access.
     
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor)
   }];
 
   let assemblyFormat = [{ 
@@ -2033,8 +2027,7 @@ def NVVM_CpAsyncBulkTensorPrefetchOp :
     The `l2CacheHint` operand is optional, and it is used to specify cache
     eviction policy that may be used during the memory access.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-prefetch-tensor)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-prefetch-tensor)
   }];
 
   let assemblyFormat = [{
@@ -2133,8 +2126,7 @@ def NVVM_CpAsyncBulkTensorReduceOp :
     The `l2CacheHint` operand is optional, and it is used to specify cache
     eviction policy that may be used during the memory access.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-reduce-async-bulk-tensor)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-reduce-async-bulk-tensor)
   }];
 
   let assemblyFormat = [{
@@ -2193,8 +2185,8 @@ def NVVM_CpAsyncBulkGlobalToSharedClusterOp :
 
     The `l2CacheHint` operand is optional, and it is used to specify cache
     eviction policy that may be used during the memory access.
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
   }];
 
   let arguments = (ins
@@ -2251,8 +2243,7 @@ def NVVM_CpAsyncBulkSharedCTAToSharedClusterOp :
     Initiates an asynchronous copy operation from Shared CTA memory to Shared
     cluster memory.
 
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
   }];
 
   let arguments = (ins
@@ -2282,8 +2273,8 @@ def NVVM_CpAsyncBulkSharedCTAToGlobalOp :
 
     The `l2CacheHint` operand is optional, and it is used to specify cache
     eviction policy that may be used during the memory access.
-    [For more information, see PTX ISA]
-    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
+    
+    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk)
   }];
 
   let arguments = (ins
@@ -2523,6 +2514,8 @@ def NVVM_GriddepcontrolWaitOp : NVVM_IntrOp<"griddepcontrol.wait", [], 0> {
     Causes the executing thread to wait until all prerequisite grids in flight 
     have completed and all the memory operations from the prerequisite grids 
     are performed and made visible to the current grid.
+
+
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
   }];
 }
@@ -2535,6 +2528,8 @@ def NVVM_GriddepcontrolLaunchDependentsOp
     Signals that specific dependents the runtime system designated to react to 
     this instruction can be scheduled as soon as all other CTAs in the grid 
     issue the same instruction or have completed.
+
+
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
   }];
 }
@@ -2593,6 +2588,110 @@ def NVVM_Breakpoint : NVVM_Op<"breakpoint"> {
 }
 
 //===----------------------------------------------------------------------===//
+// NVVM TCGEN05 Ops
+//===----------------------------------------------------------------------===//
+// Num CTAs in a group participating in the TCGEN05 operation.
+// This corresponds to the "cta_group::1", "cta_group::2"
+// modifiers in the PTX instructions.
+def Tcgen05GroupCTA_1 : I32EnumAttrCase<"CTA_1", 0, "cta_1">;
+def Tcgen05GroupCTA_2 : I32EnumAttrCase<"CTA_2", 1, "cta_2">;
+
+def Tcgen05GroupKind : I32EnumAttr<"Tcgen05GroupKind",
+                            "NVVM Tcgen05 group kind",
+  [Tcgen05GroupCTA_1, Tcgen05GroupCTA_2]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::NVVM";
+}
+def Tcgen05GroupKindAttr :
+  EnumAttr<NVVM_Dialect, Tcgen05GroupKind, "tcgen05_group"> {
+  let assemblyFormat = "`<` $value `>`";
+}
+
+def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc"> {
+  let summary = "Tcgen05 alloc operation";
+  let description = [{
+    The `tcgen05.alloc` Op allocates tensor core memory for
+    the amount specified by `nCols` and writes the destination
+    address to the `addr` argument. The `nCols` operand specifies the
+    number of columns to be allocated and it must be a power-of-two.
+    [For more information, refer to the PTX ISA]
+    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
+  }];
+
+  let arguments = (ins
+    AnyTypeOf<[LLVM_AnyPointer, LLVM_PointerShared]>:$addr,
+    I32:$nCols,
+    DefaultValuedAttr<Tcgen05GroupKindAttr, "Tcgen05GroupKind::CTA_1">:$group);
+
+  let assemblyFormat = "$addr `,` $nCols attr-dict `:` type(operands)";
+
+  let extraClassDeclaration = [{
+    static llvm::Intrinsic::ID
+      getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+                            llvm::SmallVector<llvm::Value *> &args);
+  }];
+  string llvmBuilder = [{
+    llvm::SmallVector<llvm::Value *> args;
+    auto id = NVVM::Tcgen05AllocOp::getIntrinsicIDAndArgs(
+      *op, moduleTranslation, args);
+    createIntrinsicCall(builder, id, args);
+  }];
+}
+
+def NVVM_Tcgen05DeallocOp : NVVM_Op<"tcgen05.dealloc"> {
+  let summary = "Tcgen05 dealloc operation";
+  let description = [{
+    The `tcgen05.dealloc` Op de-allocates the tensor core memory
+    specified by `tmemAddr`, which must be from a previous tensor
+    memory allocation. The `nCols` operand specifies the number
+    of columns to be de-allocated, and it must be a power-of-two.
+    [For more information, refer to the PTX ISA]
+    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
+  }];
+
+  let arguments = (ins LLVM_PointerTensor:$taddr, I32:$nCols,
+    DefaultValuedAttr<Tcgen05GroupKindAttr, "Tcgen05GroupKind::CTA_1">:$group);
+
+  let assemblyFormat = "$taddr `,` $nCols attr-dict `:` type(operands)";
+
+  let extraClassDeclaration = [{
+    static llvm::Intrinsic::ID
+      getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+                            llvm::SmallVector<llvm::Value *> &args);
+  }];
+  string llvmBuilder = [{
+    llvm::SmallVector<llvm::Value *> args;
+    auto id = NVVM::Tcgen05DeallocOp::getIntrinsicIDAndArgs(
+      *op, moduleTranslation, args);
+    createIntrinsicCall(builder, id, args);
+  }];
+}
+
+def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_permit"> {
+  let summary = "Tcgen05 Op to relinquish the right to allocate";
+  let description = [{
+    The `tcgen05.relinquish_alloc_permit` Op specifies that the CTA
+    of the executing thread is relinquishing the right to allocate
+    Tensor Memory. So, it is illegal for a CTA to perform `tcgen05.alloc`
+    after any of its constituent threads execute `tcgen05.relinquish_alloc_permit`.
+    [For more information, refer to the PTX ISA]
+    (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-memory-alloc-manage-instructions)
+  }];
+
+  let arguments = (ins
+    DefaultValuedAttr<Tcgen05GroupKindAttr, "Tcgen05GroupKind::CTA_1">:$group);
+
+  let assemblyFormat = "attr-dict";
+
+  string llvmBuilder = [{
+    auto id = ($group == NVVM::Tcgen05GroupKind::CTA_1) ?
+      llvm::Intrinsic::nvvm_tcgen05_relinq_alloc_permit_cg1 :
+      llvm::Intrinsic::nvvm_tcgen05_relinq_alloc_permit_cg2;
+    createIntrinsicCall(builder, id);
+  }];
+}
+
+//===----------------------------------------------------------------------===//
 // NVVM target attribute.
 //===----------------------------------------------------------------------===//
 
diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h
index aad2ac6..db4c63b 100644
--- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h
+++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPUDialect.h
@@ -22,8 +22,20 @@
 
 #include "mlir/Dialect/NVGPU/IR/NVGPUEnums.h.inc"
 
+// Maximum warp size
 constexpr int kWarpSize = 32;
 
+// Maximum number of threads in a block and block in a grid
+// https://docs.nvidia.com/cuda/cuda-c-programming-guide/#features-and-technical-specifications-technical-specifications-per-compute-capability
+constexpr int kMaxTotalBlockdim = 1024;
+constexpr int kMaxBlockdimx = 1024;
+constexpr int kMaxBlockdimy = 1024;
+constexpr int kMaxBlockdimz = 64;
+constexpr int kMaxTotalGriddim = 2147483647;
+constexpr int kMaxGriddimx = 2147483647;
+constexpr int kMaxGriddimy = 65535;
+constexpr int kMaxGriddimz = 65535;
+
 /// M size of wgmma.mma_async instruction
 constexpr int kWgmmaSizeM = 64;
 
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
index 8ede271..98bcbca 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -78,7 +78,8 @@ def Tosa_AvgPool2dOp : Tosa_InferShapedTypeOp<"avg_pool2d"> {
     Tosa_IntArrayAttr2:$stride,
     Tosa_IntArrayAttr4:$pad,
     TypeAttrOf<Tosa_AccType>:$acc_type,
-    OptionalAttr<Tosa_UnaryOpQuantizationAttr>:$quantization_info
+    OptionalAttr<I32Attr>:$input_zp,
+    OptionalAttr<I32Attr>:$output_zp
   );
 
   let results = (outs
@@ -237,7 +238,8 @@ def Tosa_FullyConnectedOp : Tosa_InferShapedTypeOp<"fully_connected"> {
     Tosa_Tensor2D:$input,
     TosaTensorRankOf<[Tosa_Weight], [2]>:$weight,
     Tosa_Tensor1D:$bias,
-    OptionalAttr<Tosa_ConvOpQuantizationAttr>:$quantization_info
+    OptionalAttr<I32Attr>:$input_zp,
+    OptionalAttr<I32Attr>:$weight_zp
   );
 
   let results = (outs
@@ -263,7 +265,8 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> {
   let arguments = (ins
     Tosa_Tensor3D:$a,
     Tosa_Tensor3D:$b,
-    OptionalAttr<Tosa_MatMulOpQuantizationAttr>:$quantization_info
+    OptionalAttr<I32Attr>:$a_zp,
+    OptionalAttr<I32Attr>:$b_zp
   );
 
   let results = (outs
@@ -1114,7 +1117,8 @@ def Tosa_NegateOp : Tosa_ElementwiseUnaryOp<"negate"> {
 
   let arguments = (ins
       Tosa_Tensor:$input1,
-      OptionalAttr<Tosa_UnaryOpQuantizationAttr>:$quantization_info
+      OptionalAttr<I32Attr>:$input1_zp,
+      OptionalAttr<I32Attr>:$output_zp
   );
 
   let results = (outs
@@ -1589,7 +1593,7 @@ def Tosa_PadOp : Tosa_InferShapedTypeOp<"pad"> {
     Tosa_RankedTensor:$input1,
     Tosa_Shape:$padding,
     Optional<Tosa_ScalarTensor>:$pad_const,
-    OptionalAttr<Tosa_PadOpQuantizationAttr>:$quantization_info
+    OptionalAttr<I32Attr>:$input_zp
   );
 
   let results = (outs
diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h
index d9c925a..5eb8b4a 100644
--- a/mlir/include/mlir/IR/OpImplementation.h
+++ b/mlir/include/mlir/IR/OpImplementation.h
@@ -202,7 +202,8 @@ public:
   /// special or non-printable characters in it.
   virtual void printSymbolName(StringRef symbolRef);
 
-  /// Print a handle to the given dialect resource.
+  /// Print a handle to the given dialect resource. The handle key is quoted and
+  /// escaped if it has any special or non-printable characters in it.
   virtual void printResourceHandle(const AsmDialectResourceHandle &resource);
 
   /// Print an optional arrow followed by a type list.
diff --git a/mlir/lib/AsmParser/AsmParserImpl.h b/mlir/lib/AsmParser/AsmParserImpl.h
index d5b72d6..1f8fbfd 100644
--- a/mlir/lib/AsmParser/AsmParserImpl.h
+++ b/mlir/lib/AsmParser/AsmParserImpl.h
@@ -248,13 +248,7 @@ public:
 
   /// Parses a quoted string token if present.
   ParseResult parseOptionalString(std::string *string) override {
-    if (!parser.getToken().is(Token::string))
-      return failure();
-
-    if (string)
-      *string = parser.getToken().getStringValue();
-    parser.consumeToken();
-    return success();
+    return parser.parseOptionalString(string);
   }
 
   /// Parses a Base64 encoded string of bytes.
@@ -355,13 +349,7 @@ public:
 
   /// Parse a keyword, if present, into 'keyword'.
   ParseResult parseOptionalKeyword(StringRef *keyword) override {
-    // Check that the current token is a keyword.
-    if (!parser.isCurrentTokenAKeyword())
-      return failure();
-
-    *keyword = parser.getTokenSpelling();
-    parser.consumeToken();
-    return success();
+    return parser.parseOptionalKeyword(keyword);
   }
 
   /// Parse a keyword if it is one of the 'allowedKeywords'.
@@ -387,13 +375,7 @@ public:
 
   /// Parse an optional keyword or string and set instance into 'result'.`
   ParseResult parseOptionalKeywordOrString(std::string *result) override {
-    StringRef keyword;
-    if (succeeded(parseOptionalKeyword(&keyword))) {
-      *result = keyword.str();
-      return success();
-    }
-
-    return parseOptionalString(result);
+    return parser.parseOptionalKeywordOrString(result);
   }
 
   //===--------------------------------------------------------------------===//
@@ -514,7 +496,7 @@ public:
       return parser.emitError() << "dialect '" << dialect->getNamespace()
                                 << "' does not expect resource handles";
     }
-    StringRef resourceName;
+    std::string resourceName;
     return parser.parseResourceHandle(interface, resourceName);
   }
 
diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index eccb324..b5f1d2e 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -271,6 +271,17 @@ ParseResult Parser::parseToken(Token::Kind expectedToken,
   return emitWrongTokenError(message);
 }
 
+/// Parses a quoted string token if present.
+ParseResult Parser::parseOptionalString(std::string *string) {
+  if (!getToken().is(Token::string))
+    return failure();
+
+  if (string)
+    *string = getToken().getStringValue();
+  consumeToken();
+  return success();
+}
+
 /// Parse an optional integer value from the stream.
 OptionalParseResult Parser::parseOptionalInteger(APInt &result) {
   // Parse `false` and `true` keywords as 0 and 1 respectively.
@@ -412,15 +423,25 @@ ParseResult Parser::parseOptionalKeyword(StringRef *keyword) {
   return success();
 }
 
+ParseResult Parser::parseOptionalKeywordOrString(std::string *result) {
+  StringRef keyword;
+  if (succeeded(parseOptionalKeyword(&keyword))) {
+    *result = keyword.str();
+    return success();
+  }
+
+  return parseOptionalString(result);
+}
+
 //===----------------------------------------------------------------------===//
 // Resource Parsing
 
 FailureOr<AsmDialectResourceHandle>
 Parser::parseResourceHandle(const OpAsmDialectInterface *dialect,
-                            StringRef &name) {
+                            std::string &name) {
   assert(dialect && "expected valid dialect interface");
   SMLoc nameLoc = getToken().getLoc();
-  if (failed(parseOptionalKeyword(&name)))
+  if (failed(parseOptionalKeywordOrString(&name)))
     return emitError("expected identifier key for 'resource' entry");
   auto &resources = getState().symbols.dialectResources;
 
@@ -451,7 +472,7 @@ Parser::parseResourceHandle(Dialect *dialect) {
     return emitError() << "dialect '" << dialect->getNamespace()
                        << "' does not expect resource handles";
   }
-  StringRef resourceName;
+  std::string resourceName;
   return parseResourceHandle(interface, resourceName);
 }
 
@@ -2530,8 +2551,8 @@ private:
 /// textual format.
 class ParsedResourceEntry : public AsmParsedResourceEntry {
 public:
-  ParsedResourceEntry(StringRef key, SMLoc keyLoc, Token value, Parser &p)
-      : key(key), keyLoc(keyLoc), value(value), p(p) {}
+  ParsedResourceEntry(std::string key, SMLoc keyLoc, Token value, Parser &p)
+      : key(std::move(key)), keyLoc(keyLoc), value(value), p(p) {}
   ~ParsedResourceEntry() override = default;
 
   StringRef getKey() const final { return key; }
@@ -2607,7 +2628,7 @@ public:
   }
 
 private:
-  StringRef key;
+  std::string key;
   SMLoc keyLoc;
   Token value;
   Parser &p;
@@ -2736,7 +2757,7 @@ ParseResult TopLevelOperationParser::parseDialectResourceFileMetadata() {
     return parseCommaSeparatedListUntil(Token::r_brace, [&]() -> ParseResult {
       // Parse the name of the resource entry.
       SMLoc keyLoc = getToken().getLoc();
-      StringRef key;
+      std::string key;
       if (failed(parseResourceHandle(handler, key)) ||
           parseToken(Token::colon, "expected ':'"))
         return failure();
@@ -2763,8 +2784,8 @@ ParseResult TopLevelOperationParser::parseExternalResourceFileMetadata() {
     return parseCommaSeparatedListUntil(Token::r_brace, [&]() -> ParseResult {
       // Parse the name of the resource entry.
       SMLoc keyLoc = getToken().getLoc();
-      StringRef key;
-      if (failed(parseOptionalKeyword(&key)))
+      std::string key;
+      if (failed(parseOptionalKeywordOrString(&key)))
         return emitError(
             "expected identifier key for 'external_resources' entry");
       if (parseToken(Token::colon, "expected ':'"))
diff --git a/mlir/lib/AsmParser/Parser.h b/mlir/lib/AsmParser/Parser.h
index 37670bd..1b8aa7c 100644
--- a/mlir/lib/AsmParser/Parser.h
+++ b/mlir/lib/AsmParser/Parser.h
@@ -146,6 +146,9 @@ public:
   /// output a diagnostic and return failure.
   ParseResult parseToken(Token::Kind expectedToken, const Twine &message);
 
+  /// Parses a quoted string token if present.
+  ParseResult parseOptionalString(std::string *string);
+
   /// Parse an optional integer value from the stream.
   OptionalParseResult parseOptionalInteger(APInt &result);
 
@@ -171,13 +174,16 @@ public:
   /// Parse a keyword, if present, into 'keyword'.
   ParseResult parseOptionalKeyword(StringRef *keyword);
 
+  /// Parse an optional keyword or string and set instance into 'result'.`
+  ParseResult parseOptionalKeywordOrString(std::string *result);
+
   //===--------------------------------------------------------------------===//
   // Resource Parsing
   //===--------------------------------------------------------------------===//
 
   /// Parse a handle to a dialect resource within the assembly format.
   FailureOr<AsmDialectResourceHandle>
-  parseResourceHandle(const OpAsmDialectInterface *dialect, StringRef &name);
+  parseResourceHandle(const OpAsmDialectInterface *dialect, std::string &name);
   FailureOr<AsmDialectResourceHandle> parseResourceHandle(Dialect *dialect);
 
   //===--------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 0caf3ad..12e3c07 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -226,7 +226,7 @@ void mlir::configureOpenMPToLLVMConversionLegality(
   target.addDynamicallyLegalOp<
       omp::AtomicReadOp, omp::AtomicWriteOp, omp::CancellationPointOp,
       omp::CancelOp, omp::CriticalDeclareOp, omp::FlushOp, omp::MapBoundsOp,
-      omp::MapInfoOp, omp::OrderedOp, omp::TargetEnterDataOp,
+      omp::MapInfoOp, omp::OrderedOp, omp::ScanOp, omp::TargetEnterDataOp,
       omp::TargetExitDataOp, omp::TargetUpdateOp, omp::ThreadprivateOp,
       omp::YieldOp>([&](Operation *op) {
     return typeConverter.isLegal(op->getOperandTypes()) &&
@@ -274,6 +274,7 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
       RegionLessOpConversion<omp::CancelOp>,
       RegionLessOpConversion<omp::CriticalDeclareOp>,
       RegionLessOpConversion<omp::OrderedOp>,
+      RegionLessOpConversion<omp::ScanOp>,
       RegionLessOpConversion<omp::TargetEnterDataOp>,
       RegionLessOpConversion<omp::TargetExitDataOp>,
       RegionLessOpConversion<omp::TargetUpdateOp>,
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
index b0eb2d6..67218ce 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -141,63 +141,65 @@ static Value createLinalgBodyCalculationForElementwiseOp(
   }
 
   // tosa::NegateOp
-  if (isa<tosa::NegateOp>(op) && isa<FloatType>(elementTy))
-    return rewriter.create<arith::NegFOp>(loc, resultTypes, args);
+  if (isa<tosa::NegateOp>(op)) {
+    if (isa<FloatType>(elementTy))
+      return rewriter.create<arith::NegFOp>(loc, resultTypes, args);
 
-  if (isa<tosa::NegateOp>(op) && isa<IntegerType>(elementTy)) {
-    int64_t inZp = 0, outZp = 0;
+    if (isa<IntegerType>(elementTy)) {
+      auto inputZpAttr = cast<tosa::NegateOp>(op).getInput1Zp();
+      auto outputZpAttr = cast<tosa::NegateOp>(op).getOutputZp();
 
-    if (cast<tosa::NegateOp>(op).getQuantizationInfo()) {
-      auto quantizationInfo = cast<tosa::NegateOp>(op).getQuantizationInfo();
-      inZp = quantizationInfo.value().getInputZp();
-      outZp = quantizationInfo.value().getOutputZp();
-    }
+      const int64_t inZp = inputZpAttr ? *inputZpAttr : 0;
+      const int64_t outZp = outputZpAttr ? *outputZpAttr : 0;
 
-    int32_t inputBitWidth = elementTy.getIntOrFloatBitWidth();
-    if (!inZp && !outZp) {
-      auto constant = rewriter.create<arith::ConstantOp>(
-          loc, IntegerAttr::get(elementTy, 0));
-      return rewriter.create<arith::SubIOp>(loc, resultTypes, constant,
-                                            args[0]);
-    }
+      if (!inZp && !outZp) {
+        auto constant = rewriter.create<arith::ConstantOp>(
+            loc, IntegerAttr::get(elementTy, 0));
+        return rewriter.create<arith::SubIOp>(loc, resultTypes, constant,
+                                              args[0]);
+      }
 
-    // Compute the maximum value that can occur in the intermediate buffer.
-    int64_t zpAdd = inZp + outZp;
-    int64_t maxValue = APInt::getSignedMaxValue(inputBitWidth).getSExtValue() +
-                       std::abs(zpAdd) + 1;
-
-    // Convert that maximum value into the maximum bitwidth needed to represent
-    // it. We assume 48-bit numbers may be supported further in the pipeline.
-    int intermediateBitWidth = 64;
-    if (maxValue <= APInt::getSignedMaxValue(16).getSExtValue()) {
-      intermediateBitWidth = 16;
-    } else if (maxValue <= APInt::getSignedMaxValue(32).getSExtValue()) {
-      intermediateBitWidth = 32;
-    } else if (maxValue <= APInt::getSignedMaxValue(48).getSExtValue()) {
-      intermediateBitWidth = 48;
-    }
+      // Compute the maximum value that can occur in the intermediate buffer.
+      const int32_t inputBitWidth = elementTy.getIntOrFloatBitWidth();
+      const int64_t zpAdd = inZp + outZp;
+      const int64_t maxValue =
+          APInt::getSignedMaxValue(inputBitWidth).getSExtValue() +
+          std::abs(zpAdd) + 1;
+
+      // Convert that maximum value into the maximum bitwidth needed to
+      // represent it. We assume 48-bit numbers may be supported further in
+      // the pipeline.
+      int intermediateBitWidth = 64;
+      if (maxValue <= APInt::getSignedMaxValue(16).getSExtValue()) {
+        intermediateBitWidth = 16;
+      } else if (maxValue <= APInt::getSignedMaxValue(32).getSExtValue()) {
+        intermediateBitWidth = 32;
+      } else if (maxValue <= APInt::getSignedMaxValue(48).getSExtValue()) {
+        intermediateBitWidth = 48;
+      }
 
-    Type intermediateType = rewriter.getIntegerType(intermediateBitWidth);
-    Value zpAddValue = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getIntegerAttr(intermediateType, zpAdd));
-
-    // The negation can be applied by doing:
-    //  outputValue = inZp + outZp - inputValue
-    auto ext = rewriter.create<arith::ExtSIOp>(loc, intermediateType, args[0]);
-    auto sub = rewriter.create<arith::SubIOp>(loc, zpAddValue, ext);
-
-    // Clamp to the negation range.
-    Value min = rewriter.create<arith::ConstantIntOp>(
-        loc, APInt::getSignedMinValue(inputBitWidth).getSExtValue(),
-        intermediateType);
-    Value max = rewriter.create<arith::ConstantIntOp>(
-        loc, APInt::getSignedMaxValue(inputBitWidth).getSExtValue(),
-        intermediateType);
-    auto clamp =
-        clampIntHelper(loc, sub, min, max, rewriter, /*isUnsigned=*/false);
-
-    // Truncate to the final value.
-    return rewriter.create<arith::TruncIOp>(loc, elementTy, clamp);
+      Type intermediateType = rewriter.getIntegerType(intermediateBitWidth);
+      Value zpAddValue = rewriter.create<arith::ConstantOp>(
+          loc, rewriter.getIntegerAttr(intermediateType, zpAdd));
+
+      // The negation can be applied by doing:
+      //  outputValue = inZp + outZp - inputValue
+      auto ext =
+          rewriter.create<arith::ExtSIOp>(loc, intermediateType, args[0]);
+      auto sub = rewriter.create<arith::SubIOp>(loc, zpAddValue, ext);
+
+      // Clamp to the negation range.
+      Value min = rewriter.create<arith::ConstantIntOp>(
+          loc, APInt::getSignedMinValue(inputBitWidth).getSExtValue(),
+          intermediateType);
+      Value max = rewriter.create<arith::ConstantIntOp>(
+          loc, APInt::getSignedMaxValue(inputBitWidth).getSExtValue(),
+          intermediateType);
+      auto clamp = clampIntHelper(loc, sub, min, max, rewriter, false);
+
+      // Truncate to the final value.
+      return rewriter.create<arith::TruncIOp>(loc, elementTy, clamp);
+    }
   }
 
   // tosa::BitwiseAndOp
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
index cf9852e..6321cb6 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
@@ -590,18 +590,15 @@ public:
                            .create<linalg::FillOp>(loc, ValueRange{zero},
                                                    ValueRange{emptyTensor})
                            .result();
-    if (!op.getQuantizationInfo()) {
+    if (!op.getAZp() && !op.getBZp()) {
       rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
           op, TypeRange{op.getType()},
           ValueRange{adaptor.getA(), adaptor.getB()}, ValueRange{zeroTensor});
       return success();
     }
 
-    auto quantizationInfo = *op.getQuantizationInfo();
-    auto aZp = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getI32IntegerAttr(quantizationInfo.getAZp()));
-    auto bZp = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getI32IntegerAttr(quantizationInfo.getBZp()));
+    auto aZp = rewriter.create<arith::ConstantOp>(loc, op.getAZpAttr());
+    auto bZp = rewriter.create<arith::ConstantOp>(loc, op.getBZpAttr());
     rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
         op, TypeRange{op.getType()},
         ValueRange{adaptor.getA(), adaptor.getB(), aZp, bZp}, zeroTensor);
@@ -661,7 +658,7 @@ public:
     Value broadcastBias =
         linalgBroadcastAndMaybeExtSI(rewriter, loc, bias, biasEmptyTensor);
 
-    if (!op.getQuantizationInfo()) {
+    if (!op.getInputZp() && !op.getWeightZp()) {
       Value matmul = rewriter
                          .create<linalg::MatmulOp>(
                              loc, TypeRange{op.getType()},
@@ -672,11 +669,9 @@ public:
       return success();
     }
 
-    auto quantizationInfo = *op.getQuantizationInfo();
-    auto inputZp = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getI32IntegerAttr(quantizationInfo.getInputZp()));
-    auto outputZp = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getI32IntegerAttr(quantizationInfo.getWeightZp()));
+    auto inputZp = rewriter.create<arith::ConstantOp>(loc, op.getInputZpAttr());
+    auto outputZp =
+        rewriter.create<arith::ConstantOp>(loc, op.getWeightZpAttr());
     Value matmul =
         rewriter
             .create<linalg::QuantizedMatmulOp>(
@@ -958,10 +953,9 @@ public:
 
             // If we have quantization information we need to apply an offset
             // for the input zp value.
-            if (op.getQuantizationInfo()) {
-              auto quantizationInfo = *op.getQuantizationInfo();
-              auto inputZp = rewriter.create<arith::ConstantOp>(
-                  loc, b.getIntegerAttr(accETy, quantizationInfo.getInputZp()));
+            if (op.getInputZp()) {
+              auto inputZp =
+                  rewriter.create<arith::ConstantOp>(loc, op.getInputZpAttr());
               Value offset =
                   rewriter.create<arith::MulIOp>(loc, accETy, count, inputZp);
               poolVal =
@@ -1013,11 +1007,9 @@ public:
 
             // If we have quantization information we need to apply output
             // zeropoint.
-            if (op.getQuantizationInfo()) {
-              auto quantizationInfo = *op.getQuantizationInfo();
-              auto outputZp = rewriter.create<arith::ConstantOp>(
-                  loc, b.getIntegerAttr(scaled.getType(),
-                                        quantizationInfo.getOutputZp()));
+            if (op.getOutputZp()) {
+              auto outputZp =
+                  rewriter.create<arith::ConstantOp>(loc, op.getOutputZpAttr());
               scaled = rewriter.create<arith::AddIOp>(loc, scaled, outputZp)
                            .getResult();
             }
diff --git a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp
index c4b787d..2a9b4d1 100644
--- a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp
+++ b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp
@@ -358,10 +358,10 @@ public:
       TypedAttr constantAttr;
       if (isa<FloatType>(elementTy)) {
         constantAttr = rewriter.getFloatAttr(elementTy, 0.0);
-      } else if (isa<IntegerType>(elementTy) && !padOp.getQuantizationInfo()) {
+      } else if (isa<IntegerType>(elementTy) && !padOp.getInputZpAttr()) {
         constantAttr = rewriter.getIntegerAttr(elementTy, 0);
-      } else if (isa<IntegerType>(elementTy) && padOp.getQuantizationInfo()) {
-        int64_t value = padOp.getQuantizationInfo()->getInputZp();
+      } else if (isa<IntegerType>(elementTy) && padOp.getInputZpAttr()) {
+        int64_t value = padOp.getInputZpAttr().getInt();
         constantAttr = rewriter.getIntegerAttr(elementTy, value);
       }
       if (constantAttr)
diff --git a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp
index af882cb..2c8bc14 100644
--- a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp
+++ b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp
@@ -137,6 +137,33 @@ struct VectorBroadcastConvert final
   }
 };
 
+// SPIR-V does not have a concept of a poison index for certain instructions,
+// which creates a UB hazard when lowering from otherwise equivalent Vector
+// dialect instructions, because this index will be considered out-of-bounds.
+// To avoid this, this function implements a dynamic sanitization that returns
+// some arbitrary safe index. For power-of-two vector sizes, this uses a bitmask
+// (presumably more efficient), and otherwise index 0 (always in-bounds).
+static Value sanitizeDynamicIndex(ConversionPatternRewriter &rewriter,
+                                  Location loc, Value dynamicIndex,
+                                  int64_t kPoisonIndex, unsigned vectorSize) {
+  if (llvm::isPowerOf2_32(vectorSize)) {
+    Value inBoundsMask = rewriter.create<spirv::ConstantOp>(
+        loc, dynamicIndex.getType(),
+        rewriter.getIntegerAttr(dynamicIndex.getType(), vectorSize - 1));
+    return rewriter.create<spirv::BitwiseAndOp>(loc, dynamicIndex,
+                                                inBoundsMask);
+  }
+  Value poisonIndex = rewriter.create<spirv::ConstantOp>(
+      loc, dynamicIndex.getType(),
+      rewriter.getIntegerAttr(dynamicIndex.getType(), kPoisonIndex));
+  Value cmpResult =
+      rewriter.create<spirv::IEqualOp>(loc, dynamicIndex, poisonIndex);
+  return rewriter.create<spirv::SelectOp>(
+      loc, cmpResult,
+      spirv::ConstantOp::getZero(dynamicIndex.getType(), loc, rewriter),
+      dynamicIndex);
+}
+
 struct VectorExtractOpConvert final
     : public OpConversionPattern<vector::ExtractOp> {
   using OpConversionPattern::OpConversionPattern;
@@ -154,14 +181,26 @@ struct VectorExtractOpConvert final
     }
 
     if (std::optional<int64_t> id =
-            getConstantIntValue(extractOp.getMixedPosition()[0]))
-      rewriter.replaceOpWithNewOp<spirv::CompositeExtractOp>(
-          extractOp, dstType, adaptor.getVector(),
-          rewriter.getI32ArrayAttr(id.value()));
-    else
+            getConstantIntValue(extractOp.getMixedPosition()[0])) {
+      // TODO: ExtractOp::fold() already can fold a static poison index to
+      //       ub.poison; remove this once ub.poison can be converted to SPIR-V.
+      if (id == vector::ExtractOp::kPoisonIndex) {
+        // Arbitrary choice of poison result, intended to stick out.
+        Value zero =
+            spirv::ConstantOp::getZero(dstType, extractOp.getLoc(), rewriter);
+        rewriter.replaceOp(extractOp, zero);
+      } else
+        rewriter.replaceOpWithNewOp<spirv::CompositeExtractOp>(
+            extractOp, dstType, adaptor.getVector(),
+            rewriter.getI32ArrayAttr(id.value()));
+    } else {
+      Value sanitizedIndex = sanitizeDynamicIndex(
+          rewriter, extractOp.getLoc(), adaptor.getDynamicPosition()[0],
+          vector::ExtractOp::kPoisonIndex,
+          extractOp.getSourceVectorType().getNumElements());
       rewriter.replaceOpWithNewOp<spirv::VectorExtractDynamicOp>(
-          extractOp, dstType, adaptor.getVector(),
-          adaptor.getDynamicPosition()[0]);
+          extractOp, dstType, adaptor.getVector(), sanitizedIndex);
+    }
     return success();
   }
 };
@@ -266,13 +305,25 @@ struct VectorInsertOpConvert final
     }
 
     if (std::optional<int64_t> id =
-            getConstantIntValue(insertOp.getMixedPosition()[0]))
-      rewriter.replaceOpWithNewOp<spirv::CompositeInsertOp>(
-          insertOp, adaptor.getSource(), adaptor.getDest(), id.value());
-    else
+            getConstantIntValue(insertOp.getMixedPosition()[0])) {
+      // TODO: ExtractOp::fold() already can fold a static poison index to
+      //       ub.poison; remove this once ub.poison can be converted to SPIR-V.
+      if (id == vector::InsertOp::kPoisonIndex) {
+        // Arbitrary choice of poison result, intended to stick out.
+        Value zero = spirv::ConstantOp::getZero(insertOp.getDestVectorType(),
+                                                insertOp.getLoc(), rewriter);
+        rewriter.replaceOp(insertOp, zero);
+      } else
+        rewriter.replaceOpWithNewOp<spirv::CompositeInsertOp>(
+            insertOp, adaptor.getSource(), adaptor.getDest(), id.value());
+    } else {
+      Value sanitizedIndex = sanitizeDynamicIndex(
+          rewriter, insertOp.getLoc(), adaptor.getDynamicPosition()[0],
+          vector::InsertOp::kPoisonIndex,
+          insertOp.getDestVectorType().getNumElements());
       rewriter.replaceOpWithNewOp<spirv::VectorInsertDynamicOp>(
-          insertOp, insertOp.getDest(), adaptor.getSource(),
-          adaptor.getDynamicPosition()[0]);
+          insertOp, insertOp.getDest(), adaptor.getSource(), sanitizedIndex);
+    }
     return success();
   }
 };
diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
index 2960864..9c0b5db 100644
--- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
@@ -42,23 +42,49 @@ using Node = MemRefDependenceGraph::Node;
 // was encountered in the loop nest.
 void LoopNestStateCollector::collect(Operation *opToWalk) {
   opToWalk->walk([&](Operation *op) {
-    if (isa<AffineForOp>(op))
-      forOps.push_back(cast<AffineForOp>(op));
-    else if (op->getNumRegions() != 0 && !isa<AffineIfOp>(op))
-      hasNonAffineRegionOp = true;
-    else if (isa<AffineReadOpInterface>(op))
+    if (auto forOp = dyn_cast<AffineForOp>(op)) {
+      forOps.push_back(forOp);
+    } else if (isa<AffineReadOpInterface>(op)) {
       loadOpInsts.push_back(op);
-    else if (isa<AffineWriteOpInterface>(op))
+    } else if (isa<AffineWriteOpInterface>(op)) {
       storeOpInsts.push_back(op);
+    } else {
+      auto memInterface = dyn_cast<MemoryEffectOpInterface>(op);
+      if (!memInterface) {
+        if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>())
+          // This op itself is memory-effect free.
+          return;
+        // Check operands. Eg. ops like the `call` op are handled here.
+        for (Value v : op->getOperands()) {
+          if (!isa<MemRefType>(v.getType()))
+            continue;
+          // Conservatively, we assume the memref is read and written to.
+          memrefLoads.push_back(op);
+          memrefStores.push_back(op);
+        }
+      } else {
+        // Non-affine loads and stores.
+        if (hasEffect<MemoryEffects::Read>(op))
+          memrefLoads.push_back(op);
+        if (hasEffect<MemoryEffects::Write>(op))
+          memrefStores.push_back(op);
+        if (hasEffect<MemoryEffects::Free>(op))
+          memrefFrees.push_back(op);
+      }
+    }
   });
 }
 
-// Returns the load op count for 'memref'.
 unsigned Node::getLoadOpCount(Value memref) const {
   unsigned loadOpCount = 0;
   for (Operation *loadOp : loads) {
-    if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
+    // Common case: affine reads.
+    if (auto affineLoad = dyn_cast<AffineReadOpInterface>(loadOp)) {
+      if (memref == affineLoad.getMemRef())
+        ++loadOpCount;
+    } else if (hasEffect<MemoryEffects::Read>(loadOp, memref)) {
       ++loadOpCount;
+    }
   }
   return loadOpCount;
 }
@@ -66,13 +92,40 @@ unsigned Node::getLoadOpCount(Value memref) const {
 // Returns the store op count for 'memref'.
 unsigned Node::getStoreOpCount(Value memref) const {
   unsigned storeOpCount = 0;
-  for (Operation *storeOp : stores) {
-    if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
+  for (auto *storeOp : llvm::concat<Operation *const>(stores, memrefStores)) {
+    // Common case: affine writes.
+    if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
+      if (memref == affineStore.getMemRef())
+        ++storeOpCount;
+    } else if (hasEffect<MemoryEffects::Write>(const_cast<Operation *>(storeOp),
+                                               memref)) {
       ++storeOpCount;
+    }
   }
   return storeOpCount;
 }
 
+// Returns the store op count for 'memref'.
+unsigned Node::hasStore(Value memref) const {
+  return llvm::any_of(
+      llvm::concat<Operation *const>(stores, memrefStores),
+      [&](Operation *storeOp) {
+        if (auto affineStore = dyn_cast<AffineWriteOpInterface>(storeOp)) {
+          if (memref == affineStore.getMemRef())
+            return true;
+        } else if (hasEffect<MemoryEffects::Write>(storeOp, memref)) {
+          return true;
+        }
+        return false;
+      });
+}
+
+unsigned Node::hasFree(Value memref) const {
+  return llvm::any_of(memrefFrees, [&](Operation *freeOp) {
+    return hasEffect<MemoryEffects::Free>(freeOp, memref);
+  });
+}
+
 // Returns all store ops in 'storeOps' which access 'memref'.
 void Node::getStoreOpsForMemref(Value memref,
                                 SmallVectorImpl<Operation *> *storeOps) const {
@@ -106,8 +159,88 @@ void Node::getLoadAndStoreMemrefSet(
   }
 }
 
-// Initializes the data dependence graph by walking operations in `block`.
-// Assigns each node in the graph a node id based on program order in 'f'.
+/// Returns the values that this op has a memref effect of type `EffectTys` on,
+/// not considering recursive effects.
+template <typename... EffectTys>
+static void getEffectedValues(Operation *op, SmallVectorImpl<Value> &values) {
+  auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
+  if (!memOp) {
+    if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>())
+      // No effects.
+      return;
+    // Memref operands have to be considered as being affected.
+    for (Value operand : op->getOperands()) {
+      if (isa<MemRefType>(operand.getType()))
+        values.push_back(operand);
+    }
+    return;
+  }
+  SmallVector<SideEffects::EffectInstance<MemoryEffects::Effect>, 4> effects;
+  memOp.getEffects(effects);
+  for (auto &effect : effects) {
+    Value effectVal = effect.getValue();
+    if (isa<EffectTys...>(effect.getEffect()) && effectVal &&
+        isa<MemRefType>(effectVal.getType()))
+      values.push_back(effectVal);
+  };
+}
+
+/// Add `op` to MDG creating a new node and adding its memory accesses (affine
+/// or non-affine to memrefAccesses (memref -> list of nodes with accesses) map.
+Node *addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg,
+                   DenseMap<Value, SetVector<unsigned>> &memrefAccesses) {
+  auto &nodes = mdg.nodes;
+  // Create graph node 'id' to represent top-level 'forOp' and record
+  // all loads and store accesses it contains.
+  LoopNestStateCollector collector;
+  collector.collect(nodeOp);
+  unsigned newNodeId = mdg.nextNodeId++;
+  Node &node = nodes.insert({newNodeId, Node(newNodeId, nodeOp)}).first->second;
+  for (Operation *op : collector.loadOpInsts) {
+    node.loads.push_back(op);
+    auto memref = cast<AffineReadOpInterface>(op).getMemRef();
+    memrefAccesses[memref].insert(node.id);
+  }
+  for (Operation *op : collector.storeOpInsts) {
+    node.stores.push_back(op);
+    auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
+    memrefAccesses[memref].insert(node.id);
+  }
+  for (Operation *op : collector.memrefLoads) {
+    SmallVector<Value> effectedValues;
+    getEffectedValues<MemoryEffects::Read>(op, effectedValues);
+    if (llvm::any_of(((ValueRange)effectedValues).getTypes(),
+                     [](Type type) { return !isa<MemRefType>(type); }))
+      // We do not know the interaction here.
+      return nullptr;
+    for (Value memref : effectedValues)
+      memrefAccesses[memref].insert(node.id);
+    node.memrefLoads.push_back(op);
+  }
+  for (Operation *op : collector.memrefStores) {
+    SmallVector<Value> effectedValues;
+    getEffectedValues<MemoryEffects::Write>(op, effectedValues);
+    if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
+                     [](Type type) { return !isa<MemRefType>(type); }))
+      return nullptr;
+    for (Value memref : effectedValues)
+      memrefAccesses[memref].insert(node.id);
+    node.memrefStores.push_back(op);
+  }
+  for (Operation *op : collector.memrefFrees) {
+    SmallVector<Value> effectedValues;
+    getEffectedValues<MemoryEffects::Free>(op, effectedValues);
+    if (llvm::any_of((ValueRange(effectedValues)).getTypes(),
+                     [](Type type) { return !isa<MemRefType>(type); }))
+      return nullptr;
+    for (Value memref : effectedValues)
+      memrefAccesses[memref].insert(node.id);
+    node.memrefFrees.push_back(op);
+  }
+
+  return &node;
+}
+
 bool MemRefDependenceGraph::init() {
   LLVM_DEBUG(llvm::dbgs() << "--- Initializing MDG ---\n");
   // Map from a memref to the set of ids of the nodes that have ops accessing
@@ -116,36 +249,19 @@ bool MemRefDependenceGraph::init() {
 
   DenseMap<Operation *, unsigned> forToNodeMap;
   for (Operation &op : block) {
-    if (dyn_cast<AffineForOp>(op)) {
-      // Create graph node 'id' to represent top-level 'forOp' and record
-      // all loads and store accesses it contains.
-      LoopNestStateCollector collector;
-      collector.collect(&op);
-      // Return false if a region holding op other than 'affine.for' and
-      // 'affine.if' was found (not currently supported).
-      if (collector.hasNonAffineRegionOp)
+    if (auto forOp = dyn_cast<AffineForOp>(op)) {
+      Node *node = addNodeToMDG(&op, *this, memrefAccesses);
+      if (!node)
         return false;
-      Node node(nextNodeId++, &op);
-      for (auto *opInst : collector.loadOpInsts) {
-        node.loads.push_back(opInst);
-        auto memref = cast<AffineReadOpInterface>(opInst).getMemRef();
-        memrefAccesses[memref].insert(node.id);
-      }
-      for (auto *opInst : collector.storeOpInsts) {
-        node.stores.push_back(opInst);
-        auto memref = cast<AffineWriteOpInterface>(opInst).getMemRef();
-        memrefAccesses[memref].insert(node.id);
-      }
-      forToNodeMap[&op] = node.id;
-      nodes.insert({node.id, node});
-    } else if (dyn_cast<AffineReadOpInterface>(op)) {
+      forToNodeMap[&op] = node->id;
+    } else if (isa<AffineReadOpInterface>(op)) {
       // Create graph node for top-level load op.
       Node node(nextNodeId++, &op);
       node.loads.push_back(&op);
       auto memref = cast<AffineReadOpInterface>(op).getMemRef();
       memrefAccesses[memref].insert(node.id);
       nodes.insert({node.id, node});
-    } else if (dyn_cast<AffineWriteOpInterface>(op)) {
+    } else if (isa<AffineWriteOpInterface>(op)) {
       // Create graph node for top-level store op.
       Node node(nextNodeId++, &op);
       node.stores.push_back(&op);
@@ -155,8 +271,9 @@ bool MemRefDependenceGraph::init() {
     } else if (op.getNumResults() > 0 && !op.use_empty()) {
       // Create graph node for top-level producer of SSA values, which
       // could be used by loop nest nodes.
-      Node node(nextNodeId++, &op);
-      nodes.insert({node.id, node});
+      Node *node = addNodeToMDG(&op, *this, memrefAccesses);
+      if (!node)
+        return false;
     } else if (!isMemoryEffectFree(&op) &&
                (op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
       // Create graph node for top-level op unless it is known to be
@@ -165,9 +282,10 @@ bool MemRefDependenceGraph::init() {
       // well-defined control flow. During the fusion validity checks, we look
       // for non-affine ops on the path from source to destination, at which
       // point we check which memrefs if any are used in the region.
-      Node node(nextNodeId++, &op);
-      nodes.insert({node.id, node});
-    } else if (op.getNumRegions() != 0) {
+      Node *node = addNodeToMDG(&op, *this, memrefAccesses);
+      if (!node)
+        return false;
+    } else if (op.getNumRegions() != 0 && !isa<RegionBranchOpInterface>(op)) {
       // Return false if non-handled/unknown region-holding ops are found. We
       // won't know what such ops do or what its regions mean; for e.g., it may
       // not be an imperative op.
@@ -175,6 +293,9 @@ bool MemRefDependenceGraph::init() {
                  << "MDG init failed; unknown region-holding op found!\n");
       return false;
     }
+    // We aren't creating nodes for memory-effect free ops either with no
+    // regions (unless it has results being used) or those with branch op
+    // interface.
   }
 
   for (auto &idAndNode : nodes) {
@@ -216,16 +337,20 @@ bool MemRefDependenceGraph::init() {
   // Walk memref access lists and add graph edges between dependent nodes.
   for (auto &memrefAndList : memrefAccesses) {
     unsigned n = memrefAndList.second.size();
+    Value srcMemRef = memrefAndList.first;
+    // Add edges between all dependent pairs among the node IDs on this memref.
     for (unsigned i = 0; i < n; ++i) {
       unsigned srcId = memrefAndList.second[i];
-      bool srcHasStore =
-          getNode(srcId)->getStoreOpCount(memrefAndList.first) > 0;
+      Node *srcNode = getNode(srcId);
+      bool srcHasStoreOrFree =
+          srcNode->hasStore(srcMemRef) || srcNode->hasFree(srcMemRef);
       for (unsigned j = i + 1; j < n; ++j) {
         unsigned dstId = memrefAndList.second[j];
-        bool dstHasStore =
-            getNode(dstId)->getStoreOpCount(memrefAndList.first) > 0;
-        if (srcHasStore || dstHasStore)
-          addEdge(srcId, dstId, memrefAndList.first);
+        Node *dstNode = getNode(dstId);
+        bool dstHasStoreOrFree =
+            dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef);
+        if (srcHasStoreOrFree || dstHasStoreOrFree)
+          addEdge(srcId, dstId, srcMemRef);
       }
     }
   }
@@ -565,12 +690,17 @@ void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
 }
 
 // Adds ops in 'loads' and 'stores' to node at 'id'.
-void MemRefDependenceGraph::addToNode(
-    unsigned id, const SmallVectorImpl<Operation *> &loads,
-    const SmallVectorImpl<Operation *> &stores) {
+void MemRefDependenceGraph::addToNode(unsigned id, ArrayRef<Operation *> loads,
+                                      ArrayRef<Operation *> stores,
+                                      ArrayRef<Operation *> memrefLoads,
+                                      ArrayRef<Operation *> memrefStores,
+                                      ArrayRef<Operation *> memrefFrees) {
   Node *node = getNode(id);
   llvm::append_range(node->loads, loads);
   llvm::append_range(node->stores, stores);
+  llvm::append_range(node->memrefLoads, memrefLoads);
+  llvm::append_range(node->memrefStores, memrefStores);
+  llvm::append_range(node->memrefFrees, memrefFrees);
 }
 
 void MemRefDependenceGraph::clearNodeLoadAndStores(unsigned id) {
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
index 6fefe44..c22ec21 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
@@ -343,51 +343,6 @@ static Value createPrivateMemRef(AffineForOp forOp, Operation *srcStoreOpInst,
   return newMemRef;
 }
 
-/// Returns true if there are any non-affine uses of `memref` in any of
-/// the operations between `start` and `end` (both exclusive). Any other
-/// than affine read/write are treated as non-affine uses of `memref`.
-static bool hasNonAffineUsersOnPath(Operation *start, Operation *end,
-                                    Value memref) {
-  assert(start->getBlock() == end->getBlock());
-  assert(start->isBeforeInBlock(end) && "start expected to be before end");
-  Block *block = start->getBlock();
-  // Check if there is a non-affine memref user in any op between `start` and
-  // `end`.
-  return llvm::any_of(memref.getUsers(), [&](Operation *user) {
-    if (isa<AffineReadOpInterface, AffineWriteOpInterface>(user))
-      return false;
-    Operation *ancestor = block->findAncestorOpInBlock(*user);
-    return ancestor && start->isBeforeInBlock(ancestor) &&
-           ancestor->isBeforeInBlock(end);
-  });
-}
-
-/// Check whether a memref value used in any operation of 'src' has a
-/// non-affine operation that is between `src` and `end` (exclusive of `src`
-/// and `end`)  where `src` and `end` are expected to be in the same Block.
-/// Any other than affine read/write are treated as non-affine uses of memref.
-static bool hasNonAffineUsersOnPath(Operation *src, Operation *end) {
-  assert(src->getBlock() == end->getBlock() && "same block expected");
-
-  // Trivial case. `src` and `end` are exclusive.
-  if (src == end || end->isBeforeInBlock(src))
-    return false;
-
-  // Collect relevant memref values.
-  llvm::SmallDenseSet<Value, 2> memRefValues;
-  src->walk([&](Operation *op) {
-    for (Value v : op->getOperands())
-      // Collect memref values only.
-      if (isa<MemRefType>(v.getType()))
-        memRefValues.insert(v);
-    return WalkResult::advance();
-  });
-  // Look for non-affine users between `src` and `end`.
-  return llvm::any_of(memRefValues, [&](Value memref) {
-    return hasNonAffineUsersOnPath(src, end, memref);
-  });
-}
-
 // Checks the profitability of fusing a backwards slice of the loop nest
 // surrounding 'srcOpInst' into the loop nest surrounding 'dstLoadOpInsts'.
 // The argument 'srcStoreOpInst' is used to calculate the storage reduction on
@@ -864,19 +819,6 @@ public:
         DenseSet<Value> srcEscapingMemRefs;
         gatherEscapingMemrefs(srcNode->id, mdg, srcEscapingMemRefs);
 
-        // Skip if there are non-affine operations in between the 'srcNode'
-        // and 'dstNode' using their memrefs. If so, we wouldn't be able to
-        // compute a legal insertion point for now. 'srcNode' and 'dstNode'
-        // memrefs with non-affine operation users would be considered
-        // escaping memrefs so we can limit this check to only scenarios with
-        // escaping memrefs.
-        if (!srcEscapingMemRefs.empty() &&
-            hasNonAffineUsersOnPath(srcNode->op, dstNode->op)) {
-          LLVM_DEBUG(llvm::dbgs()
-                     << "Can't fuse: non-affine users in between the loops\n");
-          continue;
-        }
-
         // Compute an operation list insertion point for the fused loop
         // nest which preserves dependences.
         Operation *fusedLoopInsPoint =
@@ -1039,8 +981,10 @@ public:
 
         // Clear and add back loads and stores.
         mdg->clearNodeLoadAndStores(dstNode->id);
-        mdg->addToNode(dstId, dstLoopCollector.loadOpInsts,
-                       dstLoopCollector.storeOpInsts);
+        mdg->addToNode(
+            dstId, dstLoopCollector.loadOpInsts, dstLoopCollector.storeOpInsts,
+            dstLoopCollector.memrefLoads, dstLoopCollector.memrefStores,
+            dstLoopCollector.memrefFrees);
 
         if (removeSrcNode) {
           LLVM_DEBUG(llvm::dbgs()
@@ -1229,15 +1173,7 @@ public:
         storeMemrefs.insert(
             cast<AffineWriteOpInterface>(storeOpInst).getMemRef());
       }
-      if (storeMemrefs.size() > 1)
-        return false;
-
-      // Skip if a memref value in one node is used by a non-affine memref
-      // access that lies between 'dstNode' and 'sibNode'.
-      if (hasNonAffineUsersOnPath(dstNode->op, sibNode->op) ||
-          hasNonAffineUsersOnPath(sibNode->op, dstNode->op))
-        return false;
-      return true;
+      return storeMemrefs.size() <= 1;
     };
 
     // Search for siblings which load the same memref block argument.
@@ -1339,7 +1275,8 @@ public:
     // Clear and add back loads and stores
     mdg->clearNodeLoadAndStores(dstNode->id);
     mdg->addToNode(dstNode->id, dstLoopCollector.loadOpInsts,
-                   dstLoopCollector.storeOpInsts);
+                   dstLoopCollector.storeOpInsts, dstLoopCollector.memrefLoads,
+                   dstLoopCollector.memrefStores, dstLoopCollector.memrefFrees);
     // Remove old sibling loop nest if it no longer has outgoing dependence
     // edges, and it does not write to a memref which escapes the block.
     if (mdg->getOutEdgeCount(sibNode->id) == 0) {
diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp
index 2d49854..61f8d82 100644
--- a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp
+++ b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp
@@ -1044,9 +1044,9 @@ struct EmulateWideIntPass final
       return typeConverter.isLegal(op);
     };
     target.addDynamicallyLegalOp<func::CallOp, func::ReturnOp>(opLegalCallback);
-    target
-        .addDynamicallyLegalDialect<arith::ArithDialect, vector::VectorDialect>(
-            opLegalCallback);
+    target.addDynamicallyLegalOp<vector::PrintOp>(opLegalCallback);
+    target.addDynamicallyLegalDialect<arith::ArithDialect>(opLegalCallback);
+    target.addLegalDialect<vector::VectorDialect>();
 
     RewritePatternSet patterns(ctx);
     arith::populateArithWideIntEmulationPatterns(typeConverter, patterns);
diff --git a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
index f4d3612..f5a6d08 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
 #include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/Transform/IR/TransformDialect.h"
@@ -237,25 +238,17 @@ DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp,
                                            std::optional<int64_t> blockDimZ) {
 
   // TODO: pass a configuration object to set the limits properly.
-  static constexpr int maxTotalBlockdim = 1024;
-  static constexpr int maxBlockdimx = 1024;
-  static constexpr int maxBlockdimy = 1024;
-  static constexpr int maxBlockdimz = 64;
-  static constexpr int maxTotalGriddim = 2147483647;
-  static constexpr int maxGriddimx = 2147483647;
-  static constexpr int maxGriddimy = 65535;
-  static constexpr int maxGriddimz = 65535;
 
   if ((blockDimX.value_or(1) * blockDimY.value_or(1) * blockDimZ.value_or(1)) >
-          maxTotalBlockdim ||
+          kMaxTotalBlockdim ||
       (gridDimX.value_or(1) * gridDimY.value_or(1) * gridDimZ.value_or(1)) >
-          maxTotalGriddim ||
-      blockDimX.value_or(1) > maxBlockdimx ||
-      blockDimY.value_or(1) > maxBlockdimy ||
-      blockDimZ.value_or(1) > maxBlockdimz ||
-      gridDimY.value_or(1) > maxGriddimy ||
-      gridDimZ.value_or(1) > maxGriddimz ||
-      gridDimX.value_or(1) > maxGriddimx) {
+          kMaxTotalGriddim ||
+      blockDimX.value_or(1) > kMaxBlockdimx ||
+      blockDimY.value_or(1) > kMaxBlockdimy ||
+      blockDimZ.value_or(1) > kMaxBlockdimz ||
+      gridDimY.value_or(1) > kMaxGriddimy ||
+      gridDimZ.value_or(1) > kMaxGriddimz ||
+      gridDimX.value_or(1) > kMaxGriddimx) {
     return transformOp.emitSilenceableError()
            << "Trying to launch a GPU kernel with grid_dims = ("
            << gridDimX.value_or(1) << ", " << gridDimY.value_or(1) << ", "
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index a5d09ea..241b25c 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1243,6 +1243,47 @@ llvm::Intrinsic::ID CvtFloatToTF32Op::getIntrinsicID(NVVM::FPRoundingMode rnd,
   }
 }
 
+llvm::Intrinsic::ID
+Tcgen05AllocOp::getIntrinsicIDAndArgs(Operation &op,
+                                      LLVM::ModuleTranslation &mt,
+                                      llvm::SmallVector<llvm::Value *> &args) {
+  auto curOp = cast<NVVM::Tcgen05AllocOp>(op);
+  unsigned AS = llvm::cast<LLVM::LLVMPointerType>(curOp.getAddr().getType())
+                    .getAddressSpace();
+  bool isShared = AS == NVVMMemorySpace::kSharedMemorySpace;
+  bool is2CTAMode = curOp.getGroup() == Tcgen05GroupKind::CTA_2;
+
+  llvm::Intrinsic::ID id;
+  if (isShared) {
+    id = is2CTAMode ? llvm::Intrinsic::nvvm_tcgen05_alloc_shared_cg2
+                    : llvm::Intrinsic::nvvm_tcgen05_alloc_shared_cg1;
+  } else {
+    id = is2CTAMode ? llvm::Intrinsic::nvvm_tcgen05_alloc_cg2
+                    : llvm::Intrinsic::nvvm_tcgen05_alloc_cg1;
+  }
+
+  // Fill the Intrinsic Args
+  args.push_back(mt.lookupValue(curOp.getAddr()));
+  args.push_back(mt.lookupValue(curOp.getNCols()));
+
+  return id;
+}
+
+llvm::Intrinsic::ID Tcgen05DeallocOp::getIntrinsicIDAndArgs(
+    Operation &op, LLVM::ModuleTranslation &mt,
+    llvm::SmallVector<llvm::Value *> &args) {
+  auto curOp = cast<NVVM::Tcgen05DeallocOp>(op);
+  auto id = (curOp.getGroup() == Tcgen05GroupKind::CTA_1)
+                ? llvm::Intrinsic::nvvm_tcgen05_dealloc_cg1
+                : llvm::Intrinsic::nvvm_tcgen05_dealloc_cg2;
+
+  // Fill the Intrinsic Args
+  args.push_back(mt.lookupValue(curOp.getTaddr()));
+  args.push_back(mt.lookupValue(curOp.getNCols()));
+
+  return id;
+}
+
 /// Infer the result ranges for the NVVM SpecialRangeableRegisterOp that might
 /// have ConstantRangeAttr.
 static void nvvmInferResultRanges(Operation *op, Value result,
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
index 9d36947..8e22c87 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
@@ -207,10 +207,10 @@ struct MaterializePadValue : public OpRewritePattern<tosa::PadOp> {
     Attribute constantAttr;
     if (llvm::isa<FloatType>(elementTy)) {
       constantAttr = rewriter.getFloatAttr(elementTy, 0.0);
-    } else if (llvm::isa<IntegerType>(elementTy) && !op.getQuantizationInfo()) {
+    } else if (llvm::isa<IntegerType>(elementTy) && !op.getInputZpAttr()) {
       constantAttr = rewriter.getIntegerAttr(elementTy, 0);
-    } else if (llvm::isa<IntegerType>(elementTy) && op.getQuantizationInfo()) {
-      auto value = op.getQuantizationInfo()->getInputZp();
+    } else if (llvm::isa<IntegerType>(elementTy) && op.getInputZpAttr()) {
+      int64_t value = op.getInputZpAttr().getInt();
       constantAttr = rewriter.getIntegerAttr(elementTy, value);
     }
 
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index e8b2890..031c279 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -271,11 +271,11 @@ static LogicalResult verifyConvOp(T op) {
     }
   }
 
-  bool inputIsQuant = !llvm::isa<FloatType>(inputEType);
-  bool weightIsQuant = !llvm::isa<FloatType>(weightEType);
+  bool inputIsFloat = llvm::isa<FloatType>(inputEType);
+  bool weightIsFloat = llvm::isa<FloatType>(weightEType);
 
-  // Either both must be quantized or both unquantized.
-  if (inputIsQuant != weightIsQuant) {
+  // Either both must be float or both non-float.
+  if (inputIsFloat != weightIsFloat) {
     op.emitOpError(
         "expect both input and weight to be float or not together, got ")
         << inputEType << " and " << weightEType;
@@ -527,7 +527,12 @@ static void buildTransConvOpWithQuantInfo(
   auto quantAttr = ::buildConvOpQuantizationAttr(builder, input, weight);
 
   if (quantAttr) {
-    result.addAttribute("quantization_info", quantAttr);
+    result.addAttribute("input_zp",
+                        builder.getI32IntegerAttr(
+                            static_cast<int32_t>(quantAttr.getInputZp())));
+    result.addAttribute("weight_zp",
+                        builder.getI32IntegerAttr(
+                            static_cast<int32_t>(quantAttr.getWeightZp())));
     result.addTypes(
         buildConvOpResultTypeInfo(builder, outputType, input, weight));
   } else {
@@ -563,7 +568,10 @@ static void buildMatMulOpWithQuantInfo(OpBuilder &builder,
   auto quantAttr = ::buildMatMulOpQuantizationAttr(builder, a, b);
 
   if (quantAttr) {
-    result.addAttribute("quantization_info", quantAttr);
+    result.addAttribute("a_zp", builder.getI32IntegerAttr(
+                                    static_cast<int32_t>(quantAttr.getAZp())));
+    result.addAttribute("b_zp", builder.getI32IntegerAttr(
+                                    static_cast<int32_t>(quantAttr.getBZp())));
 
     auto inputType = llvm::dyn_cast<ShapedType>(a.getType());
     assert(inputType && "Input must be a shaped tensor type!");
@@ -603,8 +611,14 @@ buildAvgPool2dOpWithQuantInfo(OpBuilder &builder, OperationState &result,
   result.addAttribute("pad", pad);
   result.addAttribute("acc_type", accType);
   auto quantAttr = buildUnaryOpQuantizationAttr(builder, input, outputType);
-  if (quantAttr)
-    result.addAttribute("quantization_info", quantAttr);
+  if (quantAttr) {
+    result.addAttribute("input_zp",
+                        builder.getI32IntegerAttr(
+                            static_cast<int32_t>(quantAttr.getInputZp())));
+    result.addAttribute("output_zp",
+                        builder.getI32IntegerAttr(
+                            static_cast<int32_t>(quantAttr.getOutputZp())));
+  }
   result.types.push_back(outputType);
 }
 
@@ -616,8 +630,15 @@ static void buildUnaryOpWithQuantInfo(OpBuilder &builder,
                                       Value input) {
   result.addOperands(input);
   auto quantAttr = buildUnaryOpQuantizationAttr(builder, input, outputType);
-  if (quantAttr)
-    result.addAttribute("quantization_info", quantAttr);
+  if (quantAttr) {
+    // note: negateOp has attributes input1_zp and output_zp
+    result.addAttribute("input1_zp",
+                        builder.getI32IntegerAttr(
+                            static_cast<int32_t>(quantAttr.getInputZp())));
+    result.addAttribute("output_zp",
+                        builder.getI32IntegerAttr(
+                            static_cast<int32_t>(quantAttr.getOutputZp())));
+  }
   result.types.push_back(outputType);
 }
 
@@ -629,8 +650,11 @@ static void buildPadOpWithQuantInfo(OpBuilder &builder, OperationState &result,
                                     Value paddings) {
   result.addOperands({input, paddings});
   auto quantAttr = buildPadOpQuantizationAttr(builder, input);
-  if (quantAttr)
-    result.addAttribute("quantization_info", quantAttr);
+  if (quantAttr) {
+    result.addAttribute("input_zp",
+                        builder.getI32IntegerAttr(
+                            static_cast<int32_t>(quantAttr.getInputZp())));
+  }
   result.types.push_back(outputType);
 }
 
@@ -643,8 +667,11 @@ static void buildExplicitValuePadOpWithQuantInfo(OpBuilder &builder,
                                                  Value padConst) {
   result.addOperands({input, paddings, padConst});
   auto quantAttr = buildPadOpQuantizationAttr(builder, input);
-  if (quantAttr)
-    result.addAttribute("quantization_info", quantAttr);
+  if (quantAttr) {
+    result.addAttribute("input_zp",
+                        builder.getI32IntegerAttr(
+                            static_cast<int32_t>(quantAttr.getInputZp())));
+  }
   result.types.push_back(outputType);
 }
 
@@ -898,9 +925,8 @@ LogicalResult FullyConnectedOp::verify() {
 
   // Quantized type must have constructed the quantizationattr, and unquantized
   // types should not have a quantizationattr.
-  if ((inputIsQuant && !getQuantizationInfo()) ||
-      (!inputIsQuant && getQuantizationInfo())) {
-    emitOpError("quantizationattr is required for quantized type, and not "
+  if ((inputIsQuant && !getInputZp()) || (!inputIsQuant && getInputZp())) {
+    emitOpError("input zero point is required for quantized type, and not "
                 "allowed for float type");
     return failure();
   }
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp
index 7d3deae..4eba89b 100644
--- a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp
+++ b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp
@@ -130,13 +130,13 @@ struct Conv2DIsFullyConnected : public OpRewritePattern<tosa::Conv2DOp> {
     auto maybeZps = failureOrMaybeZps.value();
     Value fullyConnectedValue;
     if (maybeZps) {
-      auto zeroPointAttr = rewriter.getAttr<tosa::ConvOpQuantizationAttr>(
-          maybeZps->inputZp, maybeZps->weightZp);
       fullyConnectedValue =
           rewriter
               .create<tosa::FullyConnectedOp>(
                   op.getLoc(), fullyConnectedShapeType, reshapedInput,
-                  reshapedWeight, op.getBias(), zeroPointAttr)
+                  reshapedWeight, op.getBias(),
+                  rewriter.getI32IntegerAttr(maybeZps->inputZp),
+                  rewriter.getI32IntegerAttr(maybeZps->weightZp))
               .getResult();
     } else {
       fullyConnectedValue = rewriter
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp
index ae22467..b5b3e9d 100644
--- a/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp
+++ b/mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp
@@ -143,8 +143,7 @@ public:
       weight = CreateOpAndInferShape<tosa::PadOp>(
           rewriter, loc, UnrankedTensorType::get(weightETy), weight,
           weightPaddingVal, nullptr,
-          rewriter.getAttr<PadOpQuantizationAttr>(maybeZps->weightZp));
-
+          rewriter.getI32IntegerAttr(maybeZps->weightZp));
     } else {
       weight = CreateOpAndInferShape<tosa::PadOp>(
           rewriter, loc, UnrankedTensorType::get(weightETy), weight,
@@ -203,7 +202,7 @@ public:
       input = CreateOpAndInferShape<tosa::PadOp>(
           rewriter, loc, UnrankedTensorType::get(inputETy), input,
           inputPaddingVal, nullptr,
-          rewriter.getAttr<PadOpQuantizationAttr>(maybeZps->inputZp));
+          rewriter.getI32IntegerAttr(maybeZps->inputZp));
     } else {
       input = CreateOpAndInferShape<tosa::PadOp>(
           rewriter, loc, UnrankedTensorType::get(inputETy), input,
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 93f89ed..7a10d2f 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -26,7 +26,6 @@
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinAttributes.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/IRMapping.h"
@@ -42,7 +41,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/TypeSwitch.h"
-#include "llvm/ADT/bit.h"
 
 #include <cassert>
 #include <cstdint>
@@ -437,6 +435,9 @@ void VectorDialect::initialize() {
 Operation *VectorDialect::materializeConstant(OpBuilder &builder,
                                               Attribute value, Type type,
                                               Location loc) {
+  if (isa<ub::PoisonAttrInterface>(value))
+    return value.getDialect().materializeConstant(builder, value, type, loc);
+
   return arith::ConstantOp::materialize(builder, value, type, loc);
 }
 
@@ -2273,20 +2274,6 @@ LogicalResult foldExtractFromFromElements(ExtractOp extractOp,
   return success();
 }
 
-/// Fold an insert or extract operation into an poison value when a poison index
-/// is found at any dimension of the static position.
-template <typename OpTy>
-LogicalResult
-canonicalizePoisonIndexInsertExtractOp(OpTy op, PatternRewriter &rewriter) {
-  if (auto poisonAttr = foldPoisonIndexInsertExtractOp(
-          op.getContext(), op.getStaticPosition(), OpTy::kPoisonIndex)) {
-    rewriter.replaceOpWithNewOp<ub::PoisonOp>(op, op.getType(), poisonAttr);
-    return success();
-  }
-
-  return failure();
-}
-
 } // namespace
 
 void ExtractOp::getCanonicalizationPatterns(RewritePatternSet &results,
@@ -2295,7 +2282,6 @@ void ExtractOp::getCanonicalizationPatterns(RewritePatternSet &results,
               ExtractOpFromBroadcast, ExtractOpFromCreateMask>(context);
   results.add(foldExtractFromShapeCastToShapeCast);
   results.add(foldExtractFromFromElements);
-  results.add(canonicalizePoisonIndexInsertExtractOp<ExtractOp>);
 }
 
 static void populateFromInt64AttrArray(ArrayAttr arrayAttr,
@@ -2696,25 +2682,45 @@ OpFoldResult vector::ShuffleOp::fold(FoldAdaptor adaptor) {
   if (!v1Attr || !v2Attr)
     return {};
 
+  // Fold shuffle poison, poison -> poison.
+  bool isV1Poison = isa<ub::PoisonAttr>(v1Attr);
+  bool isV2Poison = isa<ub::PoisonAttr>(v2Attr);
+  if (isV1Poison && isV2Poison)
+    return ub::PoisonAttr::get(getContext());
+
   // Only support 1-D for now to avoid complicated n-D DenseElementsAttr
   // manipulation.
   if (v1Type.getRank() != 1)
     return {};
 
-  int64_t v1Size = v1Type.getDimSize(0);
+  // Poison input attributes need special handling as they are not
+  // DenseElementsAttr. If an index is poison, we select the first element of
+  // the first non-poison input.
+  SmallVector<Attribute> v1Elements, v2Elements;
+  Attribute poisonElement;
+  if (!isV2Poison) {
+    v2Elements =
+        to_vector(cast<DenseElementsAttr>(v2Attr).getValues<Attribute>());
+    poisonElement = v2Elements[0];
+  }
+  if (!isV1Poison) {
+    v1Elements =
+        to_vector(cast<DenseElementsAttr>(v1Attr).getValues<Attribute>());
+    poisonElement = v1Elements[0];
+  }
 
   SmallVector<Attribute> results;
-  auto v1Elements = cast<DenseElementsAttr>(v1Attr).getValues<Attribute>();
-  auto v2Elements = cast<DenseElementsAttr>(v2Attr).getValues<Attribute>();
+  int64_t v1Size = v1Type.getDimSize(0);
   for (int64_t maskIdx : mask) {
     Attribute indexedElm;
-    // Select v1[0] for poison indices.
     // TODO: Return a partial poison vector when supported by the UB dialect.
     if (maskIdx == ShuffleOp::kPoisonIndex) {
-      indexedElm = v1Elements[0];
+      indexedElm = poisonElement;
     } else {
-      indexedElm =
-          maskIdx < v1Size ? v1Elements[maskIdx] : v2Elements[maskIdx - v1Size];
+      if (maskIdx < v1Size)
+        indexedElm = isV1Poison ? poisonElement : v1Elements[maskIdx];
+      else
+        indexedElm = isV2Poison ? poisonElement : v2Elements[maskIdx - v1Size];
     }
 
     results.push_back(indexedElm);
@@ -3068,7 +3074,6 @@ void InsertOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                            MLIRContext *context) {
   results.add<InsertToBroadcast, BroadcastFolder, InsertSplatToSplat,
               InsertOpConstantFolder>(context);
-  results.add(canonicalizePoisonIndexInsertExtractOp<InsertOp>);
 }
 
 OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) {
@@ -3332,13 +3337,15 @@ public:
         !destVector.hasOneUse())
       return failure();
 
-    auto denseDest = llvm::cast<DenseElementsAttr>(vectorDestCst);
-
     TypedValue<VectorType> sourceValue = op.getSource();
     Attribute sourceCst;
     if (!matchPattern(sourceValue, m_Constant(&sourceCst)))
       return failure();
 
+    // TODO: Support poison.
+    if (isa<ub::PoisonAttr>(vectorDestCst) || isa<ub::PoisonAttr>(sourceCst))
+      return failure();
+
     // TODO: Handle non-unit strides when they become available.
     if (op.hasNonUnitStrides())
       return failure();
@@ -3355,6 +3362,7 @@ public:
     // increasing linearized position indices.
     // Because the destination may have higher dimensionality then the slice,
     // we keep track of two overlapping sets of positions and offsets.
+    auto denseDest = llvm::cast<DenseElementsAttr>(vectorDestCst);
     auto denseSlice = llvm::cast<DenseElementsAttr>(sourceCst);
     auto sliceValuesIt = denseSlice.value_begin<Attribute>();
     auto newValues = llvm::to_vector(denseDest.getValues<Attribute>());
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index fa4a1b4..eea4f7f 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -2188,13 +2188,6 @@ void AsmPrinter::Impl::printLocation(LocationAttr loc, bool allowAlias) {
   os << ')';
 }
 
-void AsmPrinter::Impl::printResourceHandle(
-    const AsmDialectResourceHandle &resource) {
-  auto *interface = cast<OpAsmDialectInterface>(resource.getDialect());
-  os << interface->getResourceKey(resource);
-  state.getDialectResources()[resource.getDialect()].insert(resource);
-}
-
 /// Returns true if the given dialect symbol data is simple enough to print in
 /// the pretty form. This is essentially when the symbol takes the form:
 ///   identifier (`<` body `>`)?
@@ -2279,6 +2272,13 @@ static void printElidedElementsAttr(raw_ostream &os) {
   os << R"(dense_resource<__elided__>)";
 }
 
+void AsmPrinter::Impl::printResourceHandle(
+    const AsmDialectResourceHandle &resource) {
+  auto *interface = cast<OpAsmDialectInterface>(resource.getDialect());
+  ::printKeywordOrString(interface->getResourceKey(resource), os);
+  state.getDialectResources()[resource.getDialect()].insert(resource);
+}
+
 LogicalResult AsmPrinter::Impl::printAlias(Attribute attr) {
   return state.getAliasState().getAlias(attr, os);
 }
@@ -3373,41 +3373,41 @@ void OperationPrinter::printResourceFileMetadata(
     auto printFn = [&](StringRef key, ResourceBuilder::ValueFn valueFn) {
       checkAddMetadataDict();
 
-      auto printFormatting = [&]() {
-        // Emit the top-level resource entry if we haven't yet.
-        if (!std::exchange(hadResource, true)) {
-          if (needResourceComma)
-            os << "," << newLine;
-          os << "  " << dictName << "_resources: {" << newLine;
-        }
-        // Emit the parent resource entry if we haven't yet.
-        if (!std::exchange(hadEntry, true)) {
-          if (needEntryComma)
-            os << "," << newLine;
-          os << "    " << name << ": {" << newLine;
-        } else {
-          os << "," << newLine;
-        }
-      };
-
+      std::string resourceStr;
+      auto printResourceStr = [&](raw_ostream &os) { os << resourceStr; };
       std::optional<uint64_t> charLimit =
           printerFlags.getLargeResourceStringLimit();
       if (charLimit.has_value()) {
-        std::string resourceStr;
         llvm::raw_string_ostream ss(resourceStr);
         valueFn(ss);
 
-        // Only print entry if it's string is small enough
+        // Only print entry if its string is small enough.
         if (resourceStr.size() > charLimit.value())
           return;
 
-        printFormatting();
-        os << "      " << key << ": " << resourceStr;
+        // Don't recompute resourceStr when valueFn is called below.
+        valueFn = printResourceStr;
+      }
+
+      // Emit the top-level resource entry if we haven't yet.
+      if (!std::exchange(hadResource, true)) {
+        if (needResourceComma)
+          os << "," << newLine;
+        os << "  " << dictName << "_resources: {" << newLine;
+      }
+      // Emit the parent resource entry if we haven't yet.
+      if (!std::exchange(hadEntry, true)) {
+        if (needEntryComma)
+          os << "," << newLine;
+        os << "    " << name << ": {" << newLine;
       } else {
-        printFormatting();
-        os << "      " << key << ": ";
-        valueFn(os);
+        os << "," << newLine;
       }
+      os << "      ";
+      ::printKeywordOrString(key, os);
+      os << ": ";
+      // Call printResourceStr or original valueFn, depending on charLimit.
+      valueFn(os);
     };
     ResourceBuilder entryBuilder(printFn);
     provider.buildResources(op, providerArgs..., entryBuilder);
diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
index 01de0e4..3ba1244 100644
--- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp
+++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
@@ -1387,11 +1387,9 @@ LogicalResult CppEmitter::emitOperand(Value value) {
     // as they might be evaluated in the wrong order depending on the shape of
     // the expression tree.
     bool encloseInParenthesis = precedence.value() <= getExpressionPrecedence();
-    if (encloseInParenthesis) {
+    if (encloseInParenthesis)
       os << "(";
-      pushExpressionPrecedence(lowestPrecedence());
-    } else
-      pushExpressionPrecedence(precedence.value());
+    pushExpressionPrecedence(precedence.value());
 
     if (failed(emitOperation(*def, /*trailingSemicolon=*/false)))
       return failure();
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 4be147d..83fbf7a 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -125,7 +125,7 @@ function(embed_binary_to_src file output_file symbol)
     # Convert hex data for C compatibility
     string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
     # Write data to output file
-    file(WRITE ${output_file} "const char ${symbol}[] = {${filedata}};\nconst int ${symbol}_size = sizeof(${symbol});\n")
+    file(WRITE ${output_file} "const unsigned char ${symbol}[] = {${filedata}};\nconst int ${symbol}_size = sizeof(${symbol});\n")
 endfunction()
 
 set(MLIR_NVVM_EMBED_LIBDEVICE 0 CACHE BOOL "Embed CUDA libdevice.bc in the binary at build time instead of looking it up at runtime")
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index 86ff848..b7d60ed 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -47,7 +47,7 @@ using namespace mlir::NVVM;
 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
 #endif
 
-extern "C" const char _mlir_embedded_libdevice[];
+extern "C" const unsigned char _mlir_embedded_libdevice[];
 extern "C" const unsigned _mlir_embedded_libdevice_size;
 
 namespace {
@@ -160,7 +160,8 @@ LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
   // Allocate a resource using one of the UnManagedResourceBlob method to wrap
   // the embedded data.
   auto unmanagedBlob = UnmanagedAsmResourceBlob::allocateInferAlign(
-      ArrayRef<char>{_mlir_embedded_libdevice, _mlir_embedded_libdevice_size});
+      ArrayRef<char>{(const char *)_mlir_embedded_libdevice,
+                     _mlir_embedded_libdevice_size});
   librariesToLink.push_back(DenseResourceElementsAttr::get(
       type, resourceManager.insert("_mlir_embedded_libdevice",
                                    std::move(unmanagedBlob))));
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ea044fe..9c16388 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4229,6 +4229,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
       -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetCurrentDebugLocation(llvm::DebugLoc());
     // Forward target-cpu and target-features function attributes from the
     // original function to the new outlined function.
     llvm::Function *llvmParentFn =
@@ -4324,6 +4326,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
                            llvm::Value *&retVal, InsertPointTy allocaIP,
                            InsertPointTy codeGenIP)
       -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetCurrentDebugLocation(llvm::DebugLoc());
     // We just return the unaltered argument for the host function
     // for now, some alterations may be required in the future to
     // keep host fallback functions working identically to the device
diff --git a/mlir/test/Bytecode/resources.mlir b/mlir/test/Bytecode/resources.mlir
index 33ed01d..3ef220e 100644
--- a/mlir/test/Bytecode/resources.mlir
+++ b/mlir/test/Bytecode/resources.mlir
@@ -4,21 +4,21 @@
 module @TestDialectResources attributes {
   // CHECK: bytecode.test = dense_resource<decl_resource> : tensor<2xui32>
   // CHECK: bytecode.test2 = dense_resource<resource> : tensor<4xf64>
-  // CHECK: bytecode.test3 = dense_resource<resource_2> : tensor<4xf64>
+  // CHECK: bytecode.test3 = dense_resource<"resource\09two"> : tensor<4xf64>
   bytecode.test = dense_resource<decl_resource> : tensor<2xui32>,
   bytecode.test2 = dense_resource<resource> : tensor<4xf64>,
-  bytecode.test3 = dense_resource<resource_2> : tensor<4xf64>
+  bytecode.test3 = dense_resource<"resource\09two"> : tensor<4xf64>
 } {}
 
 // CHECK: builtin: {
 // CHECK-NEXT: resource: "0x08000000010000000000000002000000000000000300000000000000"
-// CHECK-NEXT: resource_2: "0x08000000010000000000000002000000000000000300000000000000"
+// CHECK-NEXT: "resource\09two": "0x08000000010000000000000002000000000000000300000000000000"
 
 {-#
   dialect_resources: {
     builtin: {
       resource: "0x08000000010000000000000002000000000000000300000000000000",
-      resource_2: "0x08000000010000000000000002000000000000000300000000000000"
+      "resource\09two": "0x08000000010000000000000002000000000000000300000000000000"
     }
   }
 #-}
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
index 116cd04..87c388b 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -23,7 +23,7 @@ func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) ->
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
   // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
-  %0 = tosa.matmul %arg0, %arg1 {quantization_info = #tosa.matmul_quant<a_zp = 1, b_zp = 2>} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> tensor<1x5x6xi32>
+  %0 = tosa.matmul %arg0, %arg1 {a_zp = 1 : i32, b_zp = 2 : i32} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> tensor<1x5x6xi32>
   return %0 : tensor<1x5x6xi32>
 }
 
@@ -124,7 +124,7 @@ func.func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8
   // CHECK: %[[C2:.+]] = arith.constant 2 : i32
   // CHECK: linalg.quantized_matmul ins(%arg0, %[[TRANSPOSE]], %[[C1]], %[[C2]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs(%[[BROADCAST]] : tensor<5x6xi32>) -> tensor<5x6xi32>
 
-  %0 = tosa.fully_connected %arg0, %arg1, %arg2 {quantization_info = #tosa.conv_quant<input_zp = 1, weight_zp = 2>} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> tensor<5x6xi32>
+  %0 = tosa.fully_connected %arg0, %arg1, %arg2 {input_zp = 1 : i32, weight_zp = 2 : i32} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> tensor<5x6xi32>
   return %0 : tensor<5x6xi32>
 }
 
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index f9bdcef..6e8501a 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -880,26 +880,36 @@ func.func @test_bool(%arg0: tensor<1xi1>, %arg1: tensor<1xi1>) -> () {
 func.func @test_negate_quantized(%arg0: tensor<1xi8>) -> () {
   // CHECK: linalg.generic
   // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
-  // CHECK: [[CNST:%.+]] = arith.constant 7
+  // CHECK: [[ZERO:%.+]] = arith.constant 0
+  // CHECK: [[SUB:%.+]] = arith.subi [[ZERO]], %[[BBARG0]]
+  // CHECK: linalg.yield [[SUB]]
+  %0 = tosa.negate %arg0 {input_zp1 = 0 : i32, output_zp = 0 : i32} : (tensor<1xi8>) -> tensor<1xi8>
+
+  // CHECK: linalg.generic
+  // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
+  // CHECK: [[C32639:%.+]] = arith.constant 32639
   // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i16
-  // CHECK: [[SUB:%.+]] = arith.subi [[CNST]], [[EXT]]
+  // CHECK: [[SUB:%.+]] = arith.subi [[C32639]], [[EXT]]
   // CHECK: [[MIN:%.+]] = arith.constant -128
   // CHECK: [[MAX:%.+]] = arith.constant 127
   // CHECK: [[LBOUND:%.+]] = arith.maxsi [[MIN]], [[SUB]]
   // CHECK: [[UBOUND:%.+]] = arith.minsi [[MAX]], [[LBOUND]]
   // CHECK: [[TRUNC:%.+]] = arith.trunci [[UBOUND]]
   // CHECK: linalg.yield [[TRUNC]]
-  %0 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 0, output_zp = 7>} : (tensor<1xi8>) -> tensor<1xi8>
-
-  // CHECK: linalg.generic
-  // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
-  // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i16
-  %1 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 32639, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
+  %1 = tosa.negate %arg0 {input1_zp = 32639 : i32, output_zp = 0 : i32} : (tensor<1xi8>) -> tensor<1xi8>
 
   // CHECK: linalg.generic
   // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
+  // CHECK: [[C32640:%.+]] = arith.constant 32640
   // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i32
-  %2 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 32640, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
+  // CHECK: [[SUB:%.+]] = arith.subi [[C32640]], [[EXT]]
+  // CHECK: [[MIN:%.+]] = arith.constant -128
+  // CHECK: [[MAX:%.+]] = arith.constant 127
+  // CHECK: [[LBOUND:%.+]] = arith.maxsi [[MIN]], [[SUB]]
+  // CHECK: [[UBOUND:%.+]] = arith.minsi [[MAX]], [[LBOUND]]
+  // CHECK: [[TRUNC:%.+]] = arith.trunci [[UBOUND]]
+  // CHECK: linalg.yield [[TRUNC]]
+  %2 = tosa.negate %arg0 {input1_zp = 32640 : i32, output_zp = 0 : i32} : (tensor<1xi8>) -> tensor<1xi8>
 
   // CHECK: linalg.generic
   // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
diff --git a/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir b/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir
index f95de79..e83e898 100644
--- a/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir
+++ b/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir
@@ -492,7 +492,7 @@ func.func @pad_quant(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
   // CHECK: [[CST:%.+]] = arith.constant 42 : i32
   // CHECK: tensor.pad
   // CHECK:   tensor.yield [[CST]]
-  %1 = "tosa.pad"(%arg0, %0) {quantization_info = #tosa.pad_quant<input_zp = 42>} : (tensor<1x2xi32>, !tosa.shape<4>)  -> (tensor<4x9xi32>)
+  %1 = "tosa.pad"(%arg0, %0) {input_zp = 42 : i32} : (tensor<1x2xi32>, !tosa.shape<4>)  -> (tensor<4x9xi32>)
   return %1 : tensor<4x9xi32>
 }
 
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 7df6def..9a6337f 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -1250,13 +1250,13 @@ func.func @extract_scalar_from_vec_1d_f32(%arg0: vector<16xf32>) -> f32 {
 
 // -----
 
-func.func @extract_poison_idx(%arg0: vector<16xf32>) -> f32 {
+func.func @extract_scalar_from_vec_1d_f32_poison_idx(%arg0: vector<16xf32>) -> f32 {
   %0 = vector.extract %arg0[-1]: f32 from vector<16xf32>
   return %0 : f32
 }
-// CHECK-LABEL: @extract_poison_idx
-//       CHECK:   %[[IDX:.*]] = llvm.mlir.constant(-1 : i64) : i64
-//       CHECK:   llvm.extractelement {{.*}}[%[[IDX]] : i64] : vector<16xf32>
+// CHECK-LABEL: @extract_scalar_from_vec_1d_f32_poison_idx
+//       CHECK:   %[[UB:.*]] = ub.poison : f32
+//       CHECK:   return %[[UB]] : f32
 
 // -----
 
@@ -1335,6 +1335,16 @@ func.func @extract_vec_2d_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> vector<3
 
 // -----
 
+func.func @extract_vec_2d_from_vec_3d_f32_poison_idx(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> {
+  %0 = vector.extract %arg0[-1]: vector<3x16xf32> from vector<4x3x16xf32>
+  return %0 : vector<3x16xf32>
+}
+// CHECK-LABEL: @extract_vec_2d_from_vec_3d_f32_poison_idx
+//       CHECK:   %[[UB:.*]] = ub.poison : vector<3x16xf32>
+//       CHECK:   return %[[UB]] : vector<3x16xf32>
+
+// -----
+
 func.func @extract_vec_2d_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<3x[16]xf32> {
   %0 = vector.extract %arg0[0]: vector<3x[16]xf32> from vector<4x3x[16]xf32>
   return %0 : vector<3x[16]xf32>
diff --git a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
index 383215c..5fd7324 100644
--- a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
+++ b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
@@ -176,7 +176,8 @@ func.func @extract(%arg0 : vector<2xf32>) -> (vector<1xf32>, f32) {
 // -----
 
 func.func @extract_poison_idx(%arg0 : vector<4xf32>) -> f32 {
-  // expected-error@+1 {{index -1 out of bounds for 'vector<4xf32>'}}
+  // CHECK: %[[ZERO:.+]] = spirv.Constant 0.000000e+00
+  // CHECK: return %[[ZERO]]
   %0 = vector.extract %arg0[-1] : f32 from vector<4xf32>
   return %0: f32
 }
@@ -208,12 +209,31 @@ func.func @extract_size1_vector_dynamic(%arg0 : vector<1xf32>, %id : index) -> f
 // CHECK-LABEL: @extract_dynamic
 //  CHECK-SAME: %[[V:.*]]: vector<4xf32>, %[[ARG1:.*]]: index
 //       CHECK:   %[[ID:.+]] = builtin.unrealized_conversion_cast %[[ARG1]] : index to i32
-//       CHECK:   spirv.VectorExtractDynamic %[[V]][%[[ID]]] : vector<4xf32>, i32
+//       CHECK:   %[[MASK:.+]] = spirv.Constant 3 :
+//       CHECK:   %[[MASKED:.+]] = spirv.BitwiseAnd %[[ID]], %[[MASK]] :
+//       CHECK:   spirv.VectorExtractDynamic %[[V]][%[[MASKED]]] : vector<4xf32>, i32
 func.func @extract_dynamic(%arg0 : vector<4xf32>, %id : index) -> f32 {
   %0 = vector.extract %arg0[%id] : f32 from vector<4xf32>
   return %0: f32
 }
 
+// -----
+
+// CHECK-LABEL: @extract_dynamic_non_pow2
+//  CHECK-SAME: %[[V:.*]]: vector<3xf32>, %[[ARG1:.*]]: index
+//       CHECK:   %[[ID:.+]] = builtin.unrealized_conversion_cast %[[ARG1]] : index to i32
+//       CHECK:   %[[POISON:.+]] = spirv.Constant -1 :
+//       CHECK:   %[[CMP:.+]] = spirv.IEqual %[[ID]], %[[POISON]]
+//       CHECK:   %[[ZERO:.+]] = spirv.Constant 0 :
+//       CHECK:   %[[SELECT:.+]] = spirv.Select %[[CMP]], %[[ZERO]], %[[ID]] :
+//       CHECK:   spirv.VectorExtractDynamic %[[V]][%[[SELECT]]] : vector<3xf32>, i32
+func.func @extract_dynamic_non_pow2(%arg0 : vector<3xf32>, %id : index) -> f32 {
+  %0 = vector.extract %arg0[%id] : f32 from vector<3xf32>
+  return %0: f32
+}
+
+// -----
+
 // CHECK-LABEL: @extract_dynamic_cst
 //  CHECK-SAME: %[[V:.*]]: vector<4xf32>
 //       CHECK:   spirv.CompositeExtract %[[V]][1 : i32] : vector<4xf32>
@@ -264,8 +284,10 @@ func.func @insert(%arg0 : vector<4xf32>, %arg1: f32) -> vector<4xf32> {
 
 // -----
 
+// CHECK-LABEL: @insert_poison_idx
+// CHECK: %[[ZERO:.+]] = spirv.Constant dense<0.000000e+00>
+// CHECK: return %[[ZERO]]
 func.func @insert_poison_idx(%arg0 : vector<4xf32>, %arg1: f32) -> vector<4xf32> {
-  // expected-error@+1 {{index -1 out of bounds for 'vector<4xf32>'}}
   %1 = vector.insert %arg1, %arg0[-1] : f32 into vector<4xf32>
   return %1: vector<4xf32>
 }
@@ -306,7 +328,9 @@ func.func @insert_size1_vector_dynamic(%arg0 : vector<1xf32>, %arg1: f32, %id :
 // CHECK-LABEL: @insert_dynamic
 //  CHECK-SAME: %[[VAL:.*]]: f32, %[[V:.*]]: vector<4xf32>, %[[ARG2:.*]]: index
 //       CHECK: %[[ID:.+]] = builtin.unrealized_conversion_cast %[[ARG2]] : index to i32
-//       CHECK:   spirv.VectorInsertDynamic %[[VAL]], %[[V]][%[[ID]]] : vector<4xf32>, i32
+//       CHECK:   %[[MASK:.+]] = spirv.Constant 3 :
+//       CHECK:   %[[MASKED:.+]] = spirv.BitwiseAnd %[[ID]], %[[MASK]] :
+//       CHECK:   spirv.VectorInsertDynamic %[[VAL]], %[[V]][%[[MASKED]]] : vector<4xf32>, i32
 func.func @insert_dynamic(%val: f32, %arg0 : vector<4xf32>, %id : index) -> vector<4xf32> {
   %0 = vector.insert %val, %arg0[%id] : f32 into vector<4xf32>
   return %0: vector<4xf32>
@@ -314,6 +338,21 @@ func.func @insert_dynamic(%val: f32, %arg0 : vector<4xf32>, %id : index) -> vect
 
 // -----
 
+// CHECK-LABEL: @insert_dynamic_non_pow2
+//  CHECK-SAME: %[[VAL:.*]]: f32, %[[V:.*]]: vector<3xf32>, %[[ARG2:.*]]: index
+//       CHECK: %[[ID:.+]] = builtin.unrealized_conversion_cast %[[ARG2]] : index to i32
+//       CHECK:   %[[POISON:.+]] = spirv.Constant -1 :
+//       CHECK:   %[[CMP:.+]] = spirv.IEqual %[[ID]], %[[POISON]]
+//       CHECK:   %[[ZERO:.+]] = spirv.Constant 0 :
+//       CHECK:   %[[SELECT:.+]] = spirv.Select %[[CMP]], %[[ZERO]], %[[ID]] :
+//       CHECK:   spirv.VectorInsertDynamic %[[VAL]], %[[V]][%[[SELECT]]] : vector<3xf32>, i32
+func.func @insert_dynamic_non_pow2(%val: f32, %arg0 : vector<3xf32>, %id : index) -> vector<3xf32> {
+  %0 = vector.insert %val, %arg0[%id] : f32 into vector<3xf32>
+  return %0: vector<3xf32>
+}
+
+// -----
+
 // CHECK-LABEL: @insert_dynamic_cst
 //  CHECK-SAME: %[[VAL:.*]]: f32, %[[V:.*]]: vector<4xf32>
 //       CHECK:   spirv.CompositeInsert %[[VAL]], %[[V]][2 : i32] : f32 into vector<4xf32>
diff --git a/mlir/test/Dialect/Affine/loop-fusion-3.mlir b/mlir/test/Dialect/Affine/loop-fusion-3.mlir
index 6bc4fea..d291f46 100644
--- a/mlir/test/Dialect/Affine/loop-fusion-3.mlir
+++ b/mlir/test/Dialect/Affine/loop-fusion-3.mlir
@@ -351,8 +351,157 @@ func.func @should_not_fuse_since_top_level_non_affine_mem_write_users(
 // CHECK:  affine.for
 // CHECK:    arith.addf
 
+// Tests that fusion isn't prevented by the presence of a dealloc op in
+// between since we can move the fused nest.
+
+// CHECK-LABEL: func @fuse_non_affine_intervening_op
+func.func @fuse_non_affine_intervening_op() {
+  %cst = arith.constant 0.0 : f32
+
+  %a = memref.alloc() : memref<100xf32>
+  %b = memref.alloc() : memref<100xf32>
+  %c = memref.alloc() : memref<100xf32>
+
+  affine.for %i = 0 to 100 {
+    affine.store %cst, %a[%i] : memref<100xf32>
+    affine.store %cst, %c[%i] : memref<100xf32>
+  }
+
+  // The source is fused into the destination while being moved here.
+  // CHECK:      affine.for %{{.*}} = 0 to 100
+  // CHECK-NEXT:   affine.store %cst
+  // CHECK-NEXT:   affine.store %cst{{.*}}
+  // CHECK-NEXT:   affine.load
+  // CHECK-NEXT:   affine.store
+  // CHECK-NEXT: }
+  // CHECK-NEXT: memref.dealloc
+
+  memref.dealloc %c : memref<100xf32>
+
+  affine.for %i = 0 to 100 {
+    %v = affine.load %a[%i] : memref<100xf32>
+    affine.store %v, %b[%i] : memref<100xf32>
+  }
+
+  return
+}
+
+// Tests that fusion happens in the presence of intervening non-affine reads.
+
+// CHECK-LABEL: func @fuse_non_affine_intervening_read
+func.func @fuse_non_affine_intervening_read() {
+  %cst = arith.constant 0.0 : f32
+
+  %a = memref.alloc() : memref<100xf32>
+  %b = memref.alloc() : memref<100xf32>
+  %c = memref.alloc() : memref<100xf32>
+
+  affine.for %i = 0 to 100 {
+    affine.store %cst, %a[%i] : memref<100xf32>
+  }
+
+  // The source is fused into the destination while being moved here.
+  // CHECK:      affine.for %{{.*}} = 0 to 100
+  // CHECK-NEXT:   affine.store %cst
+  // CHECK-NEXT:   affine.load
+  // CHECK-NEXT:   affine.store
+  // CHECK-NEXT: }
+
+  // CHECK:     affine.for %{{.*}} = 0 to 100
+  // CHECK-NEXT:  memref.load
+  affine.for %i = 0 to 100 {
+    memref.load %a[%i] : memref<100xf32>
+  }
+
+  affine.for %i = 0 to 100 {
+    %v = affine.load %a[%i] : memref<100xf32>
+    affine.store %v, %b[%i] : memref<100xf32>
+  }
+
+  return
+}
+
+// Tests that fusion happens in the presence of intervening non-affine region
+// ops.
+
+// CHECK-LABEL: func @fuse_non_affine_intervening_read_nest
+func.func @fuse_non_affine_intervening_read_nest() {
+  %cst = arith.constant 0.0 : f32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c100 = arith.constant 100 : index
+
+  %a = memref.alloc() : memref<100xf32>
+  %b = memref.alloc() : memref<100xf32>
+  %c = memref.alloc() : memref<100xf32>
+
+  affine.for %i = 0 to 100 {
+    affine.store %cst, %a[%i] : memref<100xf32>
+  }
+
+  // The source is fused into the destination while being moved here.
+  // CHECK:      affine.for %{{.*}} = 0 to 100
+  // CHECK-NEXT:   affine.store %cst
+  // CHECK-NEXT:   affine.load
+  // CHECK-NEXT:   affine.store
+  // CHECK-NEXT: }
+
+  // CHECK: scf.for
+  // CHECK-NEXT:  memref.load
+  scf.for %i = %c0 to %c100 step %c1 {
+    memref.load %a[%i] : memref<100xf32>
+  }
+
+  affine.for %i = 0 to 100 {
+    %v = affine.load %a[%i] : memref<100xf32>
+    affine.store %v, %b[%i] : memref<100xf32>
+  }
+
+  return
+}
+
+// Tests that fusion does not happen when there are non-affine sources
+// intervening.
+
+// CHECK-LABEL: func @no_fusion_scf_for_store
+func.func @no_fusion_scf_for_store() {
+  %cst = arith.constant 0.0 : f32
+  %cst1 = arith.constant 1.0 : f32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c100 = arith.constant 100 : index
+
+  %a = memref.alloc() : memref<100xf32>
+  %b = memref.alloc() : memref<100xf32>
+  %c = memref.alloc() : memref<100xf32>
+
+  // CHECK:      affine.for %{{.*}} = 0 to 100
+  // CHECK-NEXT:   affine.store
+  // CHECK-NEXT: }
+  affine.for %i = 0 to 100 {
+    affine.store %cst, %a[%i] : memref<100xf32>
+  }
+
+  // CHECK: scf.for
+  scf.for %i = %c0 to %c100 step %c1 {
+    memref.store %cst1, %a[%i] : memref<100xf32>
+  }
+
+  // CHECK:      affine.for %{{.*}} = 0 to 100
+  // CHECK-NEXT:   affine.load
+  // CHECK-NEXT:   affine.store
+  affine.for %i = 0 to 100 {
+    // Non-affine source for this load.
+    %v = affine.load %a[%i] : memref<100xf32>
+    affine.store %v, %b[%i] : memref<100xf32>
+  }
+
+  return
+}
+
 // -----
 
+// CHECK-LABEL: func @fuse_minor_affine_map
 // MAXIMAL-LABEL: func @fuse_minor_affine_map
 func.func @fuse_minor_affine_map(%in: memref<128xf32>, %out: memref<20x512xf32>) {
   %tmp = memref.alloc() : memref<128xf32>
@@ -418,7 +567,7 @@ func.func @should_fuse_multi_store_producer_and_privatize_memfefs() {
   return
 }
 
-
+// CHECK-LABEL: func @should_fuse_multi_store_producer_with_escaping_memrefs_and_remove_src
 func.func @should_fuse_multi_store_producer_with_escaping_memrefs_and_remove_src(
     %a : memref<10xf32>, %b : memref<10xf32>) {
   %cst = arith.constant 0.000000e+00 : f32
@@ -467,7 +616,7 @@ func.func @should_fuse_multi_store_producer_with_escaping_memrefs_and_preserve_s
     %0 = affine.load %b[%i2] : memref<10xf32>
   }
 
-	// Loops '%i0' and '%i2' should be fused first and '%i0' should be removed
+  // Loops '%i0' and '%i2' should be fused first and '%i0' should be removed
   // since fusion is maximal. Then the fused loop and '%i1' should be fused
   // and the fused loop shouldn't be removed since fusion is not maximal.
   // CHECK:       affine.for %{{.*}} = 0 to 10 {
@@ -517,6 +666,31 @@ func.func @should_not_fuse_due_to_dealloc(%arg0: memref<16xf32>){
 // CHECK-NEXT:      arith.addf
 // CHECK-NEXT:      affine.store
 
+// CHECK-LABEL: func @cannot_fuse_intervening_deallocs
+func.func @cannot_fuse_intervening_deallocs(%arg0: memref<16xf32>){
+  %A = memref.alloc() : memref<16xf32>
+  %C = memref.alloc() : memref<16xf32>
+  %cst_1 = arith.constant 1.000000e+00 : f32
+  // CHECK: affine.for %{{.*}} = 0 to 16
+  affine.for %arg1 = 0 to 16 {
+    %a = affine.load %arg0[%arg1] : memref<16xf32>
+    affine.store %a, %A[%arg1] : memref<16xf32>
+    affine.store %a, %C[%arg1] : memref<16xf32>
+  }
+  // The presence of B's alloc prevents placement of the fused nest above C's
+  // dealloc. No fusion here.
+  memref.dealloc %C : memref<16xf32>
+  %B = memref.alloc() : memref<16xf32>
+  // CHECK: affine.for %{{.*}} = 0 to 16
+  affine.for %arg1 = 0 to 16 {
+    %a = affine.load %A[%arg1] : memref<16xf32>
+    %b = arith.addf %cst_1, %a : f32
+    affine.store %b, %B[%arg1] : memref<16xf32>
+  }
+  memref.dealloc %A : memref<16xf32>
+  return
+}
+
 // -----
 
 // CHECK-LABEL: func @should_fuse_defining_node_has_no_dependence_from_source_node
diff --git a/mlir/test/Dialect/Affine/loop-fusion-inner.mlir b/mlir/test/Dialect/Affine/loop-fusion-inner.mlir
index 61af9a4..e76441f 100644
--- a/mlir/test/Dialect/Affine/loop-fusion-inner.mlir
+++ b/mlir/test/Dialect/Affine/loop-fusion-inner.mlir
@@ -90,13 +90,13 @@ func.func @fusion_inner_multiple_nests() {
   // CHECK:      affine.for %{{.*}} = 0 to 4 {
   // Everything inside fused into two nests (the second will be DCE'd).
   // CHECK-NEXT:   memref.alloc() : memref<4xi8>
-  // CHECK-NEXT:   memref.alloc() : memref<1xi8>
-  // CHECK-NEXT:   memref.alloc() : memref<1xi8>
   // CHECK-NEXT:   memref.alloc() : memref<8x4xi8>
   // CHECK-NEXT:   memref.alloc() : memref<4xi8>
-  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 2 {
+  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 4 {
   // CHECK:        }
-  // CHECK:        affine.for %{{.*}} = 0 to 4 {
+  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 2 {
+  // CHECK:          arith.muli
+  // CHECK-NEXT:     arith.extsi
   // CHECK:        }
   // CHECK-NEXT:   memref.dealloc
   // CHECK-NEXT: }
diff --git a/mlir/test/Dialect/Affine/loop-fusion.mlir b/mlir/test/Dialect/Affine/loop-fusion.mlir
index 045b1be..1c119e8 100644
--- a/mlir/test/Dialect/Affine/loop-fusion.mlir
+++ b/mlir/test/Dialect/Affine/loop-fusion.mlir
@@ -448,8 +448,8 @@ func.func @should_fuse_no_top_level_access() {
 
 #set0 = affine_set<(d0) : (1 == 0)>
 
-// CHECK-LABEL: func @should_not_fuse_if_op_at_top_level() {
-func.func @should_not_fuse_if_op_at_top_level() {
+// CHECK-LABEL: func @should_fuse_despite_affine_if() {
+func.func @should_fuse_despite_affine_if() {
   %m = memref.alloc() : memref<10xf32>
   %cf7 = arith.constant 7.0 : f32
 
@@ -462,12 +462,10 @@ func.func @should_not_fuse_if_op_at_top_level() {
   %c0 = arith.constant 4 : index
   affine.if #set0(%c0) {
   }
-  // Top-level IfOp should prevent fusion.
+  // An unrelated affine.if op doesn't prevent fusion.
   // CHECK:      affine.for %{{.*}} = 0 to 10 {
-  // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
-  // CHECK-NEXT: }
-  // CHECK:      affine.for %{{.*}} = 0 to 10 {
-  // CHECK-NEXT:   affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
+  // CHECK-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
+  // CHECK-NEXT:   affine.load %{{.*}}[0] : memref<1xf32>
   // CHECK-NEXT: }
   return
 }
diff --git a/mlir/test/Dialect/Arith/emulate-wide-int-unsupported.mlir b/mlir/test/Dialect/Arith/emulate-wide-int-unsupported.mlir
index 091dd15..44f09b9 100644
--- a/mlir/test/Dialect/Arith/emulate-wide-int-unsupported.mlir
+++ b/mlir/test/Dialect/Arith/emulate-wide-int-unsupported.mlir
@@ -41,3 +41,13 @@ func.func @unsupported_argument_type(%arg0: vector<4xi128>) -> vector<4xi64> {
   return %0 : vector<4xi64>
 }
 
+// -----
+
+// Ensure this case not crash
+func.func @unsupported_vector(%arg0: vector<2xi1>) {
+  // expected-error@+1 {{failed to legalize unresolved materialization from ('vector<2x2xi32>') to ('vector<2xi64>') that remained live after conversion}}
+  %cst_0 = arith.constant dense<0> : vector<2xi64>
+  // expected-note@+1 {{see existing live user here}}
+  %0 = vector.mask %arg0 { vector.multi_reduction <xor>, %cst_0, %cst_0 [] : vector<2xi64> to vector<2xi64> } : vector<2xi1> -> vector<2xi64>
+  return
+}
diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir
index 71a7e28..e0e1de6 100644
--- a/mlir/test/Dialect/Tosa/canonicalize.mlir
+++ b/mlir/test/Dialect/Tosa/canonicalize.mlir
@@ -317,7 +317,7 @@ func.func @pad_determine_val_f32(%arg0: tensor<?x?xf32>, %arg1 : tensor<2x2xi32>
 
 // CHECK-LABEL: @pad_determine_val_quant
 func.func @pad_determine_val_quant(%arg0: tensor<?x?xi32>, %arg1 : tensor<2x2xi32>) -> tensor<?x?xi32> {
-  // CHECK-DAG: %[[ZERO:.+]] = "tosa.const"() <{value = dense<0> : tensor<i32>}
+  // CHECK-DAG: %[[ZERO:.+]] = "tosa.const"() <{value = dense<42> : tensor<i32>}
   // CHECK-DAG: %[[PADDING:.+]] = tosa.const_shape {value = dense<[1, 0, 0, 1]> : tensor<4xindex>} : () -> !tosa.shape<4>
   // CHECK: tosa.pad %arg0, %[[PADDING]], %[[ZERO]]
   %0 = tosa.const_shape { value = dense<[1, 0, 0, 1]> : tensor<4xindex>} : () -> !tosa.shape<4>
diff --git a/mlir/test/Dialect/Tosa/tosa-decompose-conv2d.mlir b/mlir/test/Dialect/Tosa/tosa-decompose-conv2d.mlir
index 685f799..e4a2897 100644
--- a/mlir/test/Dialect/Tosa/tosa-decompose-conv2d.mlir
+++ b/mlir/test/Dialect/Tosa/tosa-decompose-conv2d.mlir
@@ -28,7 +28,7 @@ func.func @conv2d_as_fully_connected_quant(%arg0: tensor<4x10x10x2xi8>, %arg1: t
   // CHECK: %[[VAR1:.*]] = tosa.reshape %arg1 {new_shape = array<i64: 3, 2>}
   // CHECK-SAME: -> tensor<3x2xi8>
   // CHECK: %[[VAR2:.*]] = tosa.fully_connected %[[VAR0]], %[[VAR1]], %arg2
-  // CHECK-SAME: quantization_info = #tosa.conv_quant<input_zp = 42, weight_zp = 24>
+  // CHECK-SAME: {input_zp = 42 : i32, weight_zp = 24 : i32}
   // CHECK-SAME: -> tensor<400x3xi32>
   // CHECK: %[[VAR3:.*]] = tosa.reshape %[[VAR2]] {new_shape = array<i64: 4, 10, 10, 3>}
   // CHECK-SAME: -> tensor<4x10x10x3xi32>
@@ -48,7 +48,7 @@ func.func @conv2d_as_fully_connected_quant(%arg0: tensor<4x10x10x2xi8>, %arg1: t
 func.func @conv_with_dynamic_dim(%arg0: tensor<?x14x14x64xi8>, %arg1: tensor<384x1x1x64xi8>, %arg2: tensor<384xi32>) -> tensor<?x14x14x384xi32> {
 // CHECK:           %[[VAL_3:.*]] = tosa.reshape %[[VAL_0]] {new_shape = array<i64: -1, 64>} : (tensor<?x14x14x64xi8>) -> tensor<?x64xi8>
 // CHECK:           %[[VAL_4:.*]] = tosa.reshape %[[VAL_1]] {new_shape = array<i64: 384, 64>} : (tensor<384x1x1x64xi8>) -> tensor<384x64xi8>
-// CHECK:           %[[VAL_5:.*]] = tosa.fully_connected %[[VAL_3]], %[[VAL_4]], %[[VAL_2]] {quantization_info = #tosa.conv_quant<input_zp = -6, weight_zp = 11>} : (tensor<?x64xi8>, tensor<384x64xi8>, tensor<384xi32>) -> tensor<?x384xi32>
+// CHECK:           %[[VAL_5:.*]] = tosa.fully_connected %[[VAL_3]], %[[VAL_4]], %[[VAL_2]] {input_zp = -6 : i32, weight_zp = 11 : i32} : (tensor<?x64xi8>, tensor<384x64xi8>, tensor<384xi32>) -> tensor<?x384xi32>
 // CHECK:           %[[VAL_6:.*]] = tosa.reshape %[[VAL_5]] {new_shape = array<i64: -1, 14, 14, 384>} : (tensor<?x384xi32>) -> tensor<?x14x14x384xi32>
 // CHECK:           return %[[VAL_6]] : tensor<?x14x14x384xi32>
 // CHECK:         }
@@ -67,7 +67,7 @@ func.func @conv2d_as_fully_connected_padded(%arg0: tensor<4x10x10x2xi8>, %arg1:
   // CHECK-DAG: %[[PAD:.+]] = tosa.pad %arg0, %[[PAD_SHAPE]], %[[PAD_VAL]] : (tensor<4x10x10x2xi8>, !tosa.shape<8>, tensor<i8>) -> tensor<4x12x12x2xi8>
   // CHECK-DAG: %[[RESHAPE_INPUT:.+]] = tosa.reshape %[[PAD]] {new_shape = array<i64: 576, 2>}
   // CHECK-DAG: %[[RESHAPE_FILTER:.+]] = tosa.reshape %arg1 {new_shape = array<i64: 3, 2>}
-  // CHECK-DAG: %[[FULLY:.+]] = tosa.fully_connected %[[RESHAPE_INPUT]], %[[RESHAPE_FILTER]], %arg2 {quantization_info = #tosa.conv_quant<input_zp = 42, weight_zp = 24>}
+  // CHECK-DAG: %[[FULLY:.+]] = tosa.fully_connected %[[RESHAPE_INPUT]], %[[RESHAPE_FILTER]], %arg2 {input_zp = 42 : i32, weight_zp = 24 : i32}
   // CHECK: %[[RESHAPE:.+]] = tosa.reshape %[[FULLY]] {new_shape = array<i64: 4, 12, 12, 3>}
   %input_zp = "tosa.const"() {value = dense<42> : tensor<1xi8>} : () -> tensor<1xi8>
   %weight_zp = "tosa.const"() {value = dense<24> : tensor<1xi8>} : () -> tensor<1xi8>
diff --git a/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir b/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir
index bb6de82..82838cc 100644
--- a/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir
+++ b/mlir/test/Dialect/Tosa/tosa-decompose-transpose-conv.mlir
@@ -91,7 +91,7 @@ func.func @transpose_conv2d_strided_quantized(%arg0: tensor<2x17x15x3xi8>, %arg1
   // Manipulate the weight matrix to handle striding.
   // CHECK-DAG: %[[PADV:.+]]  = tosa.const_shape {value = dense<[0, 0, 0, 1, 0, 1, 0, 0]> : tensor<8xindex>} : () -> !tosa.shape<8>
   // CHECK-DAG: %[[TRANSV:.+]]  = "tosa.const"() <{value = dense<[2, 4, 0, 1, 3, 5]> : tensor<6xi32>}
-  // CHECK-DAG: %[[PADW:.+]]  = tosa.pad %arg1, %[[PADV]] {quantization_info = #tosa.pad_quant<input_zp = 42>}
+  // CHECK-DAG: %[[PADW:.+]]  = tosa.pad %arg1, %[[PADV]] {input_zp = 42 : i32}
   // CHECK-DAG: %[[RESW1:.+]]  = tosa.reshape %[[PADW]] {new_shape = array<i64: 5, 2, 2, 2, 3, 3>}
   // CHECK-DAG: %[[TRANS:.+]]  = tosa.transpose %[[RESW1]], %[[TRANSV]]
   // CHECK-DAG: %[[RESW2:.+]]  = tosa.reshape %[[TRANS]] {new_shape = array<i64: 30, 2, 2, 3>}
@@ -101,7 +101,7 @@ func.func @transpose_conv2d_strided_quantized(%arg0: tensor<2x17x15x3xi8>, %arg1
   // Pad out the input matrix to handle the transpose conv.
   // CHECK-DAG: %[[PAD:.+]]  = tosa.const_shape {value = dense<[0, 0, 1, 1, 1, 1, 0, 0]> : tensor<8xindex>} : () -> !tosa.shape<8>
   // CHECK-DAG: %[[TRANS2:.+]]  = "tosa.const"() <{value = dense<[0, 1, 3, 2, 4, 5]> : tensor<6xi32>}
-  // CHECK-DAG: %[[NEWINPUT:.+]] = tosa.pad %arg0, %[[PAD]] {quantization_info = #tosa.pad_quant<input_zp = -22>}
+  // CHECK-DAG: %[[NEWINPUT:.+]] = tosa.pad %arg0, %[[PAD]] {input_zp = -22 : i32}
 
   // Manipulate the final shape.
   // CHECK-DAG: %[[BIAS:.+]]  = "tosa.const"() <{value = dense<0> : tensor<30xi32>}
@@ -132,14 +132,14 @@ func.func @transpose_conv2d_strided_overpad(%arg0 : tensor<1x16x1x1xi8>, %arg1 :
   // CHECK-DAG: %[[ZERO:.+]] = "tosa.const"() <{value = dense<0> : tensor<2xi32>}
   // CHECK-DAG: %[[RESULT_PERMS:.+]] = "tosa.const"() <{value = dense<[0, 1, 3, 2, 4, 5]> : tensor<6xi32>}
   // CHECK-DAG: %[[RESULT_PAD:.+]] = tosa.const_shape  {value = dense<[0, 0, 2, 0, 0, 0, 0, 0]> : tensor<8xindex>} : () -> !tosa.shape<8>
-  // CHECK: %[[PAD_WEIGHT:.+]] = tosa.pad %arg1, %[[WEIGHT_PAD]] {quantization_info = #tosa.pad_quant<input_zp = 93>}
+  // CHECK: %[[PAD_WEIGHT:.+]] = tosa.pad %arg1, %[[WEIGHT_PAD]] {input_zp = 93 : i32}
   // CHECK: %[[RESHAPE_WEIGHT_0:.+]] = tosa.reshape %[[PAD_WEIGHT]] {new_shape = array<i64: 1, 2, 1, 1, 2, 1>}
   // CHECK: %[[TRANSPOSE_WEIGHT:.+]] = tosa.transpose %[[RESHAPE_WEIGHT_0]], %[[WEIGHT_PERMS]]
   // CHECK: %[[RESHAPE_WEIGHT_1:.+]] = tosa.reshape %[[TRANSPOSE_WEIGHT]] {new_shape = array<i64: 2, 2, 1, 1>}
   // CHECK: %[[REVERSE:.+]] = tosa.reverse %[[RESHAPE_WEIGHT_1]] {axis = 1 : i32}
-  // CHECK: %[[PAD_INPUT:.+]] = tosa.pad %arg0, %[[INPUT_PAD]] {quantization_info = #tosa.pad_quant<input_zp = -103>}
+  // CHECK: %[[PAD_INPUT:.+]] = tosa.pad %arg0, %[[INPUT_PAD]] {input_zp = -103 : i32}
   // CHECK: %[[CONV:.+]] = tosa.conv2d %[[PAD_INPUT]], %[[REVERSE]], %[[ZERO]]
-  // CHECK-SAME{literal}: dilation = [1, 1], pad = [0, 0, 0, 0], quantization_info = #tosa.conv_quant<input_zp = -103, weight_zp = 93>, stride = [1, 1]}
+  // CHECK-SAME{literal}: dilation = [1, 1], pad = [0, 0, 0, 0], input_zp = -103 : i32, weight_zp = 93 : i32, stride = [1, 1]}
   // CHECK: %[[RESHAPE_RESULT_0:.+]] = tosa.reshape %[[CONV]] {new_shape = array<i64: 1, 17, 1, 1, 2, 1>}
   // CHECK: %[[TRANSPOSE_RESULT:.+]] = tosa.transpose %[[RESHAPE_RESULT_0]], %[[RESULT_PERMS]]
   // CHECK: %[[RESHAPE_RESULT_1:.+]] = tosa.reshape %[[TRANSPOSE_RESULT]] {new_shape = array<i64: 1, 17, 2, 1>}
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index 6858f0d..61e858f 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -2012,17 +2012,56 @@ func.func @shuffle_1d() -> vector<4xi32> {
 // input vector. That is, %v[0] (i.e., 5) in this test.
 
 // CHECK-LABEL: func @shuffle_1d_poison_idx
-//       CHECK:   %[[V:.+]] = arith.constant dense<[2, 5, 0, 5]> : vector<4xi32>
+//       CHECK:   %[[V:.+]] = arith.constant dense<[13, 10, 15, 10]> : vector<4xi32>
 //       CHECK:   return %[[V]]
 func.func @shuffle_1d_poison_idx() -> vector<4xi32> {
-  %v0 = arith.constant dense<[5, 4, 3]> : vector<3xi32>
-  %v1 = arith.constant dense<[2, 1, 0]> : vector<3xi32>
+  %v0 = arith.constant dense<[10, 11, 12]> : vector<3xi32>
+  %v1 = arith.constant dense<[13, 14, 15]> : vector<3xi32>
   %shuffle = vector.shuffle %v0, %v1 [3, -1, 5, -1] : vector<3xi32>, vector<3xi32>
   return %shuffle : vector<4xi32>
 }
 
 // -----
 
+// CHECK-LABEL: func @shuffle_1d_rhs_lhs_poison
+//   CHECK-NOT:   vector.shuffle
+//       CHECK:   %[[V:.+]] = ub.poison : vector<4xi32>
+//       CHECK:   return %[[V]]
+func.func @shuffle_1d_rhs_lhs_poison() -> vector<4xi32> {
+  %v0 = ub.poison : vector<3xi32>
+  %v1 = ub.poison : vector<3xi32>
+  %shuffle = vector.shuffle %v0, %v1 [3, 1, 5, 4] : vector<3xi32>, vector<3xi32>
+  return %shuffle : vector<4xi32>
+}
+
+// -----
+
+// CHECK-LABEL: func @shuffle_1d_lhs_poison
+//   CHECK-NOT:   vector.shuffle
+//       CHECK:   %[[V:.+]] = arith.constant dense<[11, 12, 11, 11]> : vector<4xi32>
+//       CHECK:   return %[[V]]
+func.func @shuffle_1d_lhs_poison() -> vector<4xi32> {
+  %v0 = arith.constant dense<[11, 12, 13]> : vector<3xi32>
+  %v1 = ub.poison : vector<3xi32>
+  %shuffle = vector.shuffle %v0, %v1 [3, 1, 5, 4] : vector<3xi32>, vector<3xi32>
+  return %shuffle : vector<4xi32>
+}
+
+// -----
+
+// CHECK-LABEL: func @shuffle_1d_rhs_poison
+//   CHECK-NOT:   vector.shuffle
+//       CHECK:   %[[V:.+]] = arith.constant dense<[11, 11, 13, 12]> : vector<4xi32>
+//       CHECK:   return %[[V]]
+func.func @shuffle_1d_rhs_poison() -> vector<4xi32> {
+  %v0 = ub.poison : vector<3xi32>
+  %v1 = arith.constant dense<[11, 12, 13]> : vector<3xi32>
+  %shuffle = vector.shuffle %v0, %v1 [3, 1, 5, 4] : vector<3xi32>, vector<3xi32>
+  return %shuffle : vector<4xi32>
+}
+
+// -----
+
 // CHECK-LABEL: func @shuffle_canonicalize_0d
 func.func @shuffle_canonicalize_0d(%v0 : vector<i32>, %v1 : vector<i32>) -> vector<1xi32> {
   // CHECK: vector.broadcast %{{.*}} : vector<i32> to vector<1xi32>
diff --git a/mlir/test/IR/dense-resource-elements-attr.mlir b/mlir/test/IR/dense-resource-elements-attr.mlir
index adba979..44cefc3 100644
--- a/mlir/test/IR/dense-resource-elements-attr.mlir
+++ b/mlir/test/IR/dense-resource-elements-attr.mlir
@@ -11,3 +11,18 @@
     }
   }
 #-}
+
+// -----
+
+// DenseResourceElementsHandle key blob\-"one" is quoted and escaped.
+// CHECK: attr = dense_resource<"blob\\-\22one\22"> : tensor<2xi16>
+"test.user_op"() {attr = dense_resource<"blob\\-\22one\22"> : tensor<2xi16>} : () -> ()
+
+{-#
+  dialect_resources: {
+    builtin: {
+      // CHECK: "blob\\-\22one\22": "0x0200000001000200"
+      "blob\\-\22one\22": "0x0200000001000200"
+    }
+  }
+#-}
diff --git a/mlir/test/IR/pretty-resources-print.mlir b/mlir/test/IR/pretty-resources-print.mlir
index 625967f..297c83b 100644
--- a/mlir/test/IR/pretty-resources-print.mlir
+++ b/mlir/test/IR/pretty-resources-print.mlir
@@ -12,7 +12,7 @@
 // CHECK:      {-#
 // CHECK-NEXT:   external_resources: {
 // CHECK-NEXT:     external: {
-// CHECK-NEXT:       bool: true,
+// CHECK-NEXT:       "backslash\\tab\09": true,
 // CHECK-NEXT:       string: "\22string\22"
 // CHECK-NEXT:     },
 // CHECK-NEXT:     other_stuff: {
@@ -31,8 +31,8 @@
   external_resources: {
     external: {
       blob: "0x08000000010000000000000002000000000000000300000000000000",
-      bool: true,
-      string: "\"string\"" // with escape characters
+      "backslash\\tab\09": true, // quoted key with escape characters
+      string: "\"string\"" // string with escape characters
     },
     other_stuff: {
       bool: true
diff --git a/mlir/test/Target/Cpp/expressions.mlir b/mlir/test/Target/Cpp/expressions.mlir
index 6b67065..3a1694e 100644
--- a/mlir/test/Target/Cpp/expressions.mlir
+++ b/mlir/test/Target/Cpp/expressions.mlir
@@ -70,11 +70,11 @@ func.func @do_not_inline(%arg0: i32, %arg1: i32, %arg2 : i32) -> i32 {
 }
 
 // CPP-DEFAULT:      float parentheses_for_low_precedence(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) {
-// CPP-DEFAULT-NEXT:   return (float) ([[VAL_1]] + [[VAL_2]] * [[VAL_3]]);
+// CPP-DEFAULT-NEXT:   return (float) (([[VAL_1]] + [[VAL_2]]) * [[VAL_3]]);
 // CPP-DEFAULT-NEXT: }
 
 // CPP-DECLTOP:      float parentheses_for_low_precedence(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) {
-// CPP-DECLTOP-NEXT:   return (float) ([[VAL_1]] + [[VAL_2]] * [[VAL_3]]);
+// CPP-DECLTOP-NEXT:   return (float) (([[VAL_1]] + [[VAL_2]]) * [[VAL_3]]);
 // CPP-DECLTOP-NEXT: }
 
 func.func @parentheses_for_low_precedence(%arg0: i32, %arg1: i32, %arg2: i32) -> f32 {
diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
index bd33532..249a055 100644
--- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll
+++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll
@@ -101,12 +101,23 @@ define void @floor_test(float %0, <8 x float> %1) {
   %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> %1)
   ret void
 }
-; CHECK-LABEL:  llvm.func @cos_test
-define void @cos_test(float %0, <8 x float> %1) {
+; CHECK-LABEL:  llvm.func @trig_test
+define void @trig_test(float %0, <8 x float> %1) {
+  ; CHECK: llvm.intr.sin(%{{.*}}) : (f32) -> f32
+  %3 = call float @llvm.sin.f32(float %0)
+  ; CHECK: llvm.intr.sin(%{{.*}}) : (vector<8xf32>) -> vector<8xf32>
+  %4 = call <8 x float> @llvm.sin.v8f32(<8 x float> %1)
+
   ; CHECK: llvm.intr.cos(%{{.*}}) : (f32) -> f32
-  %3 = call float @llvm.cos.f32(float %0)
+  %5 = call float @llvm.cos.f32(float %0)
   ; CHECK: llvm.intr.cos(%{{.*}}) : (vector<8xf32>) -> vector<8xf32>
-  %4 = call <8 x float> @llvm.cos.v8f32(<8 x float> %1)
+  %6 = call <8 x float> @llvm.cos.v8f32(<8 x float> %1)
+
+  ; CHECK: llvm.intr.tan(%{{.*}}) : (f32) -> f32
+  %7 = call float @llvm.tan.f32(float %0)
+  ; CHECK: llvm.intr.tan(%{{.*}}) : (vector<8xf32>) -> vector<8xf32>
+  %8 = call <8 x float> @llvm.tan.v8f32(<8 x float> %1)
+
   ret void
 }
 ; CHECK-LABEL:  llvm.func @hyperbolic_trig_test
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
index 382b2b9..2c20878 100644
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -103,12 +103,22 @@ llvm.func @floor_test(%arg0: f32, %arg1: vector<8xf32>) {
   llvm.return
 }
 
-// CHECK-LABEL: @cos_test
-llvm.func @cos_test(%arg0: f32, %arg1: vector<8xf32>) {
+// CHECK-LABEL: @trig_test
+llvm.func @trig_test(%arg0: f32, %arg1: vector<8xf32>) {
+  // CHECK: call float @llvm.sin.f32
+  llvm.intr.sin(%arg0) : (f32) -> f32
+  // CHECK: call <8 x float> @llvm.sin.v8f32
+  llvm.intr.sin(%arg1) : (vector<8xf32>) -> vector<8xf32>
+
   // CHECK: call float @llvm.cos.f32
-  "llvm.intr.cos"(%arg0) : (f32) -> f32
+  llvm.intr.cos(%arg0) : (f32) -> f32
   // CHECK: call <8 x float> @llvm.cos.v8f32
-  "llvm.intr.cos"(%arg1) : (vector<8xf32>) -> vector<8xf32>
+  llvm.intr.cos(%arg1) : (vector<8xf32>) -> vector<8xf32>
+
+  // CHECK: call float @llvm.tan.f32
+  llvm.intr.tan(%arg0) : (f32) -> f32
+  // CHECK: call <8 x float> @llvm.tan.v8f32
+  llvm.intr.tan(%arg1) : (vector<8xf32>) -> vector<8xf32>
   llvm.return
 }
 
diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir
new file mode 100644
index 0000000..781efa2
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-alloc.mlir
@@ -0,0 +1,42 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics %s
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-LLVM
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_alloc
+llvm.func @llvm_nvvm_tcgen05_alloc(%addr : !llvm.ptr, %ncols : i32) {
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.alloc.cg1(ptr %{{.*}}, i32 %{{.*}})
+  nvvm.tcgen05.alloc %addr, %ncols : !llvm.ptr, i32
+
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %{{.*}}, i32 %{{.*}})
+  nvvm.tcgen05.alloc %addr, %ncols {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr, i32
+  llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_alloc_shared
+llvm.func @llvm_nvvm_tcgen05_alloc_shared(%addr : !llvm.ptr<3>, %ncols : i32) {
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
+  nvvm.tcgen05.alloc %addr, %ncols : !llvm.ptr<3>, i32
+
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %{{.*}}, i32 %{{.*}})
+  nvvm.tcgen05.alloc %addr, %ncols {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr<3>, i32
+  llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_dealloc
+llvm.func @llvm_nvvm_tcgen05_dealloc(%addr : !llvm.ptr<6>, %ncols : i32) {
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %{{.*}}, i32 %{{.*}})
+  nvvm.tcgen05.dealloc %addr, %ncols : !llvm.ptr<6>, i32
+
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %{{.*}}, i32 %{{.*}})
+  nvvm.tcgen05.dealloc %addr, %ncols {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr<6>, i32
+  llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_relinquish_alloc_permit
+llvm.func @llvm_nvvm_tcgen05_relinquish_alloc_permit() {
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1()
+  nvvm.tcgen05.relinquish_alloc_permit
+
+  // CHECK-LLVM: call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2()
+  nvvm.tcgen05.relinquish_alloc_permit {group = #nvvm.tcgen05_group<cta_2>}
+  llvm.return
+}
diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt
index eeb9cf1..1abcfc7 100644
--- a/mlir/test/lib/IR/CMakeLists.txt
+++ b/mlir/test/lib/IR/CMakeLists.txt
@@ -28,7 +28,7 @@ add_mlir_library(MLIRTestIR
 
   EXCLUDE_FROM_LIBMLIR
 
-  DEPENDS
+  LINK_LIBS PUBLIC
   MLIRTestDialect
   )
 
@@ -37,7 +37,6 @@ mlir_target_link_libraries(MLIRTestIR PUBLIC
   MLIRBytecodeReader
   MLIRBytecodeWriter
   MLIRFunctionInterfaces
-  MLIRTestDialect
   )
 
 target_include_directories(MLIRTestIR
diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index b91265d..1b9b9bf 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -34,12 +34,14 @@ add_mlir_library(MLIRTestTransforms
 
   DEPENDS
   ${MLIRTestTransformsPDLDep}
+
+  LINK_LIBS PUBLIC
+  MLIRTestDialect
   )
 mlir_target_link_libraries(MLIRTestTransforms PUBLIC
   MLIRAnalysis
   MLIRFuncDialect
   MLIRInferIntRangeInterface
-  MLIRTestDialect
   MLIRTransforms
   )
 
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 099634e..8f2a1fd 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -95,11 +95,10 @@ set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
 list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
 
 # Set flags for LLVM Bitcode compilation.
-set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
-              ${clang_opt_flags} --offload-device-only
-             -nocudalib -nogpulib -nogpuinc -nostdlibinc
-             -fopenmp -fopenmp-cuda-mode
-             -Wno-unknown-cuda-version -Wno-openmp-target
+set(bc_flags -c -flto -std=c++17 -fvisibility=hidden
+             ${clang_opt_flags} -nogpulib -nostdlibinc
+             -fno-rtti -fno-exceptions -fconvergent-functions
+             -Wno-unknown-cuda-version
              -DOMPTARGET_DEVICE_RUNTIME
              -I${include_directory}
              -I${devicertl_base_directory}/../include
@@ -123,8 +122,7 @@ function(compileDeviceRTLLibrary target_name target_triple)
     add_custom_command(OUTPUT ${outfile}
       COMMAND ${CLANG_TOOL}
       ${bc_flags}
-      -fopenmp-targets=${target_triple}
-      -Xopenmp-target=${target_triple} -march=
+      --target=${target_triple}
       ${target_bc_flags}
       -MD -MF ${depfile}
       ${infile} -o ${outfile}
@@ -242,10 +240,8 @@ function(compileDeviceRTLLibrary target_name target_triple)
     set(ide_target_name omptarget-ide-${target_name})
     add_library(${ide_target_name} STATIC EXCLUDE_FROM_ALL ${src_files})
     target_compile_options(${ide_target_name} PRIVATE
-      -fopenmp-targets=${target_triple} -Xopenmp-target=${target_triple} -march=
-      -fopenmp -fopenmp-cuda-mode -mllvm -openmp-opt-disable
-      -foffload-lto -fvisibility=hidden --offload-device-only
-      -nocudalib -nogpulib -nogpuinc -nostdlibinc -Wno-unknown-cuda-version
+      -fvisibility=hidden --target=${target_triple}
+      -nogpulib -nostdlibinc -Wno-unknown-cuda-version
     )
     target_compile_definitions(${ide_target_name} PRIVATE SHARED_SCRATCHPAD_SIZE=512)
     target_include_directories(${ide_target_name} PRIVATE
diff --git a/offload/DeviceRTL/include/Allocator.h b/offload/DeviceRTL/include/Allocator.h
index 475f6a2..79c69a2 100644
--- a/offload/DeviceRTL/include/Allocator.h
+++ b/offload/DeviceRTL/include/Allocator.h
@@ -17,8 +17,6 @@
 // Forward declaration.
 struct KernelEnvironmentTy;
 
-#pragma omp begin declare target device_type(nohost)
-
 namespace ompx {
 
 namespace allocator {
@@ -44,6 +42,4 @@ extern "C" {
 [[gnu::weak]] void free(void *Ptr);
 }
 
-#pragma omp end declare target
-
 #endif
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index 1cd044f..308109b 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -99,14 +99,7 @@ struct TaskDescriptorTy {
   TaskFnTy TaskFn;
 };
 
-#pragma omp begin declare variant match(device = {arch(amdgcn)})
 using LaneMaskTy = uint64_t;
-#pragma omp end declare variant
-
-#pragma omp begin declare variant match(                                       \
-        device = {arch(amdgcn)}, implementation = {extension(match_none)})
-using LaneMaskTy = uint64_t;
-#pragma omp end declare variant
 
 namespace lanes {
 enum : LaneMaskTy { All = ~(LaneMaskTy)0 };
@@ -163,8 +156,7 @@ typedef enum omp_allocator_handle_t {
 #define OMP_PRAGMA(STR) __PRAGMA(omp STR)
 
 #define SHARED(NAME)                                                           \
-  NAME [[clang::loader_uninitialized]];                                        \
-  OMP_PRAGMA(allocate(NAME) allocator(omp_pteam_mem_alloc))
+  [[clang::address_space(3)]] NAME [[clang::loader_uninitialized]];
 
 // TODO: clang should use address space 5 for omp_thread_mem_alloc, but right
 //       now that's not the case.
diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h
index 2243673..b92514e 100644
--- a/offload/DeviceRTL/include/DeviceUtils.h
+++ b/offload/DeviceRTL/include/DeviceUtils.h
@@ -15,8 +15,6 @@
 #include "DeviceTypes.h"
 #include "Shared/Utils.h"
 
-#pragma omp begin declare target device_type(nohost)
-
 namespace utils {
 
 template <typename T> struct type_identity {
@@ -95,6 +93,4 @@ bool isThreadLocalMemPtr(void *Ptr);
 
 } // namespace utils
 
-#pragma omp end declare target
-
 #endif
diff --git a/offload/DeviceRTL/include/Mapping.h b/offload/DeviceRTL/include/Mapping.h
index 2217eb7..f892a02 100644
--- a/offload/DeviceRTL/include/Mapping.h
+++ b/offload/DeviceRTL/include/Mapping.h
@@ -24,12 +24,8 @@ enum {
   DIM_Z = 2,
 };
 
-#pragma omp begin declare target device_type(nohost)
-
 inline constexpr uint32_t MaxThreadsPerTeam = 1024;
 
-#pragma omp end declare target
-
 /// Initialize the mapping machinery.
 void init(bool IsSPMD);
 
diff --git a/offload/DeviceRTL/include/State.h b/offload/DeviceRTL/include/State.h
index f0500c1..58b619f 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -22,8 +22,6 @@
 // Forward declaration.
 struct KernelEnvironmentTy;
 
-#pragma omp begin declare target device_type(nohost)
-
 namespace ompx {
 
 namespace memory {
@@ -88,8 +86,7 @@ struct TeamStateTy {
   ParallelRegionFnTy ParallelRegionFnVar;
 };
 
-extern TeamStateTy TeamState;
-#pragma omp allocate(TeamState) allocator(omp_pteam_mem_alloc)
+extern TeamStateTy [[clang::address_space(3)]] TeamState;
 
 struct ThreadStateTy {
 
@@ -115,8 +112,7 @@ struct ThreadStateTy {
   }
 };
 
-extern ThreadStateTy **ThreadStates;
-#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
+extern ThreadStateTy **[[clang::address_space(3)]] ThreadStates;
 
 /// Initialize the state machinery. Must be called by all threads.
 void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
@@ -378,6 +374,4 @@ inline state::Value<uint32_t, state::VK_RunSched> RunSched;
 
 } // namespace ompx
 
-#pragma omp end declare target
-
 #endif
diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h
index 5045d3c..f9eb8d0 100644
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -15,8 +15,6 @@
 #include "DeviceTypes.h"
 #include "DeviceUtils.h"
 
-#pragma omp begin declare target device_type(nohost)
-
 namespace ompx {
 namespace atomic {
 
@@ -220,6 +218,4 @@ void system(atomic::OrderingTy Ordering);
 
 } // namespace ompx
 
-#pragma omp end declare target
-
 #endif
diff --git a/offload/DeviceRTL/include/Workshare.h b/offload/DeviceRTL/include/Workshare.h
index fa9b3b2..554c327 100644
--- a/offload/DeviceRTL/include/Workshare.h
+++ b/offload/DeviceRTL/include/Workshare.h
@@ -12,8 +12,6 @@
 #ifndef OMPTARGET_WORKSHARE_H
 #define OMPTARGET_WORKSHARE_H
 
-#pragma omp begin declare target device_type(nohost)
-
 namespace ompx {
 
 namespace workshare {
@@ -25,6 +23,4 @@ void init(bool IsSPMD);
 
 } // namespace ompx
 
-#pragma omp end declare target
-
 #endif
diff --git a/offload/DeviceRTL/src/Allocator.cpp b/offload/DeviceRTL/src/Allocator.cpp
index ac662c4..aac2a60 100644
--- a/offload/DeviceRTL/src/Allocator.cpp
+++ b/offload/DeviceRTL/src/Allocator.cpp
@@ -19,8 +19,6 @@
 
 using namespace ompx;
 
-#pragma omp begin declare target device_type(nohost)
-
 [[gnu::used, gnu::retain, gnu::weak,
   gnu::visibility(
       "protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
@@ -77,5 +75,3 @@ void *allocator::alloc(uint64_t Size) { return BumpAllocator.alloc(Size); }
 void allocator::free(void *Ptr) { BumpAllocator.free(Ptr); }
 
 ///}
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Configuration.cpp b/offload/DeviceRTL/src/Configuration.cpp
index 0b488b8..a2dfa4a 100644
--- a/offload/DeviceRTL/src/Configuration.cpp
+++ b/offload/DeviceRTL/src/Configuration.cpp
@@ -17,8 +17,6 @@
 
 using namespace ompx;
 
-#pragma omp begin declare target device_type(nohost)
-
 // Weak definitions will be overridden by CGOpenmpRuntimeGPU if enabled.
 [[gnu::weak]] extern const uint32_t __omp_rtl_debug_kind = 0;
 [[gnu::weak]] extern const uint32_t __omp_rtl_assume_no_thread_state = 0;
@@ -85,5 +83,3 @@ bool config::mayUseNestedParallelism() {
     return false;
   return state::getKernelEnvironment().Configuration.MayUseNestedParallelism;
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Debug.cpp b/offload/DeviceRTL/src/Debug.cpp
index 1d9c962..5b5482d 100644
--- a/offload/DeviceRTL/src/Debug.cpp
+++ b/offload/DeviceRTL/src/Debug.cpp
@@ -21,8 +21,6 @@
 
 using namespace ompx;
 
-#pragma omp begin declare target device_type(nohost)
-
 extern "C" {
 void __assert_assume(bool condition) { __builtin_assume(condition); }
 
@@ -44,5 +42,3 @@ void __assert_fail_internal(const char *expr, const char *msg, const char *file,
   __builtin_trap();
 }
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/DeviceUtils.cpp b/offload/DeviceRTL/src/DeviceUtils.cpp
index c204a7b..d810953 100644
--- a/offload/DeviceRTL/src/DeviceUtils.cpp
+++ b/offload/DeviceRTL/src/DeviceUtils.cpp
@@ -15,14 +15,10 @@
 #include "Interface.h"
 #include "Mapping.h"
 
-#pragma omp begin declare target device_type(nohost)
-
 using namespace ompx;
 
 namespace impl {
 
-bool isSharedMemPtr(const void *Ptr) { return false; }
-
 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
   static_assert(sizeof(unsigned long) == 8, "");
   *LowBits = static_cast<uint32_t>(Val & 0x00000000FFFFFFFFUL);
@@ -42,7 +38,7 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred);
 /// AMDGCN Implementation
 ///
 ///{
-#pragma omp begin declare variant match(device = {arch(amdgcn)})
+#ifdef __AMDGPU__
 
 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
   int Self = mapping::getThreadIdInWarp();
@@ -66,15 +62,13 @@ bool isSharedMemPtr(const void *Ptr) {
   return __builtin_amdgcn_is_shared(
       (const __attribute__((address_space(0))) void *)Ptr);
 }
-#pragma omp end declare variant
+#endif
 ///}
 
 /// NVPTX Implementation
 ///
 ///{
-#pragma omp begin declare variant match(                                       \
-        device = {arch(nvptx, nvptx64)},                                       \
-            implementation = {extension(match_any)})
+#ifdef __NVPTX__
 
 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
   return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, Width - 1);
@@ -91,7 +85,7 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) {
 
 bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
 
-#pragma omp end declare variant
+#endif
 ///}
 } // namespace impl
 
@@ -137,5 +131,3 @@ int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) {
   return utils::shuffleDown(lanes::All, Val, Delta, Width);
 }
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index 8bb275e..9bb8957 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -25,8 +25,6 @@
 
 using namespace ompx;
 
-#pragma omp begin declare target device_type(nohost)
-
 static void
 inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
                     KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
@@ -155,5 +153,3 @@ void __kmpc_target_deinit() {
 
 int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); }
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/LibC.cpp b/offload/DeviceRTL/src/LibC.cpp
index e55008f..83f9233 100644
--- a/offload/DeviceRTL/src/LibC.cpp
+++ b/offload/DeviceRTL/src/LibC.cpp
@@ -8,8 +8,6 @@
 
 #include "LibC.h"
 
-#pragma omp begin declare target device_type(nohost)
-
 #if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
 extern "C" int vprintf(const char *format, __builtin_va_list) { return -1; }
 #else
@@ -48,5 +46,3 @@ namespace ompx {
   return ::vprintf(Format, vlist);
 }
 } // namespace ompx
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index 8583a53..a0c0f67 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -15,8 +15,6 @@
 #include "Interface.h"
 #include "State.h"
 
-#pragma omp begin declare target device_type(nohost)
-
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 using namespace ompx;
@@ -24,24 +22,10 @@ using namespace ompx;
 namespace ompx {
 namespace impl {
 
-// Forward declarations defined to be defined for AMDGCN and NVPTX.
-LaneMaskTy activemask();
-LaneMaskTy lanemaskLT();
-LaneMaskTy lanemaskGT();
-uint32_t getThreadIdInWarp();
-uint32_t getThreadIdInBlock(int32_t Dim);
-uint32_t getNumberOfThreadsInBlock(int32_t Dim);
-uint32_t getNumberOfThreadsInKernel();
-uint32_t getBlockIdInKernel(int32_t Dim);
-uint32_t getNumberOfBlocksInKernel(int32_t Dim);
-uint32_t getWarpIdInBlock();
-uint32_t getNumberOfWarpsInBlock();
-uint32_t getWarpSize();
-
 /// AMDGCN Implementation
 ///
 ///{
-#pragma omp begin declare variant match(device = {arch(amdgcn)})
+#ifdef __AMDGPU__
 
 uint32_t getWarpSize() { return __builtin_amdgcn_wavefrontsize(); }
 
@@ -128,15 +112,13 @@ uint32_t getNumberOfWarpsInBlock() {
   return mapping::getNumberOfThreadsInBlock() / mapping::getWarpSize();
 }
 
-#pragma omp end declare variant
+#endif
 ///}
 
 /// NVPTX Implementation
 ///
 ///{
-#pragma omp begin declare variant match(                                       \
-        device = {arch(nvptx, nvptx64)},                                       \
-            implementation = {extension(match_any)})
+#ifdef __NVPTX__
 
 uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
   switch (Dim) {
@@ -214,7 +196,7 @@ uint32_t getNumberOfWarpsInBlock() {
          mapping::getWarpSize();
 }
 
-#pragma omp end declare variant
+#endif
 ///}
 
 } // namespace impl
@@ -376,7 +358,7 @@ float ompx_shfl_down_sync_f(uint64_t mask, float var, unsigned delta,
 }
 
 long ompx_shfl_down_sync_l(uint64_t mask, long var, unsigned delta, int width) {
-  return utils::shuffleDown(mask, var, delta, width);
+  return utils::shuffleDown(mask, utils::bitCast<int64_t>(var), delta, width);
 }
 
 double ompx_shfl_down_sync_d(uint64_t mask, double var, unsigned delta,
@@ -385,5 +367,3 @@ double ompx_shfl_down_sync_d(uint64_t mask, double var, unsigned delta,
       utils::shuffleDown(mask, utils::bitCast<int64_t>(var), delta, width));
 }
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Misc.cpp b/offload/DeviceRTL/src/Misc.cpp
index 010474b..734e937 100644
--- a/offload/DeviceRTL/src/Misc.cpp
+++ b/offload/DeviceRTL/src/Misc.cpp
@@ -17,19 +17,13 @@
 
 #include "Debug.h"
 
-#pragma omp begin declare target device_type(nohost)
-
 namespace ompx {
 namespace impl {
 
-double getWTick();
-
-double getWTime();
-
 /// AMDGCN Implementation
 ///
 ///{
-#pragma omp begin declare variant match(device = {arch(amdgcn)})
+#ifdef __AMDGPU__
 
 double getWTick() {
   // The number of ticks per second for the AMDGPU clock varies by card and can
@@ -42,14 +36,12 @@ double getWTime() {
   return static_cast<double>(__builtin_readsteadycounter()) * getWTick();
 }
 
-#pragma omp end declare variant
+#endif
 
 /// NVPTX Implementation
 ///
 ///{
-#pragma omp begin declare variant match(                                       \
-        device = {arch(nvptx, nvptx64)},                                       \
-            implementation = {extension(match_any)})
+#ifdef __NVPTX__
 
 double getWTick() {
   // Timer precision is 1ns
@@ -61,7 +53,7 @@ double getWTime() {
   return static_cast<double>(nsecs) * getWTick();
 }
 
-#pragma omp end declare variant
+#endif
 
 /// Lookup a device-side function using a host pointer /p HstPtr using the table
 /// provided by the device plugin. The table is an ordered pair of host and
@@ -171,4 +163,3 @@ unsigned long long __llvm_omp_host_call(void *fn, void *data, size_t size) {
 }
 
 ///}
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Parallelism.cpp b/offload/DeviceRTL/src/Parallelism.cpp
index a87e363..08ce616 100644
--- a/offload/DeviceRTL/src/Parallelism.cpp
+++ b/offload/DeviceRTL/src/Parallelism.cpp
@@ -43,8 +43,6 @@
 
 using namespace ompx;
 
-#pragma omp begin declare target device_type(nohost)
-
 namespace {
 
 uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
@@ -311,5 +309,3 @@ void __kmpc_push_num_teams(IdentTy *loc, int32_t tid, int32_t num_teams,
 
 void __kmpc_push_proc_bind(IdentTy *loc, uint32_t tid, int proc_bind) {}
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Profiling.cpp b/offload/DeviceRTL/src/Profiling.cpp
index bb3caaa..df141af 100644
--- a/offload/DeviceRTL/src/Profiling.cpp
+++ b/offload/DeviceRTL/src/Profiling.cpp
@@ -8,8 +8,6 @@
 
 #include "Profiling.h"
 
-#pragma omp begin declare target device_type(nohost)
-
 extern "C" {
 
 // Provides empty implementations for certain functions in compiler-rt
@@ -18,5 +16,3 @@ void __llvm_profile_register_function(void *Ptr) {}
 void __llvm_profile_register_names_function(void *Ptr, long int I) {}
 void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2) {}
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Reduction.cpp b/offload/DeviceRTL/src/Reduction.cpp
index 382f6cf..25f3400 100644
--- a/offload/DeviceRTL/src/Reduction.cpp
+++ b/offload/DeviceRTL/src/Reduction.cpp
@@ -22,8 +22,6 @@ using namespace ompx;
 
 namespace {
 
-#pragma omp begin declare target device_type(nohost)
-
 void gpu_regular_warp_reduce(void *reduce_data, ShuffleReductFnTy shflFct) {
   for (uint32_t mask = mapping::getWarpSize() / 2; mask > 0; mask /= 2) {
     shflFct(reduce_data, /*LaneId - not used= */ 0,
@@ -316,5 +314,3 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
 void *__kmpc_reduction_get_fixed_buffer() {
   return state::getKernelLaunchEnvironment().ReductionBuffer;
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index 100bc8a..89edb480 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -23,16 +23,13 @@
 
 using namespace ompx;
 
-#pragma omp begin declare target device_type(nohost)
-
 /// Memory implementation
 ///
 ///{
 
 /// External symbol to access dynamic shared memory.
-[[gnu::aligned(
-    allocator::ALIGNMENT)]] extern unsigned char DynamicSharedBuffer[];
-#pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
+[[gnu::aligned(allocator::ALIGNMENT)]] extern unsigned char
+    [[clang::address_space(3)]] DynamicSharedBuffer[];
 
 /// The kernel environment passed to the init method by the compiler.
 static KernelEnvironmentTy *SHARED(KernelEnvironmentPtr);
@@ -452,13 +449,10 @@ void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
 /// NUM_SHARED_VARIABLES_IN_SHARED_MEM we will malloc space for communication.
 constexpr uint64_t NUM_SHARED_VARIABLES_IN_SHARED_MEM = 64;
 
-[[clang::loader_uninitialized]] static void
-    *SharedMemVariableSharingSpace[NUM_SHARED_VARIABLES_IN_SHARED_MEM];
-#pragma omp allocate(SharedMemVariableSharingSpace)                            \
-    allocator(omp_pteam_mem_alloc)
-[[clang::loader_uninitialized]] static void **SharedMemVariableSharingSpacePtr;
-#pragma omp allocate(SharedMemVariableSharingSpacePtr)                         \
-    allocator(omp_pteam_mem_alloc)
+[[clang::loader_uninitialized]] static void *[[clang::address_space(
+    3)]] SharedMemVariableSharingSpace[NUM_SHARED_VARIABLES_IN_SHARED_MEM];
+[[clang::loader_uninitialized]] static void **[[clang::address_space(
+    3)]] SharedMemVariableSharingSpacePtr;
 
 void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) {
   if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) {
@@ -481,4 +475,3 @@ void __kmpc_get_shared_variables(void ***GlobalArgs) {
   *GlobalArgs = SharedMemVariableSharingSpacePtr;
 }
 }
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Synchronization.cpp b/offload/DeviceRTL/src/Synchronization.cpp
index b09d480..a5090b9 100644
--- a/offload/DeviceRTL/src/Synchronization.cpp
+++ b/offload/DeviceRTL/src/Synchronization.cpp
@@ -19,8 +19,6 @@
 #include "Mapping.h"
 #include "State.h"
 
-#pragma omp begin declare target device_type(nohost)
-
 using namespace ompx;
 
 namespace impl {
@@ -28,34 +26,12 @@ namespace impl {
 /// Atomics
 ///
 ///{
-/// NOTE: This function needs to be implemented by every target.
-uint32_t atomicInc(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering,
-                   atomic::MemScopeTy MemScope);
 ///}
 
-// Forward declarations defined to be defined for AMDGCN and NVPTX.
-uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering,
-                   atomic::MemScopeTy MemScope);
-void namedBarrierInit();
-void namedBarrier();
-void fenceTeam(atomic::OrderingTy Ordering);
-void fenceKernel(atomic::OrderingTy Ordering);
-void fenceSystem(atomic::OrderingTy Ordering);
-void syncWarp(__kmpc_impl_lanemask_t);
-void syncThreads(atomic::OrderingTy Ordering);
-void syncThreadsAligned(atomic::OrderingTy Ordering) { syncThreads(Ordering); }
-void unsetLock(omp_lock_t *);
-int testLock(omp_lock_t *);
-void initLock(omp_lock_t *);
-void destroyLock(omp_lock_t *);
-void setLock(omp_lock_t *);
-void unsetCriticalLock(omp_lock_t *);
-void setCriticalLock(omp_lock_t *);
-
 /// AMDGCN Implementation
 ///
 ///{
-#pragma omp begin declare variant match(device = {arch(amdgcn)})
+#ifdef __AMDGPU__
 
 uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering,
                    atomic::MemScopeTy MemScope) {
@@ -202,15 +178,13 @@ void setCriticalLock(omp_lock_t *Lock) {
   }
 }
 
-#pragma omp end declare variant
+#endif
 ///}
 
 /// NVPTX Implementation
 ///
 ///{
-#pragma omp begin declare variant match(                                       \
-        device = {arch(nvptx, nvptx64)},                                       \
-            implementation = {extension(match_any)})
+#ifdef __NVPTX__
 
 uint32_t atomicInc(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering,
                    atomic::MemScopeTy MemScope) {
@@ -283,7 +257,7 @@ void unsetCriticalLock(omp_lock_t *Lock) { unsetLock(Lock); }
 
 void setCriticalLock(omp_lock_t *Lock) { setLock(Lock); }
 
-#pragma omp end declare variant
+#endif
 ///}
 
 } // namespace impl
@@ -401,5 +375,3 @@ void ompx_sync_block_divergent(int Ordering) {
   impl::syncThreads(atomic::OrderingTy(Ordering));
 }
 } // extern "C"
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Tasking.cpp b/offload/DeviceRTL/src/Tasking.cpp
index 23a967c..d0be0ac 100644
--- a/offload/DeviceRTL/src/Tasking.cpp
+++ b/offload/DeviceRTL/src/Tasking.cpp
@@ -20,8 +20,6 @@
 
 using namespace ompx;
 
-#pragma omp begin declare target device_type(nohost)
-
 extern "C" {
 
 TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
@@ -29,7 +27,7 @@ TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
                                         size_t SharedValuesSize,
                                         TaskFnTy TaskFn) {
   auto TaskSizeInclPrivateValuesPadded =
-      utils::roundUp(TaskSizeInclPrivateValues, uint64_t(sizeof(void *)));
+      utils::roundUp(TaskSizeInclPrivateValues, sizeof(void *));
   auto TaskSizeTotal = TaskSizeInclPrivateValuesPadded + SharedValuesSize;
   TaskDescriptorTy *TaskDescriptor = (TaskDescriptorTy *)memory::allocGlobal(
       TaskSizeTotal, "explicit task descriptor");
@@ -103,5 +101,3 @@ int omp_in_final(void) {
 
 int omp_get_max_task_priority(void) { return 0; }
 }
-
-#pragma omp end declare target
diff --git a/offload/DeviceRTL/src/Workshare.cpp b/offload/DeviceRTL/src/Workshare.cpp
index cb83f1b..b1f037a 100644
--- a/offload/DeviceRTL/src/Workshare.cpp
+++ b/offload/DeviceRTL/src/Workshare.cpp
@@ -44,8 +44,6 @@ struct DynamicScheduleTracker {
 #define NOT_FINISHED 1
 #define LAST_CHUNK 2
 
-#pragma omp begin declare target device_type(nohost)
-
 // TODO: This variable is a hack inherited from the old runtime.
 static uint64_t SHARED(Cnt);
 
@@ -935,5 +933,3 @@ OMP_LOOP_ENTRY(_4u, uint32_t)
 OMP_LOOP_ENTRY(_8, int64_t)
 OMP_LOOP_ENTRY(_8u, uint64_t)
 }
-
-#pragma omp end declare target
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 8559cc1..4f52108 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -6001,6 +6001,7 @@ cc_library(
         ":IR",
         ":LLVMCommonConversion",
         ":MemRefDialect",
+        ":NVGPUDialect",
         ":NVVMDialect",
         ":SCFDialect",
         ":Support",
author	mingmingl <mingmingl@google.com>	2025-02-05 22:58:44 -0800
committer	mingmingl <mingmingl@google.com>	2025-02-05 22:58:44 -0800
commit	9f20ebb51dd6879968cf53c39131c68e0421a324 (patch)
tree	6d7a7e7b1d20dddf01628cf98cfa8e054408f36e
parent	fb798fd923517ef559228cbb75f7abfe5526de9d (diff)
parent	eaa5fd57cd0f1c819a3b0c68f9402124842fd212 (diff)
download	llvm-users/mingmingl-llvm/spr/aarch64jumptable.zip llvm-users/mingmingl-llvm/spr/aarch64jumptable.tar.gz llvm-users/mingmingl-llvm/spr/aarch64jumptable.tar.bz2